Re: Question about parser/parsing technics

Jens Kallup <jkallup@web.de>
Tue, 06 Mar 2012 19:52:29 +0100

          From comp.compilers

Related articles
Question about parser/parsing technics spy974@gmail.com (2012-03-06)
Re: Question about parser/parsing technics jkallup@web.de (Jens Kallup) (2012-03-06)
| List of all articles for this month |
From: Jens Kallup <jkallup@web.de>
Newsgroups: comp.compilers
Date: Tue, 06 Mar 2012 19:52:29 +0100
Organization: CNNTP
References: 12-03-006
Keywords: parse
Posted-Date: 06 Mar 2012 16:31:48 EST

Hello,


you can try this grammar for a simple language:


//here the lexer.l
//compile: flex lexer.l


%{
#include "syntaxtree.h"
#include "string.h"
#include <stdlib.h>
#include <QMessageBox>
#include "y.tab.cc"


extern int lineno;
char num_text[2048];


#ifdef FLEX_SCANNER
#define INPUT_EOF EOF
#else
#define INPUT_EOF 0
#endif


extern "C" int yywrap() { return 1; }


void start_code_parser(FILE *fp)
{
          /*
          YY_BUFFER_STATE old_flexer = YY_CURRENT_BUFFER;
          YY_BUFFER_STATE new_flexer = yy_create_buffer(fp,YY_BUF_SIZE);
          yy_switch_to_buffer(new_flexer);
          while (yyparse() != INPUT_EOF)


          yy_delete_buffer(new_flexer);
          if (old_flexer != NULL)
          yy_switch_to_buffer(old_flexer);
          */
          yyparse();
}


extern void RestartApplication(void);
%}


%x COMMENT
%x STRING_BEG
%x STRING_END


%%


"\*\*".*\n { BEGIN(INITIAL); lineno++; }
"/*" { BEGIN(COMMENT); }
<COMMENT>(.*|\n*)"*/" { BEGIN(INITIAL); }
"//".*\n { lineno++; }
"if" { BEGIN(INITIAL); return _IF_;}
"else" { BEGIN(INITIAL); return _ELSE_;}
"endif" { BEGIN(INITIAL); return _END_IF_;}
"repeat" { BEGIN(INITIAL); return _REPEAT_;}
"until" { BEGIN(INITIAL); return _UNTIL_;}
"scan" { BEGIN(INITIAL); return _SCAN_;}
(\?) { BEGIN(INITIAL); return _PRINT_;}
"local" { BEGIN(INITIAL); return _LOCAL_;}
"parameter" { BEGIN(INITIAL); return _PARAMETER_;}
"endfor" { BEGIN(INITIAL); return _ENDFOR_;}
"new" { BEGIN(INITIAL); return _NEW_;}
"for" { BEGIN(INITIAL); return _FOR_;}
"while" { BEGIN(INITIAL); return _WHILE_;}
"return" { BEGIN(INITIAL); return _RETURN_;}
"==" { BEGIN(INITIAL); return _EQUAL_;}
">=" { BEGIN(INITIAL); return _GREQL_;}
"<=" { BEGIN(INITIAL); return _LWEQL_;}
"=>" { BEGIN(INITIAL); return _GREQL_;}
"=<" { BEGIN(INITIAL); return _LWEQL_;}
"++" { BEGIN(INITIAL); return _PLUS_PLUS_; }
"--" { BEGIN(INITIAL); return _MINUS_MINUS_; }
"+=" { BEGIN(INITIAL); return _PLUS_ASSIGN_; }
"-=" { BEGIN(INITIAL); return _MINUS_ASSIGN_; }
"*=" { BEGIN(INITIAL); return _TIMES_ASSIGN_; }
"/=" { BEGIN(INITIAL); return _DIV_ASSIGN_; }
"class" { BEGIN(INITIAL); return _CLASS_; }
"of" { BEGIN(INITIAL); return _OF_; }
"endclass" { BEGIN(INITIAL); return _ENDCLASS_; }
"set" { BEGIN(INITIAL); return _SET_; }
"with" { BEGIN(INITIAL); return _WITH_; }
"endwith" { BEGIN(INITIAL); return _ENDWITH_; }
"to" { BEGIN(INITIAL); return _TO_; }
"precision" { BEGIN(INITIAL); return _PRECISION_; }
"round" { BEGIN(INITIAL); return _ROUND_; }
"space" { BEGIN(INITIAL); return _SPACE_; }
"replicate" { BEGIN(INITIAL); return _REPLICATE_; }
"trim" { BEGIN(INITIAL); return _TRIM_; }
"rtrim" { BEGIN(INITIAL); return _RTRIM_; }
"ltrim" { BEGIN(INITIAL); return _LTRIM_; }
"abs" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _ABS_; }
"sin" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _SIN_; }
"cos" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _COS_; }
"tan" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _TAN_; }
"asin" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _ASIN_; }
"acos" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _ACOS_; }
"atan" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _ATAN_; }
"sqrt" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _SQRT_; }
"=" { BEGIN(INITIAL); return _ASSIGN_; }
"+" { BEGIN(INITIAL); return _PLUS_; }
"-" { BEGIN(INITIAL); return _MINUS_; }
"*" { BEGIN(INITIAL); return _TIMES_; }
"/" { BEGIN(INITIAL); return _DIV_;}
"(" { BEGIN(INITIAL); return _OBR_;}
")" { BEGIN(INITIAL); return _CBR_;}
"," { BEGIN(INITIAL); return _COMMA_; }
(([-+]?)([0-9]+['.'0-9]*)) {
BEGIN(INITIAL);
yylval.val = (double) atof(yytext);
return _NUM_;
}
[a-zA-Z_]+[0-9a-zA-Z_]* {
                          yylval.name = strdup(yytext);
                          return _ID_;
                          }
(\") { BEGIN(INITIAL); BEGIN(STRING_BEG); }
<STRING_BEG>([0-9a-zA-Z_ \t\.\!\=\?\(\)\[\]\:\.\,\+\-\*\/\^]*) {
yylval.name = strdup(yytext); BEGIN(STRING_END); }
<STRING_END>(\") { BEGIN(INITIAL); return _STRING_; }
[\n]* { lineno++; }
[ \t]* ;
[\.] { BEGIN(INITIAL); return _POINT_; }
. {
                                  ::yyrestart(yyin);
QMessageBox::about(NULL,"pFehler",QString("Fehlerzeichen: %1 in Zeile:
%2").arg(yytext[0]).arg(lineno));
lineno = 1;


if_label = 0;
expr_type = 2;


text_counter = 0;
char_counter = 0;
nvar_counter = 0;


RestartApplication();
}
%%




// here the bison lang-y file
// compile: bison.exe -d lang.y
%{
#include <stdio.h>
#include <stdlib.h>
,,,
%}


%union {
double val;
char* name;
char* str;
int id;
int label;
};


%token _IF_ _THEN_ _ELSE_ _END_IF_ _REPEAT_ _UNTIL_ _SCAN_ _PRINT_ _ID_
_NUM_ _EQUAL_ _FOR_ _WHILE_ _RETURN_ _DBLNUM_
%token _LWEQL_ _GREQL_ _ASSIGN_ _PLUS_ _MINUS_ _TIMES_ _DIV_ _STRBE_
_COMMA_ _TRIM_ _LTRIM_ _RTRIM_
%token _NEW_ _LOCAL_ _PARAMETER_ _ENDFOR_
%token _MINUS_ASSIGN_ _TIMES_ASSIGN_ _DIV_ASSIGN_ _PLUS_ASSIGN_ _ROUND_
_SET_ _TO_ _PRECISION_
%token _CLASS_ _OF_ _ENDCLASS_ _WITH_ _ENDWITH_ _POINT_
%token <name> _ASIN_ _ACOS_ _ATAN_ _SIN_ _COS_ _SQRT_ _TAN_ _STRING_
_OBR_ _CBR_ _SPACE_
%token <name> _PLUS_PLUS_ _MINUS_MINUS_
%token <name> _ABS_ _REPLICATE_
%type <val> _NUM_
%type <name> _ID_
%type <id> stmt_seq stmt if_stmt repeat_stmt for_stmt while_stmt
assign_stmt read_stmt write_stmt
%type <id> exp simple_exp term factor exec_stmt handle_string ari_stmt
set_stmt class_stmt with_stmt with_this
%type <id> object_vars class_cmd_stmt misc_stmt local_stmt




program :
          | stmt_seq { }
          ;


stmt_seq
          : { }
          | stmt_seq stmt { }
          | stmt { }
          ;


stmt: if_stmt { }
          | repeat_stmt { }
          | assign_stmt { }
          | read_stmt { }
          | write_stmt { }
          | for_stmt { }
          | while_stmt { }
          | ari_stmt { }
          | set_stmt { }
          | class_stmt { }
          | misc_stmt { }
          ;




assign_stmt
          : _ID_ _ASSIGN_ { assign_flag = true; } exp
          {
code_str += QString("\tfstp qword [_LC%1]\n").arg($1);


                  if (!symbol_labels.contains(QString("_LC%1").arg($1)))
                  {
                          data_str += QString("_LC%1: dq 0.00\n").arg($1);
                          symbol_labels << QString("_LC%1").arg($1);
                  }




assign_flag = false;
          }
;


exp:
              simple_exp '<' simple_exp { expr_type = 0; }
          | simple_exp '>' simple_exp { expr_type = 1; }
          | simple_exp _EQUAL_ simple_exp { expr_type = 2; }
          | simple_exp _GREQL_ simple_exp { expr_type = 3; }
          | simple_exp _LWEQL_ simple_exp { expr_type = 4; }
          | simple_exp { }
          ;


simple_exp :
          term _PLUS_ simple_exp
          {
code_str += QString("\tfaddp st1, st0\n");
          }
          | term _MINUS_ simple_exp
          {
code_str += QString("\tfsubp st1, st0\n");
          }
          | term { }
          ;


term:
          factor _TIMES_ term
          {
code_str += QString("\tfmulp st1, st0\n");
          }
          | factor _DIV_ term
          {
code_str += QString("\tfdivp st1, st0\n");
          }
          | factor {}
          ;


factor
          : _OBR_ exp _CBR_ {
                  code_str += QString("\tfld qword [_LC%1]\n").arg(FLastID);
          }
          | _NUM_
          {
temp_str = QString("%1").arg((double)$1); if (temp_str.contains('.')
== false)
temp_str += QString(".00");


nvar_counter++;


code_str.append(QString("\tfld qword [_LC%1]\n").arg(nvar_counter-1));
data_str += QString("_LC%1:\tdq %2\n").arg(nvar_counter-1).arg(temp_str);


last_result = NUMBER;
          }
          | _ROUND_ _OBR_ { assign_flag = true; } exp {
code_str += QString(
"\tfstp qword [_LCtmp]\n"
"\tpush dword [_LC%1+4]\n"
"\tpush dword [_LC%1]\n"
).arg(nvar_counter-1);
          } _COMMA_ exp _CBR_ {
code_str += QString(
"\tfstp qword [_LCtmp]\n"
"\tpush dword [_LC%1+4]\n"
"\tpush dword [_LC%1]\n"
"\tcall _round_val\n"
).arg(nvar_counter-1);
          }
          | _ID_
          {
last_result = 0;


                  //if (do_function_call)
code_str += QString("\tfld qword [_LC%1]\n").arg($1);


                  FLastID = $1;
stack_counter += 4;


                  if (!symbol_labels.contains(QString("_LC%1").arg($1)))
                  {
                          data_str += QString("_LC%1: dq 0.00\n").arg($1);
                          symbol_labels << QString("_LC%1").arg($1);
                  }
          }
          | _ID_ _PLUS_PLUS_ {
code_str += QString(
"\tfld qword [_LC%1]\n"
"\tfld qword [_LCtmp_1]\n"
"\tfaddp qword [_LC%1]\n"
).arg($1);
          }
          | _ID_ _MINUS_MINUS_ {
code_str += QString(
"\tfld qword [_LC%1]\n"
"\tfld qword [_LCtmp_1]\n"
"\tfsub qword [_LC%1]\n"
"\tfstp qword [_LC%1]\n"
).arg($1);
          }
          | _SIN_ _OBR_ exp _CBR_ { internal_pusher("sin" ); }
          | _ASIN_ _OBR_ exp _CBR_ { internal_pusher("asin"); }
          | _ACOS_ _OBR_ exp _CBR_ { internal_pusher("acos"); }
          | _ATAN_ _OBR_ exp _CBR_ { internal_pusher("atan"); }
          | _COS_ _OBR_ exp _CBR_ { internal_pusher("cos" ); }
          | _TAN_ _OBR_ exp _CBR_ { internal_pusher("tan" ); }
          | _SQRT_ _OBR_ exp _CBR_ { internal_pusher("sqrt"); }
          | _ABS_ _OBR_ exp _CBR_ { code_str += QString("\tfchs\n"); }
          ;


endif_stmt
          : _END_IF_ { }
          ;
%%


void yyerror(char * message)
{
          QMessageBox::about(NULL,"error",QString("%1: line
%2").arg(message).arg(lineno));
          ::yyrestart(yyin);
}




void internal_pusher(QString id)
{
      if (id == "sin"
      || id == "cos"
      || id == "sqrt" )
      code_str += QString(
"\tf%1\n"
"\tfstp qword [_LC%1]\n"
"\tfld qword [_LC%1]\n"
).arg(id);


      if (id == "asin"
      || id == "acos"
      || id == "atan"
      || id == "tan" )
      code_str += QString(
"\tfstp qword [_LCtmp]\n"
"\tpush dword [_LCtmp+4]\n"
"\tpush dword [_LCtmp]\n"
"\tcall _%1\n"
"\tadd esp, 12\n").arg(id);
}


cheers
Jens



Post a followup to this message

Return to the comp.compilers page.
Search the comp.compilers archives again.