Related articles |
---|
Question about parser/parsing technics spy974@gmail.com (2012-03-06) |
Re: Question about parser/parsing technics jkallup@web.de (Jens Kallup) (2012-03-06) |
From: | Jens Kallup <jkallup@web.de> |
Newsgroups: | comp.compilers |
Date: | Tue, 06 Mar 2012 19:52:29 +0100 |
Organization: | CNNTP |
References: | 12-03-006 |
Keywords: | parse |
Posted-Date: | 06 Mar 2012 16:31:48 EST |
Hello,
you can try this grammar for a simple language:
//here the lexer.l
//compile: flex lexer.l
%{
#include "syntaxtree.h"
#include "string.h"
#include <stdlib.h>
#include <QMessageBox>
#include "y.tab.cc"
extern int lineno;
char num_text[2048];
#ifdef FLEX_SCANNER
#define INPUT_EOF EOF
#else
#define INPUT_EOF 0
#endif
extern "C" int yywrap() { return 1; }
void start_code_parser(FILE *fp)
{
/*
YY_BUFFER_STATE old_flexer = YY_CURRENT_BUFFER;
YY_BUFFER_STATE new_flexer = yy_create_buffer(fp,YY_BUF_SIZE);
yy_switch_to_buffer(new_flexer);
while (yyparse() != INPUT_EOF)
yy_delete_buffer(new_flexer);
if (old_flexer != NULL)
yy_switch_to_buffer(old_flexer);
*/
yyparse();
}
extern void RestartApplication(void);
%}
%x COMMENT
%x STRING_BEG
%x STRING_END
%%
"\*\*".*\n { BEGIN(INITIAL); lineno++; }
"/*" { BEGIN(COMMENT); }
<COMMENT>(.*|\n*)"*/" { BEGIN(INITIAL); }
"//".*\n { lineno++; }
"if" { BEGIN(INITIAL); return _IF_;}
"else" { BEGIN(INITIAL); return _ELSE_;}
"endif" { BEGIN(INITIAL); return _END_IF_;}
"repeat" { BEGIN(INITIAL); return _REPEAT_;}
"until" { BEGIN(INITIAL); return _UNTIL_;}
"scan" { BEGIN(INITIAL); return _SCAN_;}
(\?) { BEGIN(INITIAL); return _PRINT_;}
"local" { BEGIN(INITIAL); return _LOCAL_;}
"parameter" { BEGIN(INITIAL); return _PARAMETER_;}
"endfor" { BEGIN(INITIAL); return _ENDFOR_;}
"new" { BEGIN(INITIAL); return _NEW_;}
"for" { BEGIN(INITIAL); return _FOR_;}
"while" { BEGIN(INITIAL); return _WHILE_;}
"return" { BEGIN(INITIAL); return _RETURN_;}
"==" { BEGIN(INITIAL); return _EQUAL_;}
">=" { BEGIN(INITIAL); return _GREQL_;}
"<=" { BEGIN(INITIAL); return _LWEQL_;}
"=>" { BEGIN(INITIAL); return _GREQL_;}
"=<" { BEGIN(INITIAL); return _LWEQL_;}
"++" { BEGIN(INITIAL); return _PLUS_PLUS_; }
"--" { BEGIN(INITIAL); return _MINUS_MINUS_; }
"+=" { BEGIN(INITIAL); return _PLUS_ASSIGN_; }
"-=" { BEGIN(INITIAL); return _MINUS_ASSIGN_; }
"*=" { BEGIN(INITIAL); return _TIMES_ASSIGN_; }
"/=" { BEGIN(INITIAL); return _DIV_ASSIGN_; }
"class" { BEGIN(INITIAL); return _CLASS_; }
"of" { BEGIN(INITIAL); return _OF_; }
"endclass" { BEGIN(INITIAL); return _ENDCLASS_; }
"set" { BEGIN(INITIAL); return _SET_; }
"with" { BEGIN(INITIAL); return _WITH_; }
"endwith" { BEGIN(INITIAL); return _ENDWITH_; }
"to" { BEGIN(INITIAL); return _TO_; }
"precision" { BEGIN(INITIAL); return _PRECISION_; }
"round" { BEGIN(INITIAL); return _ROUND_; }
"space" { BEGIN(INITIAL); return _SPACE_; }
"replicate" { BEGIN(INITIAL); return _REPLICATE_; }
"trim" { BEGIN(INITIAL); return _TRIM_; }
"rtrim" { BEGIN(INITIAL); return _RTRIM_; }
"ltrim" { BEGIN(INITIAL); return _LTRIM_; }
"abs" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _ABS_; }
"sin" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _SIN_; }
"cos" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _COS_; }
"tan" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _TAN_; }
"asin" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _ASIN_; }
"acos" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _ACOS_; }
"atan" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _ATAN_; }
"sqrt" { BEGIN(INITIAL); yylval.name = strdup(yytext); return _SQRT_; }
"=" { BEGIN(INITIAL); return _ASSIGN_; }
"+" { BEGIN(INITIAL); return _PLUS_; }
"-" { BEGIN(INITIAL); return _MINUS_; }
"*" { BEGIN(INITIAL); return _TIMES_; }
"/" { BEGIN(INITIAL); return _DIV_;}
"(" { BEGIN(INITIAL); return _OBR_;}
")" { BEGIN(INITIAL); return _CBR_;}
"," { BEGIN(INITIAL); return _COMMA_; }
(([-+]?)([0-9]+['.'0-9]*)) {
BEGIN(INITIAL);
yylval.val = (double) atof(yytext);
return _NUM_;
}
[a-zA-Z_]+[0-9a-zA-Z_]* {
yylval.name = strdup(yytext);
return _ID_;
}
(\") { BEGIN(INITIAL); BEGIN(STRING_BEG); }
<STRING_BEG>([0-9a-zA-Z_ \t\.\!\=\?\(\)\[\]\:\.\,\+\-\*\/\^]*) {
yylval.name = strdup(yytext); BEGIN(STRING_END); }
<STRING_END>(\") { BEGIN(INITIAL); return _STRING_; }
[\n]* { lineno++; }
[ \t]* ;
[\.] { BEGIN(INITIAL); return _POINT_; }
. {
::yyrestart(yyin);
QMessageBox::about(NULL,"pFehler",QString("Fehlerzeichen: %1 in Zeile:
%2").arg(yytext[0]).arg(lineno));
lineno = 1;
if_label = 0;
expr_type = 2;
text_counter = 0;
char_counter = 0;
nvar_counter = 0;
RestartApplication();
}
%%
// here the bison lang-y file
// compile: bison.exe -d lang.y
%{
#include <stdio.h>
#include <stdlib.h>
,,,
%}
%union {
double val;
char* name;
char* str;
int id;
int label;
};
%token _IF_ _THEN_ _ELSE_ _END_IF_ _REPEAT_ _UNTIL_ _SCAN_ _PRINT_ _ID_
_NUM_ _EQUAL_ _FOR_ _WHILE_ _RETURN_ _DBLNUM_
%token _LWEQL_ _GREQL_ _ASSIGN_ _PLUS_ _MINUS_ _TIMES_ _DIV_ _STRBE_
_COMMA_ _TRIM_ _LTRIM_ _RTRIM_
%token _NEW_ _LOCAL_ _PARAMETER_ _ENDFOR_
%token _MINUS_ASSIGN_ _TIMES_ASSIGN_ _DIV_ASSIGN_ _PLUS_ASSIGN_ _ROUND_
_SET_ _TO_ _PRECISION_
%token _CLASS_ _OF_ _ENDCLASS_ _WITH_ _ENDWITH_ _POINT_
%token <name> _ASIN_ _ACOS_ _ATAN_ _SIN_ _COS_ _SQRT_ _TAN_ _STRING_
_OBR_ _CBR_ _SPACE_
%token <name> _PLUS_PLUS_ _MINUS_MINUS_
%token <name> _ABS_ _REPLICATE_
%type <val> _NUM_
%type <name> _ID_
%type <id> stmt_seq stmt if_stmt repeat_stmt for_stmt while_stmt
assign_stmt read_stmt write_stmt
%type <id> exp simple_exp term factor exec_stmt handle_string ari_stmt
set_stmt class_stmt with_stmt with_this
%type <id> object_vars class_cmd_stmt misc_stmt local_stmt
program :
| stmt_seq { }
;
stmt_seq
: { }
| stmt_seq stmt { }
| stmt { }
;
stmt: if_stmt { }
| repeat_stmt { }
| assign_stmt { }
| read_stmt { }
| write_stmt { }
| for_stmt { }
| while_stmt { }
| ari_stmt { }
| set_stmt { }
| class_stmt { }
| misc_stmt { }
;
assign_stmt
: _ID_ _ASSIGN_ { assign_flag = true; } exp
{
code_str += QString("\tfstp qword [_LC%1]\n").arg($1);
if (!symbol_labels.contains(QString("_LC%1").arg($1)))
{
data_str += QString("_LC%1: dq 0.00\n").arg($1);
symbol_labels << QString("_LC%1").arg($1);
}
assign_flag = false;
}
;
exp:
simple_exp '<' simple_exp { expr_type = 0; }
| simple_exp '>' simple_exp { expr_type = 1; }
| simple_exp _EQUAL_ simple_exp { expr_type = 2; }
| simple_exp _GREQL_ simple_exp { expr_type = 3; }
| simple_exp _LWEQL_ simple_exp { expr_type = 4; }
| simple_exp { }
;
simple_exp :
term _PLUS_ simple_exp
{
code_str += QString("\tfaddp st1, st0\n");
}
| term _MINUS_ simple_exp
{
code_str += QString("\tfsubp st1, st0\n");
}
| term { }
;
term:
factor _TIMES_ term
{
code_str += QString("\tfmulp st1, st0\n");
}
| factor _DIV_ term
{
code_str += QString("\tfdivp st1, st0\n");
}
| factor {}
;
factor
: _OBR_ exp _CBR_ {
code_str += QString("\tfld qword [_LC%1]\n").arg(FLastID);
}
| _NUM_
{
temp_str = QString("%1").arg((double)$1); if (temp_str.contains('.')
== false)
temp_str += QString(".00");
nvar_counter++;
code_str.append(QString("\tfld qword [_LC%1]\n").arg(nvar_counter-1));
data_str += QString("_LC%1:\tdq %2\n").arg(nvar_counter-1).arg(temp_str);
last_result = NUMBER;
}
| _ROUND_ _OBR_ { assign_flag = true; } exp {
code_str += QString(
"\tfstp qword [_LCtmp]\n"
"\tpush dword [_LC%1+4]\n"
"\tpush dword [_LC%1]\n"
).arg(nvar_counter-1);
} _COMMA_ exp _CBR_ {
code_str += QString(
"\tfstp qword [_LCtmp]\n"
"\tpush dword [_LC%1+4]\n"
"\tpush dword [_LC%1]\n"
"\tcall _round_val\n"
).arg(nvar_counter-1);
}
| _ID_
{
last_result = 0;
//if (do_function_call)
code_str += QString("\tfld qword [_LC%1]\n").arg($1);
FLastID = $1;
stack_counter += 4;
if (!symbol_labels.contains(QString("_LC%1").arg($1)))
{
data_str += QString("_LC%1: dq 0.00\n").arg($1);
symbol_labels << QString("_LC%1").arg($1);
}
}
| _ID_ _PLUS_PLUS_ {
code_str += QString(
"\tfld qword [_LC%1]\n"
"\tfld qword [_LCtmp_1]\n"
"\tfaddp qword [_LC%1]\n"
).arg($1);
}
| _ID_ _MINUS_MINUS_ {
code_str += QString(
"\tfld qword [_LC%1]\n"
"\tfld qword [_LCtmp_1]\n"
"\tfsub qword [_LC%1]\n"
"\tfstp qword [_LC%1]\n"
).arg($1);
}
| _SIN_ _OBR_ exp _CBR_ { internal_pusher("sin" ); }
| _ASIN_ _OBR_ exp _CBR_ { internal_pusher("asin"); }
| _ACOS_ _OBR_ exp _CBR_ { internal_pusher("acos"); }
| _ATAN_ _OBR_ exp _CBR_ { internal_pusher("atan"); }
| _COS_ _OBR_ exp _CBR_ { internal_pusher("cos" ); }
| _TAN_ _OBR_ exp _CBR_ { internal_pusher("tan" ); }
| _SQRT_ _OBR_ exp _CBR_ { internal_pusher("sqrt"); }
| _ABS_ _OBR_ exp _CBR_ { code_str += QString("\tfchs\n"); }
;
endif_stmt
: _END_IF_ { }
;
%%
void yyerror(char * message)
{
QMessageBox::about(NULL,"error",QString("%1: line
%2").arg(message).arg(lineno));
::yyrestart(yyin);
}
void internal_pusher(QString id)
{
if (id == "sin"
|| id == "cos"
|| id == "sqrt" )
code_str += QString(
"\tf%1\n"
"\tfstp qword [_LC%1]\n"
"\tfld qword [_LC%1]\n"
).arg(id);
if (id == "asin"
|| id == "acos"
|| id == "atan"
|| id == "tan" )
code_str += QString(
"\tfstp qword [_LCtmp]\n"
"\tpush dword [_LCtmp+4]\n"
"\tpush dword [_LCtmp]\n"
"\tcall _%1\n"
"\tadd esp, 12\n").arg(id);
}
cheers
Jens
Return to the
comp.compilers page.
Search the
comp.compilers archives again.