Re: Diplodicus: A parser with infinite tokens of lookahead

"tj bandrowsky" <tbandrow@unitedsoftworks.com>
23 Aug 2002 11:05:47 -0400

          From comp.compilers

Related articles
Diplodicus: A parser with infinite tokens of lookahead tbandrow@unitedsoftworks.com (tj bandrowsky) (2002-07-24)
Re: Diplodicus: A parser with infinite tokens of lookahead Mark.van.den.Brand@cwi.nl (M.G.J. van den Brand) (2002-07-25)
Re: Diplodicus: A parser with infinite tokens of lookahead tbandrow@unitedsoftworks.com (tj bandrowsky) (2002-07-31)
Re: Diplodicus: A parser with infinite tokens of lookahead clint@0lsen.net (Clint Olsen) (2002-07-31)
Re: Diplodicus: A parser with infinite tokens of lookahead tbandrow@unitedsoftworks.com (tj bandrowsky) (2002-08-23)
| List of all articles for this month |

From: "tj bandrowsky" <tbandrow@unitedsoftworks.com>
Newsgroups: comp.compilers
Date: 23 Aug 2002 11:05:47 -0400
Organization: http://groups.google.com/
References: 02-07-099 02-07-115 02-07-135
Keywords: parse
Posted-Date: 23 Aug 2002 11:05:46 EDT

As an FYI, I've managed to fix most of the shortcomings in Diplodicus
so that it does what I need for now. It falls somewhere short of
generalized LR parsing because it imposes some of its own "thinking"
in order to avoid ambiguity. I don't do operator precedence at all.


Still, it can do grammars like B:=A, A:=B, and handles unary minus
pretty simply. It's convenient, which was the main thing I wanted
when I wrote it. This is the quintessential simple calculator
interpreter written using Diplodicus.


class calculator_type : public shift_reduce_parser_type {
public:


calculator_type()
{


trace_reduce = false;
trace_shift = false;


add_lexical_rule( 2, "expr", "[/d]+" );
add_lexical_rule( 5, "*", "*" );
add_lexical_rule( 6, "/", "/" );
add_lexical_rule( 7, "+", "+" );
add_lexical_rule( 8, "-", "-" );
add_lexical_rule( 9, "(", "(" );
add_lexical_rule( 10, ")", ")" );
add_lexical_rule( 11, "ws", "/w" );


add_grammatical_rule( 12, "expr", "expr + expr" );
add_grammatical_rule( 13, "expr", "expr - expr" );
add_grammatical_rule( 14, "expr", "expr * expr" );
add_grammatical_rule( 15, "expr", "expr / expr" );
add_grammatical_rule( 16, "expr", "( expr )" );
add_grammatical_rule( 17, "result", "expr" );
add_grammatical_rule( 18, "expr", "expr ws" );
add_grammatical_rule( 19, "+", "+ ws" );
add_grammatical_rule( 20, "-", "- ws" );
add_grammatical_rule( 21, "/", "/ ws" );
add_grammatical_rule( 22, "*", "* ws" );
add_grammatical_rule( 23, "(", "( ws" );
add_grammatical_rule( 24, ")", ") ws" );
add_grammatical_rule( 25, "expr", "- expr" );
add_grammatical_rule( 26, "expr", "expr - expr / expr" );
add_grammatical_rule( 27, "expr", "expr - expr * expr" );
add_grammatical_rule( 28, "expr", "expr + expr / expr" );
add_grammatical_rule( 29, "expr", "expr + expr * expr" );


}


virtual ~calculator_type()
{
;
}


virtual int on_reduce( reduce_event_type *_reduce_event )
{
TCHAR buff[1024];
int value;
double d1, d2, r;


shift_reduce_parser_type::on_reduce( _reduce_event );


dictionary.lock();


switch (_reduce_event->get_rule_id()) {
case 12:
d1 = atof( _reduce_event->get_value( 0 ) );
d2 = atof( _reduce_event->get_value( 2 ) );
r = d1 + d2;
break;
case 13:
d1 = atof( _reduce_event->get_value( 0 ) );
d2 = atof( _reduce_event->get_value( 2 ) );
r = d1 - d2;
break;
case 14:
d1 = atof( _reduce_event->get_value( 0 ) );
d2 = atof( _reduce_event->get_value( 2 ) );
r = d1 * d2;
break;
case 15:
d1 = atof( _reduce_event->get_value( 0 ) );
d2 = atof( _reduce_event->get_value( 2 ) );
r = d1 / d2;
break;
case 16:
r = atof( _reduce_event->get_value( 1 ) );
break;
case 17:
printf( "\nresult = %s\n", _reduce_event->get_value( 0 ) );
r = atof( _reduce_event->get_value( 0 ) );
break;
case 18:
r = atof( _reduce_event->get_value( 0 ) );
break;
case 25:
r = -atof( _reduce_event->get_value( 1 ) );
break;
case 26:
r = atof( _reduce_event->get_value( 0 ) ) - atof(
_reduce_event->get_value( 2 ) ) / atof( _reduce_event->get_value( 4 )
);
break;
case 27:
r = atof( _reduce_event->get_value( 0 ) ) - atof(
_reduce_event->get_value( 2 ) ) * atof( _reduce_event->get_value( 4 )
);
break;
case 28:
r = atof( _reduce_event->get_value( 0 ) ) + atof(
_reduce_event->get_value( 2 ) ) / atof( _reduce_event->get_value( 4 )
);
break;
case 29:
r = atof( _reduce_event->get_value( 0 ) ) + atof(
_reduce_event->get_value( 2 ) ) * atof( _reduce_event->get_value( 4 )
);
break;
}


sprintf( buff, "%f", r );
value = dictionary.word_to_id( buff );
dictionary.unlock();


return value;
}


};




And here is a simple XML parser


void test_parser3( char *_filename )
{
// then, test the engine
shift_reduce_parser_type parser;


printf( "test parser 3\n" );


parser.trace_reduce = true;
parser.trace_shift = true;


parser.add_lexical_rule( 1, "start", "<" );
parser.add_lexical_rule( 2, "endstart", "<//" );
parser.add_lexical_rule( 3, "end", ">" );
parser.add_lexical_rule( 4, "commentstart", "<!--" );
parser.add_lexical_rule( 5, "dtdstart", "<!DOCTYPE" );
parser.add_lexical_rule( 6, "docrefstart", "<?xml" );
parser.add_lexical_rule( 7, "docrefend", "?>" );
parser.add_lexical_rule( 8, "ws", "/w+" );
parser.add_lexical_rule( 9, "name", "[/a][/a/d_]+" );
parser.add_lexical_rule( 10, "number", "[/d]+" );
parser.add_lexical_rule( 11, "floatnumber", "[/d]+/.[/d]+" );
parser.add_lexical_rule( 12, "floatnumber", "/.[/d]+" );
parser.add_lexical_rule( 13, "quote", "\"" );
parser.add_lexical_rule( 14, "otherpunct",
"['-~!@#$%^&/*()_;|:+-/]/[/+{}//\\?,/.]" );
parser.add_lexical_rule( 15, "equal", "=" );
parser.add_lexical_rule( 16, "attrtagend", "//>" );
parser.add_lexical_rule( 17, "system", "SYSTEM" );


parser.add_grammatical_rule( 50, "$delete", "ws" );
parser.add_grammatical_rule( 51, "otherpunct", "otherpunct
otherpunct" );
parser.add_grammatical_rule( 52, "quotedstring", "quote start" );
parser.add_grammatical_rule( 53, "quotedstring", "quote name" );
parser.add_grammatical_rule( 54, "quotedstring", "quote number" );
parser.add_grammatical_rule( 55, "quotedstring", "quote floatnumber"
);
parser.add_grammatical_rule( 56, "quotedstring", "quote otherpunct"
);
parser.add_grammatical_rule( 57, "quotedstring", "quote end" );
parser.add_grammatical_rule( 58, "quotedstring", "quotedstring name"
);
parser.add_grammatical_rule( 59, "quotedstring", "quotedstring
number" );
parser.add_grammatical_rule( 60, "quotedstring", "quotedstring
floatnumber" );
parser.add_grammatical_rule( 61, "quotedstring", "quotedstring
otherpunct" );
parser.add_grammatical_rule( 62, "quotedstring", "quotedstring end"
);
parser.add_grammatical_rule( 63, "quotedstring", "quotedstring start"
);
parser.add_grammatical_rule( 64, "string", "quotedstring quote" );
parser.add_grammatical_rule( 65, "commenting", "commentstart name" );
parser.add_grammatical_rule( 66, "commenting", "commentstart number"
);
parser.add_grammatical_rule( 67, "commenting", "commentstart
floatnumber" );
parser.add_grammatical_rule( 68, "commenting", "commentstart
otherpunct" );
parser.add_grammatical_rule( 69, "commenting", "commenting name" );
parser.add_grammatical_rule( 70, "commenting", "commenting number" );
parser.add_grammatical_rule( 71, "commenting", "commenting
floatnumber" );
parser.add_grammatical_rule( 72, "commenting", "commenting
otherpunct" );
parser.add_grammatical_rule( 73, "comment", "commenting end" );
parser.add_grammatical_rule( 74, "$delete", "comment" );
parser.add_grammatical_rule( 75, "property", "name equal number" );
parser.add_grammatical_rule( 76, "property", "name equal floatnumber"
);
parser.add_grammatical_rule( 77, "property", "name equal string" );
parser.add_grammatical_rule( 78, "property", "property property" );
parser.add_grammatical_rule( 79, "xmlattribute", "start name property
attrtagend" );
parser.add_grammatical_rule( 80, "xmlattribute", "xmlattribute
xmlattribute" );
parser.add_grammatical_rule( 81, "xmltagstart", "start name property
end" );
parser.add_grammatical_rule( 82, "xmltagstart", "start name end" );
parser.add_grammatical_rule( 83, "xmltagend", "endstart name end" );


parser.add_grammatical_rule( 84, "xmldatatag", "xmltagstart name" );
parser.add_grammatical_rule( 85, "xmldatatag", "xmltagstart number"
);
parser.add_grammatical_rule( 86, "xmldatatag", "xmltagstart
floatnumber" );
parser.add_grammatical_rule( 87, "xmldatatag", "xmltagstart
otherpunct" );
parser.add_grammatical_rule( 88, "xmldatatag", "xmltagstart equal" );
parser.add_grammatical_rule( 90, "xmldatatag", "xmltagstart quote" );
parser.add_grammatical_rule( 91, "xmldatatag", "xmldatatag name" );
parser.add_grammatical_rule( 92, "xmldatatag", "xmldatatag number" );
parser.add_grammatical_rule( 93, "xmldatatag", "xmldatatag
floatnumber" );
parser.add_grammatical_rule( 94, "xmldatatag", "xmldatatag
otherpunct" );
parser.add_grammatical_rule( 95, "xmldatatag", "xmldatatag equal" );
parser.add_grammatical_rule( 97, "xmldatatag", "xmldatatag quote" );


parser.add_grammatical_rule( 98, "xmltag", "xmldatatag xmltagend" );
parser.add_grammatical_rule( 99, "xmltag", "xmltagstart xmlattribute
xmltagend" );
parser.add_grammatical_rule( 100, "xmltag", "xmltagstart xmlattribute
xmltag xmltagend" );
parser.add_grammatical_rule( 101, "xmltag", "xmltagstart xmltag
xmlattribute xmltagend" );
parser.add_grammatical_rule( 102, "xmltag", "xmltagstart xmltag
xmltagend" );
parser.add_grammatical_rule( 103, "xmltag", "xmltag xmltag" );
parser.add_grammatical_rule( 104, "xmldescr", "docrefstart property
docrefend" );
parser.add_grammatical_rule( 105, "xmldtd", "dtdstart name system
string end" );
parser.add_grammatical_rule( 106, "xmldtd", "dtdstart name end" );
parser.add_grammatical_rule( 107, "xmldocument", "xmldescr xmldtd
xmltag" );


parser.print_rules();


parser.start();


page_string_type<char> my_data;
int l = read_file( _filename, my_data );


parser.parse( my_data, l );


parser.finish();
}


Post a followup to this message

Return to the comp.compilers page.
Search the comp.compilers archives again.