RFC - VBS Grammar

"Larry Lewis" <larlew@home.com>
23 Dec 2000 02:17:24 -0500

          From comp.compilers

Related articles
RFC - VBS Grammar larlew@home.com (Larry Lewis) (2000-12-23)
Re: RFC - VBS Grammar joachim_d@gmx.de (Joachim Durchholz) (2000-12-31)
Re: RFC - VBS Grammar snicol@apk.net (Scott Nicol) (2000-12-31)
Re: RFC - VBS Grammar snicol@apk.net (Scott Nicol) (2001-01-04)
Re: RFC - VBS Grammar Arargh@Enteract.com (Arargh!) (2001-01-04)
| List of all articles for this month |

From: "Larry Lewis" <larlew@home.com>
Newsgroups: comp.compilers
Date: 23 Dec 2000 02:17:24 -0500
Organization: Excite@Home - The Leader in Broadband http://home.com/faster
Keywords: Basic, lex, yacc
Posted-Date: 23 Dec 2000 02:17:24 EST

I have attached a simple grammar(lex/yacc) for VB script. There are still a
few things missing but I am wondering if I'm on the right track here.


  Does anyone care to review and comment?


*** filename="vbs.yacc" ***


%{
#include "symbol.h"
extern void yyerror();
extern int yylex();
extern char *yytext;
extern int nError, nWarning;
extern int yylineno;
%}
%union {
struct symtab *symp;
int val;
}
%token <symp> IDENTIFIER


%token EOL
%token IDENTIFIER CONSTANT STRING_LITERAL


%token ABS AND ARRAY ASC ATN CALL CBOOL CBYTE CCUR CDATE
%token CDBL CHR CINT CLASS CLNG CONST COS CREATEOBJECT
%token CSNG CSTR DATE DATEADD DATEDIFF DATEPART DATESERIAL
%token DATEVALUE DAY DEFAULT DIM DO LOOP EMPTY END ERASE ERR EVAL =
EXECUTE
%token EXECUTEGLOBAL EXIT EXP FALSE FILTER FIX FOR EACH NEXT
%token FORMATCURRENCY FORMATDATETIME FORMATNUMBER FORMATPERCENT
%token FUNCTION GETLOCALE GETOBJECT GETREF HEX HOUR IF THEN ELSE
%token INPUTBOX INSTR INSTRREV IN INT IS ISARRAY ISDATE ISEMPTY
%token ISNULL ISNUMERIC ISOBJECT JOIN LBOUND LCASE LEFT LEN
%token LOADPICTURE LOG LTRIM MID MINUTE MONTH MONTHNAME MSGBOX
%token NOT NOW NOTHING NUL OCT ONERRORRESUME ONERRORGOTO OPTIONEXP OR=20
%token PRESERVE PRIVATE PROPERTY
%token PUBLIC RAISE RANDOMIZE REDIM REM REPLACE RGB RIGHT RND
%token ROUND RTRIM SCRIPTENGINE SCRIPTBUILD
%token SCRIPTMAJOR SCRIPTMINOR SECOND SELECT CASE SET
%token SETLOCALE SGN SIN SPACE SPLIT SQR STRCOMP STRING
%token STRREVERSE SUB TAN TIME TIMER TIMESERIAL TIMEVALUE TRIM
%token TRUE TYPENAME UBOUND UCASE UNTIL VARTYPE WEEKDAY
%token WEEKDAYNAME WHILE WEND WITH YEAR TO STEP
%token XOR EQV IMP MOD LE_OP GE_OP NE_OP EQ_OP


%start statement_list
%%
intrinsic_function
: ABS | ARRAY | ASC | ATN | CBOOL | CBYTE
| CCUR | CDATE | CDBL | CHR | CINT | CLNG
| COS | CREATEOBJECT | CSNG | CSTR | DATE
| DATEADD | DATEDIFF | DATEPART | DATESERIAL
| DATEVALUE | DAY | EVAL | EXP | FILTER | FIX
| FORMATCURRENCY | FORMATDATETIME | FORMATNUMBER
| FORMATPERCENT | GETLOCALE | GETOBJECT | GETREF
| HEX | HOUR | INPUTBOX | INSTR | INSTRREV=20
| INT | ISARRAY | ISDATE | ISEMPTY | ISNULL
| ISNUMERIC | ISOBJECT | JOIN | LBOUND | LCASE
| LEFT | LEN | LOADPICTURE | LOG | LTRIM | MID
| MINUTE | MONTH | MONTHNAME | MSGBOX | NOW
| OCT | REPLACE | RGB | RIGHT | RND | ROUND
| RTRIM | SCRIPTENGINE | SCRIPTBUILD | SCRIPTMAJOR
| SCRIPTMINOR | SECOND | SETLOCALE | SGN | SIN
| SPACE | SPLIT | SQR | STRCOMP | STRING | STRREVERSE
| TAN | TIME | TIMER | TIMESERIAL | TIMEVALUE | TRIM
| TYPENAME | UBOUND | UCASE | VARTYPE | WEEKDAY
| WEEKDAYNAME | YEAR
;


expression
: assignment_expression
;


assignment_expression
: logical_imp_expression
| uminus_expression '=3D' assignment_expression
;


primary_expression
: IDENTIFIER { $1->line_referenced++;}
| CONSTANT
| STRING_LITERAL
| intrinsic_function
| '(' expression ')'
;


postfix_expression
: primary_expression
| postfix_expression '(' ')'
| postfix_expression '(' argument_expression_list ')'
| postfix_expression '.' IDENTIFIER
;


uminus_expression
: postfix_expression
| '-' uminus_expression
| NOT uminus_expression
;
=09
exponent_expression
: uminus_expression
| exponent_expression '^' uminus_expression
;


multiplicative_expression
: exponent_expression
| multiplicative_expression '*' exponent_expression
| multiplicative_expression '/' exponent_expression
;


intdiv_expression
: multiplicative_expression
| intdiv_expression '\\' multiplicative_expression
;


modulo_expression
: intdiv_expression
| modulo_expression MOD intdiv_expression
;


additive_expression
: modulo_expression
| additive_expression '+' modulo_expression
| additive_expression '-' modulo_expression
;


concatenation_expression
: additive_expression
| concatenation_expression '&' additive_expression
;


relational_expression
: concatenation_expression
| relational_expression '<' concatenation_expression
| relational_expression '>' concatenation_expression
| relational_expression LE_OP concatenation_expression
| relational_expression GE_OP concatenation_expression
;


equality_expression
: relational_expression
| equality_expression EQ_OP relational_expression
| equality_expression NE_OP relational_expression
;


is_expression
: equality_expression
| is_expression IS equality_expression
;


logical_and_expression
: is_expression
| logical_and_expression AND is_expression
;


logical_or_expression
: logical_and_expression
| logical_or_expression OR logical_and_expression
;


logical_xor_expression
: logical_or_expression
| logical_xor_expression XOR logical_or_expression
;


logical_eqv_expression
: logical_xor_expression
| logical_eqv_expression EQV logical_xor_expression
;


logical_imp_expression
: logical_eqv_expression
| logical_imp_expression IMP logical_eqv_expression
;


argument_expression_list
: assignment_expression
| argument_expression_list ',' assignment_expression
;


constant_list
: CONSTANT
| constant_list ',' CONSTANT
;


identifier
: IDENTIFIER { $1->line_referenced++;}
| IDENTIFIER '(' constant_list ')' { $1->line_referenced++;}
;


dec_identifier
: IDENTIFIER { $1->line_declared =3D yylineno;}
| IDENTIFIER '(' constant_list ')' { $1->line_declared =3D yylineno;}
;


ref_identifier
: dec_identifier
| ref_identifier '.' dec_identifier
;
array_identifier
: dec_identifier '(' argument_expression_list ')'
;


array_identifier_list
: array_identifier
| array_identifier_list ',' array_identifier
;


ref_identifier_list
: ref_identifier
| ref_identifier_list ref_identifier
;


identifier_list
: dec_identifier
| identifier_list ',' dec_identifier
;


statement
: expression_statement
| declaration_statment
| selection_statement
| iteration_statement
| subroutine
| implicit_subroutine_call
| explicit_subroutine_call
| jump_statement
| optionexp_statement
| erase_statement
| onerror_statement
| redim_statement
| set_statement
;


implicit_subroutine_call
: identifier argument_expression_list
;


explicit_subroutine_call
: CALL identifier '(' argument_expression_list ')'
;


statement_list
: statement
| statement_list statement
;


onerror_statement
: ONERRORRESUME EOL
| ONERRORGOTO EOL
;


expression_statement
: expression EOL
| EOL
;
optionexp_statement
: OPTIONEXP EOL
;


declaration_statment
: DIM identifier_list EOL
| PUBLIC identifier_list EOL
| PRIVATE identifier_list EOL
| PUBLIC CONST IDENTIFIER '=3D' expression { $3->line_declared =3D =
yylineno;}
| PRIVATE CONST IDENTIFIER '=3D' expression { $3->line_declared =3D =
yylineno;}
| CONST IDENTIFIER '=3D' expression { $2->line_declared =3D yylineno;}
;


erase_statement
: ERASE ref_identifier_list EOL
;


redim_statement
: REDIM array_identifier_list EOL
| REDIM PRESERVE array_identifier_list EOL
;


argument_list
: IDENTIFIER
| argument_list IDENTIFIER
;
set_statement
: SET ref_identifier '=3D' expression EOL
;


subroutine_scope
: PUBLIC
| PUBLIC DEFAULT
| PRIVATE
;


subroutine_declaration
: subroutine_scope SUB IDENTIFIER { $3->line_declared =3D yylineno;}
| SUB IDENTIFIER { $2->line_declared =3D yylineno;}
| subroutine_scope SUB IDENTIFIER '(' argument_list ')' { =
$3->line_declared =3D yylineno;}
| SUB IDENTIFIER '(' argument_list ')' { $2->line_declared =3D =
yylineno;}
| subroutine_scope SUB IDENTIFIER '(' ')' { $3->line_declared =3D =
yylineno;}
| SUB IDENTIFIER '(' ')' { $2->line_declared =3D yylineno;}
;


subroutine
: subroutine_declaration EOL statement_list END SUB EOL


case_list
: case_statement
| case_statement statement_list
| case_list case_statement
| case_list case_statement statement_list
;
case_statement
: CASE argument_expression_list EOL
;
iteration_statement
: FOR expression TO logical_imp_expression EOL NEXT EOL
| FOR expression TO logical_imp_expression EOL statement_list NEXT EOL
| FOR expression TO logical_imp_expression EOL statement_list NEXT =
expression EOL
| FOR expression TO logical_imp_expression STEP logical_imp_expression =
EOL
| FOR EACH identifier IN ref_identifier EOL statement_list NEXT EOL
| WHILE logical_imp_expression EOL statement_list WEND EOL
| DO EOL statement_list LOOP EOL
| DO WHILE expression EOL statement_list LOOP EOL
| DO UNTIL expression EOL statement_list LOOP EOL
;


jump_statement
: EXIT DO EOL
| EXIT FOR EOL
| EXIT FUNCTION EOL
| EXIT PROPERTY EOL
| EXIT SUB EOL
;


selection_statement
: IF expression THEN EOL statement_list END IF EOL
| IF expression THEN EOL statement_list ELSE EOL statement_list END IF =
EOL
| SELECT case_statement END SELECT EOL
| SELECT case_statement case_list END SELECT EOL
| SELECT case_statement case_list CASE ELSE statement_list END SELECT =
EOL
;




%%
#include <stdio.h>
#include <string.h>
#include <ctype.h>


extern char *yytext;
extern int column;


struct symtab *symlook(char *s) {
struct symtab *sp;
for(sp =3D symtab;sp < &symtab[NSYMS]; sp++) {
if(sp->name && !stricmp(sp->name, s))
return sp;
if(!sp->name) {
sp->name =3D strdup(s);
sp->line_declared =3D -1;
return sp;
}
}
yyerror("Too many symbols");
exit(1);=09
}


*** name="vbs.lex" ***


%option case-insensitive
%option yylineno


D [0-9]
L [a-zA-Z_]
H [a-fA-F0-9]




%{
#include "y_tab.h"
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "symbol.h"


void count();
int check_type();
%}


%x COMMENT
%%


<COMMENT>[^\n_]*\n {count();BEGIN 0;}
<COMMENT>[^\n]*_[ \t\v\f]*\n {count();}=20
<COMMENT>. {count();}




"abs" { count(); return (ABS);}
"and" { count(); return (AND);}
"array" { count(); return (ARRAY);}
"asc" { count(); return (ASC);}
"atn" { count(); return (ATN);}
"call" { count(); return (CALL);}
"cbool" { count(); return (CBOOL);}
"cbyte" { count(); return (CBYTE);}
"ccur" { count(); return (CCUR);}
"cdate" { count(); return (CDATE);}
"cdbl" { count(); return (CDBL);}
"chr" { count(); return (CHR);}
"cint" { count(); return (CINT);}
"class" { count(); return (CLASS);}
"clng" { count(); return (CLNG);}
"const" { count(); return (CONST);}
"cos" { count(); return (COS);}
"createobject" { count(); return (CREATEOBJECT);}
"csng" { count(); return (CSNG);}
"cstr" { count(); return (CSTR);}
"date" { count(); return (DATE);}
"dateadd" { count(); return (DATEADD);}
"datediff" { count(); return (DATEDIFF);}
"datepart" { count(); return (DATEPART);}
"dateserial" { count(); return (DATESERIAL);}
"datevalue" { count(); return (DATEVALUE);}
"day" { count(); return (DAY);}
"dim" { count(); return (DIM);}
"do" { count(); return (DO);}
"loop" { count(); return (LOOP);}
"empty" { count(); return (CONSTANT);}
"end" { count(); return (END);}
"erase" { count(); return (ERASE);}
"err" { count(); return (ERR);}
"eval" { count(); return (EVAL);}
"execute" { count(); return (EXECUTE);}
"executeglobal" { count(); return (EXECUTEGLOBAL);}
"exit" { count(); return (EXIT);}
"exp" { count(); return (EXP);}
"false" { count(); return (CONSTANT);}
"filter" { count(); return (FILTER);}
"fix" { count(); return (FIX);}
"for" { count(); return (FOR);}
"each" { count(); return (EACH);}
"next" { count(); return (NEXT);}
"formatcurrency" { count(); return (FORMATCURRENCY);}
"formatdatetime" { count(); return (FORMATDATETIME);}
"formatnumber" { count(); return (FORMATNUMBER);}
"formatpercent" { count(); return (FORMATPERCENT);}
"function" { count(); return (FUNCTION);}
"getlocale" { count(); return (GETLOCALE);}
"getobject" { count(); return (GETOBJECT);}
"getref" { count(); return (GETREF);}
"hex" { count(); return (HEX);}
"hour" { count(); return (HOUR);}
"if" { count(); return (IF);}
"then" { count(); return (THEN);}
"else" { count(); return (ELSE);}
"inputbox" { count(); return (INPUTBOX);}
"instr" { count(); return (INSTR);}
"instrrev" { count(); return (INSTRREV);}
"int" { count(); return (INT);}
"in" { count(); return (IN);}
"is" { count(); return (IS);}
"isarray" { count(); return (ISARRAY);}
"isdate" { count(); return (ISDATE);}
"isempty" { count(); return (ISEMPTY);}
"isnull" { count(); return (ISNULL);}
"isnumeric" { count(); return (ISNUMERIC);}
"isobject" { count(); return (ISOBJECT);}
"join" { count(); return (JOIN);}
"lbound" { count(); return (LBOUND);}
"lcase" { count(); return (LCASE);}
"left" { count(); return (LEFT);}
"len" { count(); return (LEN);}
"loadpicture" { count(); return (LOADPICTURE);}
"log" { count(); return (LOG);}
"ltrim" { count(); return (LTRIM);}
"mid" { count(); return (MID);}
"minute" { count(); return (MINUTE);}
"month" { count(); return (MONTH);}
"monthname" { count(); return (MONTHNAME);}
"msgbox" { count(); return (MSGBOX);}
"not" { count(); return (NOT);}
"now" { count(); return (NOW);}
"nothing" { count(); return (CONSTANT);}
"null" { count(); return (CONSTANT);}
"oct" { count(); return (OCT);}
"on[ \t\v\f]+error[ \t\v\f]+resume[ \t\v\f]+next" { count(); return =
(ONERRORRESUME);}
"on[ \t\v\f]+error[ \t\v\f]+goto[ \t\v\f]+0" { count(); return =
(ONERRORGOTO);}
"option explict" { count(); return (OPTIONEXP);}
"or" { count(); return (OR);}
"preserve" { count(); return (PRESERVE);}
"private" { count(); return (PRIVATE);}
"property" { count(); return (PROPERTY);}
"public" { count(); return (PUBLIC);}
"raise" { count(); return (RAISE);}
"randomize" { count(); return (RANDOMIZE);}
"redim" { count(); return (REDIM);}
"rem" { BEGIN COMMENT; count(); return (EOL);}
"replace" { count(); return (REPLACE);}
"rgb" { count(); return (RGB);}
"right" { count(); return (RIGHT);}
"rnd" { count(); return (RND);} =09
"round" { count(); return (ROUND);}
"rtrim" { count(); return (RTRIM);}
"scriptengine" { count(); return (SCRIPTENGINE);}
"scriptenginebuild" { count(); return (SCRIPTBUILD);}
"scriptengineversionmajor" { count(); return (SCRIPTMAJOR);}
"scriptengineversionminor" { count(); return (SCRIPTMINOR);}
"second" { count(); return (SECOND);}
"select" { count(); return (SELECT);}
"case" { count(); return (CASE);}
"set" { count(); return (SET);}
"setlocale" { count(); return (SETLOCALE);}
"sgn" { count(); return (SGN);}
"sin" { count(); return (SIN);}
"space" { count(); return (SPACE);}
"split" { count(); return (SPLIT);}
"sqr" { count(); return (SQR);}
"step" { count(); return (STEP);}
"strcomp" { count(); return (STRCOMP);}
"string" { count(); return (STRING);}
"strreverse" { count(); return (STRREVERSE);}
"sub" { count(); return (SUB);}
"tan" { count(); return (TAN);}
"time" { count(); return (TIME);}
"timer" { count(); return (TIMER);}
"timeserial" { count(); return (TIMESERIAL);}
"timevalue" { count(); return (TIMEVALUE);}
"to" { count(); return (TO);}
"trim" { count(); return (TRIM);}
"true" { count(); return (CONSTANT);}
"typename" { count(); return (TYPENAME);}
"ubound" { count(); return (UBOUND);}
"until" { count(); return (UNTIL);}
"ucase" { count(); return (UCASE);}
"vartype" { count(); return (VARTYPE);}
"weekday" { count(); return (WEEKDAY);}
"weekdayname" { count(); return (WEEKDAYNAME);}
"while" { count(); return (WHILE);}
"wend" { count(); return (WEND);}
"with" { count(); return (WITH);}
"year" { count(); return (YEAR);}
"xor" { count(); return (XOR);}
"eqv" { count(); return (EQV);}
"imp" { count(); return (IMP);}
"mod" { count(); return (MOD);}


{L}({L}|{D})* { count(); return(check_type()); }


0[xX]{H}+ { count(); return(CONSTANT); }
0{D}+ { count(); return(CONSTANT); }
{D}+ { count(); return(CONSTANT); }
      /* L?'(\\.|[^\\'])+' { count(); return(CONSTANT); } */


{D}+ { count(); return(CONSTANT); }
{D}*"."{D}+ { count(); return(CONSTANT); }
{D}+"."{D}* { count(); return(CONSTANT); }


L?\"(\\.|[^\\"])*\" { count(); return(STRING_LITERAL); }


"," { count(); return(','); }
":" { count(); return(':'); }
"<>" { count(); return(NE_OP); }
"><" { count(); return(NE_OP); }
"<=3D" { count(); return(LE_OP); }
"=3D<" { count(); return(LE_OP); }
">=3D" { count(); return(GE_OP); }
"=3D>" { count(); return(GE_OP); }
"=3D" { count(); return('=3D'); }
"(" { count(); return('('); }
")" { count(); return(')'); }
"." { count(); return('.'); }
"&" { count(); return('&'); }
"-" { count(); return('-'); }
"+" { count(); return('+'); }
"*" { count(); return('*'); }
"/" { count(); return('/'); }
"%" { count(); return('%'); }
"<" { count(); return('<'); }
">" { count(); return('>'); }
"^" { count(); return('^'); }
"|" { count(); return('|'); }
"?" { count(); return('?'); }
"'" {BEGIN COMMENT; count(); return(EOL);}


[ \t\v\f] { count(); }
_[ \t\v\f]*\n { count(); /* line continuation */}
"\n" { count(); return(EOL); }
.. { /* ignore bad characters */ }


%%


yywrap()
{
return(1);
}




int column =3D 0;


void count()
{
int i;


for (i =3D 0; yytext[i] !=3D '\0'; i++)
if (yytext[i] =3D=3D '\n')
column =3D 0;
else if (yytext[i] =3D=3D '\t')
column +=3D 8 - (column % 8);
else
column++;


ECHO;
}




int check_type()
{
yylval.symp =3D symlook(yytext);
return(IDENTIFIER);
}


Post a followup to this message

Return to the comp.compilers page.
Search the comp.compilers archives again.