Related articles |
---|
Lex/Yacc inputs for C and C pre-processor trwrb!mcgillis@sol.SPS.TRW.COM (1987-07-21) |
Re: Lex/Yacc inputs for C and C pre-processor decvax!utzoo!henry (1987-08-06) |
From: | decvax!utzoo!henry |
Date: | Thu, 6 Aug 87 05:18:33 edt |
References: | <625@ima.ISC.COM> |
This isn't quite what was asked for, but still might be of general interest.
This is a lex program which tokenizes C source, with minor limitations as
described in the leading comment. (In fact it does C++, unless you give it
the -C option that restricts it to ANSI C only.) It's probably not useful
as a compiler front end; in particular, it accepts *exactly* the legal C
strings/numbers/etc. rather than accepting more general forms and giving
error messages for violations of the detailed rules. It is, however, of
some use for things like statistical analysis of C programs.
Henry Spencer @ U of Toronto Zoology
{allegra,ihnp4,decvax,pyramid}!utzoo!henry
----------------
%{
/*
* ctokens - print tokens of a C or C++ program
*
* Full ANSI C (draft of 1 Oct 1986) except: no trigraphs; copes with
* backslash-newline stripping only inside strings; does not understand
* the context-dependent rule that makes <bletch.h> a single token
* inside a #include.
*
* Except for newlines, any white-space character is printed as "\t".
* It would be more sensible to make the white-space expression [ \t\v\f]+
* instead of just [ \t\v\f], but our old lex has problems with that.
*
* Note that this program uses one (sigh) undocumented feature of Unix lex:
* the ability to override the choice of input stream by assigning to yyin.
* Avoiding this requires reimplementing lex's input functions, which is a
* pain because getc/ungetc isn't good enough.
*
* $Log$
*/
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>
#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0)
#ifndef lint
static char RCSid[] = "$Header$";
#endif
int debug = 0;
char *progname;
extern void error(), exit();
#ifdef UTZOOERR
extern char *mkprogname();
#else
#define mkprogname(a) (a)
#endif
#define PRINTIT printf("%s\n", yytext)
int cflag = 0; /* C only. */
%}
EXP ([eE][+-]?[0-9]+)
FS [flFL]
IS ([uU][lL]?|[lL][uU]?)
%%
[_a-zA-Z][_a-zA-Z0-9]* { PRINTIT; /* identifier */ }
[0-9]+"."[0-9]*{EXP}?{FS}? |
"."[0-9]+{EXP}?{FS}? |
[0-9]+{EXP}{FS}? |
[1-9][0-9]*{IS}? |
0[0-7]*{IS}? |
0[xX][0-9a-fA-F]+{IS}? { PRINTIT; /* number */ }
\'([^'\\\n]|\\(['"?\\abfnrtv]|[0-7]{1,3}|[xX][0-9a-fA-F]{1,3}))+\' {
PRINTIT; /* character constant */
}
\"([^"\\\n]|\\(['"?\\abfnrtv\n]|[0-7]{1,3}|[xX][0-9a-fA-F]{1,3}))*\" {
/* string -- remove backslashed newlines */
register char *p;
for (p = yytext; *p != '\0'; p++)
if (*p == '\\' && *(p+1) == '\n')
p++;
else
putchar(*p);
putchar('\n');
}
[-()&*+~!/%<>^|,.=;:{}?#] |
"[" |
"]" |
"->" |
"++" |
"--" |
"<<" |
">>" |
"<=" |
">=" |
"==" |
"!=" |
"&&" |
"||" |
"##" |
"..." |
[-*/%+&^|]"=" |
"<<=" |
">>=" { PRINTIT; /* misc. tokens */ }
"::" {
if (cflag) {
REJECT;
} else
PRINTIT;
}
\n printf("\\n\n");
[ \t\v\f] printf("\\t\n");
"/*" {
register int ch;
register int nnl = 0;
printf("/* ");
for (;;) {
ch = input();
if (ch == '*') {
ch = input();
if (ch == '/')
break;
else
unput(ch);
} else if (ch == '\n') {
nnl++;
if (nnl <= 10)
printf("\\n");
if (nnl == 10)
printf("...");
} else if (ch == '\0') {
fprintf(stderr, "unterminated comment!\n");
exit(0);
}
}
printf(" */\n");
}
"//" {
register int ch;
if (cflag) {
REJECT;
} else {
printf("//\n");
while ((ch = input()) != '\n')
if (ch == '\0') {
fprintf(stderr, "unterminated comment!\n");
exit(0);
}
unput(ch);
}
}
. printf("%c ???\n", yytext[0]);
%%
/*
- main - parse arguments and handle options
*/
main(argc, argv)
int argc;
char *argv[];
{
int c;
int errflg = 0;
FILE *in;
struct stat statbuf;
extern int optind;
extern char *optarg;
extern FILE *efopen();
void process();
progname = mkprogname(argv[0]);
while ((c = getopt(argc, argv, "dC")) != EOF)
switch (c) {
case 'C': /* C only, no C++. */
cflag = 1;
break;
case 'd': /* Debugging. */
debug++;
break;
case '?':
default:
errflg++;
break;
}
if (errflg) {
fprintf(stderr, "usage: %s [-C] [file] ...\n", progname);
exit(2);
}
if (optind >= argc)
process(stdin, "stdin");
else
for (; optind < argc; optind++)
if (STREQ(argv[optind], "-"))
process(stdin, "-");
else {
in = efopen(argv[optind], "r");
if (fstat(fileno(in), &statbuf) < 0)
error("can't fstat `%s'", argv[optind]);
if ((statbuf.st_mode & S_IFMT) == S_IFDIR)
error("`%s' is directory!", argv[optind]);
process(in, argv[optind]);
(void) fclose(in);
}
exit(0);
}
/*
* process - process input file
*/
void
process(in, inname)
FILE *in;
char *inname;
{
yyin = in;
(void) yylex();
}
--
Return to the
comp.compilers page.
Search the
comp.compilers archives again.