Re: Lex/Yacc inputs for C and C pre-processor

decvax!utzoo!henry
Thu, 6 Aug 87 05:18:33 edt

          From comp.compilers

Related articles
Lex/Yacc inputs for C and C pre-processor trwrb!mcgillis@sol.SPS.TRW.COM (1987-07-21)
Re: Lex/Yacc inputs for C and C pre-processor decvax!utzoo!henry (1987-08-06)
| List of all articles for this month |

From: decvax!utzoo!henry
Date: Thu, 6 Aug 87 05:18:33 edt
References: <625@ima.ISC.COM>

This isn't quite what was asked for, but still might be of general interest.
This is a lex program which tokenizes C source, with minor limitations as
described in the leading comment. (In fact it does C++, unless you give it
the -C option that restricts it to ANSI C only.) It's probably not useful
as a compiler front end; in particular, it accepts *exactly* the legal C
strings/numbers/etc. rather than accepting more general forms and giving
error messages for violations of the detailed rules. It is, however, of
some use for things like statistical analysis of C programs.


Henry Spencer @ U of Toronto Zoology
{allegra,ihnp4,decvax,pyramid}!utzoo!henry


----------------
%{
/*
  * ctokens - print tokens of a C or C++ program
  *
  * Full ANSI C (draft of 1 Oct 1986) except: no trigraphs; copes with
  * backslash-newline stripping only inside strings; does not understand
  * the context-dependent rule that makes <bletch.h> a single token
  * inside a #include.
  *
  * Except for newlines, any white-space character is printed as "\t".
  * It would be more sensible to make the white-space expression [ \t\v\f]+
  * instead of just [ \t\v\f], but our old lex has problems with that.
  *
  * Note that this program uses one (sigh) undocumented feature of Unix lex:
  * the ability to override the choice of input stream by assigning to yyin.
  * Avoiding this requires reimplementing lex's input functions, which is a
  * pain because getc/ungetc isn't good enough.
  *
  * $Log$
  */


#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>


#define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0)


#ifndef lint
static char RCSid[] = "$Header$";
#endif


int debug = 0;
char *progname;


extern void error(), exit();
#ifdef UTZOOERR
extern char *mkprogname();
#else
#define mkprogname(a) (a)
#endif


#define PRINTIT printf("%s\n", yytext)


int cflag = 0; /* C only. */
%}


EXP ([eE][+-]?[0-9]+)
FS [flFL]
IS ([uU][lL]?|[lL][uU]?)


%%


[_a-zA-Z][_a-zA-Z0-9]* { PRINTIT; /* identifier */ }


[0-9]+"."[0-9]*{EXP}?{FS}? |
"."[0-9]+{EXP}?{FS}? |
[0-9]+{EXP}{FS}? |
[1-9][0-9]*{IS}? |
0[0-7]*{IS}? |
0[xX][0-9a-fA-F]+{IS}? { PRINTIT; /* number */ }


\'([^'\\\n]|\\(['"?\\abfnrtv]|[0-7]{1,3}|[xX][0-9a-fA-F]{1,3}))+\' {
PRINTIT; /* character constant */
}


\"([^"\\\n]|\\(['"?\\abfnrtv\n]|[0-7]{1,3}|[xX][0-9a-fA-F]{1,3}))*\" {
/* string -- remove backslashed newlines */
register char *p;


for (p = yytext; *p != '\0'; p++)
if (*p == '\\' && *(p+1) == '\n')
p++;
else
putchar(*p);
putchar('\n');
}


[-()&*+~!/%<>^|,.=;:{}?#] |
"[" |
"]" |
"->" |
"++" |
"--" |
"<<" |
">>" |
"<=" |
">=" |
"==" |
"!=" |
"&&" |
"||" |
"##" |
"..." |
[-*/%+&^|]"=" |
"<<=" |
">>=" { PRINTIT; /* misc. tokens */ }
"::" {
if (cflag) {
REJECT;
} else
PRINTIT;
}


\n printf("\\n\n");
[ \t\v\f] printf("\\t\n");


"/*" {
register int ch;
register int nnl = 0;


printf("/* ");
for (;;) {
ch = input();
if (ch == '*') {
ch = input();
if (ch == '/')
break;
else
unput(ch);
} else if (ch == '\n') {
nnl++;
if (nnl <= 10)
printf("\\n");
if (nnl == 10)
printf("...");
} else if (ch == '\0') {
fprintf(stderr, "unterminated comment!\n");
exit(0);
}
}
printf(" */\n");
}


"//" {
register int ch;


if (cflag) {
REJECT;
} else {
printf("//\n");
while ((ch = input()) != '\n')
if (ch == '\0') {
fprintf(stderr, "unterminated comment!\n");
exit(0);
}
unput(ch);
}
}


. printf("%c ???\n", yytext[0]);


%%


/*
  - main - parse arguments and handle options
  */
main(argc, argv)
int argc;
char *argv[];
{
int c;
int errflg = 0;
FILE *in;
struct stat statbuf;
extern int optind;
extern char *optarg;
extern FILE *efopen();
void process();


progname = mkprogname(argv[0]);


while ((c = getopt(argc, argv, "dC")) != EOF)
switch (c) {
case 'C': /* C only, no C++. */
cflag = 1;
break;
case 'd': /* Debugging. */
debug++;
break;
case '?':
default:
errflg++;
break;
}
if (errflg) {
fprintf(stderr, "usage: %s [-C] [file] ...\n", progname);
exit(2);
}


if (optind >= argc)
process(stdin, "stdin");
else
for (; optind < argc; optind++)
if (STREQ(argv[optind], "-"))
process(stdin, "-");
else {
in = efopen(argv[optind], "r");
if (fstat(fileno(in), &statbuf) < 0)
error("can't fstat `%s'", argv[optind]);
if ((statbuf.st_mode & S_IFMT) == S_IFDIR)
error("`%s' is directory!", argv[optind]);
process(in, argv[optind]);
(void) fclose(in);
}
exit(0);
}


/*
  * process - process input file
  */
void
process(in, inname)
FILE *in;
char *inname;
{
yyin = in;
(void) yylex();
}
--


Post a followup to this message

Return to the comp.compilers page.
Search the comp.compilers archives again.