Re: Better error messages with yyerror()

fjh@murlibobo.cs.mu.OZ.AU (Fergus Henderson)
30 May 1997 23:07:18 -0400

          From comp.compilers

Related articles
Better error messages with yyerror() ffdixon@InterNetivity.com (Frederick F. Dixon) (1997-05-27)
Re: Better error messages with yyerror() fjh@murlibobo.cs.mu.OZ.AU (1997-05-30)
Re: Better error messages with yyerror() alinares@sp-editores.es (Antonio Linares) (1997-05-30)
| List of all articles for this month |
From: fjh@murlibobo.cs.mu.OZ.AU (Fergus Henderson)
Newsgroups: comp.compilers
Date: 30 May 1997 23:07:18 -0400
Organization: Comp Sci, University of Melbourne
References: 97-05-297
Keywords: lex, yacc, errors

"Frederick F. Dixon" <ffdixon@InterNetivity.com> writes:


>I'm looking for a way to improve yyerror() so that my programs print out
>the full line in which an error occurred.


Here's one way of doing it: redefine input() to keep track of the
current line and column. This is not a very portable method. It
won't work for flex, for example. It might not work for different
versions of lex.


Anyway, here's the code.


/* lexical.l */


      /*
      This file provides the lexical analysis function, yylex(), which
      returns the next token in the input stream, and stores the
      appropriate semantic value corresponding to that token in yylval.
      It uses stdin as the input stream.


      It also provides yyerror(), which is used to report errors during
      parsing, and num_errors, which is a count of the number of such
      errors that have occured.
      */


%{
#include <stdlib.h>
#include <string.h>


int yylex(void);
void yyerror(const char *);


int num_errors; /* number of errors encountered */


static char *error_line; /* pointer to the current line */
static int error_line_size; /* number of bytes allocated for error_line */
static int error_line_number; /* line number of the current line */
static int error_column; /* offset into error_line of next char in the
input stream */


/* Redefine input() to keep track of the current line and column.
      NB. This will only work with lex, not with flex.


      All that I have changed is that instead of calling getc(yyin) I call
      get_next_char().


      Here is the original definition:
# define input() (((yytchar=yysptr>yysbuf?U(*--yysptr):getc(yyin))==10?(yylineno++,yytchar):yytchar)==EOF?0:yytchar)
*/


static int get_next_char(void);


#undef input
#define input() (((yytchar=yysptr>yysbuf?U(*--yysptr):get_next_char())==10?(yylineno++,yytchar):yytchar)==EOF?0:yytchar)


%}


%%


/* INSERT YOUR LEXER HERE */


%%


/* Initialize global & static variables before starting parse.
      This is in a function, rather than just using initializers with the
      declarations, so that multiple files could be parsed if necessary. */
void init_lexical() {
        error_line_size = 80;
        error_line = (char *) malloc(error_line_size);
        if (error_line == NULL) {
perror("malloc failed");
exit(EXIT_FAILURE);
        }
        error_line[0] = '\0';
        error_line_number = 1;
        error_column = 0;
        num_errors = 0;
}


/* Append a character onto error_line, dynamically resizing the array if
      necessary */
static void store_next_char(int c) {
        if (error_column > error_line_size) {
         error_line_size *= 2;
         error_line = (char *) realloc(error_line, error_line_size);
                if (error_line == NULL) {
perror("realloc failed");
exit(EXIT_FAILURE);
                }
        }
        error_line[error_column++] = c;
}


/* Read the next line of input into error_line */
static void get_next_line(void) {
        int c;
        error_column = 0; /* start storing chars at start of line */


        while((c = getc(yyin)) != EOF && c != '\n') {
/* expand tabs */
if (c == '\t') {
int len = 8 - (error_column % 8);
                        while (len-- > 0) {
                                store_next_char(' ');
}
} else {
store_next_char(c);
}
        }
        if (c != EOF) {
                store_next_char('\n');
        }
        store_next_char('\0'); /* terminate string */


        error_column = 0; /* start reading chars from the start of the line */
}


/* Get the next char from error_line */
static int get_next_char(void) {
        if (error_line[error_column] == '\0') {
                get_next_line();
                if (error_line[error_column] == '\0') {
return EOF;
                }
        }
        return error_line[error_column++];
}


/* Print error message, line number, and the contents of the offending
      line with the offending token "highlighted". */


void yyerror (const char *message) {
        int i, col;


        num_errors++;


        fprintf(stderr, "line %d: ", yylineno);
        if (!strcmp(message, "syntax error") || !strcmp(message, "parse error")) {
if (yytext[0] == '\0' && feof(yyin)) {
                        fprintf(stderr, "parse error at end-of-file\n");
} else {
                        fprintf(stderr, "parse error before token '%s'\n", yytext);
}
        } else {
                fprintf(stderr, "%s\n", message);
        }


        /* At this point, error_column will be either at the end of the offending
              token, or possibly further to the right, if look-ahead was required.
              Search back along the line to locate the start of the token. */


        col = error_column;
        while (strncmp(&error_line[col], yytext, yyleng) && col >= 0) {
col--;
        }


        /* Now print out the offending line, and highlight the offending token */


        if (col >= 0 && yyleng > 0 && yytext[0] != '\0') {
                fprintf(stderr, "%s", error_line);
                for (i = 0; i < col; i++) {
                        fprintf(stderr, "-");
                }
                for (i = 0; i < yyleng; i++) {
                        fprintf(stderr, "^");
                }
                fprintf(stderr, "\n\n");
        }
}


--
Fergus Henderson <fjh@cs.mu.oz.au>
WWW: <http://www.cs.mu.oz.au/~fjh>
PGP: finger fjh@128.250.37.3
--


Post a followup to this message

Return to the comp.compilers page.
Search the comp.compilers archives again.