Bison:syntax error at the end of parsing

855 views Asked by At

Hello this is my bison grammar file for a mini-programming language:

    %{
 #include <stdlib.h>
 #include <stdio.h>
 #include <math.h>
 #include "projectbison.tab.h"

 void yyerror(char const *);


 extern FILE *yyin;
 extern FILE *yyout;

 extern int yylval;
 extern int yyparse(void);
 extern int n;
 int errNum = 0;
 int forNum = 0;
%}

%left PLUS MINUS
%left MULT DIV MOD
%nonassoc EQUAL NEQUAL LESS GREATER LEQUAL GEQUAL


%token INTEGER BOOLEAN STRING VOID 
%token ID
%token AND 
%token BEGINP 
%token ENDP
%token EXTERN 
%token COMMA
%token EQ
%token RETURN1
%token IF1 ELSE1 WHILE1 FOR1 DO1 
%token LOR LAND LNOT
%token TRUE FALSE
%token EQUAL NEQUAL LESS GREATER LEQUAL GEQUAL
%token LB1 RB1             
%token LCB1 RCB1
%token SEMIC
%token NEWLINE
%token PLUS MINUS 
%token MULT DIV MOD
%token DIGIT STRING1
%start program

%%


/*50*/
program : external-decl program-header defin-field command-field 
        ;
external-decl : external-decl external-prototype 
              | 
              ;
external-prototype : EXTERN prototype-func NEWLINE             
                   ;
program-header : VOID ID LB1 RB1 NEWLINE    

              ;
defin-field : defin-field definition
             | 
             ;
definition : variable-defin
           | func-defin
           | prototype-func
           ;
variable-defin : data-type var-list SEMIC newline 

             ;
data-type : INTEGER               
          | BOOLEAN
          | STRING
          ;
var-list : ID extra-ids 
         ;
extra-ids : COMMA var-list
      | 
      ;
func-defin : func-header defin-field  command-field
            ;
prototype-func : func-header SEMIC   

              ;
func-header : data-type ID LB1 lists RB1 newline 
            ;
lists: list-typ-param
    | 
    ;
list-typ-param : typical-param typical-params 
             ;
typical-params : COMMA list-typ-param  
        | 
        ;
typical-param : data-type AND ID
        ;
command-field : BEGINP  commands newline ENDP newline
    ;
commands : commands newline command 
    | 
    ;
command : simple-command SEMIC 
        | struct-command
        | complex-command

       ;
complex-command : LCB1 newline command newline RCB1  
                ;
struct-command  : if-command
                | while-command
                | for-command
                ;
simple-command : assign
              | func-call
              | return-command
              | null-command
              ;
if-command : IF1 LB1 gen-expr RB1 newline command else-clause
          ;
else-clause: ELSE1 newline command   
            ;
while-command : WHILE1 LB1 gen-expr RB1 DO1 newline RCB1 command LCB1 

             ;
for-command : FOR1 LB1 conditions RB1 newline RCB1 command LCB1  

           ;
conditions : condition SEMIC condition SEMIC condition SEMIC
       ;
condition : gen-expr
      | 
      ;
assign : ID EQ gen-expr   
       ;
func-call  : ID LB1 real-params-list RB1
          | ID LB1 RB1
          ;
real-params-list : real-param real-params
                 ;
real-params : COMMA real-param real-params 
        | 
        ;
real-param : gen-expr
         ;
return-command : RETURN1 gen-expr
              ;
null-command : 
            ;
gen-expr : gen-terms gen-term
         ;
gen-terms : gen-expr LOR    
      | 
          ;

gen-term : gen-factors gen-factor
         ;
gen-factors : gen-term LAND  
        | 
        ;
gen-factor : LNOT first-gen-factor  
              |  first-gen-factor   
              ;
first-gen-factor : simple-expr comparison
                 | simple-expr
                 ;
comparison : compare-operator simple-expr
            ;
compare-operator : EQUAL         
                 | NEQUAL
                 | LESS
                 | GREATER
                 | LEQUAL
                 | GEQUAL
                 ;
simple-expr : expresion simple-term
    ;
expresion : simple-expr PLUS    
    |simple-expr MINUS
    | 
    ;
simple-term : mul-expr simple-parag
            ;
mul-expr: simple-term MULT
    | simple-term DIV
    | simple-term MOD
    | 
    ;
simple-parag : simple-prot-oros
             | MINUS simple-prot-oros
             ;
simple-prot-oros : ID
                 | constant
                 | func-call
                 | LB1 gen-expr RB1
                 ;
constant : DIGIT
         | STRING1
         | TRUE
         | FALSE
         ;
newline:NEWLINE
    | 
    ;



%%

void yyerror(char const *msg) 
{
errNum++;
fprintf(stderr, "%s\n", msg);

}
int main(int argc, char **argv) 
{
++argv;
--argc;
if ( argc > 0 )
 {yyin= fopen( argv[0], "r" ); }
else
 {yyin = stdin; 
 yyout = fopen ( "output", "w" );}

int a = yyparse();
if(a==0)
{printf("Done parsing\n");}
else
{printf("Yparxei lathos sti grammi: %d\n", n);}

printf("Estimated number of errors: %d\n", errNum);

return 0;
}

for a simple input like this :

void main()
integer k;
boolean l;
begin
aek=32;
end

i get the following :

$ ./MyParser.exe file2.txt
void , id ,left bracket , right bracket
integer , id ,semicolon
boolean , id ,semicolon
BEGIN PROGRAM
id ,equals , digit ,semicolon
END PROGRAM
syntax error
Yparxei lathos sti grammi: 8
Estimated number of errors: 1

And whatever change i make to the input file i get a syntax error at the end....Why do i get this and what can i do??thanks a lot in advance!here is the flex file just in case someone needs it :

%{
#include "projectbison.tab.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int n=1;
%}
%option noyywrap 

digit   [0-9]+
id      [a-zA-Z][a-zA-Z0-9]*



%%

"(" {printf("left bracket , "); return LB1;}
")" {printf("right bracket\n"); return RB1;}
"{" {printf("left curly bracket , "); return LCB1;}
"}" {printf("right curly bracket\n"); return RCB1;}
"==" {printf("isotita ,"); return EQUAL;}
"!=" {printf("diafora ,"); return NEQUAL;}
"<" {printf("less_than ,"); return LESS;}
">" {printf("greater_than ,"); return GREATER;}
"<=" {printf("less_eq ,"); return LEQUAL;}
">=" {printf("greater_eq ,"); return GEQUAL;}
"||" {printf("lor\n"); return LOR;}
"&&" {printf("land\n"); return LAND;}
"&" {printf("and ,"); return AND;}
"!" {printf("lnot ,"); return LNOT;}
"+" {printf("plus ,"); return PLUS; }
"-" {printf("minus ,"); return MINUS;}
"*" {printf("multiply ,"); return MULT;}
"/" {printf("division ,"); return DIV;}
"%" {printf("mod ,"); return MOD;}
";" {printf("semicolon \n"); return SEMIC;}
"=" {printf("equals , "); return EQ;}
"," {printf("comma ,"); return COMMA;}
"\n" {n++; return NEWLINE;}
void {printf("void ,"); return VOID;}
return {printf("return ,"); return RETURN1;}
extern {printf("extern\n"); return EXTERN;}
integer {printf("integer ,"); return INTEGER;}
boolean {printf("boolean ,"); return BOOLEAN;}
string {printf("string ,"); return STRING;}
begin {printf("BEGIN PROGRAM\n"); return BEGINP;}
end {printf("END PROGRAM\n"); return ENDP;}
for {printf("for\n"); return FOR1;}
true {printf("true ,"); return TRUE;}
false {printf("false ,"); return FALSE;}
if {printf("if\n"); return IF1; }
else {printf("else\n"); return ELSE1; }
while {printf("while\n"); return WHILE1;}
{id} {printf("id ,"); return ID;}
{digit}  {printf("digit ,"); return DIGIT;}
[a-zA-Z0-9]+  {return STRING1;}
` {/*catchcall*/ printf("Mystery character %s\n", yytext); }
<<EOF>> { static int once = 0; return once++ ? 0 : '\n'; }

%%
1

There are 1 answers

0
rici On BEST ANSWER

Your scanner pretty well guarantees that two newline characters will be sent at the end of the input: one from the newline present in the input, and another one as a result of your trapping <<EOF>>. However, your grammar doesn't appear to accept unexpected newlines, so the second newline will trigger a syntax error.

The simplest solution would be to remove the <<EOF>> rule, since text files without a terminating newline are very rare, and it is entirely legitimate to consider them syntax errors. A more general solution would be to allow any number of newline characters to appear where a newline is expected, by defining something like:

newlines: '\n' | newlines '\n';

(Using actual characters for single-character tokens makes your grammar much more readable, and simplifies your scanner. But that's a side issue.)

You might also ask yourself whether you really need to enforce newline terminators, since your grammar seems to use ; as a statement terminator, making the newline redundant (aside from stylistic considerations). Removing newlines from the grammar (and ignoring them, as with other whitespace, in the scanner) will also simplify your code.