Hello this is my bison grammar file for a mini-programming language:
%{
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "projectbison.tab.h"
void yyerror(char const *);
extern FILE *yyin;
extern FILE *yyout;
extern int yylval;
extern int yyparse(void);
extern int n;
int errNum = 0;
int forNum = 0;
%}
%left PLUS MINUS
%left MULT DIV MOD
%nonassoc EQUAL NEQUAL LESS GREATER LEQUAL GEQUAL
%token INTEGER BOOLEAN STRING VOID
%token ID
%token AND
%token BEGINP
%token ENDP
%token EXTERN
%token COMMA
%token EQ
%token RETURN1
%token IF1 ELSE1 WHILE1 FOR1 DO1
%token LOR LAND LNOT
%token TRUE FALSE
%token EQUAL NEQUAL LESS GREATER LEQUAL GEQUAL
%token LB1 RB1
%token LCB1 RCB1
%token SEMIC
%token NEWLINE
%token PLUS MINUS
%token MULT DIV MOD
%token DIGIT STRING1
%start program
%%
/*50*/
program : external-decl program-header defin-field command-field
;
external-decl : external-decl external-prototype
|
;
external-prototype : EXTERN prototype-func NEWLINE
;
program-header : VOID ID LB1 RB1 NEWLINE
;
defin-field : defin-field definition
|
;
definition : variable-defin
| func-defin
| prototype-func
;
variable-defin : data-type var-list SEMIC newline
;
data-type : INTEGER
| BOOLEAN
| STRING
;
var-list : ID extra-ids
;
extra-ids : COMMA var-list
|
;
func-defin : func-header defin-field command-field
;
prototype-func : func-header SEMIC
;
func-header : data-type ID LB1 lists RB1 newline
;
lists: list-typ-param
|
;
list-typ-param : typical-param typical-params
;
typical-params : COMMA list-typ-param
|
;
typical-param : data-type AND ID
;
command-field : BEGINP commands newline ENDP newline
;
commands : commands newline command
|
;
command : simple-command SEMIC
| struct-command
| complex-command
;
complex-command : LCB1 newline command newline RCB1
;
struct-command : if-command
| while-command
| for-command
;
simple-command : assign
| func-call
| return-command
| null-command
;
if-command : IF1 LB1 gen-expr RB1 newline command else-clause
;
else-clause: ELSE1 newline command
;
while-command : WHILE1 LB1 gen-expr RB1 DO1 newline RCB1 command LCB1
;
for-command : FOR1 LB1 conditions RB1 newline RCB1 command LCB1
;
conditions : condition SEMIC condition SEMIC condition SEMIC
;
condition : gen-expr
|
;
assign : ID EQ gen-expr
;
func-call : ID LB1 real-params-list RB1
| ID LB1 RB1
;
real-params-list : real-param real-params
;
real-params : COMMA real-param real-params
|
;
real-param : gen-expr
;
return-command : RETURN1 gen-expr
;
null-command :
;
gen-expr : gen-terms gen-term
;
gen-terms : gen-expr LOR
|
;
gen-term : gen-factors gen-factor
;
gen-factors : gen-term LAND
|
;
gen-factor : LNOT first-gen-factor
| first-gen-factor
;
first-gen-factor : simple-expr comparison
| simple-expr
;
comparison : compare-operator simple-expr
;
compare-operator : EQUAL
| NEQUAL
| LESS
| GREATER
| LEQUAL
| GEQUAL
;
simple-expr : expresion simple-term
;
expresion : simple-expr PLUS
|simple-expr MINUS
|
;
simple-term : mul-expr simple-parag
;
mul-expr: simple-term MULT
| simple-term DIV
| simple-term MOD
|
;
simple-parag : simple-prot-oros
| MINUS simple-prot-oros
;
simple-prot-oros : ID
| constant
| func-call
| LB1 gen-expr RB1
;
constant : DIGIT
| STRING1
| TRUE
| FALSE
;
newline:NEWLINE
|
;
%%
void yyerror(char const *msg)
{
errNum++;
fprintf(stderr, "%s\n", msg);
}
int main(int argc, char **argv)
{
++argv;
--argc;
if ( argc > 0 )
{yyin= fopen( argv[0], "r" ); }
else
{yyin = stdin;
yyout = fopen ( "output", "w" );}
int a = yyparse();
if(a==0)
{printf("Done parsing\n");}
else
{printf("Yparxei lathos sti grammi: %d\n", n);}
printf("Estimated number of errors: %d\n", errNum);
return 0;
}
for a simple input like this :
void main()
integer k;
boolean l;
begin
aek=32;
end
i get the following :
$ ./MyParser.exe file2.txt
void , id ,left bracket , right bracket
integer , id ,semicolon
boolean , id ,semicolon
BEGIN PROGRAM
id ,equals , digit ,semicolon
END PROGRAM
syntax error
Yparxei lathos sti grammi: 8
Estimated number of errors: 1
And whatever change i make to the input file i get a syntax error at the end....Why do i get this and what can i do??thanks a lot in advance!here is the flex file just in case someone needs it :
%{
#include "projectbison.tab.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int n=1;
%}
%option noyywrap
digit [0-9]+
id [a-zA-Z][a-zA-Z0-9]*
%%
"(" {printf("left bracket , "); return LB1;}
")" {printf("right bracket\n"); return RB1;}
"{" {printf("left curly bracket , "); return LCB1;}
"}" {printf("right curly bracket\n"); return RCB1;}
"==" {printf("isotita ,"); return EQUAL;}
"!=" {printf("diafora ,"); return NEQUAL;}
"<" {printf("less_than ,"); return LESS;}
">" {printf("greater_than ,"); return GREATER;}
"<=" {printf("less_eq ,"); return LEQUAL;}
">=" {printf("greater_eq ,"); return GEQUAL;}
"||" {printf("lor\n"); return LOR;}
"&&" {printf("land\n"); return LAND;}
"&" {printf("and ,"); return AND;}
"!" {printf("lnot ,"); return LNOT;}
"+" {printf("plus ,"); return PLUS; }
"-" {printf("minus ,"); return MINUS;}
"*" {printf("multiply ,"); return MULT;}
"/" {printf("division ,"); return DIV;}
"%" {printf("mod ,"); return MOD;}
";" {printf("semicolon \n"); return SEMIC;}
"=" {printf("equals , "); return EQ;}
"," {printf("comma ,"); return COMMA;}
"\n" {n++; return NEWLINE;}
void {printf("void ,"); return VOID;}
return {printf("return ,"); return RETURN1;}
extern {printf("extern\n"); return EXTERN;}
integer {printf("integer ,"); return INTEGER;}
boolean {printf("boolean ,"); return BOOLEAN;}
string {printf("string ,"); return STRING;}
begin {printf("BEGIN PROGRAM\n"); return BEGINP;}
end {printf("END PROGRAM\n"); return ENDP;}
for {printf("for\n"); return FOR1;}
true {printf("true ,"); return TRUE;}
false {printf("false ,"); return FALSE;}
if {printf("if\n"); return IF1; }
else {printf("else\n"); return ELSE1; }
while {printf("while\n"); return WHILE1;}
{id} {printf("id ,"); return ID;}
{digit} {printf("digit ,"); return DIGIT;}
[a-zA-Z0-9]+ {return STRING1;}
` {/*catchcall*/ printf("Mystery character %s\n", yytext); }
<<EOF>> { static int once = 0; return once++ ? 0 : '\n'; }
%%
Your scanner pretty well guarantees that two newline characters will be sent at the end of the input: one from the newline present in the input, and another one as a result of your trapping
<<EOF>>
. However, your grammar doesn't appear to accept unexpected newlines, so the second newline will trigger a syntax error.The simplest solution would be to remove the
<<EOF>>
rule, since text files without a terminating newline are very rare, and it is entirely legitimate to consider them syntax errors. A more general solution would be to allow any number of newline characters to appear where a newline is expected, by defining something like:(Using actual characters for single-character tokens makes your grammar much more readable, and simplifies your scanner. But that's a side issue.)
You might also ask yourself whether you really need to enforce newline terminators, since your grammar seems to use
;
as a statement terminator, making the newline redundant (aside from stylistic considerations). Removing newlines from the grammar (and ignoring them, as with other whitespace, in the scanner) will also simplify your code.