Using multiple lex files and combining it into one while executing

51 views Asked by At

I have multiple lex files namely, keywords.l, identifier.l and literals.l

keywords.l:

%{
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
/*#include "../yacc/grammer.tab.h"*/
%}

class "class"
from "from"
None "None"
continue "continue"
global "global"
pass "pass"
def "def"
if "if"
raise "raise"
del "del"
import "import"
return "return"
as "as"
elif "elif"
try "try"
assert "assert"
else "else"
while "while"
async "async"
except "except"
lambda "lambda"
with "with"
await "await"
finally "finally"
nonlocal "nonlocal"
yield "yield"
break "break"
for "for"

%%
{class} {printf("keyword ");}
{from} {printf("keyword ");}
{None} {printf("keyword ");}
{continue} {printf("keyword ");}
{global} {printf("keyword ");}
{pass} {printf("keyword ");}
{def} {printf("keyword ");}
{if} {printf("keyword ");}
{raise} {printf("keyword ");}
{del} {printf("keyword ");}
{import} {printf("keyword ");}
{return} {printf("keyword ");}
{as} {printf("keyword ");}
{elif} {printf("keyword ");}
{try} {printf("keyword ");}
{assert} {printf("keyword ");}
{else} {printf("keyword ");}
{while} {printf("keyword ");}
{async} {printf("keyword ");}
{except} {printf("keyword ");}
{lambda} {printf("keyword ");}
{with} {printf("keyword ");}
{await} {printf("keyword ");}
{finally} {printf("keyword ");}
{nonlocal} {printf("keyword ");}
{yield} {printf("keyword ");}
{break} {printf("keyword ");}
{for} {printf("keyword ");}

%%


int yywrap() { 
  return 1; 
} 

identifier.l

%{
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include "../yacc/grammer.tab.h"

typedef struct SymbolTable{
  char token[10];
  char attr[100];
  char type[10];
} SymbolTable;

SymbolTable ST[1000];
int STn=0;

void printST(){
  int i;
  printf("\nPrinting Symbol Table ");
  for(i=0;i<STn;i++){
    printf("\n%s : %s",ST[i].token, ST[i].attr);
  }
}

int installID(char *a, char *t){
  int i;
  for(i=0;i<STn;i++){
    if((strcmp(ST[i].token, "ID")==0)&&(strcmp(ST[i].attr, a)==0)){
      return i;
    }
  }
  strcpy(ST[STn].token,"ID");
  strcpy(ST[STn].attr,a);
  strcpy(ST[STn].type,t);
  STn++;
  return STn-1;
}

%}

id [a-zA-Z_][a-zA-Z_0-9]*

%%

%union {
    char *lexeme;
    int value;
}

%token <lexeme> ID

{id} {
    printf("\nIdentifier : %s ", yytext);
    int a = installID(yytext, NULL);
    yylval.lexeme = strdup(yytext);
    return ID;
}

%%

int yywrap() { 
  return 1; 
} 

literals.l

%{
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include "../yacc/grammer.tab.h"

typedef struct SymbolTable{
  char token[10];
  char attr[100];
  char type[10];
}SymbolTable;

SymbolTable ST[1000];
int STn=0;

void printST(){
  int i;
  printf("\nPrinting Symbol Table ");
  for(i=0;i<STn;i++){
    printf("\n%s : %s",ST[i].token, ST[i].attr);
  }
}

int installLit(char *b, char *t){
  int i;
  for(i=0;i<STn;i++){
    if((strcmp(ST[i].token, "NUM")==0)&&(strcmp(ST[i].attr, b)==0)){
      return i;
    }
  }
  strcpy(ST[STn].token,"LITERAL");
  strcpy(ST[STn].attr,b);
  strcpy(ST[STn].type,t);
  STn++;
  return STn-1;
}

%}

int [-]?[0-9]+
float [-]?[0-9]+([.][0-9]+)?([Ee][+-]?[0-9]+)?
bool (True|False)
str (\"([^\"\\\\]|\\\\.)*\")|('([^'\\\\]|\\\\.)*')

%%
{int} {printf("\nInteger : %s ",yytext);
      int a = installLit(yytext,"INT");
      yylval.value = = strdup(yytext);
      return LITERAL;}

{str} {printf("String ");
      int a = installLit(yytext,"STR");
      yylval.value = = strdup(yytext);
      return LITERAL;}

{float} {printf("Float ");
      int a = installLit(yytext,"FLOAT");
      yylval.value = = strdup(yytext);
      return LITERAL;}

{bool} {printf("Boolean ");
      int a = installLit(yytext,"BOOL");
      yylval.value = = strdup(yytext);
      return LITERAL;}

%union {
    char *lexeme;
    int value;
}

%token <value> LITERAL
%%

int yywrap() { 
  return 1; 
} 

I want to combine these 3 lex files into a single file like how we import packages/file in C, python, etc.

I tried doing so

%{
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include "../yacc/grammer.tab.h";
#include "keywords.l"
#include "literals.l"
#include "identifier.l"
%}

%%

%union {
    char *lexeme;
    int value;
}

%token <lexeme> ID

%%

int yywrap(){
  return 1;
}

But while importing, the imported files are considered as C files not a .l files.

flex tokens.l

gcc lex.yy.c
tokens.l:5:33: warning: extra tokens at end of #include directive
 #include "../yacc/grammer.tab.h";
                                 ^
In file included from tokens.l:6:0:
keywords.l:1:1: error: expected identifier or '(' before '%' token
 %{
 ^
keywords.l:8:7: error: expected '=', ',', ';', 'asm' or '__attribute__' before string constant
 class "class"
       ^
keywords.l:38:9: error: expected identifier or '(' before '{' token
 {class} {printf("keyword ");}
         ^
keywords.l:39:1: error: expected identifier or '(' before '{' token
 {from} {printf("keyword ");}
 ^
keywords.l:39:8: error: expected identifier or '(' before '{' token
 {from} {printf("keyword ");}
        ^
keywords.l:40:1: error: expected identifier or '(' before '{' token
 {None} {printf("keyword ");}
 ^
keywords.l:40:8: error: expected identifier or '(' before '{' token
 {None} {printf("keyword ");}
        ^
keywords.l:41:1: error: expected identifier or '(' before '{' token
 {continue} {printf("keyword ");}
 ^
keywords.l:41:12: error: expected identifier or '(' before '{' token
 {continue} {printf("keyword ");}
            ^
keywords.l:42:1: error: expected identifier or '(' before '{' token
 {global} {printf("keyword ");}
 ^
keywords.l:42:10: error: expected identifier or '(' before '{' token
 {global} {printf("keyword ");}
          ^
keywords.l:43:1: error: expected identifier or '(' before '{' token
 {pass} {printf("keyword ");}
 ^
keywords.l:43:8: error: expected identifier or '(' before '{' token
 {pass} {printf("keyword ");}
        ^
keywords.l:44:1: error: expected identifier or '(' before '{' token
 {def} {printf("keyword ");}
 ^
keywords.l:44:7: error: expected identifier or '(' before '{' token
 {def} {printf("keyword ");}
       ^
keywords.l:45:1: error: expected identifier or '(' before '{' token
 {if} {printf("keyword ");}
 ^
keywords.l:45:6: error: expected identifier or '(' before '{' token
 {if} {printf("keyword ");}
      ^
keywords.l:46:1: error: expected identifier or '(' before '{' token
 {raise} {printf("keyword ");}
 ^
keywords.l:46:9: error: expected identifier or '(' before '{' token
 {raise} {printf("keyword ");}
         ^
keywords.l:47:1: error: expected identifier or '(' before '{' token
 {del} {printf("keyword ");}
 ^
keywords.l:47:7: error: expected identifier or '(' before '{' token
 {del} {printf("keyword ");}
       ^
keywords.l:48:1: error: expected identifier or '(' before '{' token
 {import} {printf("keyword ");}
 ^
keywords.l:48:10: error: expected identifier or '(' before '{' token
 {import} {printf("keyword ");}
          ^
keywords.l:49:1: error: expected identifier or '(' before '{' token
 {return} {printf("keyword ");}
 ^
keywords.l:49:10: error: expected identifier or '(' before '{' token
 {return} {printf("keyword ");}
          ^
keywords.l:50:1: error: expected identifier or '(' before '{' token
 {as} {printf("keyword ");}
 ^
keywords.l:50:6: error: expected identifier or '(' before '{' token
 {as} {printf("keyword ");}
      ^
keywords.l:51:1: error: expected identifier or '(' before '{' token
 {elif} {printf("keyword ");}
 ^
keywords.l:51:8: error: expected identifier or '(' before '{' token
 {elif} {printf("keyword ");}
        ^
keywords.l:52:1: error: expected identifier or '(' before '{' token
 {try} {printf("keyword ");}
 ^
keywords.l:52:7: error: expected identifier or '(' before '{' token
 {try} {printf("keyword ");}
       ^
keywords.l:53:1: error: expected identifier or '(' before '{' token
 {assert} {printf("keyword ");}
 ^
keywords.l:53:10: error: expected identifier or '(' before '{' token
 {assert} {printf("keyword ");}
          ^
keywords.l:54:1: error: expected identifier or '(' before '{' token
 {else} {printf("keyword ");}
 ^
keywords.l:54:8: error: expected identifier or '(' before '{' token
 {else} {printf("keyword ");}
        ^
keywords.l:55:1: error: expected identifier or '(' before '{' token
 {while} {printf("keyword ");}
 ^
keywords.l:55:9: error: expected identifier or '(' before '{' token
 {while} {printf("keyword ");}
         ^
keywords.l:56:1: error: expected identifier or '(' before '{' token
 {async} {printf("keyword ");}
 ^
keywords.l:56:9: error: expected identifier or '(' before '{' token
 {async} {printf("keyword ");}
         ^
keywords.l:57:1: error: expected identifier or '(' before '{' token
 {except} {printf("keyword ");}
 ^
keywords.l:57:10: error: expected identifier or '(' before '{' token
 {except} {printf("keyword ");}
          ^
keywords.l:58:1: error: expected identifier or '(' before '{' token
 {lambda} {printf("keyword ");}
 ^
keywords.l:58:10: error: expected identifier or '(' before '{' token
 {lambda} {printf("keyword ");}
          ^
keywords.l:59:1: error: expected identifier or '(' before '{' token
 {with} {printf("keyword ");}
 ^
keywords.l:59:8: error: expected identifier or '(' before '{' token
 {with} {printf("keyword ");}
        ^
keywords.l:60:1: error: expected identifier or '(' before '{' token
 {await} {printf("keyword ");}
 ^
keywords.l:60:9: error: expected identifier or '(' before '{' token
 {await} {printf("keyword ");}
         ^
keywords.l:61:1: error: expected identifier or '(' before '{' token
 {finally} {printf("keyword ");}
 ^
keywords.l:61:11: error: expected identifier or '(' before '{' token
 {finally} {printf("keyword ");}
           ^
keywords.l:62:1: error: expected identifier or '(' before '{' token
 {nonlocal} {printf("keyword ");}
 ^
keywords.l:62:12: error: expected identifier or '(' before '{' token
 {nonlocal} {printf("keyword ");}
            ^
keywords.l:63:1: error: expected identifier or '(' before '{' token
 {yield} {printf("keyword ");}
 ^
keywords.l:63:9: error: expected identifier or '(' before '{' token
 {yield} {printf("keyword ");}
         ^
keywords.l:64:1: error: expected identifier or '(' before '{' token
 {break} {printf("keyword ");}
 ^
keywords.l:64:9: error: expected identifier or '(' before '{' token
 {break} {printf("keyword ");}
         ^
keywords.l:65:1: error: expected identifier or '(' before '{' token
 {for} {printf("keyword ");}
 ^
keywords.l:65:7: error: expected identifier or '(' before '{' token
 {for} {printf("keyword ");}
       ^
keywords.l:67:1: error: expected identifier or '(' before '%' token
 %%
 ^
In file included from tokens.l:7:0:
literals.l:1:1: error: expected identifier or '(' before '%' token
 %{
 ^
In file included from tokens.l:7:0:
literals.l:40:5: error: expected identifier or '(' before '[' token
 int [-]?[0-9]+
     ^
literals.l:43:1: error: stray '\' in program
 str (\"([^\"\\\\]|\\\\.)*\")|('([^'\\\\]|\\\\.)*')
 ^
literals.l:43:7: warning: missing terminating " character
 str (\"([^\"\\\\]|\\\\.)*\")|('([^'\\\\]|\\\\.)*')
       ^
literals.l:43:1: error: missing terminating " character
 str (\"([^\"\\\\]|\\\\.)*\")|('([^'\\\\]|\\\\.)*')
 ^
literals.l:46:7: error: expected identifier or '(' before '{' token
 {int} {printf("\nInteger : %s ",yytext);
       ^
literals.l:51:1: error: expected identifier or '(' before '{' token
 {str} {printf("String ");
 ^
literals.l:51:7: error: expected identifier or '(' before '{' token
 {str} {printf("String ");
       ^
literals.l:56:1: error: expected identifier or '(' before '{' token
 {float} {printf("Float ");
 ^
literals.l:56:9: error: expected identifier or '(' before '{' token
 {float} {printf("Float ");
         ^
literals.l:61:1: error: expected identifier or '(' before '{' token
 {bool} {printf("Boolean ");
 ^
literals.l:61:8: error: expected identifier or '(' before '{' token
 {bool} {printf("Boolean ");
        ^
literals.l:66:1: error: expected identifier or '(' before '%' token
 %union {
 ^
literals.l:71:1: error: expected identifier or '(' before '%' token
 %token <value> LITERAL
 ^
In file included from tokens.l:8:0:
identifier.l:1:1: error: expected identifier or '(' before '%' token
 %{
 ^
In file included from tokens.l:8:0:
identifier.l:40:5: error: 'a' undeclared here (not in a function)
 id [a-zA-Z_][a-zA-Z_0-9]*
     ^
identifier.l:40:7: error: 'zA' undeclared here (not in a function)
 id [a-zA-Z_][a-zA-Z_0-9]*
       ^
identifier.l:40:10: error: 'Z_' undeclared here (not in a function)
 id [a-zA-Z_][a-zA-Z_0-9]*
          ^
identifier.l:40:19: error: 'Z_0' undeclared here (not in a function)
 id [a-zA-Z_][a-zA-Z_0-9]*
                   ^
identifier.l:40:25: error: expected '=', ',', ';', 'asm' or '__attribute__' before '*' token
 id [a-zA-Z_][a-zA-Z_0-9]*
                         ^
identifier.l:49:1: error: expected identifier or '(' before '%' token
 %token <lexeme> ID
 ^
identifier.l:51:6: error: expected identifier or '(' before '{' token
 {id} {
      ^
identifier.l:58:1: error: expected identifier or '(' before '%' token
 %%
 ^
tokens.l: In function 'yylex':
tokens.l:18:1: error: expected expression before '<' token
 %token <lexeme> ID

Kindly help

1

There are 1 answers

2
John Bollinger On

Lex and Flex do not preprocess their inputs with the C preprocessor, nor do they provide any mechanism analogous the the preprocessor's #include directive or the (quite different) behavior of a Python import statement. Lex and its work-alikes require the entire scanner definition to be presented in a single input file.

I tried doing so

%{
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include "../yacc/grammer.tab.h";
#include "keywords.l"
#include "literals.l"
#include "identifier.l"
%}

But while importing, the imported files are considered as C files not a .l files.

C does not have a Pythonesque import feature. The C preprocessor has external-file inclusion capability, which has considerably different semantics.

Anyway, Lex is a C code generator. Anything inside a %{ ... %} block is carried through verbatim to the output C source file. Thus, your

#include "keywords.l"

etc. are processed during C compilation (not Lex scanner generation), according to the semantics of the C preprocessor. As a result, yes, the C compiler attempts to interpret the contents of those files as C source code.

Kindly help

Combine all your .l files into one. And as you do so, pay attention to the fact that the entries in the rules (middle) section of the input are order-sensitive. This will make a difference for you, because all of your keywords also satisfy the rule for an id.

Also be sure to remove the duplicate definitions of yywrap(). The overall scanner can have only one.