How to re-launch the scan in LEX

218 views Asked by At

I am trying to do a conversor from markdown syntax to Latex and vice versa. But I am facing a problem I can not solve so far. Lets say, we have the following text:

* item
* item
* _Italic_ item
* Item with __nested _italic_ text__

Right now, my lex program would do this to the text:

\begin{itemize}
    \item{item}
    \item{item}
    \item{_Italic_ item}
    \item{Item with __nested _italic_ text__}
\end{itemize}

And, if I run the program again on that output, I get:

\begin{itemize}
    \item{item}
    \item{item}
    \item{\textit{Italic} item}
    \item{Item with nested \textit{italic} text}}
\end{itemize}

Which is the expected result, But it is supposed to do it in one run.

I want to know if it is possible to indicate Flex to run yylex() again on the output. Reading the documentation I've found something about Reentrant C Scanners and Multiple Input Buffers, but I do not know if that would solve my problem.

What is the best solution? Reentrant scanners, multiple Input buffers, or something more simpler?

I also thought on implementing the function yywrap to tell lex launch the scanner again, but with no luck:

int yywrap(){
    if (first_run == 1){
        first_run++;
        yyin = fopen ("/tmp/out1", "rt");
        yyout = fopen("salida", "wt");
        if (yyin == NULL) {
            printf ("El fichero no se puede abrir\n");
            exit (-1);
        }
        if (yyout == NULL) {
            printf ("El fichero no se puede abrir\n");
            exit (-1);
        }
        yyrestart(yyin);
        return 0;
    } else {
        return 1;
    }
}

Here is my code:

                     /*----- Sección de Declaraciones --------------*/ 

    %option case-insensitive
    %option debug
    %option verbose

    %{
    #include<stdio.h>
    #include<string.h>

    int from_italic_text = 0; /* Para saber si venimos de una italic anidada en la bold*/
    int from_bold_text = 0;
    %}

    /* Primitives */ 
    word    .+

    scstrong    "__"|"**"
    scem    "_"|"*"
    list    ^"* "|"- "


    %x IN_MARKDOWN_LIST
    %x BOLD_TEXT_NESTED_ITALIC ITALIC_TEXT
    %x BOLD_TEXT ITALIC_TEXT_NESTED_BOLD

    %%
                     /*----- Sección de Reglas ----------------*/ 

    {list}  {BEGIN(IN_MARKDOWN_LIST);fprintf(yyout, "\\begin{itemize}\n");}

    <IN_MARKDOWN_LIST>{
        ^\n fprintf(yyout, "\\end{itemize}\n\n");BEGIN(INITIAL); /* si volvemos a detectar línea vacia, hemos acabado el itemize, o no era nada y salimos */
        ^"* "|"- "  /* Eliminar la sintáxis de itemize en markdown */
        [^"*"\-\n]+ fprintf(yyout, "\t\\item{%s}\n", yytext);   /* Éste es el texto que compone cada línea del itemize */
        \n  yylineno++;BEGIN(IN_MARKDOWN_LIST); /* Si detectamos salto de línea, aumentar el número de línea, y seguimos comprobando dentro de IN_MARKDOWN_LIST buscando más items*/
    }

    {scstrong}  {   BEGIN(BOLD_TEXT_NESTED_ITALIC); /* Comienzo de un strong __....*/} 
    <BOLD_TEXT_NESTED_ITALIC>{
        "__"    fprintf(yyout, "}");BEGIN(INITIAL); // Eat the end and exit
        "_" BEGIN(ITALIC_TEXT);     // Hay otro elemento anidado, un italic, pasamos a procesarlo
        [^_\n]* {   
            if (from_italic_text)
                fprintf(yyout, "%s", yytext); // Texto a continuación del italic
            else
                fprintf(yyout, "\\textbf{%s", yytext);
        }
        \n  BEGIN(INITIAL);
    }
    <ITALIC_TEXT>{
        [^_\n]* fprintf(yyout, "\\textit{%s", yytext);
        "_" fprintf(yyout, "}"); BEGIN(BOLD_TEXT_NESTED_ITALIC); from_italic_text = 1; /* Llegado al último _, cerramos }, volvemos al stado BOLD_TEXT y ponemos from_italic_text a 1 para saber que estuvimos aquí, y no cerra antes de tiempo el \textbf*/
    }

    {scem}  {   BEGIN(ITALIC_TEXT_NESTED_BOLD); /* Comienzo de un strong __....*/} 
    <ITALIC_TEXT_NESTED_BOLD>{
        "_" fprintf(yyout, "}"); BEGIN(INITIAL); // Eat the end and exit
        "__"    BEGIN(BOLD_TEXT);   // Hay otro elemento anidado, un italic, pasamos a procesarlo
        [^_\n]* { 
            if (from_bold_text) 
                fprintf(yyout, "%s", yytext); // Texto a continuación del italic
            else
                fprintf(yyout, "\\textit{%s", yytext);
        }
    \n  BEGIN(INITIAL);
    }
    <BOLD_TEXT>{
        [^_\n]* fprintf(yyout, "\\textbf{%s", yytext);
        "__"    fprintf(yyout, "}"); BEGIN(ITALIC_TEXT_NESTED_BOLD); from_bold_text = 1; /* Llegado al último _, cerramos }, volvemos al stado BOLD_TEXT y ponemos from_italic_text a 1 para saber que estuvimos aquí, y no cerra antes de tiempo el \textbf*/
    }

    .|\n            {ECHO;}

    %%   
                    /*----- Sección de Procedimientos --------*/ 

    int main (int argc, char *argv[]) {
        if (argc == 2) {
            yyin = fopen (argv[1], "rt");
            if (yyin == NULL) {
                printf ("El fichero %s no se puede abrir\n", argv[1]);
                exit (-1);
            }
        } else 
            yyin = stdin;

        yyout = fopen("/tmp/out1", "wt");

        if (yyout == NULL) {
            printf ("El fichero %s no se puede abrir\n", argv[1]);
            exit (-1);
        }

        yylex ();

        return 0;
    }
2

There are 2 answers

0
Alejandro Alcalde On BEST ANSWER

I finally came out with a solution. I do not know if it is the best, but it worked. I implemented yywrap like this:

    int yywrap(){
        if (first_run == 1){
            first_run++;

            fclose(yyout);
            fclose(yyin);
            yyin = fopen ("/tmp/out", "rt");
            yyout = fopen("salida", "wt");

            if (yyin == NULL) {
                printf ("El fichero no se puede abrir\n");
                exit (-1);
            }
            if (yyout == NULL) {
                printf ("El fichero no se puede abrir\n");
                exit (-1);
            }
            return 0;
        } else {
            return 1;
        }
    }

    int main (int argc, char *argv[]) {
        yyin = fopen (argv[1], "rt");
        // ...
        yyout = fopen("/tmp/out", "wt");
        // .....
        yylex();
    }
0
JJoao On

Instead of doing multiple traversals you do it with a single traversal and stacks:

%option stack 

and change BEGIN state with

yy_push_state(STATE)      // ... push current-state; BEGIN STATE
yy_pop_state              // instead of BEGIN INITIAL we "return" to prev. state

This way easily have nested commands

{list}      { yy_push_state(IN_MARKDOWN_LIST);fprintf(yyout, "\\begin{itemize}\n");}

<IN_MARKDOWN_LIST>{
    ^\n     { yy_pop_state();         fprintf(yyout, "\\end{itemize}\n\n");  
    "__"    { yy_push_state(BOLD);    fprintf(yyout, "\\textbf{");
    "_"     { yy_push_state(ITALIC);  fprintf(yyout, "\\textit{");
    ...
}

<ITALIC>{
    "_"     { yy_pop_state();         fprintf(yyout, "}"); }
    ....
    .       {                         fprintf(yyout, "%s",yytext);}
}