Linked Questions

Popular Questions

how to create an AST (abstract syntax tree)

Asked by At

I am a student, and my actual project is to reproduce a shell like tcsh, I have a working basic shell but now i need to add redirections (‘>’, ‘<’, ‘»’, ‘«’), pipes ('|') and semicolons (';') for my shell. From my research I saw that an AST corresponded well to my expectations, I could execute command respecting priorities. Or if you know something easier to implement I'm interested.

I made a lexer who take a command line in parameter and cut it into tokens. for example :"echo ls >> cc | cat -e ; ls > cc; cat < cc" will become, (i put all the tokens in an array): {"echo ls", ">>", "cc", "|", "cat -e ", ";", "ls", ">", "cc", ";", "cat", "<", "cc"}. Now, from this array, i want to make an AST, but after my researches, i still not know how to make one.

Here is my program:

typedef enum{
    TOKEN_REDIR_L,
    TOKEN_DQ,
    TOKEN_REDIR_R,
    TOKEN_DB_REDIR_L,
    TOKEN_DB_REDIR_R,
    TOKEN_PIPE,
    TOKEN_SEMI,
    TOKEN_CMD
} e_token_type;

typedef struct s_token_name{
    char *name;
    e_token_type type;
    unsigned int size; 
} token;

typedef struct s_token_lexer{
    int size;
    token list[2048];
} token_lexer;

static const token tok_name[] =
{
    {">>", TOKEN_DB_REDIR_L, 2},
    {"<<", TOKEN_DB_REDIR_R, 2},
    {">", TOKEN_REDIR_L, 1},
    {"<", TOKEN_DB_REDIR_R, 1},
    {"|", TOKEN_PIPE, 1},
    {";", TOKEN_SEMI, 1},
    {NULL, 1, 0}
};

token search_token_type(const char* str)
{
    const token *tmp = tok_name;
    token not_found = {0, 0, 0};

    while (tmp->name){
        if (my_strncmp(str, tmp->name, tmp->size)){
            return (*tmp);
        }
        ++tmp;
    }
    return (not_found);
}

void add_to_lexer
(token_lexer *li, char const *str, int text_size, e_token_type type)
{
    token tok;
    int i = 0;
    while (*str == ' ' && *str != '\0'){
        str++; i++;
    }
    tok.name = my_strncpy(tok.name, str, text_size - i);
    tok.size = text_size;
    tok.type = type;
    li->list[li->size] = tok;
    li->size++;
}

token_lexer *make_token_list(char const *str)
{
    char const *prev = str;
    int size = 0;
    token curr;
    token_lexer *li = malloc(sizeof(token_lexer));
    li->size = 0;

    for (; *str; str++){
        curr = search_token_type(str);
        if (curr.name != 0 && !my_strcmp(prev, str)){
            str += curr.size;
            add_to_lexer(li, prev, str - prev - curr.size, TOKEN_CMD);
            prev = str;
        }if (curr.name != 0){
            li->list[li->size++] = curr;
            str += curr.size;
        }
    }add_to_lexer(li, prev, str - prev, TOKEN_CMD);
    return (li);
}

int main(int ac, char **av)
{
    char *str = "echo ls >> cc | cat -e  cc ; ls > cc; cat < cc";
    token_lexer *li = make_token_list(str);
    for (int i = 0; i < li->size; i++){
        printf("%s\n", li->list[i]);
    }
}

(PS: sorry if my english is bad)

Related Questions