Differentiating words in trie

116 views Asked by At

I have a word "all" in my trie and a word "alter" but "alt" is not a word in the trie. But when I check for "alt" it still returns true because is_word is true as "all" was a word. How am is supposed to work this error.

//Here's the code
typedef struct node{
  bool is_word;    
  struct node *children[27];
} node;

unsigned int wsize=0;
node * root;

bool check(const char* word)
{
    // TODO
    node *chrawler=root;   
    for(int i=0;i<strlen(word)-1;i++)
    {
        int t;
        if(word[i]>=65&&word[i]<=90)
        {        
            t=word[i]-'A';
        }
        else if(isalpha(word[i]))
            t=word[i]-'a';
        else
            t=26;

        if(chrawler->children[t]==NULL)
            return false;
        else
            chrawler=chrawler->children[t];
    }

    if(chrawler->is_word)
        return true;
    return false;    

}

// Load function
bool load(const char* dictionary)
{
    // TODO

    FILE *inptr=fopen(dictionary,"r");
    if(inptr==NULL)
    {
        return false;
    }

    node *new_node=malloc(sizeof(node));
    root=new_node;

    char * word=malloc((LENGTH+1)*sizeof(char));
    int index=0;
    for(int c=fgetc(inptr);c!=EOF;c=fgetc(inptr))
    {
        char ch=c;
        if(ch=='\n')
        { 
            word[index]='\0';
            index=0;
            node *chrawler=root;
            for(int i=1;i<strlen(word);i++)
            {
                    int t;
                    if(isalpha(word[i-1]))
                        t=word[i-1]-'a';
                    else
                        t=26;
                    if(chrawler->children[t]==NULL)
                    {
                        node *new_node=malloc(sizeof(node));
                        chrawler->children[t]=new_node;

                        chrawler=chrawler->children[t];
                    }
                    else
                        chrawler=chrawler->children[t];
            }
            chrawler->is_word=1;
            wsize++;

        }
        else
        {
            word[index]=ch;
            index++;
        }

    }

    return true;
}
1

There are 1 answers

2
Jonathan Leffler On BEST ANSWER

You need to ensure that all the pointers in a new node are null, as well as setting the is_word value to false. This is, perhaps, most easily done by using calloc() to allocate the space. Creating a function to allocate and error check the allocation of a node makes it easier. Similarly, you have two blocks of code mapping characters to trie indexes. You should use functions — even small ones — more generously.

The character-by-character input for a line of data is not really necessary, either; it would be better to use fgets() to read lines.

Adding these and sundry other changes (for example, local array word instead of dynamically allocated array — which wasn't freed; closing the file when finished; etc.) gives an MCVE (Minimal, Complete, Verifiable Example) like this:

#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

enum { LENGTH = 256 };

// Here's the code
typedef struct node
{
    bool is_word;
    struct node *children[27];
} node;

unsigned int wsize = 0;
node *root;

static inline int map_char(unsigned char c)
{
    int t;
    if (isalpha(c))
        t = tolower(c) - 'a';
    else
        t = 26;
    return t;
}

static inline node *alloc_node(void)
{
    node *new_node = calloc(1, sizeof(node));
    if (new_node == 0)
    {
        fprintf(stderr, "Memory allocation failed in %s\n", __func__);
        exit(1);
    }
    return new_node;
}

static bool check(const char *word)
{
    node *chrawler = root;
    int len = strlen(word);
    for (int i = 0; i < len; i++)
    {
        int t = map_char(word[i]);
        if (chrawler->children[t] == NULL)
            return false;
        else
            chrawler = chrawler->children[t];
    }

    return chrawler->is_word;
}

// Load function
static bool load(const char *dictionary)
{
    FILE *inptr = fopen(dictionary, "r");
    if (inptr == NULL)
    {
        fprintf(stderr, "Failed to open file '%s' for reading\n", dictionary);
        return false;
    }

    root = alloc_node();

    char word[LENGTH];
    while (fgets(word, sizeof(word), inptr) != 0)
    {
        word[strcspn(word, "\n")] = '\0';
        printf("[%s]\n", word);
        node *chrawler = root;
        int len = strlen(word);
        for (int i = 0; i < len; i++)
        {
            int t = map_char(word[i]);
            //printf("t = %d (%c)\n", t, word[i]);
            if (chrawler->children[t] == NULL)
                chrawler->children[t] = alloc_node();
            chrawler = chrawler->children[t];
        }
        chrawler->is_word = 1;
        wsize++;
    }
    printf("%d words read from %s\n", wsize, dictionary);
    fclose(inptr);

    return true;
}

int main(void)
{
    const char *wordfile = "words.txt";
    if (load(wordfile))
    {
        char line[4096];
        while (fgets(line, sizeof(line), stdin) != 0)
        {
            line[strcspn(line, "\n")] = '\0';
            if (check(line))
                printf("[%s] is a word\n", line);
            else
                printf("[%s] is unknown\n", line);
        }
    }
    return 0;
}

There are other changes that should be made. For example, the wsize variable should be made non-global; it isn't really used outside the load() function. It's easily arguable that the root node should not be global either; the load() function should return the root node, and the check() function should be passed the root node. In general, global variables should be avoided when possible, and it is usually possible.

Given a file words.txt containing:

abelone
abyssinia
archimedes
brachiosaurus
triceratops
all
alter
asparagus
watchamacallit
a
abracadabra
abyss
ant

the output from a run of the program is:

[abelone]
[abyssinia]
[archimedes]
[brachiosaurus]
[triceratops]
[all]
[alter]
[asparagus]
[watchamacallit]
[a]
[abracadabra]
[abyss]
[ant]
13 words read from words.txt
a
[a] is a word
ab
[ab] is unknown
al
[al] is unknown
all
[all] is a word
alt
[alt] is unknown
alte
[alte] is unknown
alter
[alter] is a word
triceratops
[triceratops] is a word
brachiosaurus
[brachiosaurus] is a word
abys
[abys] is unknown
abbey
[abbey] is unknown
abyss
[abyss] is a word
ant
[ant] is a word
a
[a] is a word
archimedes
[archimedes] is a word