Sorry for the possibly long and dumb question, but I'm really stumped. I'm doing a task for the university. Its meaning is very simple. You need to implement a function that will change the "bad" phrases to "good". Input to the function is a text and a double array with good and bad words (in the left column the words that need to be replaced, and on the right column the words to be inserted instead of the bad words). The dictionary itself with bad and good words can have any size, but at the end there will always be a pair of NULL - NULL.
It is important to note that the program should not do anything to change the already replaced phrases. The line "termination specialist" contains the word "specialist", so the program must check to see if there are any words in the text that have already been replaced, so that the line "termination specialist" does not change into the line "termination person with certified level of knowledge". The check happens here.
The program must also make sure that the entered dictionary of good and bad words is correct, which means that a bad word cannot be the beginning of another bad word. This check happens in the function replaceInvalidity
Text and dictionary with words do not have to be meaningful. In the context of this task, it is simply a set of symbols, i.e. letters, numbers, symbols
I wrote a program that passes most of the tests, but for some reason at one of the tests it loops and exceeds the time limit (2 seconds). As a result, I get 0 points for the whole task.
I tried checking the memory with Valgrind, but it did not show any errors.
Full code:
#ifndef __PROGTEST__
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <assert.h>
#endif /* __PROGTEST__ */
int replaceInvalidity(const char * (*replace)[2])
{
int size = 0;
for (int i = 0; replace[i][0] != NULL; i++)
size++;
for (int i = 0; i < size - 1; i++)
{
for (int j = i + 1; j < size; j++)
{
if (strlen(replace[i][0]) >= strlen(replace[j][0]))
{
if (strstr(replace[i][0], replace[j][0]) == replace[i][0])
return 1;
}
else
{
if (strstr(replace[j][0], replace[i][0]) == replace[j][0])
return 1;
}
}
}
return 0;
}
char *newSpeak(const char *text, const char * (*replace)[2])
{
if (replaceInvalidity(replace))
{
return NULL;
}
int i = 0, k = 0, flag= 0, Nlen = 0, Olen = 0, length = 0;
char *result = (char *)malloc(sizeof(char));
length = strlen(text);
for (i = 0, k = 0; i < length; i++, k++)
{
flag = 0;
for (int j = 0; replace[j][1] != NULL; j++)
{
if (strstr(&text[i], replace[j][1]) == &text[i])
{
Nlen = strlen(replace[j][1]);
result = (char *)realloc(result, ((k + Nlen + 1) * sizeof(char)));
for (int l = k; l < k + Nlen; l++)
result[l] = replace[j][1][l-k];
i += Nlen - 1;
k += Nlen - 1;
flag = 1;
break;
}
}
if (flag) continue;
for (int j = 0; replace[j][0] != NULL; j++)
{
if (strstr(&text[i], replace[j][0]) == &text[i])
{
Olen = strlen(replace[j][0]);
Nlen = strlen(replace[j][1]);
result = (char *)realloc(result, ((k + Nlen + 1) * sizeof(char)));
for (int l = k; l < k + Nlen; l++)
result[l] = replace[j][1][l-k];
i += Olen - 1;
k += Nlen - 1;
flag = 1;
break;
}
}
if (flag) continue;
result = (char *)realloc(result, (k + 2) * sizeof(char));
result[k] = text[i];
}
result[k] = '\0';
return result;
}
#ifndef __PROGTEST__
int main(int argc, char * argv[])
{
char *res;
const char * d1[][2] = {
{ "murderer", "termination specialist" },
{ "failure", "non-traditional success" },
{ "specialist", "person with certified level of knowledge" },
{ "dumb", "cerebrally challenged" },
{ "teacher", "voluntary knowledge conveyor" },
{ "evil", "nicenest deprived" },
{ "incorrect answer", "alternative answer" },
{ "student", "client" },
{ NULL, NULL }
};
const char * d2[][2] = {
{ "fail", "suboptimal result" },
{ "failure", "non-traditional success" },
{ NULL, NULL }
};
res = newSpeak("dumb termination specialist.", d1);
assert(!strcmp(res, "cerebrally challenged termination specialist."));
free(res);
res = newSpeak("The student answered an incorrect answer.", d1);
assert(!strcmp(res, "The client answered an alternative answer."));
free(res);
res = newSpeak("He was dumb, his failure was expected.", d1);
assert(!strcmp(res, "He was cerebrally challenged, his non-traditional success was expected."));
free(res);
res = newSpeak("The evil teacher became a murderer.", d1);
assert(!strcmp(res, "The nicenest deprived voluntary knowledge conveyor became a termination specialist."));
free(res);
res = newSpeak("Devil's advocate.", d1);
assert(!strcmp(res, "Dnicenest deprived's advocate."));
free(res);
res = newSpeak("Hello.", d2);
assert(!res);
return EXIT_SUCCESS;
}
#endif /* __PROGTEST__ */
I was not able to reproduce the issue after adding the missing includes and combining your 3 snippets. As you phrase the question as a performance issue, I reworked your code to reduce run-time from 0.476 s to 0.275 s per 1e6 calls.
Instead of calling
strstr()per character of your inputtextfor a given bad word, only call it once + number of times a given bad word is found intext. Proceed processingtextafter the replacement. This should make make a significant difference for large input.Instead of using loops move data in your string use
memmove()which is highly optimized.Instead of calling
realloc()for each replacement when the size of the replacement changes.Removed
replaceInvalidity()as I think you were protecting yourself of the replacement string being substring of the input to avoid an infinite loop. The implementation below avoid that by only looking at replacements after the fact.result = realloc(result, ...)will leak memory on failure so handle the error by free'ing the original string and return NULL on error.strdup()error is handled similarly.Problem description does not match your test case, so I revised the test case. If this is not correct please clarify expected behavior (only replace at most 1 bad word?).