How to convert CSV decimals into C float values

171 views Asked by At

I need to convert a CSV file of decimal numbers into a C float variable. How can I do that?

data.csv

0.1,0.2,0.3,0.4,0.5
1.1,1.2,1.3,1.4,1.5
2.1,2.2,2.3,2.4,2.5
3.1,3.2,3.3,3.3,3.5
4.1,4.2,4.3,4.4,4.5

output:

------------START OF PROGRAM------------
Filesize: 99 Bytes

CSV_content =
[ 0.100000 0.200000 0.300000 0.400000 0.500000 ]
[ 1.100000 1.200000 1.300000 1.400000 1.500000 ]
[ 2.100000 2.200000 2.300000 2.400000 2.500000 ]
[ 3.100000 3.200000 3.300000 3.300000 3.500000 ]
[ 4.100000 4.200000 4.300000 4.400000 4.500000 ]

------------END OF PROGRAM------------
1

There are 1 answers

7
Self learning student On

This is my self Answer. My old answer post was deleted. So if you see comments about correcting things that have already been corrected (in here or my quesrion post), that's why.

Latest version of the Code:

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>

const int MAX_NUMBER_OF_LINE = 128;
const int MAX_LINE_SIZE = 256;
const int MAX_LENGTH_OF_STRING_ELEMENT = 11; // for floats. Increase for double 0.1234567\0 = 10 character.
const char NULL_TERMINATOR = '\0';
const char NEW_LINE_CHARARACTER = '\n';

typedef struct
{
    int numberOfString; // number of string
    char **data;        // stringArray->data[i] is a string.
} StringArray;

typedef struct
{
    char ***data;
    int numberOfStringArray;
    int numberOfString; // number of string
} StringMatrix;

char *readCSV_toString(char *filename, int printFileBool, unsigned long *fileSize_output)
{

    FILE *filePointer = fopen(filename, "rb");
    if (filePointer == NULL)
    {
        fprintf(stderr, "%s","Can't open file '");
        fprintf(stderr,"%s",filename);
        fprintf(stderr,"%s","'\n");
        return NULL;
    }

    fseek(filePointer, 0L, SEEK_END);
    unsigned long fileSize = ftell(filePointer); // get length of file
    rewind(filePointer);                         // return to beginning of file

    char *fileContent = (char *)malloc(sizeof(char) * (fileSize) + 1);
    if (fileContent == NULL)
    {   
        fprintf(stderr, "Can't allocate memory for file %s\n");
        return NULL;
    }
    fread(fileContent, sizeof(char), fileSize / sizeof(char), filePointer);
    fileContent[fileSize] = NULL_TERMINATOR; // add nullterminator

    if (printFileBool == 1)
    {
        for (int i = 0; i < fileSize + 1; i++)
        {
            printf("%i ", fileContent[i]);
        }
    }

    fclose(filePointer);
    *fileSize_output = fileSize;
    return fileContent;
}

int substringCompare(char *stringInput, int start_index, int end_index_excluded, char *stringToBeCompared)
{
    char *stringInputCut = (char *)malloc(sizeof(char) * (end_index_excluded - start_index) + 1);
    for (int i = 0; i < end_index_excluded - start_index; i++)
    {
        stringInputCut[i] = stringInput[i + start_index];
    }
    // printf("max i (included):%d \n",end_index_excluded - 1);
    stringInputCut[end_index_excluded - start_index] = NULL_TERMINATOR;
    // printf("substring cut: '%s'\n", stringInputCut);
    int return_value = strcmp(stringInputCut, stringToBeCompared);
    free(stringInputCut);
    return return_value;
}

int string_splitLength(char *string, unsigned long stringSize, char *delimiter)
// return the number of element in arrayList that the initial string will be split into
{
    int length = 0;
    int delimiterLength = strlen(delimiter);
    // printf("string size :%d\n",stringSize);
    for (unsigned long i = 0; i < stringSize - delimiterLength + 1; i++)
    {
        if (substringCompare(string, i, i + delimiterLength, delimiter) == 0 && (string[i + delimiterLength] != NULL_TERMINATOR))
        {
            // printf("___The substring is: %c%c___\n", string[i], string[i + 1]);
            length++;
        }
    }

    return length + 1; // because there (a,b,c,d): 3 commas, 4 number. Hence +1
}

StringArray *string_split(char *string, char *delimiter, int maxElementSize)
// return the string.split(delimiter)
{
    StringArray *stringArray = calloc(1, sizeof(StringArray)); // the big thing
    stringArray->numberOfString = string_splitLength(string, maxElementSize, delimiter);

    int i = 0;
    char *token = strtok(string, delimiter);

    char *stringOutput_i = (char *)malloc(maxElementSize); // malloc str[i] : Important
    strncpy(stringOutput_i, token, maxElementSize);

    stringArray->data = calloc(stringArray->numberOfString, sizeof(char *)); // The array of pointers. (inside of the big thing. )
    stringArray->data[i] = stringOutput_i;
    i++;

    while (token != NULL && i < MAX_NUMBER_OF_LINE)
    {
        token = strtok(NULL, delimiter);
        if (token == NULL)
            break; // needed because it loop 1 too many time.

        char *stringOutput_i = (char *)malloc(maxElementSize); // malloc str[i] : Important
        strncpy(stringOutput_i, token, maxElementSize);

        stringArray->data[i] = stringOutput_i; // pointer copy.
        i++;
    }

    return stringArray;
}

void printStringArray(StringArray *stringArray)
{
    int n = stringArray->numberOfString;
    printf("The string array with length %d is: \n", n);
    printf("[ ");

    for (int i = 0; i < stringArray->numberOfString; i++)
    {
        printf("'%s'", stringArray->data[i]);
        if (i < n - 1)
            printf(",");
        printf(" ");
    }
    printf("]\n");
}

StringMatrix *getStringMatrix(StringArray *stringArray_input, char *delimiterElement, int maxElementSize)
{
    StringMatrix *stringMatrix = calloc(1, sizeof(StringMatrix));
    stringMatrix->numberOfStringArray = stringArray_input->numberOfString;

    char buffer[MAX_LINE_SIZE];
    strncpy(buffer, stringArray_input->data[0], MAX_LINE_SIZE);
    StringArray *initArray = string_split(buffer, delimiterElement, MAX_LINE_SIZE);

    stringMatrix->numberOfString = initArray->numberOfString;

    stringMatrix->data = malloc(stringMatrix->numberOfStringArray * sizeof(char **));
    for (int i = 0; i < stringMatrix->numberOfStringArray; i++)
    {
        stringMatrix->data[i] = malloc(stringMatrix->numberOfString * sizeof(char *));
    }

    for (int i = 0; i < stringMatrix->numberOfStringArray; i++)
    {
        StringArray *elementArray = string_split(stringArray_input->data[i], delimiterElement, MAX_LINE_SIZE);
        for (int j = 0; j < stringMatrix->numberOfString; j++)
        {

            stringMatrix->data[i][j] = (elementArray->data[j]); // pointer copy
        }
    }
    return stringMatrix;
}

void printStringMatrix(StringMatrix *stringMatrix)
{
    int m = stringMatrix->numberOfString;
    int n = stringMatrix->numberOfStringArray;
    printf("The string matrix is: %d %d\n", m, n);
    for (int i = 0; i < n; i++)
    {
        printf("[ ");
        for (int j = 0; j < m; j++)
        {
            printf("'%s' ", stringMatrix->data[i][j]);
        }
        printf("]\n");
    }
    printf("\n");
}

float *stringToFloat_matrix(StringMatrix *stringMatrix, int *m_output, int *n_output)
{

    int n = stringMatrix->numberOfStringArray; // vertical (list of list)
    int m = stringMatrix->numberOfString;      // horizontal (last layer)

    float *array1D = calloc(m * n, sizeof(float));

    float(*matrix)[m] = (float(*)[m])array1D;

    for (int i = 0; i < n; i++)
    {
        for (int j = 0; j < m; j++)
        {

            matrix[i][j] = (float)atof(stringMatrix->data[i][j]);
        }
    }

    *m_output = m;
    *n_output = n;
    return array1D;
}

void freeStringArray(StringArray *stringArray)
{
    for (int i = 0; i < stringArray->numberOfString; i++)
    {
        free(stringArray->data[i]);
    }
    free(stringArray->data); // stringArray (-> numberOfString  , -> data -> [ p0, p1, p2, p3,... ] -> { string1, string2, string3,... } )
    free(stringArray);
}

void freeStringMatrix(StringMatrix *stringMatrix)
{
    for (int i = 0; i < stringMatrix->numberOfStringArray; i++)
    {
        for (int j = 0; j < stringMatrix->numberOfString; j++)
        {
            free(stringMatrix->data[i][j]);
        }
        free(stringMatrix->data[i]);
    }
    free(stringMatrix->data);
    free(stringMatrix);
}

float *openCSV_float(char *filename, char *delimiterLine, char *delimiterElement, int printFileContentBool, int *m_output, int *n_output)
{
    unsigned long fileSize;
    char *fileContent = readCSV_toString(filename, printFileContentBool, &fileSize);
    if (fileContent == NULL)
        exit(1);
    printf("Filesize: %lu Bytes\n\n", fileSize);

    StringArray *stringArray = string_split(fileContent, delimiterLine, MAX_LINE_SIZE);
    printStringArray(stringArray);
    printf("-------\n");

    StringMatrix *stringMatrix = getStringMatrix(stringArray, delimiterElement, MAX_LENGTH_OF_STRING_ELEMENT);
    printStringMatrix(stringMatrix);

    int m, n;
    float *array1D = stringToFloat_matrix(stringMatrix, &m, &n);
    *m_output = m;
    *n_output = n;
    // Could have put m_output, n_output in stringToFloat_matrix. And would save on 3 line of code and a bit more performance
    // But decided to be verbose.
    free(fileContent);
    freeStringArray(stringArray);
    freeStringMatrix(stringMatrix);

    // other stuff to free? idk. Should be the main leak solved.
    return array1D;
}

void printMatrix_float(float *array1D, int m, int n)
{
    float(*matrix)[m] = (float(*)[m])array1D;

    for (int i = 0; i < n; i++)
    {
        printf("[ ");
        for (int j = 0; j < m; j++)
        {
            printf("%f ", matrix[i][j]);
        }
        printf("]\n");
    }
}

int main()
{
    printf("\n\n------------START OF PROGRAM------------\n");

    int n, m;
    int printFileContentBool = 0;
    float *array1D;

    char delimiterLine[2] = {NEW_LINE_CHARARACTER, NULL_TERMINATOR};
    char delimiterElement[] = "||";
    char filename[100] = "data2.csv";

    array1D = openCSV_float(filename, delimiterLine, delimiterElement, printFileContentBool, &m, &n);
    // 1kb of memory leak per file opening loop;

    printf("CSV_content =\n");
    printMatrix_float(array1D, m, n);

    printf("\n------------END OF PROGRAM------------");
    return 0;
}

Data.csv

0.1||0.2||0.3||0.4||0.5
1.1||1.2||1.3||1.4||1.5
2.1||2.2||2.3||2.4||2.5
3.1||3.2||3.3||3.3||3.5

Output


------------START OF PROGRAM------------
Filesize: 95 Bytes

The string array with length 4 is:
[ '0.1||0.2||0.3||0.4||0.5', '1.1||1.2||1.3||1.4||1.5', '2.1||2.2||2.3||2.4||2.5', '3.1||3.2||3.3||3.3||3.5' ]
-------
The string matrix is: 5 4
[ '0.1' '0.2' '0.3' '0.4' '0.5' ]
[ '1.1' '1.2' '1.3' '1.4' '1.5' ]
[ '2.1' '2.2' '2.3' '2.4' '2.5' ]
[ '3.1' '3.2' '3.3' '3.3' '3.5' ]

CSV_content =
[ 0.100000 0.200000 0.300000 0.400000 0.500000 ]
[ 1.100000 1.200000 1.300000 1.400000 1.500000 ]
[ 2.100000 2.200000 2.300000 2.400000 2.500000 ]
[ 3.100000 3.200000 3.300000 3.300000 3.500000 ]

------------END OF PROGRAM------------