Sort a array of array of integers lexicographically

907 views Asked by At

I'm using qsort() to sort a 2D array of integers lexiographically, each row being one input, but having problems with pointer arithmetic when traversing a row in the array.

Idea is to concatenate each column in one row into a string, and strcmp with other strings computed similarly from other rows.

Input is restricted to positive integers only. Some sort examples below,
{1, 1, 10} before {1, 2, 0}
{3, 1, 5} before {3, 11, 5}
{1, 19, 2} before {1, 2, 1}

#define COL_SIZE 3  

int cmp_func (const void *a, const void *b) {
    char str1[100], str2[100], temp[10];
    int i;
    const int **ia = (const int **)a;
    const int **ib = (const int **)b;

    printf("(a: %p), (b: %p)\n", a, b);
    str1[0] = '\0';
    str2[0] = '\0';

    for (i = 0; i < COL_SIZE; i++) {
        printf("(ia+%d: %p), (ib+%d: %p)\n", i, (ia + i), i, (ib + i));
        sprintf(temp, "%d", (int)*(ia + i));
        strcat(str1, temp);

        sprintf(temp, "%d", (int)*(ib + i));
        strcat(str2, temp);
    }

    printf("a: %s, b:%s\n", str1, str2);
    return(strcmp(str1, str2));
}

int main (void) {
    int i, len;
    int A[][COL_SIZE] = {{1,2,3},
                         {1,1,5},
                         {1,0,1},
                         {1,2,1},
                         {1,2,0}};

    len = sizeof(A)/sizeof(A[0]);
    printf("sizeof(int): %ld, sizeof(int *): %ld\n", sizeof(int), sizeof(int *));

    for (i = 0; i < len; i++) {
        printf("Address of A[%d][0]: %p\n", i, A[i]);
    }

    qsort(A, len, COL_SIZE * sizeof(int *), cmp_func);
    return 0;
}

Below is the output:

sizeof(int): 4, sizeof(int *): 8  
Address of A[0][0]: 0x7fff58e9fb30  
Address of A[1][0]: 0x7fff58e9fb3c  
Address of A[2][0]: 0x7fff58e9fb48
Address of A[3][0]: 0x7fff58e9fb54  
Address of A[4][0]: 0x7fff58e9fb60  
(a: 0x7fff58e9fb30), (b: 0x7fff58e9fb48)  
(ia+0: 0x7fff58e9fb30), (ib+0: 0x7fff58e9fb48)  
(ia+1: 0x7fff58e9fb38), (ib+1: 0x7fff58e9fb50)  
(ia+2: 0x7fff58e9fb40), (ib+2: 0x7fff58e9fb58)  
a: 131, b:112  
(a: 0x7fff58e9fb48), (b: 0x7fff58e9fb60)  
(ia+0: 0x7fff58e9fb48), (ib+0: 0x7fff58e9fb60)  
(ia+1: 0x7fff58e9fb50), (ib+1: 0x7fff58e9fb68)  
(ia+2: 0x7fff58e9fb58), (ib+2: 0x7fff58e9fb70)  
Abort trap: 6

The pointer arithmetic for *(ia + 1) is causing the address in each iteration to jump by 8 (sizeof(int *)) from 0x7fff58e9fb30 to 0x7fff58e9fb38, whereas the next value in A[0] is stored at 0x7fff58e9fb34 (size of int).

How do I fix this so that I can get an offset of 4 instead of 8?

4

There are 4 answers

2
Jonathan Leffler On BEST ANSWER

You're sorting an array of arrays, not an array of pointers. Consequently, you need to clean up the code in the comparison function. Also, the current incarnation of the comparator converts all the numbers to strings before doing any comparisons. It is feasible and sensible to convert one number in each row at a time and stop when you find a difference, only converting the next number when the previous ones are the same.

This leads to the following code, which requires a C99 compiler or a C11 compiler with support for VLAs — variable length arrays — which are mandatory in C99 but optional in C11:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static void dump_array(const char *tag, size_t n_rows, size_t n_cols, int A[n_rows][n_cols]);

enum { COLS  = 3 };

extern int cmp_func (const void *a, const void *b);

int cmp_func(const void *va, const void *vb)
{
    const int *a = *(const int (*)[COLS])va;
    const int *b = *(const int (*)[COLS])vb;

    for (int i = 0; i < COLS; i++)
    {
        char str1[20], str2[20];
        sprintf(str1, "%d", a[i]);
        sprintf(str2, "%d", b[i]);
        int rc = strcmp(str1, str2);
        if (rc != 0)
            return rc;
    }
    return 0;
}

int main(void)
{
    int A[][COLS] =
    {
        { 1, 91, 10 },
        { 1,  9,  9 },
        { 1,  9, 11 },
        { 1,  1,  5 },
        { 1,  9, 10 },
        { 1,  0,  1 },
        { 1,  2,  3 },
        { 1, 91, 10 },
        { 1, 19,  0 },
        { 1, 91,  0 },
        { 1,  2,  0 },
        { 1,  2,  1 },
    };
    enum { ROWS = sizeof(A) / sizeof(A[0]) };

    dump_array("Before", ROWS, COLS, A);
    qsort(A, ROWS, sizeof(A[0]), cmp_func);
    dump_array("After", ROWS, COLS, A);
    return 0;
}

static void dump_array(const char *tag, size_t n_rows, size_t n_cols, int A[n_rows][n_cols])
{
    printf("%s:\n", tag);
    for (size_t r = 0; r < n_rows; r++)
    {
        const char *pad = "{";
        for (size_t c = 0; c < n_cols; c++)
        {
            printf("%s%3d", pad, A[r][c]);
            pad = ",";
        }
        puts(" }");
    }
}

The pointers passed to cmp_func() are pointers to arrays of COLS ints, or int (*)[COLS]; the assignments to a and b generate an array int [COLS], which decays to an int *. I usually use v as a prefix to the const void * arguments to a comparator, and remove the v for the working variables in the function.

The test data there includes test values from the commentary, and the output is:

Before:
{  1, 91, 10 }
{  1,  9,  9 }
{  1,  9, 11 }
{  1,  1,  5 }
{  1,  9, 10 }
{  1,  0,  1 }
{  1,  2,  3 }
{  1, 91, 10 }
{  1, 19,  0 }
{  1, 91,  0 }
{  1,  2,  0 }
{  1,  2,  1 }
After:
{  1,  0,  1 }
{  1,  1,  5 }
{  1, 19,  0 }
{  1,  2,  0 }
{  1,  2,  1 }
{  1,  2,  3 }
{  1,  9, 10 }
{  1,  9, 11 }
{  1,  9,  9 }
{  1, 91,  0 }
{  1, 91, 10 }
{  1, 91, 10 }

This code is similar to the code by RoadRunner, but primarily differs in the simpler, more efficient comparison function cmp_func() which does not necessarily convert all the numbers in each row to strings each time two rows are compared. (In the sample data, the first value is always 1, so at least two pairs of numbers are converted. This is probably not normal, though.)

2
Vlad from Moscow On

I do not think that to concatenate integers that to compare rows is a good idea. In general the resulted strings can have different sizes. At least you have to add leading zeroes to each converted number that to align the strings and take into account signs of the integers.

Also you incorrectly dereference the void pointers.

And the call of the qsort is incorrect though it can work provided that sizeof( int * ) is equal to sizeof( int ). You should write

qsort(A, len, sizeof( int[COL_SIZE] ), cmp_func);

I would write the program the following way

#include <stdlib.h>
#include <stdio.h>

#define COL_SIZE 3

int cmp_rows(const void *a, const void *b)
{
    const int *left  = *(const int(*)[COL_SIZE])a;
    const int *right = *(const int(*)[COL_SIZE])b;

    for (size_t i = 0; i < COL_SIZE; i++)
    {
        if ( left[i] < right[i]) return -1;
        else if (right[i] < left[i]) return 1;
    }

    return 0;
}

int main( void )
{
    int A[][COL_SIZE] = 
    { 
        { 1,2,3 },
        { 1,1,5 },
        { 1,0,1 },
        { 1,2,1 },
        { 1,2,0 } 
    };

    qsort(A, sizeof(A) / sizeof(*A), sizeof(int[COL_SIZE]), cmp_rows);

    for (size_t i = 0; i < sizeof(A) / sizeof(*A); i++)
    {
        for (size_t j = 0; j < COL_SIZE; j++) printf("%d ", A[i][j]);
        printf("\n");
    }

    return 0;
}

The program output is

1 0 1
1 1 5
1 2 0
1 2 1
1 2 3
1
DarthNoodles On

The problem is here:

qsort(A, len, COL_SIZE * sizeof(int *), cmp_func);

You're telling qsort() to sort 'A', which has 'len'(5) items and each item is 3 *int's big. But, 'A' is made up of int's, not *int's.

qsort(A, len, COL_SIZE * sizeof(int), cmp_func);

Give that a try.

4
RoadRunner On

In these two lines:

const int **ia = (const int **)a;
const int **ib = (const int **)b;

This is incorrect, you only need one * pointer, as your are comparing elements in each row against other rows.

This needs to be changed to:

const int *ia = (const int *)a;
const int *ib = (const int *)b;

Furthermore, this line:

qsort(A, len, COL_SIZE * sizeof(int *), cmp_func);

needs to be changed to:

qsort(A, len, sizeof(*A), cmp_func);

The third parameter in qsort only requires a size of what it's comparing. In this case, size of each row, which can be expressed as sizeof(*A) or sizeof(A[0]).

Moreover, @Vlad from Moscow has a better approach which doesn't require concatenating strings.

If you still wish use a string approach, you can try this:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define STRSIZE 100
#define COL_SIZE 3

int cmp_func(const void *a, const void *b) {
    char str1[STRSIZE], str2[STRSIZE], temp[STRSIZE];
    size_t i;

    const int *row1 = (const int *)a;
    const int *row2 = (const int *)b;

    *str1 = '\0';
    *str2 = '\0';

    for (i = 0; i < COL_SIZE; i++) {
        sprintf(temp, "%d", row1[i]);
        strcat(str1, temp);

        sprintf(temp, "%d", row2[i]);
        strcat(str2, temp);
    }  

    return strcmp(str1, str2);

}

void print_array(int A[][COL_SIZE], size_t len) {
    size_t row, col;

    for (row = 0; row < len; row++) {
        for (col = 0; col < COL_SIZE; col++) {
            printf("%d ", A[row][col]);
        }
        printf("\n");
    }
}

int main(void) {
    size_t len;
    int A[][COL_SIZE] = {{1,2,0},
                         {1,19,0},
                         {1,19,0},
                         {1,19,0},
                         {1,2,0}};

    len = sizeof(A)/sizeof(*A);

    printf("\nBefore:\n");
    print_array(A, len);

    qsort(A, len, sizeof(*A), cmp_func);

    printf("\nAfter:\n");
    print_array(A, len);

    return 0;
}

Output:

Before:
1 2 0
1 19 0
1 19 0
1 19 0
1 2 0

After:
1 19 0
1 19 0
1 19 0
1 2 0
1 2 0