Heap Sort doesn't work properly

1.1k views Asked by At

I'm writing sorting application in C for my univesity, and I have a problem with one algorithm (heap sort). Here is my code:

void heap_sort(int *array, int size) {
    int temp;

    heap_build(array, size);

    for(int i = size; i>1; i--) {
        temp = array[i];
        array[i] = array[1];
        array[1] = temp;
        size--;
        heap_heapify(array, size, 1);
    }
}

void heap_build(int *array, int size) {
    for(int i = size / 2; i > 0; i--)
        heap_heapify(array, size, i);
}

void heap_heapify(int *array, int size, int i) {
    int largest, temp, 
        l = 2 * i, r = (2 * i) + 1;

    if(l <= size && array[l] > array[i])
        largest = l;
    else 
        largest = i;

    if(r <= size && array[r] > array[largest])
        largest = r;

    if(largest != i) {
        temp = array[largest];
        array[largest] = array[i];
        array[i] = temp;
        heap_heapify(array, size, largest);
    }
}

The result is for eg:

-22
-33686019 // the range is <-100, 100>
-71
-68
-59
-17
-8
43
59
82

How you can see, numbers are not sorted properly, and I have one wired number (always in array[1]).

1

There are 1 answers

5
Filipe Gonçalves On BEST ANSWER

In the comments, you mention that you're using array indexes in the interval 1..N for an array of size N+1, but the size you pass in is N+1. If that's true, you have an off-by-one error in max-heapify(): if size is N+1, the last position you can access is N, not N+1, so you must change the comparison to l < size (and similarly for r):

void heap_heapify(int *array, int size, int i) {
    int largest, temp, 
        l = 2 * i, r = (2 * i) + 1;

    if(l < size && array[l] > array[i])
        largest = l;
    else 
        largest = i;

    if(r < size && array[r] > array[largest])
        largest = r;

    if(largest != i) {
        temp = array[largest];
        array[largest] = array[i];
        array[i] = temp;
        heap_heapify(array, size, largest);
    }
}

Alternatively, if you want to keep your code as close as possible to CLRS, you can use <= as long as you pass N as the size rather than N+1 (so, you allocate an array of N+1 elements, but you pass N as the size, so things line up).

[Side note: it has always bugged me that CLRS uses arrays indexed from 1. This always causes trouble when writing real code based on the pseudocode there].

The same happens in heap_sort(), either you pass it N as size for an array of N+1 elements or you initialize i to size-1:

void heap_sort(int *array, int size) {
    int temp;

    heap_build(array, size);

    for(int i = size-1; i>1; i--) {
        temp = array[i];
        array[i] = array[1];
        array[1] = temp;
        size--;
        heap_heapify(array, size, 1);
    }
}

Here's a full program with the working code:

#include <stdio.h>

void heap_build(int *array, int size);
void heap_heapify(int *array, int size, int i);

void heap_sort(int *array, int size) {
    int temp;

    heap_build(array, size);

    for(int i = size-1; i>1; i--) {
        temp = array[i];
        array[i] = array[1];
        array[1] = temp;
        size--;
        heap_heapify(array, size, 1);
    }
}

void heap_build(int *array, int size) {
    for(int i = size / 2; i > 0; i--)
        heap_heapify(array, size, i);
}

void heap_heapify(int *array, int size, int i) {
    int largest, temp, 
        l = 2 * i, r = (2 * i) + 1;

    if(l < size && array[l] > array[i])
        largest = l;
    else 
        largest = i;

    if(r < size && array[r] > array[largest])
        largest = r;

    if(largest != i) {
        temp = array[largest];
        array[largest] = array[i];
        array[i] = temp;
        heap_heapify(array, size, largest);
    }
}

int main(void) {
    int arr[] = { 0, -22, 2, -33, 82, 71, 82, 0, -68, -59, -17, -8, 43, 59, -100 };

    heap_sort(arr, sizeof(arr)/sizeof(arr[0]));

    for (int i = 0; i < sizeof(arr)/sizeof(arr[0]); i++) {
        printf("%d\n", arr[i]);
    }

    return 0;
}

This prints:

0
-100
-68
-59
-33
-22
-17
-8
0
2
43
59
71
82
82

Note that the first element is never sorted; since you use indexes 1..N, you're basically ignoring element 0. A quick hack is to pass in a pointer to one element before the start of the array, but that is ugly, and UB (pointer arithmetic is valid only if the resulting pointer references an element in the array, or one past the end).

So I suggest refactoring the code and forget about 1-based indexing. This can be done by adjusting the formulas to calculate the left and right child of a node and adjusting the loop conditions:

#include <stdio.h>

void heap_build(int *array, int size);
void heap_heapify(int *array, int size, int i);

void heap_sort(int *array, int size) {
    int temp;

    heap_build(array, size);

    for(int i = size-1; i > 0; i--) {
        temp = array[i];
        array[i] = array[0];
        array[0] = temp;
        size--;
        heap_heapify(array, size, 0);
    }
}

void heap_build(int *array, int size) {
    for(int i = size/2; i >= 0; i--)
        heap_heapify(array, size, i);
}

void heap_heapify(int *array, int size, int i) {
    int largest, temp,
        l = i*2+1, r = l+1;

    if (l < size && array[l] > array[i])
        largest = l;
    else 
        largest = i;

    if (r < size && array[r] > array[largest])
        largest = r;

    if (largest != i) {
        temp = array[largest];
        array[largest] = array[i];
        array[i] = temp;
        heap_heapify(array, size, largest);
    }
}

int main(void) {
    int arr[] = { 0, -22, 2, -33, 82, 71, 82, 0, -68, -59, -17, -8, 43, 59, -100 };

    heap_sort(arr, sizeof(arr)/sizeof(arr[0]));

    for (int i = 0; i < sizeof(arr)/sizeof(arr[0]); i++) {
        printf("%d\n", arr[i]);
    }

    return 0;
}

The differences from the previous version are:

  1. In heap_sort, the loop condition turns into i > 0.
  2. In heap_build(), the loop condition turns into i >= 0.
  3. In heap_heapify(), the left child is 2*i+1 rather than 2*i, and r is 2*i+2.