How to sort and rank a vector in C++ (without using C++11)

5.5k views Asked by At

I am trying to construct a function take takes a vector, ranks it, sorts it and outputs the sorted and ranked vector with the original positioning of the values. For example: Input: [10,332,42,0.9,0] Output: [3, 5, 4, 2, 1]

I used this stack overflow question (specifically Marius' answer) as a reference guide, however I am stuck with my code now and do not understand where the issue is. I am running a C++03.

One of the errors I get is

error: invalid types ‘const float*[float]’ for array subscript’ for array subscript on my if statement.

//Rank the values in a vector
std::vector<float> rankSort(const float *v_temp, size_t size)
{
    vector <float> v_sort;
    //create a new array with increasing values from 0 to n-1
    for(unsigned i = 0; i < size; i++)
    {
        v_sort.push_back(i);
    }
    bool swapped = false;
    do
    {
        for(unsigned i = 0; i < size; i++)
        {
            if(v_temp[v_sort[i]] > v_temp[v_sort[i+1]]) //error line
            {
                float temp = v_sort[i];
                v_sort[i] = v_sort[i+1];
                v_sort[i+1] = temp;
                swapped = true;
            }
        }
    }
    while(swapped);
    return v_sort;
}

std::vector<float> rankSort(const std::vector<float> &v_temp)
{
    return rankSort(&v_temp[0], v_temp.size());
}
5

There are 5 answers

4
Jonathan Mee On BEST ANSWER

Your problem is a misconception on rankings. Array indices are of size_t not float, so you'll need to return a vector<size_t> not a vector<float>.

That said your sort is O(n2). If you're willing to use more memory we can get that time down to O(n log(n)):

vector<size_t> rankSort(const float* v_temp, const size_t size) {
    vector<pair<float, size_t> > v_sort(size);

    for (size_t i = 0U; i < size; ++i) {
        v_sort[i] = make_pair(v_temp[i], i);
    }

    sort(v_sort.begin(), v_sort.end());

    pair<double, size_t> rank;
    vector<size_t> result(size);

    for (size_t i = 0U; i < size; ++i) {
        if (v_sort[i].first != rank.first) {
            rank = make_pair(v_sort[i].first, i);
        }
        result[v_sort[i].second] = rank.second;
    }
    return result;
}

Live Example

EDIT:

Yeah this actually gets a little simpler when taking a vector<float> instead of a float[]:

vector<size_t> rankSort(const vector<float>& v_temp) {
    vector<pair<float, size_t> > v_sort(v_temp.size());

    for (size_t i = 0U; i < v_sort.size(); ++i) {
        v_sort[i] = make_pair(v_temp[i], i);
    }

    sort(v_sort.begin(), v_sort.end());

    pair<double, size_t> rank;
    vector<size_t> result(v_temp.size());

    for (size_t i = 0U; i < v_sort.size(); ++i) {
        if (v_sort[i].first != rank.first) {
            rank = make_pair(v_sort[i].first, i);
        }
        result[v_sort[i].second] = rank.second;
    }
    return result;
}

Live Example

7
Rerito On

I suggest you adopt a more robust solution by taking advantage of what you have in the STL. To do so, we will first make an "index vector", ie. a std::vector<std::size_t> vsuch that for any i, v[i] == i is true:

// I'm sure there's a more elegant solution to generate this vector
// But this will do
std::vector<std::size_t> make_index_vector(std::size_t n) {
    std::vector<std::size_t> result(n, 0);
    for (std::size_t i = 0; i < n; ++i) {
        result[i] = i;
    }
    return result;
}

Now all we have to do is to sort this vector according to a specific comparison function that will use the input vector. Furthermore, to allow for the most generic approach we will give the user the opportunity to use any comparison functor:

template <typename T, typename A, typename Cmp>
struct idx_compare {
    std::vector<T, A> const& v;
    Cmp& cmp;
    idx_compare(std::vector<T, A> const& vec, Cmp& comp) : v(vec), cmp(comp) {}

    bool operator()(std::size_t i, std::size_t j) {
        return cmp(v[i], v[j]);
    }
};

template <typename T, typename A, typename Cmp>
std::vector<std::size_t> sorted_index_vector(std::vector<T, A> const& vec, Cmp comp) {
    std::vector<std::size_t> index = make_index_vector(vec.size());
    std::sort(index.begin(), index.end(),
        idx_compare<T, A, Cmp>(vec, comp));

    return index;
}

In the sorted index vector, index[0] is the index of the lowest value in the input vector, index[1] the second lowest and so on. Therefore, we need one additional step to get the rank vector from this one:

std::vector<std::size_t> get_rank_vector(std::vector<std::size_t> const& index) {
    std::vector<std::size_t> rank(index.size());
    for (std::size_t i = 0; i < index.size(); ++i) {
        // We add 1 since you want your rank to start at 1 instead of 0
        // Just remove it if you want 0-based ranks
        rank[index[i]] = i + 1;
    }
    return rank;
}

Now we combine all the pieces together:

template <typename T, typename A, typename Cmp>
std::vector<std::size_t> make_rank_vector(
    std::vector<T, A> const& vec, Cmp comp) {
    return get_rank_vector(sorted_index_vector(vec, comp));
}

// I had to stop using default template parameters since early gcc version did not support it (4.3.6)
// So I simply made another overload to handle the basic usage.
template <typename T, typename A>
std::vector<std::size_t> make_rank_vector(
    std::vector<T, A> const& vec) {
    return make_rank_vector(vec, std::less<T>());
}

Result with [10, 332, 42, 0.9, 0]: [3, 5, 4, 2, 1]. You can find a Live Demo on gcc 4.3.6 to explicit this behavior.

9
alexeykuzmin0 On

v_sort[i] is a float (it's just an element of v_sort vector) while only integral types can be used as array subscripts.

Probably you meant v_sort as an array of indices, thus, you should declare it as std::vector<size_t> or std::vector<int> something like that.

UP: Also, given that you change the values of the array passed, it's not an elegant way of pass it by const reference.

To sum up, the following code compiles correctly on my machine:

std::vector<unsigned> rankSort(float *v_temp, size_t size)
{
    vector <unsigned> v_sort;
    //create a new array with increasing values from 0 to n-1
    for(unsigned i = 0; i < size; i++)
    {
        v_sort.push_back(i);
    }
    bool swapped = false;
    do
    {
        for(unsigned i = 0; i < size; i++)
        {
            if(v_temp[v_sort[i]] > v_temp[v_sort[i+1]]) //error line
            {
                unsigned temp = v_sort[i];
                v_sort[i] = v_sort[i+1];
                v_sort[i+1] = temp;
                swapped = true;
            }
        }
    }
    while(swapped);
    return v_sort;
}

std::vector<unsigned> rankSort(std::vector<float> &v_temp)
{
    return rankSort(&v_temp[0], v_temp.size());
}
0
drbombe On

Here is my codes using STL to achieve this in a concise way to get the rank.

template <typename T>
vector<size_t> calRank(const vector<T> & var) {
    vector<size_t> result(var.size(),0);
    //sorted index
    vector<size_t> indx(var.size());
    iota(indx.begin(),indx.end(),0);
    sort(indx.begin(),indx.end(),[&var](int i1, int i2){return var[i1]<var[i2];});
    //return ranking
    for(size_t iter=0;iter<var.size();++iter){
        result[indx[iter]]=iter+1;
    }
    return result;
}
4
Alexander Anikin On
//Rank the values in a vector
std::vector<size_t> rankSort(const std::vector<float> &v_temp)
{
    vector <size_t> v_sort;
    //create a new array with increasing values from 0 to size-1
    for(size_t i = 0; i < v_temp.size(); i++)
        v_sort.push_back(i);

    bool swapped = false;
    do
    {
        swapped = false; //it's important to reset swapped
        for(size_t i = 0; i < v_temp.size()-1; i++) // size-2 should be the last, since it is compared to next element (size-1)
            if(v_temp[v_sort[i]] > v_temp[v_sort[i+1]]) 
            {
                size_t temp = v_sort[i]; // we swap indexing array elements, not original array elements
                v_sort[i] = v_sort[i+1];
                v_sort[i+1] = temp;
                swapped = true;
            }
    }
    while(swapped);

    return v_sort;
}