Generic Cuda function - Template & Cuda - c++

840 views Asked by At

My goal is to make a generic Cuda Kernel. My first step is trying to use templates in the function cudaMain (not yet in the Kernel - this will be my second step). cudaMain is called from my c++ main() file. From cudaMain the Kernel is called. This works fine, so long as I don't use templates. As soon as I add the templates to the class and cudaMainI get this error: undefined reference to 'Cuda_class<int>::cudaMain(int, int, int*, int*, int*, int*, int*)'

Here's the code:

main.cpp:

#include "cuda_class.hpp"

Cuda_class<int> p;
p.cudaMain(trees.size(), trees[0].size(), treeArray_x, treeArray_y, treeArray_z, treeArray_ID, box);

cuda_class.hpp:

template <class T>
class Cuda_class{
public:
    void cudaMain(int number_of_trees, int tree_size, T treeArray_x[], T treeArray_y[], T treeArray_z[], int treeArray_ID[], T box[]);
};

cuda_class.cu:

#include "cuda_class.hpp"

__global__
void insideBox(int *treeArray_x, int *treeArray_y, int *treeArray_z, int *treeArray_ID, int *box, int tree_size){

    //for each thread has it's own tree starting here
    int startOfTree = threadIdx.x * tree_size ;
    int endOfTree = startOfTree + tree_size - 1;
    traverseTree(treeArray_x, treeArray_y, treeArray_z, treeArray_ID, box, 1, startOfTree, endOfTree);

}
template <class T>
void Cuda_class<T>::cudaMain(int number_of_trees, int tree_size, T treeArray_x[], T treeArray_y[], T treeArray_z[], int treeArray_ID[], T box[]){

    cudaSetDevice(MYDEVICE);
    // do something allocate memory etc

    //launch kernel
    insideBox<<<1,32>>>(d_treeArray_x, d_treeArray_y, d_treeArray_z, d_treeArray_ID, d_box, tree_size);
    //do some other stuff
}
0

There are 0 answers