My goal is to make a generic Cuda Kernel. My first step is trying to use templates in the function cudaMain
(not yet in the Kernel - this will be my second step). cudaMain
is called from my c++ main() file. From cudaMain
the Kernel is called. This works fine, so long as I don't use templates. As soon as I add the templates to the class and cudaMain
I get this error:
undefined reference to 'Cuda_class<int>::cudaMain(int, int, int*, int*, int*, int*, int*)'
Here's the code:
main.cpp:
#include "cuda_class.hpp"
Cuda_class<int> p;
p.cudaMain(trees.size(), trees[0].size(), treeArray_x, treeArray_y, treeArray_z, treeArray_ID, box);
cuda_class.hpp:
template <class T>
class Cuda_class{
public:
void cudaMain(int number_of_trees, int tree_size, T treeArray_x[], T treeArray_y[], T treeArray_z[], int treeArray_ID[], T box[]);
};
cuda_class.cu:
#include "cuda_class.hpp"
__global__
void insideBox(int *treeArray_x, int *treeArray_y, int *treeArray_z, int *treeArray_ID, int *box, int tree_size){
//for each thread has it's own tree starting here
int startOfTree = threadIdx.x * tree_size ;
int endOfTree = startOfTree + tree_size - 1;
traverseTree(treeArray_x, treeArray_y, treeArray_z, treeArray_ID, box, 1, startOfTree, endOfTree);
}
template <class T>
void Cuda_class<T>::cudaMain(int number_of_trees, int tree_size, T treeArray_x[], T treeArray_y[], T treeArray_z[], int treeArray_ID[], T box[]){
cudaSetDevice(MYDEVICE);
// do something allocate memory etc
//launch kernel
insideBox<<<1,32>>>(d_treeArray_x, d_treeArray_y, d_treeArray_z, d_treeArray_ID, d_box, tree_size);
//do some other stuff
}