Using certain sets of float values for cudnnSoftmaxForward produces NaN outputs when the setting CUDNN_SOFTMAX_FAST is used instead of CUDNN_SOFTMAX_ACCURATE. Does anyone know why this is happening? Is this a bug in the library?
cudnnHandle_t lib;
cudnnCreate(&lib);
int count = 10;
size_t size = count * sizeof(float);
float examples[] = {
95.094505f,
-600.288879f,
85.621284f,
72.220154f,
70.099487f,
43.734470f,
69.538422f,
69.705490f,
20.752966f,
81.020088f
};
float* cexamples;
cudaMalloc(&cexamples, size);
cudaMemcpy(cexamples, examples, size, cudaMemcpyKind::cudaMemcpyHostToDevice);
cudnnTensorDescriptor_t tExamples;
cudnnCreateTensorDescriptor(&tExamples);
cudnnSetTensor4dDescriptor(tExamples, cudnnTensorFormat_t::CUDNN_TENSOR_NCHW, cudnnDataType_t::CUDNN_DATA_FLOAT, 1, count, 1, 1);
float one = 1;
float zero = 0;
cudnnSoftmaxForward(lib, cudnnSoftmaxAlgorithm_t::CUDNN_SOFTMAX_FAST, cudnnSoftmaxMode_t::CUDNN_SOFTMAX_MODE_INSTANCE, &one, tExamples, cexamples, &zero, tExamples, cexamples);
cudaMemcpy(examples, cexamples, size, cudaMemcpyKind::cudaMemcpyDeviceToHost);
for (size_t i = 0; i < count; i++)
{
printf("\n");
printf("%f ", examples[i]);
}
Here are the results using CUDNN_SOFTMAX_FAST:
Here are the results using CUDNN_SOFTMAX_ACCURATE:
I guess that your problem is caused by overflow i.e. you (somewhere in the process) have a value that becomes too big to be a
float
.CUDNN_SOFTMAX_FAST
just run without checking whether an overflow occured or not. On the other handCUDNN_SOFTMAX_ACCURATE
avoids it (using subtraction).CUDNN returns NaN on overflow (note that 'standart' C would probably not behave like this)
What I could suggest:
CUDNN_SOFTMAX_ACCURATE
Hope this helps
pltrdy