cuFFT R2C getting illegal memory access and out of bounds when using compute-sanitizer

56 views Asked by At

I'm involved in a large project, that involves FFT, so I'm porting it to cuFFT. I modified this code CUFFT R2C to use dynamic memory and splitting the code into functions, initialization of cuda-arrays and execution of the actual FFT.

I tried different data types for the d_idata & d_odata arrays, but nothing worked, I got the following errors:

GPUassert: an illegal memory access was encountered t734-cufft-R2C-functions.cu 53

or is out of bounds if I check it through compute-sanitizer.

Here is the code:

#include <cufft.h>
#include <stdio.h>

#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }

inline void gpuAssert(cudaError_t code, const char *file, int line, bool  abort=true)
{
   if (code != cudaSuccess) 
   {
        fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);            
        if (abort) exit(code);
   }
}

void Print2DComplex(int rows, int cols, cufftComplex *data, bool cufft_symmetry = false){

        int sym_cols = cols;
        if (cufft_symmetry) sym_cols = cols/2 + 1;
        printf("Real Part: \n");
        for (int i = 0; i < rows; i++){
          for (int j = 0; j < cols; j++)
            if (j>=sym_cols)
              printf("%f ", data[i*sym_cols+(cols-j)].x);
            else
              printf("%f ", data[i*sym_cols+j].x);
          printf("\n");}
        printf("Imag Part: \n");
        for (int i = 0; i < rows; i++){
          for (int j = 0; j < cols; j++)
            if (j>=sym_cols)
              printf("%f ", -data[i*sym_cols+(cols-j)].y); // complex (hermitian) symmetry
            else
              printf("%f ", data[i*sym_cols+j].y);
          printf("\n");}
}

void cudaarrays(cufftHandle *plan, cufftReal *d_idata, cufftComplex *h_odata, cufftComplex *d_odata, const int row, const int col, float *A)
{
        gpuErrchk(cudaMalloc((void**)&d_idata, sizeof(cufftComplex)*row*col));
        gpuErrchk(cudaMalloc((void**)&d_odata, sizeof(cufftComplex)*row*col));
        gpuErrchk(cudaMemset(d_idata, 0, sizeof(cufftComplex)*row*col));
        gpuErrchk(cudaMemset(d_odata, 0, sizeof(cufftComplex)*row*col));
        gpuErrchk(cudaMallocHost((void**)&h_odata, sizeof(cufftComplex)*row*col));
        gpuErrchk(cudaMemcpy(d_idata,A,sizeof(cufftReal)*row*col,cudaMemcpyHostToDevice));

        if ((cufftPlan2d(plan, row,col, CUFFT_R2C))!= CUFFT_SUCCESS) {printf("cufft plan error\n"); exit(-1);}
}

void exec(cufftHandle plan, cufftReal *d_idata, cufftComplex *h_odata, cufftComplex *d_odata, const int row, const int col)
{
        if ((cufftExecR2C(plan, (cufftReal*)d_idata, (cufftComplex*)d_odata))!=CUFFT_SUCCESS) {printf("cufft exec error\n"); exit(-1);}
        gpuErrchk(cudaDeviceSynchronize());
        gpuErrchk(cudaMemcpy(h_odata,d_odata,sizeof(cufftComplex)*row*col,cudaMemcpyDeviceToHost));
        gpuErrchk(cudaDeviceSynchronize());
}

int main()
{

        const int row = 4;
        const int col = 4;
/*
        double A[row][col] =
        {{ 1, 2, 3, 4},
         { 5, 6, 7, 8},
         { 9,10,11,12},
         {13,14,15,16}};
*/
        float *A = (float*)calloc(row*col , sizeof(float));
        int j=0;
        for(int i=1; i <= (row*col); i++)
           A[j++] = i;

        cufftHandle plan;
        cufftReal *d_idata=NULL;
        cufftComplex *h_odata=NULL, *d_odata=NULL;

        cudaarrays(&plan, d_idata, h_odata, d_odata, row, col, A);

        //bool symmetric_data = false;


        exec(plan, d_idata, h_odata, d_odata, row, col);
        //symmetric_data = true;

//        Print2DComplex(row,col,h_odata, symmetric_data);
        return 0;
}


I compile it as follows: nvcc t734-cufft-r2c.cu -o t734-cufft-r2c -l cufft and using an NVIDIA GeForce RTX 2060 device on

an the following drivers ** NVIDIA-SMI 470.199.02 Driver Version: 470.199.02 CUDA Version: 11.4 **

What am I missing or doing wrong?

0

There are 0 answers