Mandelbrot in OpenCL

630 views Asked by At

I have this Mandelbrot Kernel written for an OpenCL program. For test I've decided to have all my complex plane on a vector. My problem is when I print the output I obtain a list of 1 (like the initialization of the results array) and not the result of the kernel work.

Where can I have the problem?

    #include <iostream>

    #ifdef __APPLE__
    #include <OpenCL/opencl.h>
    #else
    #include <CL/cl.h>
    #endif

    int main(){
    using namespace std;
    int xPixel=100;
    int yPixel=100;
    float ics[xPixel];
    for(int i=0;i<xPixel;++i)
    ics[i]=-2+i*((float)4/xPixel);
  float ypsilon[yPixel];
  for(int i=0;i<yPixel;++i)
    ypsilon[i]=-2+i*((float)4/yPixel);
  int results[xPixel*yPixel];
  for(int i=0;i<xPixel*yPixel;++i)
    results[i]=1;

  cl_context context;
  cl_context_properties properties[3];
  cl_kernel kernel;
  cl_command_queue command_queue;
  cl_program program;
  cl_int err;
  cl_uint num_of_platforms=0;
  cl_platform_id platform_id;
  cl_device_id device_id;
  cl_uint num_of_devices=0;
  cl_mem memX, memY, memOutput;
  size_t global;

const char *KernelSource =
"__kernel void mandelbrot(__global float *ics, __global float *ypsilon, __global int *output){\n"\
"size_t id=get_global_id(0);\n"\
"int yPixel=100;\n"\
"for(int i=0;i<yPixel;i++){\n"\
"float x=0;\n"\
"float y=0;\n"\
"int counter=0;\n"\
"while(counter<1000){\n"\
"if(x*x+y*y>2*2){\n"\
"output[(id*yPixel)+i]=counter;\n"\
"break;\n"\
"}\n"\
"float xTemp=x*x-y*y+ics[id];\n"\
"y=2*x*y+ypsilon[i];\n"\
"x=xTemp;\n"\
"counter++;\n"\
"}\n"\
"}\n"\
"}\n";

  // retreives a list of platforms available
  if (clGetPlatformIDs(1, &platform_id, &num_of_platforms)!= CL_SUCCESS){
    cout<<"Unable to get platform_id\n"<<endl;;
    return 1;
  }

  // try to get a supported GPU device
  if (clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id,&num_of_devices) != CL_SUCCESS){
    cout<<"Unable to get device_id\n"<<endl;
    return 1;
  }

  //context properties list - nust be terminated with 0
  properties[0]=CL_CONTEXT_PLATFORM;
  properties[1]=(cl_context_properties)platform_id;
  properties[2]=0;

  //create a context with the GPU device
  context=clCreateContext(properties,1,&device_id,NULL,NULL,&err);

  //create a command queue using the context and device
  command_queue=clCreateCommandQueue(context,device_id,0,&err);

  //create a program from the kernel source code
  program=clCreateProgramWithSource(context,1,(const char**)&KernelSource,NULL,&err);

  //compile the program
  if(clBuildProgram(program,0,NULL,NULL,NULL,NULL)!=CL_SUCCESS){
    cout<<"Error building program"<<endl;
    return 1;
  }

  //specify which kernel from the program to execute
  kernel=clCreateKernel(program,"mandelbrot",&err);

  //create buffers for input and output
  memX=clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float)*xPixel,NULL,NULL);
  memY=clCreateBuffer(context,CL_MEM_READ_ONLY,sizeof(float)*yPixel,NULL,NULL);
  memOutput=clCreateBuffer(context,CL_MEM_WRITE_ONLY,sizeof(int)*(xPixel*yPixel),NULL,NULL);

  //load data into the input buffer
  clEnqueueWriteBuffer(command_queue,memX,CL_TRUE,0,sizeof(float)*xPixel,ics,0,NULL,NULL);
  clEnqueueWriteBuffer(command_queue,memY,CL_TRUE,0,sizeof(float)*yPixel,ypsilon,0,NULL,NULL);

  //set the argument list for the kernel command
  clSetKernelArg(kernel,0,sizeof(cl_mem),&memX);
  clSetKernelArg(kernel,1,sizeof(cl_mem),&memY);
  clSetKernelArg(kernel,2,sizeof(cl_mem),&memOutput);
  global=xPixel*yPixel;

  //enqueue the kernel command for execution
  clEnqueueNDRangeKernel(command_queue,kernel,1,NULL,&global,NULL,0,NULL,NULL);
  clFinish(command_queue);

  //copy the results from out of the output buffer
  clEnqueueReadBuffer(command_queue,memOutput,CL_TRUE,0,sizeof(int)*(xPixel*yPixel),results,0,NULL,NULL);

  //print output
   for(int i=0;i<xPixel;++i){
     for(int j=0;j<yPixel;++j){
       cout<<results[(i*yPixel)+j]<<" ";
     }
     cout<<endl;
   }

  //cleanup - release OpenCL resources
  clReleaseMemObject(memX);
  clReleaseMemObject(memY);
  clReleaseMemObject(memOutput);
  clReleaseProgram(program);
  clReleaseKernel(kernel);
  clReleaseCommandQueue(command_queue);
  clReleaseContext(context);
}
1

There are 1 answers

0
Dithermaster On

I'm not seeing the exact reason, but I do have a question: If you're running this on every element then what is the "i" looping over "yPixel" for? It seems like you're doing X*Y*Y work instead of X*Y work (your global size is X*Y then the kernel loops on Y again).

If you add "output[(id*yPixel)+i]=42" before the "i" loop then what does your output buffer hold? That will tell you if the problem lies in your kernel or your host code.

To help anyone else looking at this, I've reformatted the kernel code:

__kernel void mandelbrot(__global float *ics, __global float *ypsilon, __global int *output)
{
  size_t id=get_global_id(0);
  int yPixel=100;
  for(int i=0;i<yPixel;i++)
  {
    float x=0;
    float y=0;
    int counter=0;
    while(counter<1000)
    {
      if(x*x+y*y>2*2)
      {
        output[(id*yPixel)+i]=counter;
        break;
      }
      float xTemp=x*x-y*y+ics[id];
      y=2*x*y+ypsilon[i];
      x=xTemp;
      counter++;
    }
  }
}