Barriers in OpenCL

288 views Asked by At

Each time I run this kernel, I get some different value. I want to sum up the res array after second loop computation (something like np.sum(res, axis = 0)). When I do np.sum outside the kernel I get the same value but inside the kernel it is not possible. Probably barrier is unable to fence the memory properly. Any help would be much appreciated.

kernelsource = """
__kernel void forceFinder(
const int N,
const int dim,
const float sigma,
__global float* datacl,
__constant float* poscl,
__global float* res)
{
    int i = get_global_id(0); // Global id
    float f_sum ;
    int k;
    float sigma2 = sigma * sigma;

    f_sum = 0;

    for (k = 0; k < dim; k++)
    {
        f_sum += pown((poscl[k] - datacl[i * dim + k]), 2);
    }
    for (k = 0; k < dim; k++)
    { 
        res[i * dim + k] =  (datacl[i * dim + k] - poscl[k]) * exp(-f_sum/sigma2)/sigma2;
    }
    barrier(CLK_GLOBAL_MEM_FENCE | CLK_LOCAL_MEM_FENCE);
    for(k=0; k<dim; k++)
    {
        res[k] += res[i*dim+k];
    }
 }
 """
0

There are 0 answers