Parallel Reduction with SYCL

96 views Asked by At

Hi im trying to perform a parallel reduction with SYCL, but after every calculation it seems my device fails to copy the values back to my host device. Attached is a snippet of my code:

int ddot (const int n, const double * const x, const double * const y, 
      double * const result, double & time_allreduce)
{   
    // Since pointer is being passed by value, i modified the value at that memory address directly
    // Initializing it to zero
    
    double value = 0.0;
    double * value_ptr = &value;
    


     // Create a queue for the chosen device -- GPU
    queue Q{};
    
    buffer Result(value_ptr, range<1> (1));
    buffer X(x, range<1> (n-1));
    buffer Y(y, range<1> (n-1)); 
        
    Q.submit([&](handler &h){

    accessor sum{Result, h};
    accessor xAcc{X, h};
    accessor yAcc{Y, h};
        
        h.parallel_for(
            // (Total work items across work groups, work item in each group
            nd_range<1>{range<1>(n), range<1>(n)}, 
            reduction(sum, plus<>()), 
            [=](nd_item<1> idx, auto& sum) {
            int i = idx.get_global_id(0);
            
            sum +=  xAcc[i] * yAcc[i];
            
            });
        });    
        Q.wait();
            
        return(0);  

I was expecting to get some values, given some randomised x and y vector but I get zero

0

There are 0 answers