Using points in sycl class

111 views Asked by At

Is it possible to copy a class containing pointers to its internal attribute using SYCL and offload it to the graphics card? Basically, I try to reference members to avoid unnecessary memory usage. I managed to have my example run on local memory but when offloading to a graphics card, problems occurred

PI CUDA ERROR:
        Value:           700
        Name:            CUDA_ERROR_ILLEGAL_ADDRESS
        Description:     an illegal memory access was encountered
        Function:        operator()
        Source Location: /root/intel-llvm-mirror/sycl/plugins/cuda/pi_cuda.cpp:2612

SYCL exception: Native API failed. Native API returns: -999 (Unknown PI error) -999 (Unknown PI error)

PI CUDA ERROR:
        Value:           700
        Name:            CUDA_ERROR_ILLEGAL_ADDRESS
        Description:     an illegal memory access was encountered
        Function:        wait
        Source Location: /root/intel-llvm-mirror/sycl/plugins/cuda/pi_cuda.cpp:653


PI CUDA ERROR:
        Value:           700
        Name:            CUDA_ERROR_ILLEGAL_ADDRESS
        Description:     an illegal memory access was encountered
        Function:        wait
        Source Location: /root/intel-llvm-mirror/sycl/plugins/cuda/pi_cuda.cpp:653


PI CUDA ERROR:
        Value:           700
        Name:            CUDA_ERROR_ILLEGAL_ADDRESS
        Description:     an illegal memory access was encountered
        Function:        get_next_transfer_stream
        Source Location: /root/intel-llvm-mirror/sycl/plugins/cuda/pi_cuda.cpp:513

I understand that this error is related to copying a memory pointer to another device, but given that I've provided an explicit class copy constructor, I would expect the copy to be a deep copy. Does SYCL fundamentally lack support for copying a pointer object to a graphics card? I've attached my code below

#include <array>
#include <iostream>
#include <sycl/sycl.hpp>
#include <vector>

using namespace sycl; // (optional) avoids the need for "sycl::" before SYCL names

typedef struct my1 {
    int value = 1;

    // Copy constructor that creates a new my1
    my1(const my1& other) : value(other.value) {}

    // Default constructor
    my1() = default;

    // Constructor with a value
    my1(int v) : value(v) {}
} my1;

typedef struct my2 {
    struct my1* a;
    struct my1* b;
} my2;

template <std::size_t N>
class MyClass {
public:
    MyClass(std::array<my1, N> a, std::array<my1, N> b) {
        for (std::size_t i = 0; i < N; i++) {
            _a[i].value = a[i].value;
            _b[i].value = b[i].value;
            _c[i].a = &_a[i];
            _c[i].b = &_b[i];
        }
    }

    float addValue(int i) const {
        if (i >= N || i >= N)
            // throw std::runtime_error("Index out of bounds");
            return 0;
        return (_c[i].a)->value + (_c[i].b)->value;
    }

    void modifyAValue(int i, float a) {
        if (i >= N)
            return;
        _a[i].value = a;
    }

    void modifyBValue(int i, float b) {
        if (i >= N)
            return;
        _b[i].value = b;
    }

    MyClass(const MyClass& other) {
        _c = std::array<my2, N>();
        for (std::size_t i = 0; i < N; i++) {
            _a[i].value = other._a[i].value;
            _b[i].value = other._b[i].value;
            _c[i].a = &_a[i];
            _c[i].b = &_b[i];
        }
    }

private:
    std::array<my1, N> _a;
    std::array<my1, N> _b;
    std::array<my2, N> _c;
};

template <std::size_t N>
struct sycl::is_device_copyable<MyClass<N>> : std::true_type {};

int main() {
    // Create a SYCL queue
    sycl::queue myQueue;

    // Create an instance of MyClass
    int N = 10;
    std::array<my1, 10> a{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
    std::array<my1, 10> b{10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
    MyClass<a.size()> myObject(a, b);

    std::vector<int> input = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
    std::vector<float> result(input.size());

    // Create a SYCL buffer to hold the result
    sycl::buffer<float, 1> resultBuffer(result.data(), sycl::range<1>(input.size()));
    sycl::buffer<int, 1> inputBuffer(input.data(), sycl::range<1>(input.size()));

    sycl::buffer<MyClass<10>> buffer(&myObject, 1);
    try {
        // Submit a SYCL kernel
        myQueue.submit([&](sycl::handler& cgh) {
            auto resultAcc = resultBuffer.template get_access<sycl::access::mode::write>(cgh);
            auto inputAcc = inputBuffer.template get_access<sycl::access::mode::read>(cgh);
            auto myObjectAcc = buffer.template get_access<sycl::access::mode::read>(cgh);

            cgh.parallel_for<class MyKernel>(
                sycl::range<1>(input.size()), [=](sycl::id<1> idx) {
                    // Call a member function on the instance of MyClass
                    resultAcc[idx] = myObjectAcc[0].addValue(inputAcc[idx]);
                });
        });

        // Wait for the kernel to complete and get the result
        myQueue.wait();
        myQueue.update_host(resultBuffer.get_access());
    } catch (const sycl::exception& e) {
        std::cerr << "SYCL exception: " << e.what() << std::endl;
        return 1;
    } catch (const std::runtime_error& e) {
        std::cerr << "Runtime error: " << e.what() << std::endl;
        return 1;
    }

    // Print the result (accumulated sum)
    float sum = 0.0f;
    for (float val : result) {
        std::cout << val << std::endl;
        sum += val;
    }
    std::cout << "Result: " << sum << std::endl;

    return 0;
}

In this example, I have a class that contains three arrays. Two of these arrays store information (my1), and the third one stores references to elements from the two information arrays (my2, which holds two pointers to my1 objects).

std::array<my1, N> _a;
std::array<my1, N> _b;
std::array<my2, N> _c;

As I understand it, errors occur because the kernel is attempting to access host memory. This is unexpected, as the constructor should have reassigned the pointer value after the copy operations.

MyClass(const MyClass& other) {
    _c = std::array<my2, N>();
    for (std::size_t i = 0; i < N; i++) 
    {
         _a[i].value = other._a[i].value;
         _b[i].value = other._b[i].value;
         _c[i].a = &_a[i];
         _c[i].b = &_b[i];
    }

I wouldn't mind using USM to allocate device memory, but I would prefer to use pointers or references to access the member variables inside the class. This approach helps avoid unnecessary memory allocation by referencing device memory directly (in short, I'd like to use pointers or references as member variable to access device memory)

1

There are 1 answers

0
peizhao qiu On BEST ANSWER

After some research, I find it is impossible to emerge into SYCL without using usm or buffer below I have put the example code for this question.

template <std::size_t N>
class MyClass {
public:
    MyClass(std::array<int, N> aValue, std::array<int, N> bValue,sycl::queue& queue): _queue(queue) {

        _a = sycl::malloc_shared<my1*>(N, _queue);
        _b = sycl::malloc_shared<my1*>(N, _queue);
        _c = sycl::malloc_shared<my2*>(N, _queue);

        for(int i = 0;i<N;i++)
        {
            _a[i] = sycl::malloc_shared<my1>( 1, _queue);
            _a[i]->value = aValue[i];
            _b[i] = sycl::malloc_shared<my1>( 1, _queue);
            _b[i]->value = bValue[i];
            _c[i] = sycl::malloc_shared<my2>( 1, _queue);
            _c[i]->a = _a[i];
            _c[i]->b = _b[i];          
        }
        std::cout<<"here"<<std::endl;
    }

    float addValue(int i) const {
        if (i >= N || i >= N)
            // throw std::runtime_error("Index out of bounds");
            return 0;
        return (_c[i]->a)->value + (_c[i]->b)->value;
    }

    void modifyAValue(int i, float a) {
        if (i >= N)
            return;
        _a[i]->value = a;
    }

    void modifyBValue(int i, float b) {
        if (i >= N)
            return;
        _b[i]->value = b;
    }

    MyClass(const MyClass& other) {

        for (int i = 0;i<N;i++)
        {
            _a[i]->value = other._a[i]->value;
            _b[i]->value = other._b[i]->value;
        }

    }

    ~MyClass() {
        for (int i = 0;i<N;i++)
        {
            sycl::free(_a[i], _queue);
            sycl::free(_b[i], _queue);
            sycl::free(_c[i], _queue);
        }
        sycl::free(_a, _queue);
        sycl::free(_b, _queue);
        sycl::free(_c, _queue);


    }

private:
    my1** _a;
    my1** _b;
    my2** _c;
    sycl::queue& _queue;
};