I'm trying to use the CUDA Driver API to copy data into a 2D array, in the program listed below, but am getting an "invalid value" error when I pass my copy parameters. What value in them is wrong?
#include <cuda.h>
#include <iostream>
#include <iomanip>
#include <numeric>
#include <limits>
#include <cstring>
[[noreturn]] void die_(const std::string& message) {
std::cerr << message << "\n";
exit(EXIT_FAILURE);
}
void die_if_error(CUresult status, const std::string& extra_message) {
if (status != CUDA_SUCCESS) {
const char* error_string;
cuGetErrorString(status, &error_string);
die_(extra_message + ": " + error_string);
}
}
template <typename T = void>
T* as_pointer(CUdeviceptr address) noexcept { return reinterpret_cast<T*>(address); }
CUdeviceptr as_address(void* ptr) noexcept { return reinterpret_cast<CUdeviceptr>(ptr); }
int main() {
CUresult status;
int device_id = 0;
status = cuInit(0);
die_if_error(status, "Initializing the CUDA driver");
CUcontext pctx;
status = cuDevicePrimaryCtxRetain(&pctx, device_id);
die_if_error(status, "Obtaining the primary device context");
cuCtxSetCurrent(pctx);
struct { unsigned width, height; } dims = { 3, 3 };
std::cout << "Creating a " << dims.width << " x " << dims.height << " CUDA array" << std::endl;
CUarray arr_handle;
{
CUDA_ARRAY_DESCRIPTOR array_descriptor;
array_descriptor.Width = dims.width;
array_descriptor.Height = dims.height;
array_descriptor.Format = CU_AD_FORMAT_FLOAT;
array_descriptor.NumChannels = 1;
status = cuArrayCreate(&arr_handle, &array_descriptor);
die_if_error(status, "Failed creating a 2D CUDA array");
}
auto arr_size = dims.width * dims.height;
CUdeviceptr dptr;
status = cuMemAllocManaged(&dptr, arr_size, CU_MEM_ATTACH_GLOBAL);
die_if_error(status, "Failed allocating managed memory");
float* ptr_in = as_pointer<float>(dptr);
std::iota(ptr_in, ptr_in + arr_size, 0);
CUmemorytype ptr_in_memory_type;
status = cuPointerGetAttribute(&ptr_in_memory_type, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, as_address(ptr_in));
if (not (ptr_in_memory_type == CU_MEMORYTYPE_UNIFIED or ptr_in_memory_type == CU_MEMORYTYPE_DEVICE)) {
die_("Unexpected memory type for ptr_in");
}
std::cout << "The memory type of ptr_in is " << (ptr_in_memory_type == CU_MEMORYTYPE_DEVICE ? "DEVICE" : "UNIFIED") << std::endl;
std::cout << "Will copy from ptr_in into a 2D CUDA array" << std::endl;
CUDA_MEMCPY2D cp;
{
// Source
cp.srcXInBytes = 0; cp.srcY = 0; // No offset
cp.srcMemoryType = ptr_in_memory_type;
cp.srcDevice = as_address(ptr_in);
// no extra source pitch
cp.srcPitch = dims.width * sizeof(float);
// Destination
cp.dstXInBytes = 0; cp.dstY = 0; // No destination offset
cp.dstMemoryType = CU_MEMORYTYPE_ARRAY;
cp.dstArray = arr_handle;
cp.WidthInBytes = dims.width * sizeof(float);
cp.Height = dims.height;
}
status = cuMemcpy2D(&cp);
die_if_error(status, "cuMemcpy2D failed");
cuMemFree(as_address(ptr_in));
}
Full output of this program:
Creating a 3 x 3 CUDA array
The memory type of ptr_in is DEVICE
Will copy from ptr_in into a 2D CUDA array
cuMemcpy2D failed: invalid argument
Additional information:
- CUDA toolkit version: 11.4
- NVIDIA driver version: 470.57.02
- OS distribution: Devuan Chimaera GNU/Linux
- GPU: GeForce 1050 TI Boost (Compute Capability 6.1)
- Host architecture: amd64
The error is here:
That should be
arr_size*sizeof(float)
cuMemAllocManaged()
, likemalloc()
takes a size argument in bytes. This size needs to be consistent with (greater than or equal to) your implied size of transfer in thecuMemcpy2D
call.