In TFLite’s GPU delegate, I tried to replace clEnqueueWriteBuffer and clEnqueueReadBuffer with clEnqueueMapBuffer and clEnqueueUnmapMemObject, which means changing the data copying to the data mapping method to pass data, but using clEnqueueWriteBuffer and clEnqueueReadBuffer can correctly recognize the image, but using clEnqueueMapBuffer and clEnqueueUnmapMemObject cannot correctly recognize the image, why is that, the following is the code I modified, what is wrong?
absl::Status Convert(const TensorObject& input_obj,
const TensorObject& output_obj) override {
auto cpu_input = absl::get_if<CpuMemory>(&input_obj);
auto cpu_output = absl::get_if<CpuMemory>(&output_obj);
if (cpu_input) {
auto texture_output = absl::get_if<OpenClTexture>(&output_obj);
if (texture_output) {
return queue_->EnqueueWriteImage(
texture_output->memobj, int3(region_[0], region_[1], region_[2]),
cpu_input->data, async_);
}
std::cout<<"write"<<std::endl;
auto buffer_output = absl::get_if<OpenClBuffer>(&output_obj);
if (buffer_output) {
return queue_->EnqueueWriteBuffer(buffer_output->memobj,
cpu_input->size_bytes,
cpu_input->data, async_);
}
} else if (cpu_output) {
std::cout<<"read"<<std::endl;
auto texture_input = absl::get_if<OpenClTexture>(&input_obj);
if (texture_input) {
return queue_->EnqueueReadImage(
texture_input->memobj, int3(region_[0], region_[1], region_[2]),
cpu_output->data, async_);
}
auto buffer_input = absl::get_if<OpenClBuffer>(&input_obj);
if (buffer_input) {
return queue_->EnqueueReadBuffer(buffer_input->memobj,
cpu_output->size_bytes,
cpu_output->data, async_);
}
}
return absl::InternalError("Unexpected object");
}
absl::Status CLCommandQueue::EnqueueWriteBuffer(cl_mem memory,
size_t size_in_bytes,
const void* data, bool async) {
const cl_bool blocking = async ? CL_FALSE : CL_TRUE;
std::cout<<"data write:"<<*(double*)data<<std::endl;
auto error_code = clEnqueueWriteBuffer(
queue_, memory, CL_TRUE, 0, size_in_bytes, data, 0, nullptr, nullptr);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(
absl::StrCat("Failed to upload data to GPU (clEnqueueWriteBuffer) - ",
CLErrorCodeToString(error_code)));
}
return absl::OkStatus();
}
absl::Status CLCommandQueue::EnqueueReadBuffer(cl_mem memory,
size_t size_in_bytes, void* data,
bool async) {
//const cl_bool blocking = async ? CL_FALSE : CL_TRUE;
std::cout<<"data1 read:"<<*(float*)data<<std::endl;
//print_memory(data, size_in_bytes);
auto error_code = clEnqueueReadBuffer(
queue_, memory, CL_TRUE, 0, size_in_bytes, data, 0, nullptr, nullptr);
std::cout<<"data read:"<<*(char*)data<<std::endl;
std::cout<<"size_in_bytes:"<<size_in_bytes<<std::endl;
//print_memory(data, size_in_bytes);
if (error_code != CL_SUCCESS) {
return absl::UnknownError(
absl::StrCat("Failed to read data from GPU (clEnqueueReadBuffer) - ",
CLErrorCodeToString(error_code)));
}
return absl::OkStatus();
}
absl::Status Convert(const TensorObject& input_obj,
const TensorObject& output_obj) override {
auto cpu_input = absl::get_if<CpuMemory>(&input_obj);
auto cpu_output = absl::get_if<CpuMemory>(&output_obj);
if (cpu_input) {
auto buffer_output = absl::get_if<OpenClBuffer>(&output_obj);
if (buffer_output) {
std::cout<<"map1"<<std::endl;
return queue_->EnqueueMapBuffer(buffer_output->memobj,cpu_input->size_bytes, cpu_input->data);
}
} else if (cpu_output) {
auto buffer_input = absl::get_if<OpenClBuffer>(&input_obj);
if (buffer_input) {
std::cout<<"unmap1"<<std::endl;
return queue_->EnqueueUnMapBuffer(buffer_input->memobj,cpu_output->size_bytes, cpu_output->data);
}
}
return absl::InternalError("Unexpected object");
}
absl::Status CLCommandQueue::EnqueueMapBuffer(cl_mem memory,
size_t size_in_bytes,
const void* data) {
std::cout<<"map"<<std::endl;
cl_event map_event;
std::cout<<"data in map:"<<*(double*)data<<std::endl;
void* mapped_ptr = clEnqueueMapBuffer(queue_, memory, CL_TRUE, CL_MAP_WRITE, 0, size_in_bytes, 0, nullptr, &map_event, nullptr);// 拷贝数据从映射的指针
if (mapped_ptr == nullptr) {
return absl::InternalError("Failed to map output buffer");
}
memcpy(mapped_ptr, data, size_in_bytes); // 使用clEnqueueUnmapMemObject来释放映射的指针
cl_event unmap_event;
cl_int err=clEnqueueUnmapMemObject(queue_, memory, mapped_ptr, 0, nullptr, &unmap_event); // 等待事件完成
if (err != CL_SUCCESS) {
return absl::InternalError("Failed to unmap output buffer");
}
err =clWaitForEvents(1, &unmap_event); // 返回成功状态
if (err != CL_SUCCESS) {
return absl::InternalError("Failed to wait for unmap event");
}
// 释放映射和解映射的事件
clReleaseEvent(map_event);
clReleaseEvent(unmap_event);
return absl::OkStatus();
}
absl::Status CLCommandQueue::EnqueueUnMapBuffer(cl_mem memory,
size_t size_in_bytes,
void* data) {
std::cout<<"unmap"<<std::endl;
cl_event map_event;
std::cout<<"data1 out map:"<<*(float*)data<<std::endl;
void* mapped_ptr = clEnqueueMapBuffer(queue_, memory, CL_TRUE, CL_MAP_READ, 0, size_in_bytes, 0, nullptr, &map_event, nullptr);// 拷贝数据从映射的指针
if (mapped_ptr == nullptr) {
return absl::InternalError("Failed to map input buffer");
}
memcpy( data,mapped_ptr,size_in_bytes); // 使用clEnqueueUnmapMemObject来释放映射的指针
cl_event unmap_event;
cl_int err =clEnqueueUnmapMemObject(queue_, memory, mapped_ptr, 0, nullptr, &unmap_event); // 等待事件完成
if (err != CL_SUCCESS) {
return absl::InternalError("Failed to unmap input buffer");
}
err =clWaitForEvents(1, &unmap_event); // 返回成功状态
std::cout<<"data out map:"<<*(float*)data<<std::endl;
std::cout<<"size_in_bytes:"<<size_in_bytes<<std::endl;
if (err != CL_SUCCESS) {
return absl::InternalError("Failed to wait for unmap event");
}
// 释放映射和解映射的事件
clReleaseEvent(map_event);
clReleaseEvent(unmap_event);
return absl::OkStatus();
}
I hope to use clEnqueueMapBuffer for data mapping through TFLite's GPU delegate and be able to correctly identify it.