I want to use pcl::gpu::Octree for storing the obstacle map and retrieving the closest distance to a given point. I've modified an official test example as follows:
int main(int argc, char** argv){
using namespace pcl::gpu;
pcl::gpu::DataGenerator data;
data.data_size = 871000;
data.tests_num = 2;
data.cube_size = 1024.f;
data.max_radius = data.cube_size/30.f;
data.shared_radius = data.cube_size/30.f;
data.printParams();
const float host_octree_resolution = 25.f;
const int k = 1; // only this is supported
//generate
data();
//prepare device cloud
pcl::gpu::Octree::PointCloud cloud_device;
cloud_device.upload(data.points);
//prepare host cloud
pcl::PointCloud<pcl::PointXYZ>::Ptr cloud_host(new pcl::PointCloud<pcl::PointXYZ>);
cloud_host->width = data.points.size();
cloud_host->height = 1;
cloud_host->points.resize (cloud_host->width * cloud_host->height);
std::transform(data.points.cbegin(), data.points.cend(), cloud_host->begin(), DataGenerator::ConvPoint<pcl::PointXYZ>());
//gpu build
pcl::gpu::Octree octree_device;
octree_device.setCloud(cloud_device);
octree_device.build();
//build host octree
pcl::octree::OctreePointCloudSearch<pcl::PointXYZ> octree_host(host_octree_resolution);
octree_host.setInputCloud (cloud_host);
octree_host.addPointsFromInputCloud();
//upload queries
pcl::gpu::Octree::Queries queries_device;
queries_device.upload(data.queries);
//prepare output buffers on host
std::vector<pcl::Indices > result_host(data.tests_num);
std::vector<std::vector<float> > dists_host(data.tests_num);
for(std::size_t i = 0; i < data.tests_num; ++i)
{
result_host[i].reserve(k);
dists_host[i].reserve(k);
}
//prepare output buffers on device
pcl::gpu::NeighborIndices result_device;
pcl::gpu::Octree::ResultSqrDists result_sqr_distances;
//search GPU shared
{
pcl::ScopeTime time("1nn-gpu");
octree_device.nearestKSearchBatch(queries_device, k, result_device, result_sqr_distances);
}
std::vector<int> downloaded;
result_device.data.download(downloaded);
std::vector<float> downloaded_sqr_dists;
result_sqr_distances.download(downloaded_sqr_dists);
{
pcl::ScopeTime time("1nn-cpu");
for(std::size_t i = 0; i < data.tests_num; ++i){
auto distance = octree_host.nearestKSearch(data.queries[i], k, result_host[i], dists_host[i]);
for (std::size_t j = 0; j <result_host[i].size(); ++j){
std::cout << " cpu " << (*cloud_host)[ result_host[i].at(j) ].x << " " << (*cloud_host)[ result_host[i].at(j) ].y << " " << (*cloud_host)[ result_host[i].at(j) ].z << " (squared distance: " << dists_host[i].at(j) << ")" << std::endl;
std::cout << " gpu " << (*cloud_host)[downloaded[i]].x << " " << (*cloud_host)[downloaded[i]].y << " " << (*cloud_host)[downloaded[i]].z << " (squared distance: " << downloaded_sqr_dists[i] << ")" << std::endl;
}
}
}
}
Output:
1nn-gpu took 0.103565ms.
1nn-cpu took 0.057062ms.
Problem: Since Map is huge, my objective is to store the map as a kdtree and when host needs free distance to given position GPU gives the free distance. However, CPU is always better than GPU, am I using the wrong approach or I can not use GPU in this case?