Kitti dataset get rgb data for each point in points clouds from corresponding image

88 views Asked by At

I working with the Kitti dataset, I am trying to fuse points cloud and image on early stage for 3d object detection model, so each point in the points cloud has a foloving feature vector [x, y, z, r] where x,y,z coordinates and r is intensity. I need to expand this vector by adding R, G, B data from the corresponding image, so I will get vector [x, y, z, r, R, G, B] for each point. Can someone share the function (Python or another language) on how to do that properly?

So I get that I need to use calibration file to project on image plain and then gets RGB data, but have no clue how to do that, and also the problem how to filter point cloud to be in range of image view. I have found this code

# https://github.com/azureology/kitti-velo2cam/blob/master/proj_velo2cam.py
name = ''
img = ''
binary =''
with open(f'./testing/calib/{name}.txt','r') as f:
    calib = f.readlines()
# P2 (3 x 4) for left eye
P2 = np.array([float(x) for x in calib[2].strip('\n').split(' ')[1:]]).reshape(3, 4)
R0_rect = np.array([float(x) for x in calib[4].strip('\n').split(' ')[1:]]).reshape(3, 3)
# Add a 1 in bottom-right, reshape to 4 x 4
R0_rect = np.insert(R0_rect, 3, values=[0, 0, 0], axis=0)
R0_rect = np.insert(R0_rect, 3, values=[0, 0, 0, 1], axis=1)
Tr_velo_to_cam = np.array([float(x) for x in calib[5].strip('\n').split(' ')[1:]]).reshape(3, 4)
Tr_velo_to_cam = np.insert(Tr_velo_to_cam, 3, values=[0, 0, 0, 1], axis=0)

# read raw data from binary
scan = np.fromfile(binary, dtype=np.float32).reshape((-1, 4))
points = scan[:, 0:3]  # lidar xyz (front, left, up)
# TODO: use fov filter?
velo = np.insert(points, 3, 1, axis=1).T
velo = np.delete(velo, np.where(velo[0, :] < 0), axis=1)
cam = P2.dot(R0_rect.dot(Tr_velo_to_cam.dot(velo)))
cam = np.delete(cam, np.where(cam[2, :] < 0), axis=1)
# get u,v,z
cam[:2] /= cam[2, :]
# do projection staff
plt.figure(figsize=(12, 5), dpi=96, tight_layout=True)
png = mpimg.imread(img)
IMG_H, IMG_W, _ = png.shape
# restrict canvas in range
plt.axis([0, IMG_W, IMG_H, 0])
plt.imshow(png)
# filter point out of canvas
u, v, z = cam
u_out = np.logical_or(u < 0, u > IMG_W)
v_out = np.logical_or(v < 0, v > IMG_H)
outlier = np.logical_or(u_out, v_out)
cam = np.delete(cam, np.where(outlier), axis=1)
# generate color map from depth
u, v, z = cam
plt.scatter([u], [v], c=[z], cmap='rainbow_r', alpha=0.5, s=2)
plt.title(name)
plt.savefig(f'{name}.png', bbox_inches='tight')
plt.show()
1

There are 1 answers

0
Den Mor On BEST ANSWER
R0_rect = np.array([float(x) for x in calib[4].strip('\n').split(' ')[1:]]).reshape(3, 3)
# Add a 1 in bottom-right, reshape to 4 x 4
R0_rect = np.insert(R0_rect, 3, values=[0, 0, 0], axis=0)
R0_rect = np.insert(R0_rect, 3, values=[0, 0, 0, 1], axis=1)
Tr_velo_to_cam = np.array([float(x) for x in calib[5].strip('\n').split(' ')[1:]]).reshape(3, 4)
Tr_velo_to_cam = np.insert(Tr_velo_to_cam, 3, values=[0, 0, 0, 1], axis=0)

# read raw data from binary
scan = np.fromfile(binary, dtype=np.float32).reshape((-1, 4))
points = scan[:, 0:3]  # lidar xyz (front, left, up)
# TODO: use fov filter?
velo = np.insert(points, 3, 1, axis=1).T
velo = np.delete(velo, np.where(velo[0, :] < 0), axis=1)
cam = P2.dot(R0_rect.dot(Tr_velo_to_cam.dot(velo)))
cam = np.delete(cam, np.where(cam[2, :] < 0), axis=1)
# get u,v,z
cam[:2] /= cam[2, :]
# do projection staff
plt.figure(figsize=(12, 5), dpi=96, tight_layout=True)
png = mpimg.imread(img)
IMG_H, IMG_W, _ = png.shape
# restrict canvas in range
plt.axis([0, IMG_W, IMG_H, 0])
# plt.imshow(png)
# filter point out of canvas
u, v, z = cam
u_out = np.logical_or(u < 0, u > IMG_W)
v_out = np.logical_or(v < 0, v > IMG_H)
outlier = np.logical_or(u_out, v_out)
cam = np.delete(cam, np.where(outlier), axis=1)
# generate color map from depth
u, v, z = cam

# Adding rgb data
rgb_values = []
for u_coord, v_coord in zip(u, v):
    u_int, v_int = int(round(u_coord)), int(round(v_coord))
    if 0 <= u_int < IMG_W and 0 <= v_int < IMG_H:

        rgb_value = png[v_int, u_int]
        rgb_values.append(rgb_value)
    else:
        rgb_values.append((0, 0, 0))

plt.scatter([u], [v],  c=rgb_values, alpha=0.5, s=2)
plt.title(name)
plt.savefig(f'{name}.png', bbox_inches='tight')
plt.show()



import open3d as o3d

# Create Open3D point cloud
point_cloud = o3d.geometry.PointCloud()
point_cloud.points = o3d.utility.Vector3dVector(scan[:, :3])
point_cloud.colors = o3d.utility.Vector3dVector(rgb_values)

# Visualize the point cloud
o3d.visualization.draw_geometries([point_cloud], window_name='Point Cloud with RGB Data')