Kinect V2 depth image produces strange point cloud coordinates when using getPointXYZ() from pylibfreenect2?

2.3k views Asked by At

I am trying to generate the real-world coordinates from my MS Kinect V2.

I have managed to piece together a pyqt + opengl scatter plot and show the depth data from the Kinect using pylibfreenect2.

I noticed immediately that the depth data was not the same as point cloud data. Notice my room's ceiling is very distorted (what should be a flat ceiling begins to resemble a hockey stick graph)

Result of plotting the depth frame enter image description here

After some reading and digging through source files I managed to find a function which seemed very promising.

getPointXYZ - Construct a 3-D point in a point cloud.

As it only works on one pixel at a time I wrote a simple nested for loop. In the code below you should see the lines:

out = np.zeros((d.shape[0]*d.shape[1], 3)) #shape = (217088, 3)
for row in range(d.shape[0]):
    for col in range(d.shape[1]):
        world = registration.getPointXYZ(undistorted, row, col) #convert depth pixel to real-world coordinate
        out[row + col] = world

Result of coordinates from getPointXYZ() enter image description here

Not sure what's going on there. It looks more like a straight line and sometimes its resembles a rectangle and it's very flat (yet it sits at arbitrary angels in all three dimensions). When I move my hand in front of the sensor I can see some points move around but no declarable shapes are visible. It appears that all points are being crammed together.

The following is a Python script that will show a pyQt application window containing an openGL scatter plot. Frames are received from the Kinect sensor through pylibfreenect2 and the scatter plot's points are generated by iterating over each row and column of the depth data and sending it through getPointXYZ (This is really slow and doesn't work...).

# coding: utf-8

# An example using startStreams
from pyqtgraph.Qt import QtCore, QtGui
import pyqtgraph.opengl as gl

import numpy as np
import cv2
import sys
from pylibfreenect2 import Freenect2, SyncMultiFrameListener
from pylibfreenect2 import FrameType, Registration, Frame, libfreenect2

fn = Freenect2()
num_devices = fn.enumerateDevices()
if num_devices == 0:
    print("No device connected!")
    sys.exit(1)

serial = fn.getDeviceSerialNumber(0)
device = fn.openDevice(serial)

types = 0
types |= FrameType.Color
types |= (FrameType.Ir | FrameType.Depth)
listener = SyncMultiFrameListener(types)

# Register listeners
device.setColorFrameListener(listener)
device.setIrAndDepthFrameListener(listener)

device.start()

# NOTE: must be called after device.start()
registration = Registration(device.getIrCameraParams(),
                            device.getColorCameraParams())

undistorted = Frame(512, 424, 4)
registered = Frame(512, 424, 4)


#QT app
app = QtGui.QApplication([])
w = gl.GLViewWidget()
w.show()
g = gl.GLGridItem()
w.addItem(g)

#initialize some points data
pos = np.zeros((1,3))

sp2 = gl.GLScatterPlotItem(pos=pos)
w.addItem(sp2)


def update():
    frames = listener.waitForNewFrame()

    ir = frames["ir"]
    color = frames["color"]
    depth = frames["depth"]

    d = depth.asarray()

    registration.apply(color, depth, undistorted, registered)

    #There are 3 optionally commented methods for generating points data (the last one is not commented here). 
    #First will generate points using depth data only. 
    #Second will generate colored points and pointcloud xyz coordinates. 
    #Third is simply the pointcloud xyz coordinates without the color mapping. 

    """
    #Format depth data to be displayed
    m, n = d.shape
    R, C = np.mgrid[:m, :n]
    out = np.column_stack((d.ravel() / 4500, C.ravel()/m, (-R.ravel()/n)+1))
    """

    """
    #Format undistorted and regisered data to real-world coordinates with mapped colors (dont forget color=out_col in setData)
    out = np.zeros((d.shape[0]*d.shape[1], 3)) #shape = (217088, 3)
    out_col = np.zeros((d.shape[0]*d.shape[1], 3)) #shape = (217088, 3)
    for row in range(d.shape[0]):
        for col in range(d.shape[1]):
            world = registration.getPointXYZRGB(undistorted, registered, row, col)
            out[row + col] = world[0:3]
            out_col[row + col] = np.array(world[3:6]) / 255

    """

    # Format undistorted data to real-world coordinates
    out = np.zeros((d.shape[0]*d.shape[1], 3)) #shape = (217088, 3)
    for row in range(d.shape[0]):
        for col in range(d.shape[1]):
            world = registration.getPointXYZ(undistorted, row, col)
            out[row + col] = world


    sp2.setData(pos=out, size=2)

    listener.release(frames)

t = QtCore.QTimer()
t.timeout.connect(update)
t.start(50)


## Start Qt event loop unless running in interactive mode.
if __name__ == '__main__':
    import sys
    if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
        QtGui.QApplication.instance().exec_()

device.stop()
device.close()

sys.exit(0)

I am unsure what I should do next in order to get the actual point cloud coordinate data.

Does anyone have any suggestions as to what I'm doing wrong?

My operating system is Ubuntu 16.0.4 with Python 3.5

Thanks.

1

There are 1 answers

0
Logic1 On

The answer was actually to resolve a mistake I made in those nested loops. I noticed it was not indexing an array correctly:

#From:
out[row + col]
#To:
out[row * n_columns + col]

Vertexes are now accurately positioned in 3d space and all looks good!

enter image description here

Here's the revised and fully functional code:

# coding: utf-8

# An example using startStreams
from pyqtgraph.Qt import QtCore, QtGui
import pyqtgraph.opengl as gl

import numpy as np
import cv2
import sys
from pylibfreenect2 import Freenect2, SyncMultiFrameListener
from pylibfreenect2 import FrameType, Registration, Frame, libfreenect2

fn = Freenect2()
num_devices = fn.enumerateDevices()
if num_devices == 0:
    print("No device connected!")
    sys.exit(1)

serial = fn.getDeviceSerialNumber(0)
device = fn.openDevice(serial)

types = 0
types |= FrameType.Color
types |= (FrameType.Ir | FrameType.Depth)
listener = SyncMultiFrameListener(types)

# Register listeners
device.setColorFrameListener(listener)
device.setIrAndDepthFrameListener(listener)

device.start()

# NOTE: must be called after device.start()
registration = Registration(device.getIrCameraParams(),
                            device.getColorCameraParams())

undistorted = Frame(512, 424, 4)
registered = Frame(512, 424, 4)


#QT app
app = QtGui.QApplication([])
w = gl.GLViewWidget()
w.show()
g = gl.GLGridItem()
w.addItem(g)

#initialize some points data
pos = np.zeros((1,3))

sp2 = gl.GLScatterPlotItem(pos=pos)
w.addItem(sp2)

def update():
    colors = ((1.0, 1.0, 1.0, 1.0))

    frames = listener.waitForNewFrame()

    ir = frames["ir"]
    color = frames["color"]
    depth = frames["depth"]

    d = depth.asarray()

    registration.apply(color, depth, undistorted, registered)

    listener.release(frames)

    """
    #Format raw depth data to be displayed
    m, n = d.shape
    R, C = np.mgrid[:m, :n]
    out = np.column_stack((d.ravel() / 4500, C.ravel()/m, (-R.ravel()/n)+1))
    """


    #Format undistorted and regisered data to real-world coordinates with mapped colors (dont forget color=out_col in setData)
    n_rows = d.shape[0]
    n_columns = d.shape[1]
    out = np.zeros((d.shape[0] * d.shape[1], 3), dtype=np.float64)
    colors = np.zeros((d.shape[0] * d.shape[1], 3), dtype=np.float64)
    for row in range(n_rows):
        for col in range(n_columns):
            X, Y, Z, B, G, R = registration.getPointXYZRGB(undistorted, registered, row, col)
            out[row * n_columns + col] = np.array([X, Y, Z])  # np.array(pt, dtype=np.float64)
            colors[row * n_columns + col] = np.divide([R, G, B], 255)  # np.array(pt, dtype=np.float64)


    """
    #Format undistorted depth data to real-world coordinates
    n_rows = d.shape[0]
    n_columns = d.shape[1]
    out = np.zeros((d.shape[0] * d.shape[1], 3), dtype=np.float64)
    for row in range(n_rows):
        for col in range(n_columns):
            X, Y, Z = registration.getPointXYZ(undistorted, row, col)
            out[row * n_columns + col] = np.array([X, Y, Z])  # np.array(pt, dtype=np.float64)
    """

    sp2.setData(pos=np.array(out, dtype=np.float64), color=colors, size=2)



t = QtCore.QTimer()
t.timeout.connect(update)
t.start(50)


## Start Qt event loop unless running in interactive mode.
if __name__ == '__main__':
    import sys
    if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
        QtGui.QApplication.instance().exec_()

device.stop()
device.close()

sys.exit(0)

[EDIT]

Please see This Post for additional information