I'm trying to implement a hands detection using Mediapipe libraries. For now, I'm detecting hands frame by frame on my realsense stream using the code below.
But now I want to use running_mode=VIDEO or running_mode=LIVE_STREAM for better detection and tracking. I've found no example or documentation on this particular option. How can I implement this feature?
import cv2
import numpy as np
import os
import pyrealsense2 as rs
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
def hand_detection_realsense_video():
# Create an HandLandmarker object.
VisionRunningMode = mp.tasks.vision.RunningMode
options = vision.HandLandmarkerOptions(base_options=python.BaseOptions(model_asset_path=model_path_full),
running_mode=VisionRunningMode.IMAGE,
num_hands=2,
min_hand_detection_confidence=0.5,
min_hand_presence_confidence=0.5,
min_tracking_confidence=0.5)
detector = vision.HandLandmarker.create_from_options(options)
# Configure depth and color streams
pipeline = rs.pipeline()
config = rs.config()
# Get device product line for setting a supporting resolution
pipeline_wrapper = rs.pipeline_wrapper(pipeline)
pipeline_profile = config.resolve(pipeline_wrapper)
device = pipeline_profile.get_device()
device_product_line = str(device.get_info(rs.camera_info.product_line))
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
# Start streaming
pipeline.start(config)
try:
while True:
# Wait for a coherent pair of frames: depth and color
frames = pipeline.wait_for_frames()
depth_frame = frames.get_depth_frame()
color_frame = frames.get_color_frame()
if not depth_frame or not color_frame:
continue
# Convert images to numpy arrays
color_image = np.asanyarray(color_frame.get_data())
# Hand detection
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=color_image)
detection_result = detector.detect(mp_image)
annotated_image = draw_landmarks_on_image(mp_image.numpy_view(), detection_result)
# Show images
images = np.hstack((color_image, annotated_image))
cv2.namedWindow('RealSense', cv2.WINDOW_AUTOSIZE)
cv2.imshow('RealSense', images)
cv2.waitKey(1)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
finally:
# Stop streaming
pipeline.stop()
I've finally got it working, you need to have
result_callback
pointing to your display method at the creation of the detector. The display method is slightly different since it now takes a Mediapipe image class rather than a numpy array.Here is my code, hopping it will help someone else:
This also works with body detection by modifying the detector and the display method.