Latency in AVCaptureDepthDataOutput data

23 views Asked by At

I have ViewController which implements both AVCaptureVideoDataOutputSampleBufferDelegate and AVCaptureDepthDataOutputDelegate protocols. I want to collect both video and depth data. Video data is used to perform Vision ML requests and depth is used to calculate distance from camera to specific point on camera.

extension MainRecognizerViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
    func captureOutput(_ output: AVCaptureOutput, 
                       didOutput sampleBuffer: CMSampleBuffer,
                       from connection: AVCaptureConnection) {
        DispatchQueue.main.async {
            self.captureSessionManager.manageFlashlight(for: sampleBuffer, force: nil)
        }
        
        guard let cvPixelBuffer = sampleBuffer.convertToPixelBuffer() else {
            return
        }
        
        let exifOrientation = exifOrientationFromDeviceOrientation()
        let handler = VNImageRequestHandler(cvPixelBuffer: cvPixelBuffer,
                                            orientation: exifOrientation)
        
        let objectsRecognitionRequest = prepareVisionRequestForObjectsRecognition(
            pixelBuffer: cvPixelBuffer
        )
        
        DispatchQueue.global().async {
            try? handler.perform([objectsRecognitionRequest])
            try? handler.perform(self.roadLightsRecognizerRequests)
            try? handler.perform(self.pedestrianCrossingRecognizerRequests)
        }
    }
}

and

extension MainRecognizerViewController: AVCaptureDepthDataOutputDelegate {
    func depthDataOutput(_ output: AVCaptureDepthDataOutput, 
                         didOutput depthData: AVDepthData,
                         timestamp: CMTime,
                         connection: AVCaptureConnection) {
        if depthMeasurementsLeftInLoop == 0 {
            depthMeasurementsCumul = 0.0
            depthMeasurementMin = 9999.9
            depthMeasurementMax = 0.0
            depthMeasurementsLeftInLoop = depthMeasurementRepeats
        }
        
        if depthMeasurementsLeftInLoop > 0 {
            var convertedDepthData: AVDepthData = depthData.converting(
                toDepthDataType: kCVPixelFormatType_DepthFloat16
            )
            let depthFrame = convertedDepthData.depthDataMap
            let depthPoint = CGPoint(x: CGFloat(CVPixelBufferGetWidth(depthFrame)) / 2,
                                     y: CGFloat(CVPixelBufferGetHeight(depthFrame) / 2))
            let depthVal = getDepthValueFromFrame(fromFrame: depthFrame, 
                                                  atPoint: depthPoint)
            print(depthVal)
            
            let measurement = depthVal * 100
            
            depthMeasurementsCumul += measurement
            
            if measurement > depthMeasurementMax {
                depthMeasurementMax = measurement
            }
            
            if measurement < depthMeasurementMin {
                depthMeasurementMin = measurement
            }
            
            depthMeasurementsLeftInLoop -= 1
            
            //            let printStr = String(format: "Measurement %d: %.2f cm",
            //                depthMeasurementRepeats - depthMeasurementsLeftInLoop, measurement)
            
            DispatchQueue.main.async { [weak self] in
                self?.distanceMeasurerViewModel?.distanceString = String(format: "%.2f", measurement)
            }
        }
    }
}

I perform the whole camera setup in CaptureSessionManager structure:

import AVFoundation

final class CaptureSessionManager: CaptureSessionManaging {
    @Inject private var flashlightManager: FlashlightManaging
    
    private let captureSessionQueue = DispatchQueue(label: "captureSessionQueue")
    private let captureSessionDataOutputQueue = DispatchQueue(
        label: "captureSessionVideoDataOutput",
        qos: .userInitiated,
        attributes: [],
        autoreleaseFrequency: .workItem
    )
    
    private var sampleBufferOutput: AVCaptureVideoDataOutput = AVCaptureVideoDataOutput()
    private var sampleBufferDelegate: AVCaptureVideoDataOutputSampleBufferDelegate?
    private var depthDataOutput: AVCaptureDepthDataOutput = AVCaptureDepthDataOutput()
    private var depthDataOutputDelegate: AVCaptureDepthDataOutputDelegate?
    var cameraMode: CameraMode?
    private var desiredFrameRate: Double?
    
    private var videoDevice: AVCaptureDevice? = AVCaptureDevice.default(
        .builtInLiDARDepthCamera,
        for: .video,
        position: .back
    )
    
    var bufferSize: CGSize = .zero
    
    var captureSession: AVCaptureSession!
    
    func setUp(with sampleBufferDelegate: AVCaptureVideoDataOutputSampleBufferDelegate,
               and depthDataOutputDelegate: AVCaptureDepthDataOutputDelegate,
               for cameraMode: CameraMode,
               cameraPosition: AVCaptureDevice.Position,
               desiredFrameRate: Double,
               completion: @escaping () -> ()) {
        stopCaptureSession()
        
        self.sampleBufferDelegate = sampleBufferDelegate
        self.depthDataOutputDelegate = depthDataOutputDelegate
        self.cameraMode = cameraMode
        self.desiredFrameRate = desiredFrameRate
        
        authorizeCaptureSession {
            completion()
        }
    }
    
    func manageFlashlight(for sampleBuffer: CMSampleBuffer?, 
                          force torchMode: AVCaptureDevice.TorchMode?) {
        flashlightManager.manageFlashlight(for: sampleBuffer,
                                           and: self.videoDevice,
                                           force: torchMode)
    }
    
    private func authorizeCaptureSession(completion: @escaping () -> ())  {
        switch AVCaptureDevice.authorizationStatus(for: .video) {
        case .authorized:
            setupCaptureSession {
                completion()
            }
        case .notDetermined:
            AVCaptureDevice.requestAccess(for: .video) { [weak self] granted in
                if granted {
                    self?.setupCaptureSession {
                        completion()
                    }
                }
            }
        default:
            return
        }
    }
    
    private func setupCaptureSession(completion: @escaping () -> ()) {
        captureSessionQueue.async { [unowned self] in
            var captureSession: AVCaptureSession = AVCaptureSession()
            captureSession.beginConfiguration()
            
            guard let videoDevice = videoDevice else {
                return
            }
            
            do {
                let captureDeviceInput = try AVCaptureDeviceInput(device: videoDevice)
                guard captureSession.canAddInput(captureDeviceInput) else {
                    return
                }
                captureSession.addInput(captureDeviceInput)
            } catch {
                return
            }
            
            let sessionPreset: SessionPreset = .hd1280x720
            
            guard let videoSetupedCaptureSession: AVCaptureSession = setupCaptureSessionForVideo(
                captureSession: captureSession,
                sessionPreset: sessionPreset
            ) else {
                return
            }
            
            guard let depthAndVideoSetupedCaptureSession = setupCaptureSessionForDepth(
                captureSession: videoSetupedCaptureSession
            ) else {
                return
            }
            
            
            
            captureSession = depthAndVideoSetupedCaptureSession
            captureSession.sessionPreset = sessionPreset.preset
            captureSession.commitConfiguration()
            
            self.captureSession = captureSession
            self.startCaptureSession()
            completion()
        }
    }
    
    private func setupCaptureSessionForVideo(captureSession: AVCaptureSession,
                                             sessionPreset: SessionPreset) -> AVCaptureSession? {
        let captureSessionVideoOutput: AVCaptureVideoDataOutput = AVCaptureVideoDataOutput()
        captureSessionVideoOutput.videoSettings = [
            kCVPixelBufferPixelFormatTypeKey as String: NSNumber(
                value: kCMPixelFormat_32BGRA
            )
        ]
        captureSessionVideoOutput.alwaysDiscardsLateVideoFrames = true
        captureSessionVideoOutput.setSampleBufferDelegate(
            self.sampleBufferDelegate,
            queue: captureSessionDataOutputQueue
        )
        
        guard let videoDevice = videoDevice else {
            return nil
        }
        
        var formatToSet: AVCaptureDevice.Format = videoDevice.formats[0]
        
        guard let desiredFrameRate = desiredFrameRate else {
            return nil
        }
        
        for format in videoDevice.formats.reversed() {
            let ranges = format.videoSupportedFrameRateRanges
            let frameRates = ranges[0]

            if desiredFrameRate <= frameRates.maxFrameRate,
               format.formatDescription.dimensions.width == sessionPreset.formatWidth,
               format.formatDescription.dimensions.height == sessionPreset.formatHeight {
                formatToSet = format
                break
            }
        }
        
        do {
            try videoDevice.lockForConfiguration()
            
            if videoDevice.hasTorch {
                self.manageFlashlight(for: nil, force: .auto)
            }
            
            let dimensions = CMVideoFormatDescriptionGetDimensions((videoDevice.activeFormat.formatDescription))
            bufferSize.width = CGFloat(dimensions.width)
            bufferSize.height = CGFloat(dimensions.height)
            
            videoDevice.activeFormat = formatToSet

            let timescale = CMTimeScale(desiredFrameRate)
            if videoDevice.activeFormat.videoSupportedFrameRateRanges[0].maxFrameRate >= desiredFrameRate {
                videoDevice.activeVideoMinFrameDuration = CMTime(value: 1, timescale: timescale)
                videoDevice.activeVideoMaxFrameDuration = CMTime(value: 1, timescale: timescale)
            }
            
            videoDevice.unlockForConfiguration()
        } catch {
            return nil
        }
        
        guard captureSession.canAddOutput(captureSessionVideoOutput) else {
            return nil
        }
        
        let captureConnection = captureSessionVideoOutput.connection(with: .video)
        captureConnection?.isEnabled = true
        
        captureSession.addOutput(captureSessionVideoOutput)
        
        if let cameraMode = self.cameraMode,
           CameraMode.modesWithPortraitVideoConnection.contains(cameraMode) {
            captureSessionVideoOutput.connection(with: .video)?.videoOrientation = .portrait
        }
        
        return captureSession
    }
    
    private func setupCaptureSessionForDepth(captureSession: AVCaptureSession) -> AVCaptureSession? {
        guard let depthDataOutputDelegate = depthDataOutputDelegate else {
            return nil
        }
        
        if captureSession.canAddOutput(depthDataOutput) {
            captureSession.addOutput(depthDataOutput)
            depthDataOutput.isFilteringEnabled = false
        } else {
            return nil
        }
        
        if let connection = depthDataOutput.connection(with: .depthData) {
            connection.isEnabled = true
            depthDataOutput.isFilteringEnabled = false
            depthDataOutput.setDelegate(
                depthDataOutputDelegate,
                callbackQueue: captureSessionDataOutputQueue
            )
        } else {
            return nil
        }
        
        guard let videoDevice = videoDevice else {
            return nil
        }
        
        let availableFormats = videoDevice.activeFormat.supportedDepthDataFormats
        let availableHdepFormats = availableFormats.filter { f in
            CMFormatDescriptionGetMediaSubType(f.formatDescription) == kCVPixelFormatType_DepthFloat16
        }
        let selectedFormat = availableHdepFormats.max(by: {
            lower, higher in CMVideoFormatDescriptionGetDimensions(lower.formatDescription).width < CMVideoFormatDescriptionGetDimensions(higher.formatDescription).width
        })
        
        do {
            try videoDevice.lockForConfiguration()
            videoDevice.activeDepthDataFormat = selectedFormat
            videoDevice.unlockForConfiguration()
        } catch {
            return nil
        }
        
        return captureSession
    }
    
    func startCaptureSession() {
        self.captureSession?.startRunning()
    }
    
    func stopCaptureSession() {
        self.captureSession?.stopRunning()
    }
}

The problem is that I receive updates of depth data too slow - function captureOutput from AVCaptureVideoDataOutputSampleBufferDelegate executes much more frequent than depthDataOutput from AVCaptureDepthDataOutputDelegate

What might be the cause?

1

There are 1 answers

0
Vader20FF On BEST ANSWER

I managed to solve it. The reason of the problem was that was only one queue in CaptureSessionManager used for both videoOutputData and depthOutputData gathering. This led to unexpected results.

I added one more queue

private let captureSessionDepthDataOutputQueue = DispatchQueue(
        label: "captureSessionDepthDataOutput",
        qos: .userInitiated,
        attributes: [],
        autoreleaseFrequency: .workItem
    )

and set it there

depthDataOutput.setDelegate(
                depthDataOutputDelegate,
                callbackQueue: captureSessionDepthDataOutputQueue
            )