detect text with a rectangle not the whole screen with vision, iOS, Swift

3.5k views Asked by At

I have text detection using Vision but this shows detection on all the screen, is there a way to have it only detect in a certain area for example a rectangle I have drawn in the middle of the screen.

So when the whole screen is a camera but if text enters a rectangle in the centre it draws the boxes around it.

Photo of how I want it

Below is the code of my text detect and the cameraPreviewLayer.

Will I need 2 camera preview layers if it is possible?

func setupPreviewLayer() {
    cameraPreviewlayer = AVCaptureVideoPreviewLayer(session: captureSession)
    cameraPreviewlayer?.videoGravity = AVLayerVideoGravity.resizeAspectFill
    cameraPreviewlayer?.connection?.videoOrientation = AVCaptureVideoOrientation.portrait
    cameraPreviewlayer?.frame = self.view.frame
    self.view.layer.insertSublayer(cameraPreviewlayer!, at: 0)
}

func startTextDetection() {
    let textRequest = VNDetectTextRectanglesRequest(completionHandler: self.detectTextHandler)
    textRequest.reportCharacterBoxes = true
    self.requests = [textRequest]
}

func detectTextHandler(request: VNRequest, error: Error?) {
    guard let observations = request.results else {
        print("no result")
        return
    }

    let result = observations.map({$0 as? VNTextObservation})

    DispatchQueue.main.async() {
        self.cameraPreviewlayer?.sublayers?.removeSubrange(1...)
        for region in result {
            guard let rg = region else {
                continue
            }

            self.highlightWord(box: rg)

            if let boxes = region?.characterBoxes {
                for characterBox in boxes {
                    self.highlightLetters(box: characterBox)
                }
            }
        }
    }
}

func highlightWord(box: VNTextObservation) {
    guard let boxes = box.characterBoxes else {
        return
    }

    var maxX: CGFloat = 9999.0
    var minX: CGFloat = 0.0
    var maxY: CGFloat = 9999.0
    var minY: CGFloat = 0.0

    for char in boxes {
        if char.bottomLeft.x < maxX {
            maxX = char.bottomLeft.x
        }
        if char.bottomRight.x > minX {
            minX = char.bottomRight.x
        }
        if char.bottomRight.y < maxY {
            maxY = char.bottomRight.y
        }
        if char.topRight.y > minY {
            minY = char.topRight.y
        }
    }

    let xCord = maxX * (cameraPreviewlayer?.frame.size.width)!
    let yCord = (1 - minY) * (cameraPreviewlayer?.frame.size.height)!
    let width = (minX - maxX) * (cameraPreviewlayer?.frame.size.width)!
    let height = (minY - maxY) * (cameraPreviewlayer?.frame.size.height)!

    let outline = CALayer()
    outline.frame = CGRect(x: xCord, y: yCord, width: width, height: height)
    outline.borderWidth = 2.0
    outline.borderColor = UIColor.red.cgColor

    cameraPreviewlayer?.addSublayer(outline)
}

func highlightLetters(box: VNRectangleObservation) {
    let xCord = box.topLeft.x * (cameraPreviewlayer?.frame.size.width)!
    let yCord = (1 - box.topLeft.y) * (cameraPreviewlayer?.frame.size.height)!
    let width = (box.topRight.x - box.bottomLeft.x) * (cameraPreviewlayer?.frame.size.width)!
    let height = (box.topLeft.y - box.bottomLeft.y) * (cameraPreviewlayer?.frame.size.height)!

    let outline = CALayer()
    outline.frame = CGRect(x: xCord, y: yCord, width: width, height: height)
    outline.borderWidth = 1.0
    outline.borderColor = UIColor.blue.cgColor

    cameraPreviewlayer?.addSublayer(outline)
}
1

There are 1 answers

3
Tony Merritt On

I figured this out by adding.

let wordRect = CGRect(x: xCord, y: yCord, width: width, height: height)
    guard regionOfInterest.contains(wordRect.origin) else { return } // only draw a box if the orgin of the word box is within the regionOfInterest

before the

 let outline = CALayer()
outline.frame = CGRect(x: xCord, y: yCord, width: width, height: height)
outline.borderWidth = 1.0
outline.borderColor = UIColor.blue.cgColor

cameraPreviewlayer?.addSublayer(outline)

This way it would only run the code for words in the rectangle I set the word rect to the frame of a uiview size I wanted to only show the text in.