Recognize hand written digits using Tesseract?

1.2k views Asked by At

I have fetched this roll number section from the OMR sheet Roo Number Field

Now, my main task is to recognize handwritten digits and return the roll number value as text in Python. I tried using pytesseract. But, it is not giving proper results.

Here is my sample code

import cv2
import pytesseract

def recognize_digit(image):
    ## Preprocess the image for OCR
    
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    clahe = cv2.createCLAHE(clipLimit=0.9, tileGridSize=(8, 8))
    equalized = clahe.apply(gray)
    threshold = cv2.threshold(equalized, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    denoised = cv2.fastNlMeansDenoising(threshold, h=0)
    # Remove horizontal
    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (24,1))
    detect_horizontal = cv2.morphologyEx(threshold, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
    cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        cv2.drawContours(image, [c], -1, (255,255,255), 5)
        cv2.drawContours(denoised, [c], -1, (0, 0, 0), 5)
        cv2.drawContours(threshold, [c], -1, (0, 0, 0), 5)
    
    # Remove vertical
    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,25))
    detect_vertical = cv2.morphologyEx(threshold, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
    cnts = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        cv2.drawContours(image, [c], -1, (255,255,255), 5)
        cv2.drawContours(denoised, [c], -1, (0, 0, 0), 5)
        cv2.drawContours(threshold, [c], -1, (0, 0, 0), 5)
    
    cv2.imshow('X', denoised)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    # Apply OCR using Tesseract
    config = "--psm 7 --oem 3 -c tessedit_char_whitelist=0123456789"
    text = pytesseract.image_to_string(denoised, config=config)

    return text.strip()

# Read the image
image_path = 'aligned_image.jpeg'
image = cv2.imread(image_path)

# Define the region of interest (coordinates of the desired region)
x, y, w, h = 4, 4, 800, 100

# Crop the image to the defined region
roi = image[y:y+h, x:x+w]

# Recognize the handwritten digit within the cropped region
recognized_digit = recognize_digit(roi)

# Print the recognized digit
print("Recognized digit:", recognized_digit)

How can get proper solution for this.

1

There are 1 answers

0
Hissaan Ali On

Pytesseract doesn't work quite well for hand written data, specially when there's variation. While you can improve the results by applying different image processing techniques, like otsu thresholding to remove the noise, the resize the image such that the details are visible and then sharpen the image but these improvements wouldn't be much.

If you want better results with a free tool, I'd advice using PaddleOCR which performs significantly better than Pytesseract when it comes to detection accuracy.

Here's a sample code to use their pretrained weights to run detections:

from paddleocr import PaddleOCR,draw_ocr
# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
# You can set the parameter `lang` as `ch`, `en`, `fr`, `german`, `korean`, `japan`
# to switch the language model in order.
ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory
img_path = './imgs_en/img_12.jpg'
result = ocr.ocr(img_path, cls=True)
for idx in range(len(result)):
    res = result[idx]
    for line in res:
        print(line)


# draw result
from PIL import Image
result = result[0]
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = draw_ocr(image, boxes, txts, scores, font_path='./fonts/simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save('result.jpg')

For further details follow their official documentation which provides information for finetuning, inferencing and best practices: https://github.com/PaddlePaddle/PaddleOCR