I am trying to apply OCR using OpenCV and Python-tesseract to convert the following image to text:
.
import cv2
import pytesseract
import argparse
import numpy as np
if __name__ == "__main__":
# Argument parsing
parser = argparse.ArgumentParser(description="Process images for OCR")
parser.add_argument("input_file", help="Input image file path")
args = parser.parse_args()
# Read the input image
image = cv2.imread(args.input_file)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
orig_image = image
# Normalization
norm_img = np.zeros((image.shape[0], image.shape[1]))
image = cv2.normalize(image, norm_img, 0, 255, cv2.NORM_MINMAX)
# Remove noise
image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 15)
#image = cv2.GaussianBlur(image, (1, 1), 0)
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = cv2.threshold(image, 150, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) [1]
image = cv2.bitwise_not(image)
result = pytesseract.image_to_data(image, config=r'--psm 6 --oem 3 -l eng tessedit_char_blacklist=,;:', output_type=pytesseract.Output.DICT)
text_results = result['text']
bounding_boxes = list(zip(result['left'], result['top'], result['width'], result['height']))
unique_results = list(set(zip(text_results, bounding_boxes)))
char_list = ['-', '}', ',', '—', 'nnn', '#', ':', '=', '——', '*', '!', '°', '——=', ':', ';', '+', '-', '©', ',', ',', ',']
ocr_results = []
ocr_results = [element for element in unique_results if element[0].strip() and element[0] not in char_list]
print(len(ocr_results))
for ocr_result in ocr_results:
x, y, w, h = ocr_result[1]
cv2.rectangle(orig_image, (x, y), (x + w, y + h), (255, 0, 255), 2)
# Draw the text on the image
cv2.putText(orig_image, ocr_result[0], (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
cv2.namedWindow("image", cv2.WINDOW_NORMAL)
cv2.imshow("image", orig_image)
cv2.waitKey(0)
the performance after the ocr is attached as below. i see there is a lot of scope for improvement in results.
attached image is a sample image taken from google to just represent the problem. the original images that i am using are at 300 dpi and have good quality. the main question is how to optimize the pytesseract ocr to detect the text data from schematic of embedded micro controller boards.

Can you try paddleocr, I tested for the PCB image