I'm using chaquopy within the Android code of a Flutter project to leverage a python script that uses some tensorflow lite models.
Here's the python script:
from io import BytesIO
import base64
import tensorflow as tf
from skimage import io
from imutils.object_detection import non_max_suppression
import numpy as np
import math
import time
import cv2
import string
from os.path import dirname, join
def preprocess_east(image: np.ndarray):
input_image = image
orig = input_image.copy()
(H, W) = input_image.shape[:2]
(newW, newH) = (416, 640)
rW = W / float(newW)
rH = H / float(newH)
image = cv2.resize(input_image, (newW, newH))
(H, W) = image.shape[:2]
image = image.astype("float32")
mean = np.array([123.68, 116.779, 103.939][::-1], dtype="float32")
image -= mean
image = np.expand_dims(image, 0)
return input_image, image, rW, rH
def run_east_tflite(input_data):
model_path = join(dirname(__file__), "east_float_640.tflite")
interpreter = tf.lite.Interpreter(model_path=model_path)
input_details = interpreter.get_input_details()
interpreter.allocate_tensors()
interpreter.set_tensor(input_details[0]["index"], input_data)
interpreter.invoke()
scores = interpreter.tensor(interpreter.get_output_details()[0]["index"])()
geometry = interpreter.tensor(interpreter.get_output_details()[1]["index"])()
return scores, geometry
def postprocess_east(scores, geometry, rW, rH, orig):
scores = np.transpose(scores, (0, 3, 1, 2))
geometry = np.transpose(geometry, (0, 3, 1, 2))
(numRows, numCols) = scores.shape[2:4]
rects = []
confidences = []
for y in range(0, numRows):
scoresData = scores[0, 0, y]
xData0 = geometry[0, 0, y]
xData1 = geometry[0, 1, y]
xData2 = geometry[0, 2, y]
xData3 = geometry[0, 3, y]
anglesData = geometry[0, 4, y]
for x in range(0, numCols):
if scoresData[x] < 0.5:
continue
(offsetX, offsetY) = (x * 4.0, y * 4.0)
angle = anglesData[x]
cos = np.cos(angle)
sin = np.sin(angle)
h = xData0[x] + xData2[x]
w = xData1[x] + xData3[x]
endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
startX = int(endX - w)
startY = int(endY - h)
rects.append((startX, startY, endX, endY))
confidences.append(scoresData[x])
boxes = non_max_suppression(np.array(rects), probs=confidences)
crops = []
for startX, startY, endX, endY in boxes:
startX = int(startX * rW)
startY = int(startY * rH)
endX = int(endX * rW)
endY = int(endY * rH)
cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 0, 255), 3)
crops.append([[startX, startY], [endX, endY]])
return orig, crops
def preprocess_ocr(image):
input_data = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
input_data = cv2.resize(input_data, (200, 31))
input_data = input_data[np.newaxis]
input_data = np.expand_dims(input_data, 3)
input_data = input_data.astype("float32") / 255
return input_data
def run_tflite_ocr(input_data):
model_path = join(dirname(__file__), "keras_ocr_float16_ctc.tflite")
interpreter = tf.lite.Interpreter(model_path=model_path)
interpreter.allocate_tensors()
# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
input_shape = input_details[0]["shape"]
interpreter.set_tensor(input_details[0]["index"], input_data)
interpreter.invoke()
output = interpreter.get_tensor(output_details[0]["index"])
return output
alphabets = string.digits + string.ascii_lowercase
blank_index = len(alphabets)
def postprocess_ocr(output, greedy=True):
# Running decoder on TFLite Output
final_output = "".join(
alphabets[index] for index in output[0] if index not in [blank_index, -1]
)
return final_output
def run_ocr(img_bytes: bytes, detector="east", greedy=True):
nd_array = read_image(img_bytes)
start_time = time.time()
input_image, preprocessed_image, rW, rH = preprocess_east(nd_array)
scores, geometry = run_east_tflite(preprocessed_image)
output, crops = postprocess_east(scores, geometry, rW, rH, input_image)
font_scale = 1
thickness = 2
# i=0
(h, w) = input_image.shape[:2]
for box in crops:
# i += 1
yMin = box[0][1]
yMax = box[1][1]
xMin = box[0][0]
xMax = box[1][0]
xMin = max(0, xMin)
yMin = max(0, yMin)
xMax = min(w, xMax)
yMax = min(h, yMax)
cropped_image = input_image[yMin:yMax, xMin:xMax, :]
# Uncomment it if you want to see the croppd images in output folder
# cv2.imwrite(f'output/{i}.jpg', cropped_image)
# print("i: ", i)
# print("Box: ", box)
# plt_imshow("cropped_image", input_image)
processed_image = preprocess_ocr(cropped_image)
ocr_output = run_tflite_ocr(processed_image)
final_output = postprocess_ocr(ocr_output, greedy)
# print("Text output: ", final_output)
# final_output = ''
cv2.putText(
output,
final_output,
(box[0][0], box[0][1] - 10),
cv2.FONT_HERSHEY_SIMPLEX,
font_scale,
(0, 0, 255),
thickness,
)
print(
f"Time taken to run OCR Model with {detector} detector and KERAS OCR is",
time.time() - start_time,
)
return output.tobytes()
def image_to_byte_array(image_path: string) -> bytes:
with open(image_path, "rb") as image:
f = image.read()
return bytes(f)
def read_image(content: bytes) -> np.ndarray:
"""
Image bytes to OpenCV image
:param content: Image bytes
:returns OpenCV image
:raises TypeError: If content is not bytes
:raises ValueError: If content does not represent an image
"""
if not isinstance(content, bytes):
raise TypeError(f"Expected 'content' to be bytes, received: {type(content)}")
image = cv2.imdecode(np.frombuffer(content, dtype=np.uint8), cv2.IMREAD_COLOR)
if image is None:
raise ValueError(f"Expected 'content' to be image bytes")
return image
# image_path = r"/Users/josegeorges/Desktop/puro-labels/train/yes/label_1.jpg"
# img_bytes = image_to_byte_array(image_path)
# final_image = run_ocr(img_bytes, detector="east", greedy=True)
def call_ocr_from_android(img_bytes: bytearray):
return run_ocr(img_bytes=bytes(img_bytes), detector="east", greedy=True)
# dst_folder = "./"
# out_file_name = "out_image.png"
# # Save the image in JPG format
# cv2.imwrite(os.path.join(dst_folder, out_file_name), final_image)
Here are the installed packages through gradle:
install "numpy"
install "opencv-python"
install "imutils"
install "scikit-image"
install "tensorflow"
I'm currently running into the following exception when trying to load the keras_ocr_float16_ctc.tflite
interpreter:
Regular TensorFlow ops are not supported by this interpreter. Make sure you apply/link the Flex delegate before inference.Node number 192 (FlexCTCGreedyDecoder) failed to prepare.
From what I've read in TF docs, I should have the select-ops available since I'm installing the pip Tensorflow package, but that doesn't seem to be the case. I also thought I needed to follow the android instructions to add the org.tensorflow:tensorflow-lite-select-tf-ops:0.0.0-nightly-SNAPSHOT
dependency but that also doesn't seem to work.
What can I do to run this Select Op(s) model on Android using chaquopy?
Unfortunately it says here:
But Chaquopy's TensorFlow build is currently at version 2.1.
As for the build.gradle
dependencies
block, that will only affect the TensorFlow Java API, not the Python API.So the best options I can think of are:
tflite-runtime
pip package instead. Chaquopy currently provides this at version 2.5, so it may have some additional operators.