I've successfully trained my custom dataset with YOLOV8:
from ultralytics import YOLO
# Load a model
model = YOLO('yolov8s.pt') # load a pretrained model (recommended for training)
# Train the model
model.train(
name='MyModel',
data='/root/work/images/data.yaml',
epochs=100,
imgsz=640
)
and if I try to find some objects inside an image I get the right results:
python3 yolo_test.py
cat
[ 344 808 572 1208]
[ 458 1008 227 400]
[ 0.84057]
This is the content of yolo_test.py:
from ultralytics import YOLO
from PIL import Image
import requests
from io import BytesIO
import cv2
import numpy as np
model = YOLO('/root/work/MyModel/weights/best.pt')
inputs = ['https://example.com/cat.jpg']
results = model(inputs)
for result in results:
boxes = result.boxes # Boxes object for bbox outputs
masks = result.masks # Masks object for segmentation masks outputs
probs = result.probs # Class probabilities for classification outputs
# print(boxes.xyxy) # box with xyxy format, (N, 4)
# print(boxes.xywh) # box with xywh format, (N, 4)
# print(boxes.xyxyn) # box with xyxy format but normalized, (N, 4)
# print(boxes.xywhn) # box with xywh format but normalized, (N, 4)
# print(boxes.conf) # confidence score, (N, 1)
# print(boxes.cls) # cls, (N, 1)
# print(boxes.data) # raw bboxes tensor, (N, 6) or boxes.boxes
for box in boxes:
r = box.xyxy[0].numpy().astype(int)
r_normal = box.xywh[0].numpy().astype(int)
print(result.names[int(box.cls[0])])
print(r)
print(r_normal)
confidence = box.conf.numpy()
print(confidence)
if confidence > .7:
area_to_crop = (r[0], r[1], r[2], r[3])
cropped_img = img.crop(area_to_crop)
cropped_img.save('/root/work/cropped_img.jpg')
Now I've exported the model on onnx format:
from ultralytics import YOLO
from PIL import Image
import requests
from io import BytesIO
import cv2
import numpy as np
model = YOLO('/root/work/MyModel/weights/best.pt')
# Export the model
model.export(format='onnx', opset=12)
and I've tried to use opencv for the object detection. This is my 'predict_cv2.py':
import cv2
import numpy as np
import requests
from PIL import Image
# Load Model
net = cv2.dnn.readNet('/root/work/MyModel/weights/best.onnx')
# net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
# net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)
INPUT_WIDTH = 640
INPUT_HEIGHT = 640
# SCORE_THRESHOLD = 0.2
# NMS_THRESHOLD = 0.4
# CONFIDENCE_THRESHOLD = 0.4
CLASESS_YOLO = ['dog','cat']
inputs = ['https://example.com/cat.jpg']
response = requests.get(inputs[0])
cv2_img = cv2.imdecode(np.array(bytearray(response.content), dtype=np.uint8), -1)
blob = cv2.dnn.blobFromImage(cv2_img, 1/255.0, (INPUT_WIDTH, INPUT_HEIGHT), swapRB=True, crop=False)
net.setInput(blob)
preds = net.forward()
preds = preds.transpose((0, 2, 1))
# Extract output detection
class_ids, confs, boxes = list(), list(), list()
image_height, image_width, _ = cv2_img.shape
x_factor = image_width / INPUT_WIDTH
y_factor = image_height / INPUT_HEIGHT
min_conf = .3
rows = preds[0].shape[0]
for i in range(rows):
row = preds[0][i]
conf = row[4]
classes_score = row[4:]
_,_,_, max_idx = cv2.minMaxLoc(classes_score)
class_id = max_idx[1]
if (classes_score[class_id] > min_conf):
confs.append(conf)
label = CLASESS_YOLO[int(class_id)]
print(class_id)
class_ids.append(label)
#extract boxes
x, y, w, h = row[0].item(), row[1].item(), row[2].item(), row[3].item()
left = int((x - 0.5 * w) * x_factor)
top = int((y - 0.5 * h) * y_factor)
width = int(w * x_factor)
height = int(h * y_factor)
box = np.array([left, top, width, height])
boxes.append(box)
r_class_ids, r_confs, r_boxes = list(), list(), list()
indexes = cv2.dnn.NMSBoxes(boxes, confs, min_conf, min_conf-0.1)
for i in indexes:
r_class_ids.append(class_ids[i])
r_confs.append(confs[i])
r_boxes.append(boxes[i])
print(r_class_ids)
print(r_confs)
print(r_boxes)
for i in indexes:
box = boxes[i]
left = box[0]
top = box[1]
width = box[2]
height = box[3]
cv2.rectangle(cv2_img, (left, top), (left + width, top + height), (0,255,0), 3)
result_image = Image.fromarray(cv2.cvtColor(cv2_img, cv2.COLOR_BGR2RGB))
result_image.save('/root/work/image_cv2.jpg')
All r_boxes, r_confs and even the saved image '/root/work/image_cv2.jpg' are right. The object is detected correctly. But the detected 'class_id' -> 'class_id = max_idx[1]' is always the same '0'.
This is the result of 'predict_cv2.py':
['dog']
[0.8799546]
[array([364, 802, 206, 416])]
Can someone help me? What am I doing wrong?
Thank you!