Linked Questions

Popular Questions

I've successfully trained my custom dataset with YOLOV8:

from ultralytics import YOLO

# Load a model

model = YOLO('yolov8s.pt')  # load a pretrained model (recommended for training)

# Train the model

model.train(
    name='MyModel',
    data='/root/work/images/data.yaml',
    epochs=100,
    imgsz=640
)

and if I try to find some objects inside an image I get the right results:

python3 yolo_test.py

cat
[ 344  808  572 1208]
[ 458 1008  227  400]
[    0.84057]

This is the content of yolo_test.py:

from ultralytics import YOLO
from PIL import Image
import requests
from io import BytesIO
import cv2
import numpy as np

model = YOLO('/root/work/MyModel/weights/best.pt')

inputs = ['https://example.com/cat.jpg']

results = model(inputs)
for result in results:
    boxes = result.boxes  # Boxes object for bbox outputs
    masks = result.masks  # Masks object for segmentation masks outputs
    probs = result.probs  # Class probabilities for classification outputs
    
    # print(boxes.xyxy)  # box with xyxy format, (N, 4)
    # print(boxes.xywh)  # box with xywh format, (N, 4)
    # print(boxes.xyxyn)  # box with xyxy format but normalized, (N, 4)
    # print(boxes.xywhn)  # box with xywh format but normalized, (N, 4)
    # print(boxes.conf)  # confidence score, (N, 1)
    # print(boxes.cls)  # cls, (N, 1)
    # print(boxes.data)  # raw bboxes tensor, (N, 6) or boxes.boxes
    
    for box in boxes:
        r = box.xyxy[0].numpy().astype(int)
        r_normal = box.xywh[0].numpy().astype(int)
        print(result.names[int(box.cls[0])])
        print(r)
        print(r_normal)
        confidence = box.conf.numpy()
        print(confidence)
        if confidence > .7:
            area_to_crop = (r[0], r[1], r[2], r[3])
            cropped_img  = img.crop(area_to_crop)
            cropped_img.save('/root/work/cropped_img.jpg')

Now I've exported the model on onnx format:

from ultralytics import YOLO
from PIL import Image
import requests
from io import BytesIO
import cv2
import numpy as np

model = YOLO('/root/work/MyModel/weights/best.pt')
# Export the model
model.export(format='onnx', opset=12)

and I've tried to use opencv for the object detection. This is my 'predict_cv2.py':

import cv2
import numpy as np
import requests
from PIL import Image

# Load Model
net = cv2.dnn.readNet('/root/work/MyModel/weights/best.onnx')

# net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
# net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)

INPUT_WIDTH = 640
INPUT_HEIGHT = 640
# SCORE_THRESHOLD = 0.2
# NMS_THRESHOLD = 0.4
# CONFIDENCE_THRESHOLD = 0.4

CLASESS_YOLO = ['dog','cat']


inputs = ['https://example.com/cat.jpg']

response = requests.get(inputs[0])
cv2_img = cv2.imdecode(np.array(bytearray(response.content), dtype=np.uint8), -1)
blob = cv2.dnn.blobFromImage(cv2_img, 1/255.0, (INPUT_WIDTH, INPUT_HEIGHT), swapRB=True, crop=False)
net.setInput(blob)
preds = net.forward()

preds = preds.transpose((0, 2, 1))

# Extract output detection
class_ids, confs, boxes = list(), list(), list()

image_height, image_width, _ = cv2_img.shape
x_factor = image_width / INPUT_WIDTH
y_factor = image_height / INPUT_HEIGHT

min_conf = .3

rows = preds[0].shape[0]

for i in range(rows):
    row = preds[0][i]
    conf = row[4]
    
    classes_score = row[4:]
    _,_,_, max_idx = cv2.minMaxLoc(classes_score)
    class_id = max_idx[1] 

    if (classes_score[class_id] > min_conf):
        confs.append(conf)
        label = CLASESS_YOLO[int(class_id)]
        print(class_id)
        class_ids.append(label)
        #extract boxes
        x, y, w, h = row[0].item(), row[1].item(), row[2].item(), row[3].item() 
        left = int((x - 0.5 * w) * x_factor)
        top = int((y - 0.5 * h) * y_factor)
        width = int(w * x_factor)
        height = int(h * y_factor)
        box = np.array([left, top, width, height])
        boxes.append(box)
        
r_class_ids, r_confs, r_boxes = list(), list(), list()

indexes = cv2.dnn.NMSBoxes(boxes, confs, min_conf, min_conf-0.1) 

for i in indexes:
    r_class_ids.append(class_ids[i])
    r_confs.append(confs[i])
    r_boxes.append(boxes[i])

print(r_class_ids)
print(r_confs)
print(r_boxes)

for i in indexes:
    box = boxes[i]
    left = box[0]
    top = box[1]
    width = box[2]
    height = box[3]
    
    cv2.rectangle(cv2_img, (left, top), (left + width, top + height), (0,255,0), 3)
result_image = Image.fromarray(cv2.cvtColor(cv2_img, cv2.COLOR_BGR2RGB))
result_image.save('/root/work/image_cv2.jpg')

All r_boxes, r_confs and even the saved image '/root/work/image_cv2.jpg' are right. The object is detected correctly. But the detected 'class_id' -> 'class_id = max_idx[1]' is always the same '0'.

This is the result of 'predict_cv2.py':

['dog']
[0.8799546]
[array([364, 802, 206, 416])]

Can someone help me? What am I doing wrong?

Thank you!

Related Questions