I've been trying to give my model image and bounding box from the dataset, but it keeps make an error. Is it because of the dict? I'm new to deep learning please help. Here's my code:
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import v2
import torch
import os
import matplotlib.pyplot as plt
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from PIL import Image
from matplotlib.patches import Rectangle
from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
# Define the path to your image directory
PATH = "/content/drive/MyDrive/data/training_images"
device = "cuda" if torch.cuda.is_available() else "cpu"
label_df = pd.read_csv("/content/drive/MyDrive/data/train_solution_bounding_boxes.csv")
Images = []
Image_path = []
xmax = []
xmin = []
ymax = []
ymin = []
column = label_df.columns
for idx, row in label_df.iterrows():
Images.append(row['image'])
xmin.append(row['xmin']) # Extract xmin
ymin.append(row['ymin']) # Extract ymin
xmax.append(row['xmax']) # Extract xmax
ymax.append(row['ymax']) # Extract ymax
for img in Images:
Image_path.append(os.path.join(PATH,img))
# Define transformations for image preprocessing
Transform = v2.Compose([
v2.ToImage(),
v2.ToDtype(dtype=torch.float32, scale=True)
])
# Define a custom dataset for car detection
class CarDetectionDataset(Dataset):
def __init__(self, csv_file, Transformer):
self.csv = csv_file
self.Transformer = Transformer
def __len__(self):
return len(self.csv['image'])
def __getitem__(self, idx):
# Load an image from the dataset
Img = Image.open(Image_path[idx])
Img = self.Transformer(Img).to(device)
# Extract bounding boxes for the current image
target = {
"labels" :[],
"boxes":[]
}
bboxes = self.csv[self.csv["image"]==Images[idx]]
image, xmax, xmin, ymax, ymin = bboxes['image'], bboxes["xmax"], bboxes["xmin"], bboxes["ymax"], bboxes["ymin"]
for i in range(len(xmax)):
target["boxes"].append([xmin.iloc[i],ymin.iloc[i],xmax.iloc[i],ymax.iloc[i]])
target["labels"].append(1)
return Img, target
# Load your dataset
Dataset = CarDetectionDataset(label_df, Transform)
# Create a data loader for the dataset
Dataloader = DataLoader(dataset=Dataset, shuffle=True, batch_size=1)
# Load the Faster R-CNN model
model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
# Configure the model's box predictor for car detection
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 1) # 1 class for "Car"
for Image , bbox in Dataloader:
plt.imshow(Image.squeeze().permute(1,2,0))
print(bbox)
preds = model(Image,bbox)
break
Here's the output when I print bbox:
{
'labels': [tensor([1])],
'boxes': [
[
tensor([52.8278], dtype=torch.float64),
tensor([195.3491], dtype=torch.float64),
tensor([122.7757], dtype=torch.float64),
tensor([226.6490], dtype=torch.float64)
]
]
}
Here's the error :
TypeError: string indices must be integers
I tried searching from google and ask ChatGPT and change the bbox like this 'preds = model(Image,[bbox])`