RuntimeError: Given groups=1, weight of size [64, 1, 3, 3], expected input[1, 3, 416, 416] to have 1 channels, but got 3 channels instead

22 views Asked by At

I have folder called preprocessed_data_png where the npy file of the images and annotations are. When I try to train the model I get the below error.

RuntimeError: Given groups=1, weight of size [32, 3, 3, 3], expected input[32, 416, 416, 3] to have 3 channels, but got 416 channels instead

Convert DICOM to PNG


def preprocess_data(scan_dir, annotation_dir, output_dir):
    dict = getUID_path(scan_dir)
    annotation_paths = [os.path.join(annotation_dir, f) for f in os.listdir(annotation_dir) if f.endswith('.xml')]
    dicom_paths = []
    dicom_names = []
    for full_path in annotation_paths:
      base_path = os.path.basename(full_path)
      dcm_path, dcm_name = dict[base_path[:-4]]
      _, ext = os.path.splitext(dcm_name)
      if ext in ['.dcm']:
        dicom_names.append(dcm_name)
      dicom_paths.append(dcm_path)
      


    for dcm_path, dcm_name, annotation_path in zip(dicom_paths,dicom_names,annotation_paths):
      im = pydicom.dcmread(dcm_path)
      im = im.pixel_array.astype(float)
      rescaled_image = (np.maximum(im,0)/im.max())*255
      final_image = np.uint8(rescaled_image)
      final_image = Image.fromarray(final_image)
      name_without_extension, ext = os.path.splitext(dcm_name)
      if ext.lower() == '.dcm':
          dcm_name = name_without_extension
      # final_image.save(output_dir,dcm_name+'.png')
      # Save the image in the specified folder location with the correct format
      final_image.save(os.path.join(output_dir, dcm_name+'.png'), format='PNG')

      # Copy annotation file to output directory with the same name as DICOM image
      copyfile(annotation_path, os.path.join(output_dir, dcm_name+'.xml'))


# Preprocess data
scan_dir = "/Dataset/Scans/Lung_Dx-B0001"
annotation_dir = "/Dataset/Annotations/B0001"
# Ensure the output directory exists, create it if not
output_dir = "/Dataset/preprocessed_png/"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
preprocess_data(scan_dir, annotation_dir, output_dir)

Preprocess Code

import os
import cv2
import numpy as np
from xml.etree import ElementTree as ET

def read_png_file(png_path):
    # Read PNG image using OpenCV
    image = cv2.imread(png_path, cv2.IMREAD_GRAYSCALE)
    return image

def parse_annotation(annotation_file):
    tree = ET.parse(annotation_file)
    root = tree.getroot()

    bounding_boxes = []

    for obj in root.findall('object'):
        xmin = int(obj.find('bndbox').find('xmin').text)
        ymin = int(obj.find('bndbox').find('ymin').text)
        xmax = int(obj.find('bndbox').find('xmax').text)
        ymax = int(obj.find('bndbox').find('ymax').text)

        bounding_boxes.append([xmin, ymin, xmax, ymax])

    return np.array(bounding_boxes)

def resize_and_normalize(scan, annotation):
    # Resize scan to 416x416 and normalize pixel values to [0, 1]
    resized_scan = cv2.resize(scan, (416, 416))
    normalized_scan = resized_scan / 255.0

    # Normalize bounding box coordinates
    normalized_annotation = annotation / np.array([scan.shape[1], scan.shape[0], scan.shape[1], scan.shape[0]])
    
    return normalized_scan, normalized_annotation

def convert_to_yolo_labels(annotations):
    yolo_labels = []
    for annotation in annotations:
        x_center = (annotation[0] + annotation[2]) / 2.0
        y_center = (annotation[1] + annotation[3]) / 2.0
        width = annotation[2] - annotation[0]
        height = annotation[3] - annotation[1]
        yolo_label = [0, x_center, y_center, width, height]
        yolo_labels.append(yolo_label)
    return np.array(yolo_labels)

def preprocess_data(scan_dir, output_dir):
    for filename in os.listdir(scan_dir):
        if filename.endswith('.png'):
            base_name = os.path.splitext(filename)[0]
            png_path = os.path.join(scan_dir, filename)
            annotation_path = os.path.join(scan_dir, base_name + '.xml')

            # Read PNG scan
            scan = read_png_file(png_path)

            # Parse annotation XML file
            annotations = parse_annotation(annotation_path)

            # Resize and normalize scan and bounding boxes
            resized_scan, resized_annotations = resize_and_normalize(scan, annotations)

            # Convert to YOLO-style labels
            yolo_labels = convert_to_yolo_labels(resized_annotations)

            # Save preprocessed data
            np.save(os.path.join(output_dir, f"{base_name}_scan.npy"), resized_scan)
            np.save(os.path.join(output_dir, f"{base_name}_labels.npy"), yolo_labels)

# Set directories
scan_dir = "/Dataset/preprocessed_png" // folder where png and xml files are
output_dir = "/Dataset/preprocessed_data_png"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Preprocess data
preprocess_data(scan_dir, output_dir)

Model Code

class YOLOv7(nn.Module):
    def __init__(self, num_classes):
        super(YOLOv7, self).__init__()
        self.num_classes = num_classes
        
        # Define convolutional layers for feature extraction
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        
        # Define fully connected layers for classification and bounding box regression
        self.fc1 = nn.Linear(128 * 64 * 64, 1024)
        self.fc2 = nn.Linear(1024, 256)
        self.fc3 = nn.Linear(256, num_classes + 5)  # 5 for bounding box coordinates
        
    def forward(self, x):
        # Feature extraction
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        
        # Flatten the feature map
        x = x.view(-1, 128 * 64 * 64)
        
        # Fully connected layers for classification and bounding box regression
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x

main.py

# Define the dataset class to load preprocessed data
class CustomDataset(Dataset):
    def __init__(self, data_dir):
        self.data_dir = data_dir
        self.image_files = [f for f in os.listdir(data_dir) if f.endswith('_scan.npy')]
        self.annotation_files = [f for f in os.listdir(data_dir) if f.endswith('_labels.npy')]
        
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        image_file = os.path.join(self.data_dir, self.image_files[idx])
        annotation_file = os.path.join(self.data_dir, self.annotation_files[idx])
        
        image = np.load(image_file)
        annotation = np.load(annotation_file)

         # Convert grayscale image to 3 channels (if needed)
       
        if len(image.shape) == 2:
            image = np.stack((image,) * 3, axis=-1)
        print(image.shape)
        # Convert to tensor
        image = torch.from_numpy(image).float()
        annotation = torch.from_numpy(annotation).float()
        
        return image, annotation

# Define training parameters
batch_size = 32
num_classes = 1
lr = 0.001
num_epochs = 10

# Create dataset and dataloader
train_dataset = CustomDataset("/content/drive/MyDrive/Dataset/preprocessed_data_png/")
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Initialize model
model = YOLOv7(num_classes)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

# Training loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for images, targets in train_loader:

        # Forward pass
        outputs = model(images)
        
        # Compute loss
        loss = criterion(outputs, targets)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}")

    # Evaluation
    model.eval()
    eval_loss = 0.0
    eval_accuracy = 0.0
    y_true = []
    y_pred = []
    with torch.no_grad():
        for images, targets in train_loader:
            outputs = model(images)
            eval_loss += criterion(outputs, targets).item()
            
            # Convert outputs and targets to numpy arrays
            outputs_np = outputs.detach().cpu().numpy().round()
            targets_np = targets.detach().cpu().numpy().round()
            
            # Flatten arrays
            outputs_flat = outputs_np.flatten()
            targets_flat = targets_np.flatten()
            
            # Calculate accuracy
            eval_accuracy += calculate_accuracy(outputs_flat, targets_flat)
            
            # Collect true and predicted labels for F1 score calculation
            y_true.extend(targets_flat)
            y_pred.extend(outputs_flat)
    
    eval_loss /= len(train_loader)
    eval_accuracy /= len(train_loader)
    f1 = f1_score(y_true, y_pred)
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Evaluation Loss: {eval_loss:.4f}, Accuracy: {eval_accuracy:.4f}, F1 Score: {f1:.4f}")

I basically have DICOM files and xml annotation. When I tried with the dicom file got the same error. Then converted the files into png and made the .npy in the main dataset folder but still getting the same error. Trying to run on YOLOv7

0

There are 0 answers