How to achieve updated values on bounding box coordinates after Image Augmentation using albumentations?

162 views Asked by At

I'm trying to expand the volume of my dataset using an image augmentation package called albumentations. I have 1145 images and their corresponding annotations labelled in Yolo format. I'm having a problem with updating annotation files after augmentation techniques are used. Even though Resizing and Rescaling are performed on an original image, the updated annotations file contains the same annotations as the original one. Can anyone help me get through this? Or if I am doing anything wrong? I've provided the code snippet below:

import os
import cv2
import albumentations as A

# Define the input and output directories
input_image_dir = r'C:\Users\LEVEL51PC\Desktop\image_dataset\images'
input_label_dir = r'C:\Users\LEVEL51PC\Desktop\image_dataset\labels'
output_image_dir = r'C:\Users\LEVEL51PC\Desktop\augmented_dataset\images'
output_label_dir = r'C:\Users\LEVEL51PC\Desktop\augmented_dataset\labels'

# Create output directories if they don't exist
os.makedirs(output_image_dir, exist_ok=True)
os.makedirs(output_label_dir, exist_ok=True)

# Define augmentation transformations and bbox_params
bbox_params = A.BboxParams(
    format='yolo',
    label_fields=['labels']
)

# Define a list of augmentation techniques
augmentation_techniques = [
    A.Resize(width=512, height=512),  # Resize images
    A.RandomScale(scale_limit=(0.5, 2.0), p=0.5),  # Scaling
    A.RandomRain(p=0.5),  # Random rain
    A.ImageCompression(quality_lower=50, quality_upper=100, p=0.5),
]

# List all image files in the input directory
image_files = os.listdir(input_image_dir)

for image_file in image_files:
    # Load the image
    image_path = os.path.join(input_image_dir, image_file)
    image = cv2.imread(image_path)

    # Load the corresponding YOLO annotation file
    annotation_file = os.path.splitext(image_file)[0] + '.txt'
    annotation_path = os.path.join(input_label_dir, annotation_file)

    # Read the YOLO annotation
    with open(annotation_path, 'r') as f:
        lines = f.readlines()

    # Extract YOLO bounding box information
    boxes = []
    for line in lines:
        class_id, x_center, y_center, width, height = map(float, line.strip().split())
        boxes.append([class_id, x_center, y_center, width, height])

    # Apply each augmentation technique to the image separately
    for i, technique in enumerate(augmentation_techniques):
        augmented = technique(image=image, bboxes=boxes, labels=[0] * len(boxes))

        # Get the augmented image and updated bounding boxes
        augmented_image = augmented['image']
        augmented_boxes = augmented['bboxes']

        # Create a unique name for each augmented file based on the image filename
        augmented_filename = f"{os.path.splitext(image_file)[0]}_{i}_aug"

        # Save the augmented image
        output_image_path = os.path.join(output_image_dir, f"{augmented_filename}.jpg")
        cv2.imwrite(output_image_path, augmented_image)

        # Create a unique name for each augmented annotation file
        augmented_annotation_path = os.path.join(output_label_dir, f"{augmented_filename}.txt")
        with open(augmented_annotation_path, 'w') as f:
            for augmented_box in augmented_boxes:
                class_id, x_center, y_center, width, height = augmented_box
                f.write(f"{int(class_id)} {x_center} {y_center} {width} {height}\n")

print("Augmentation Complete")

1

There are 1 answers

0
Jagadish Subramani On

First of all, 'bbox_params' is defined but it is not passed to the augmentation pipeline.

According to Albumentations documentation, we need to pass an instance of A.BboxParams to Compose pipeline.

Since you are applying each transformation as a separate augmentation. Your augmentation technique list should be something like this

# Define a list of augmentation techniques
augmentation_techniques = [
    A.Compose([A.Resize(width=512, height=512)], bbox_params=bbox_params),  # Resize images
    A.Compose([A.RandomScale(scale_limit=(0.5, 2.0), p=0.5)], bbox_params=bbox_params),  # Scaling
    A.Compose([A.RandomRain(p=0.5)], bbox_params=bbox_params),  # Random rain
    A.Compose([A.ImageCompression(quality_lower=50, quality_upper=100, p=0.5)], bbox_params=bbox_params)
]