Finding Horizonal images with size 400 x 400 using Python

43 views Asked by At

I have a folder that contains a lot of images, a mixture of horizontal and vertical images. Im trying to filter out the images where it shows me images that look are horizontal and that have a lot of white space on the top and bottom of the images here are examples

enter image description here

while i have other images that look like this

enter image description here

all the images are same size 400x400 im trying to filter out the image based on the amount of white space on the top and bottom of the image.

here is what i have tried

from PIL import Image
import os
import shutil

def has_white_borders(image_path, threshold_top_bottom=0.2):
    # Open the image
    img = Image.open(image_path)

    # Get image size
    width, height = img.size

    # Define the number of rows to consider for top and bottom
    rows_to_check = int(height * 0.05)  # Consider top and bottom 5% of the image

    # Get the top and bottom rows
    top_rows = img.crop((0, 0, width, rows_to_check))
    bottom_rows = img.crop((0, height - rows_to_check, width, height))

    # Calculate the ratio of white pixels in the top and bottom rows
    top_white_pixels = sum(top_rows.convert("L").point(lambda p: p > 200 and 1 or 0).getdata())
    bottom_white_pixels = sum(bottom_rows.convert("L").point(lambda p: p > 200 and 1 or 0).getdata())

    top_ratio = top_white_pixels / (width * rows_to_check)
    bottom_ratio = bottom_white_pixels / (width * rows_to_check)

    # Check if the ratio of white pixels on top and bottom exceeds the threshold
    return top_ratio > threshold_top_bottom and bottom_ratio > threshold_top_bottom

def find_images_with_white_borders(input_folder, output_folder):
    # Get a list of all files in the input folder
    files = os.listdir(input_folder)

    # Filter out non-image files
    image_files = [file for file in files if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff'))]

    # Check each image for white borders
    images_with_white_borders = [img for img in image_files if has_white_borders(os.path.join(input_folder, img))]

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Copy images with white borders to the output folder
    for img_name in images_with_white_borders:
        source_path = os.path.join(input_folder, img_name)
        destination_path = os.path.join(output_folder, img_name)
        shutil.copy2(source_path, destination_path)

if __name__ == "__main__":
    # Specify the input folder containing your images
    input_image_folder = r'folder/path'

    # Specify the output folder for filtered images
    output_image_folder = r'folder/path'

    # Find images with more white borders on top and bottom and copy them to the output folder
    find_images_with_white_borders(input_image_folder, output_image_folder)

    print(f"Images with more white borders on top and bottom copied to: {output_image_folder}")

when i run the code it still shows me images with very little to no white boarder on top and bottom or it shows me images with white boarder on the sides.. i want to find the images that are horizontal like the first image.

enter image description here

1

There are 1 answers

0
Michael Hodel On

You could check whether the first row and last row are both all white:

from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

def is_hori(img):
    return img[0].min() == 255 and img[-1].min() == 255


fns = ['img1.jpeg', 'img2.jpeg', 'img3.png']
imgs = [np.asarray(Image.open(f'../{fn}')) for fn in fns]
plt.imshow(np.hstack(imgs))
plt.title((' ' * 32).join([str(is_hori(img)) for img in imgs]))
plt.axis('off')
plt.tight_layout()

output