tiles Perspective in openCV python

37 views Asked by At
import requests
import io
import base64
from PIL import Image, ImageFilter
import cv2
import numpy as np

API_URL = "https://api-inference.huggingface.co/models/facebook/maskformer-swin-large-ade"
headers = {"Authorization": "Bearer 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"}

def query(filename):
    with open(filename, "rb") as f:
        data = f.read()
    response = requests.post(API_URL, headers=headers, data=data)
    return response.json()

images = "img3.jpg"

output = query(images)
# Assuming 'output' is the response from the API
api_response = output  # Replace 'output' with your actual API response variable

# Load your original image
original_image = Image.open(images).convert("RGBA")

# Initialize an empty mask for the floor
floor_mask = Image.new("L", original_image.size, 0)

# Process each item in the response
for item in api_response:
    label = item['label']
    if label == 'floor':
        # Decode the base64 mask
        mask_data = base64.b64decode(item['mask'])
        mask_image = Image.open(io.BytesIO(mask_data)).resize(original_image.size, Image.BILINEAR).convert("L")
        
        # Combine this mask with our floor mask
        floor_mask = Image.composite(mask_image, floor_mask, mask_image)

# Use the floor mask to extract the floor area from the original image
original_image_np = np.array(original_image.convert("RGB"))
floor_mask_np = np.array(floor_mask)

# Ensure the mask is binary (255 where the mask is, 0 elsewhere)
_, floor_mask_binary = cv2.threshold(floor_mask_np, 127, 255, cv2.THRESH_BINARY)

# Extract the floor area
floor_area = cv2.bitwise_and(original_image_np, original_image_np, mask=floor_mask_binary)

# Convert back to PIL for display or further processing
floor_area_image = Image.fromarray(floor_area)

# Convert floor_area to grayscale
floor_area_gray = cv2.cvtColor(floor_area, cv2.COLOR_RGB2GRAY)


# Find contours
contours, _ = cv2.findContours(floor_area_gray, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

# Initialize a list to store extreme points
extreme_points = []

# Iterate through each contour
for contour in contours:
    # Find the extreme points
    leftmost = tuple(contour[contour[:, :, 0].argmin()][0])
    rightmost = tuple(contour[contour[:, :, 0].argmax()][0])
    topmost = tuple(contour[contour[:, :, 1].argmin()][0])
    bottommost = tuple(contour[contour[:, :, 1].argmax()][0])
    
    
    # Add the extreme points to the list
    extreme_points.append((leftmost, rightmost, topmost, bottommost))

# Draw contours and extreme points on the floor_area image (optional)
contour_image = floor_area.copy()
for contour in contours:
    cv2.drawContours(contour_image, [contour], -1, (0, 255, 0), 2)

for points in extreme_points:
    for point in points:
        cv2.circle(contour_image, point, 5, (0, 0, 255), -1)

# Convert back to PIL for display or further processing
contour_image_pil = Image.fromarray(contour_image)

# Display or save the result
contour_image_pil.show()  # Or use .save() to save the resulting image

# Extreme points are stored in the 'extreme_points' list
print("Extreme Points:", extreme_points)


def create_tiled_texture(tile_img, target_size):
    """Create a large tiled image that covers the target size."""
    small_tile = cv2.resize(tile_img, (100, 200), interpolation=cv2.INTER_AREA)
    texture = np.tile(small_tile, (target_size[1] // small_tile.shape[0] + 1, target_size[0] // small_tile.shape[1] + 1, 1))
    return texture[:target_size[1], :target_size[0]]

# Load the floor and tile images
original_image = cv2.imread('img3.jpg')  # Update with actual path
tile_image = cv2.imread('tile16', cv2.IMREAD_UNCHANGED)  # Update with actual path
# Assuming 'output' is the response from the API
api_response = output
for item in api_response:
    label = item['label']
    if label == 'floor':
        mask_data = base64.b64decode(item['mask'])
        mask_image = Image.open(io.BytesIO(mask_data))
        mask_image = mask_image.resize(original_image.shape[1::-1], Image.BILINEAR).convert('L')
        mask = np.array(mask_image)
        contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        floor_contour = max(contours, key=cv2.contourArea)
        tiled_texture = create_tiled_texture(tile_image, original_image.shape[:2])
        pts1 = np.float32([[0, 0], [tiled_texture.shape[1], 0], [tiled_texture.shape[1], tiled_texture.shape[0]], [0, tiled_texture.shape[0]]])
        pts2 = np.float32(cv2.boxPoints(cv2.minAreaRect(floor_contour)))
        matrix = cv2.getPerspectiveTransform(pts1, pts2)
        tiled_perspective = cv2.warpPerspective(tiled_texture, matrix, (original_image.shape[1], original_image.shape[0]))
        # Apply the tile texture to the masked floor area directly
        masked_floor = np.where(mask[:, :, None] == 255, tiled_perspective, original_image)
# Correctly convert back to RGB and display the result
final_image_rgb = cv2.cvtColor(masked_floor, cv2.COLOR_BGR2RGB)
final_image_pil = Image.fromarray(final_image_rgb)
# Display the image using PIL
final_image_pil

  1. Detecting the Floor Area:
  • The code sends the input image to the Hugging Face API, which employs a model called "maskformer-swin-large-ade" for semantic segmentation. This model segments the image and identifies different objects, including the floor.
  • After receiving the response from the API, the code extracts the mask corresponding to the floor area.
  1. Finding Extreme Points of the Floor Contour:
  • Once the floor mask is obtained, the code converts it to a binary format (where 255 represents the floor and 0 represents non-floor areas).
  • It then uses contour detection algorithms from OpenCV to find the contours of the floor area.
  • Contours are essentially the boundaries of objects detected in the image.
  • For each contour, the code identifies the extreme points (leftmost, rightmost, topmost, bottommost) to define the bounding box of the floor area.
  1. Applying Perspective Transformation:
  • With the extreme points of the floor contour determined, the code creates a perspective transformation matrix.
  • This transformation maps a quadrilateral region (defined by the extreme points) in the tile texture space to the corresponding region on the floor area.
  • The perspective transformation ensures that the tile texture adapts to the perspective of the floor, making it appear as if the tiles are part of the original image.
  1. Applying the Tile Texture:
  • Using the perspective transformation matrix, the code warps the tile texture image to fit the floor area.
  • This warped tile texture is then applied to the masked floor area, replacing the original pixels within the floor boundaries with the tile texture pixels.

This is my output image result:

enter image description here

but I want output result like this:

enter image description here

0

There are 0 answers