Spaces:

QingyanBai
/

Ditto

Running on Zero

App Files Files Community

Ditto / custom_nodes /comfyui-tensorops /nodes /foreground_mask.py

QingyanBai

Upload 750 files

a42ebba verified about 2 months ago

raw

history blame contribute delete

5.94 kB

	from typing import List
	from PIL import Image, ImageDraw
	import numpy as np
	from collections import Counter
	import torchvision.transforms.functional as F
	import json

	def calculate_bounding_box(points) -> List[float]:
	"""
	Calculate the bounding box for a polygon.

	Args:
	flat_points (list of int): Flat list of x, y coordinates defining the polygon points.

	Returns:
	tuple: (min_x, min_y, max_x, max_y) defining the bounding box.
	"""
	if not points or len(points) % 2 != 0:
	raise ValueError("The list of points must be non-empty and have an even number of elements")

	x_coords = points[0::2]
	y_coords = points[1::2]

	min_x = min(x_coords)
	max_x = max(x_coords)
	min_y = min(y_coords)
	max_y = max(y_coords)

	return [min_x, min_y, max_x, max_y]


	def find_mode_color(image: Image.Image):
	"""
	Identify the most frequent (mode) color in a PIL image.

	Parameters:
	image_path (str): The path to the input image.

	Returns:
	tuple: The mode color in the image as an (R, G, B) tuple.
	"""
	# Convert image to RGB mode if it's not already
	image = image.convert('RGB')

	# Get the list of pixels
	pixels = list(image.getdata())

	# Use Counter to count the frequency of each color
	counter = Counter(pixels)

	# Find the most common color
	mode_color = counter.most_common(1)[0][0]

	return mode_color

	def separate_foreground_background(image):
	"""
	Separate the Pillow image into foreground and background using the mode color and distance clustering.

	Parameters:
	image_path (str): The path to the input image.
	output_foreground (str): The path to save the foreground image.
	output_background (str): The path to save the background image.

	Returns:
	None
	"""
	# Convert image to RGBA mode to handle transparency
	image = image.convert('RGBA')
	pixels = np.array(image)

	# Calculate the Euclidean distance of each pixel to the mode color
	background_color = find_mode_color(image)
	print("Background color:", background_color)
	mode_color_array = np.array(background_color)
	distances = np.linalg.norm(pixels[:, :, :3] - mode_color_array, axis=2)

	# Determine the threshold distance for clustering
	threshold_distance = np.mean(distances)

	print("Threshold distance:", threshold_distance)
	# Create masks for foreground and background
	foreground_mask = distances > threshold_distance
	background_mask = distances <= threshold_distance

	# Create empty arrays for the new images
	foreground_image = np.zeros_like(pixels)
	background_image = np.zeros_like(pixels)

	# Copy the pixels to the new images based on the masks
	foreground_image[foreground_mask] = pixels[foreground_mask]
	background_image[background_mask] = pixels[background_mask]

	# Find the fg color
	fg_color = find_mode_color(Image.fromarray(foreground_image, 'RGBA'))

	# Set foreground pixels with alpha == 255 to black
	alpha_channel = foreground_image[:, :, 3] == 255
	foreground_image[alpha_channel, :3] = [255, 255, 255]
	foreground_image[:, :, 3] = 255

	# Convert back to PIL images
	foreground_image = Image.fromarray(foreground_image, 'RGBA')
	background_image = Image.fromarray(background_image, 'RGBA')

	# Invert Foreground As White
	# foreground_image = ImageOps.invert(foreground_image.convert("RGB"))

	return foreground_image, fg_color

	def crop_polygon(image, points):
	"""
	Create a white mask on a black image of size width x height using a list of polygon points.

	Args:
	points (list of tuples): List of (x, y) tuples defining the polygon points.
	width (int): Width of the image.
	height (int): Height of the image.

	Returns:
	Image: Pillow Image object with the polygon mask.
	"""
	x_min, y_min, x_max, y_max = calculate_bounding_box(points)
	image_crop = image.crop((x_min, y_min, x_max, y_max))
	return image_crop

	def mask_polygon(image, points):
	"""
	Crop a polygon from a Pillow image.

	Args:
	image (PIL.Image): The input image.
	flat_points (list of int): Flat list of x, y coordinates defining the polygon points.

	Returns:
	PIL.Image: Cropped image of the polygon.
	"""
	if not points or len(points) % 2 != 0:
	raise ValueError("The list of points must be non-empty and have an even number of elements")

	# Create a mask
	mask = Image.new('L', image.size, 0)
	draw = ImageDraw.Draw(mask)
	new_box = (np.array(points) * 1.0).tolist()
	draw.polygon(new_box, fill="white")

	# Apply the mask to the image
	masked_image = Image.composite(image.convert("RGBA"), mask.convert("RGBA"), mask)
	return masked_image


	import torch

	class ForegroundMask:

	@classmethod
	def INPUT_TYPES(s):
	return {
	"required": {
	"image": ("IMAGE",),
	"json_data": ("JSON",),
	},
	}

	RETURN_TYPES = ("IMAGE",)

	FUNCTION = "main"

	CATEGORY = "tensorops"

	def main(self, image: torch.Tensor, json_data: str):
	print("items", json_data)
	items = [item for item in json_data]
	image = image.permute(0, 3, 1, 2)
	image_pil = F.to_pil_image(image[0])
	full_image = Image.new("RGBA", image_pil.size, (0, 0, 0, 255))
	for item in items:
	points = item["polygon"]
	print("polygon", points)
	masked_image = mask_polygon(image_pil, points)
	masked_image_crop = crop_polygon(image_pil, points)
	fg_image, fg_color = separate_foreground_background(masked_image_crop)
	x_min, y_min, x_max, y_max = calculate_bounding_box(points)
	full_image.paste(fg_image, (int(x_min), int(y_min)))
	out_image = F.to_tensor(full_image)
	return (out_image,)