differential-diffusion

Configuration error

App Files Files Community

differential-diffusion / app.py

cocktailpeanut

update

0354639 almost 2 years ago

raw

history blame contribute delete

6.42 kB

	import gradio as gr
	import torch
	from torchvision import transforms
	from SDXL.diff_pipe import StableDiffusionXLDiffImg2ImgPipeline
	from diffusers import DPMSolverMultistepScheduler

	# DepthAnything
	import cv2
	import numpy as np
	import os
	from PIL import Image
	import torch.nn.functional as F
	from torchvision.transforms import Compose
	import tempfile
	from gradio_imageslider import ImageSlider
	from depth_anything.depth_anything.dpt import DepthAnything
	from depth_anything.depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet

	NUM_INFERENCE_STEPS = 50
	dtype = torch.float16
	if torch.cuda.is_available():
	DEVICE = "cuda"
	elif torch.backends.mps.is_available():
	DEVICE = "mps"
	dtype = torch.float32
	else:
	DEVICE = "cpu"
	#device = "cuda"

	encoder = 'vitl' # can also be 'vitb' or 'vitl'
	model = DepthAnything.from_pretrained(f"LiheYoung/depth_anything_{encoder}14").to(DEVICE).eval()

	base = StableDiffusionXLDiffImg2ImgPipeline.from_pretrained(
	"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=dtype, variant="fp16", use_safetensors=True
	)

	refiner = StableDiffusionXLDiffImg2ImgPipeline.from_pretrained(
	"stabilityai/stable-diffusion-xl-refiner-1.0",
	text_encoder_2=base.text_encoder_2,
	vae=base.vae,
	torch_dtype=dtype,
	use_safetensors=True,
	variant="fp16",
	)

	base.scheduler = DPMSolverMultistepScheduler.from_config(base.scheduler.config)
	refiner.scheduler = DPMSolverMultistepScheduler.from_config(base.scheduler.config)














	# DepthAnything

	transform = Compose([
	Resize(
	width=518,
	height=518,
	resize_target=False,
	keep_aspect_ratio=True,
	ensure_multiple_of=14,
	resize_method='lower_bound',
	image_interpolation_method=cv2.INTER_CUBIC,
	),
	NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
	PrepareForNet(),
	])

	@torch.no_grad()
	def predict_depth(model, image):
	return model(image)

	def depthify(image):
	original_image = image.copy()
	h, w = image.shape[:2]
	image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
	image = transform({'image': image})['image']
	image = torch.from_numpy(image).unsqueeze(0).to(DEVICE)
	depth = predict_depth(model, image)
	depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
	raw_depth = Image.fromarray(depth.cpu().numpy().astype('uint8'))
	tmp = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
	raw_depth.save(tmp.name)
	depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
	depth = depth.cpu().numpy().astype(np.uint8)
	colored_depth = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)[:, :, ::-1]
	return [(original_image, colored_depth), tmp.name, raw_depth]






	# DifferentialDiffusion

	def preprocess_image(image_array):
	image = Image.fromarray(image_array)
	image = image.convert("RGB")
	image = transforms.CenterCrop((image.size[1] // 64 * 64, image.size[0] // 64 * 64))(image)
	image = transforms.ToTensor()(image)
	image = image * 2 - 1
	image = image.unsqueeze(0).to(DEVICE)
	return image


	def preprocess_map(map):
	map = map.convert("L")
	map = transforms.CenterCrop((map.size[1] // 64 * 64, map.size[0] // 64 * 64))(map)
	# convert to tensor
	map = transforms.ToTensor()(map)
	map = map.to(DEVICE)
	return map


	def inference(
	image,
	map,
	guidance_scale,
	prompt,
	negative_prompt,
	steps,
	denoising_start,
	denoising_end
	):
	validate_inputs(image, map)
	image = preprocess_image(image)
	map = preprocess_map(map)
	base_device = base.to(DEVICE)
	edited_images = base_device(
	prompt=prompt,
	original_image=image,
	image=image,
	strength=1,
	guidance_scale=guidance_scale,
	num_images_per_prompt=1,
	negative_prompt=negative_prompt,
	map=map,
	num_inference_steps=steps,
	denoising_end=denoising_end,
	output_type="latent"
	).images
	base_device=None
	refiner_device = refiner.to(DEVICE)
	edited_images = refiner_device(
	prompt=prompt,
	original_image=image,
	image=edited_images,
	strength=1,
	guidance_scale=guidance_scale,
	num_images_per_prompt=1,
	negative_prompt=negative_prompt,
	map=map,
	num_inference_steps=steps,
	denoising_start=denoising_start
	).images[0]
	refiner_device=None
	return edited_images


	def validate_inputs(image, map):
	if image is None:
	raise gr.Error("Missing image")
	if map is None:
	raise gr.Error("Missing map")


	def run(image, gs, prompt, neg_prompt, steps, denoising_start, denoising_end):
	# first run
	[(original_image, colored_depth), name, raw_depth] = depthify(image)
	print(f"original_image={original_image} colored_depth={colored_depth}, name={name}, raw_depth={raw_depth}")
	return raw_depth, inference(original_image, raw_depth, gs, prompt, neg_prompt, steps, denoising_start, denoising_end)

	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column():
	with gr.Row():
	input_image = gr.Image(label="Input Image")
	# change_map = gr.Image(label="Change Map", type="pil")
	gs = gr.Slider(0, 28, value=7.5, label="Guidance Scale")
	steps = gr.Number(value=50, label="Steps")
	denoising_start = gr.Slider(0, 1, value=0.8, label="Denoising Start")
	denoising_end = gr.Slider(0, 1, value=0.8, label="Denoising End")
	prompt = gr.Textbox(label="Prompt")
	neg_prompt = gr.Textbox(label="Negative Prompt")
	with gr.Row():
	# clr_btn=gr.ClearButton(components=[input_image, change_map, gs, prompt, neg_prompt])
	clr_btn=gr.ClearButton(components=[input_image, gs, prompt, neg_prompt, steps, denoising_start, denoising_end])
	run_btn = gr.Button("Run",variant="primary")

	with gr.Column():
	output = gr.Image(label="Output Image")
	change_map = gr.Image(label="Change Map")
	run_btn.click(
	run,
	#inference,
	inputs=[input_image, gs, prompt, neg_prompt, steps, denoising_start, denoising_end],
	outputs=[change_map, output]
	)
	clr_btn.add(output)
	if __name__ == "__main__":
	demo.launch()