File size: 3,335 Bytes
ee650ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
---
license: apache-2.0
base_model: runwayml/stable-diffusion-v1-5
tags:
- text-to-image
- diffusion-models
- stable-diffusion
- diffusers
- image-generation
- fast-sampling
library_name: diffusers
pipeline_tag: text-to-image
---
# Image Diffusion Preview with Consistency Solver (Google DeepMind)
[paper](https://arxiv.org/abs/2512.13592) [code](https://github.com/G-U-N/consolver) [huggingface](https://huggingface.co/papers/2512.13592) [model](https://huggingface.co/wangfuyun/consolver)
# Quick Start
```python
Pythonimport torch
from diffusers import StableDiffusionPipeline, DDIMScheduler
from scheduler_ppo import PPOScheduler # Provided in this repo
from huggingface_hub import hf_hub_download
# Download the trained factor_net checkpoint
factor_net_path = hf_hub_download(
repo_id="wangfuyun/consolver",
filename="model.ckpt"
)
model_id = "runwayml/stable-diffusion-v1-5"
prompt = "an astronaut riding a horse on the moon, highly detailed, 8k"
num_inference_steps = 8
guidance_scale = 3.0
seed = 43
height = width = 512
def load_pipeline(scheduler_type="ddim"):
if scheduler_type == "ppo":
scheduler = PPOScheduler(
beta_end=0.012,
beta_schedule="scaled_linear",
beta_start=0.00085,
num_train_timesteps=1000,
steps_offset=1,
timestep_spacing="trailing",
order_dim=4,
scaler_dim=0,
use_conv=False,
factor_net_kwargs=dict(embedding_dim=64, hidden_dim=256, num_actions=11),
)
else:
scheduler = DDIMScheduler.from_pretrained(model_id, subfolder="scheduler", timestep_spacing="trailing")
pipe = StableDiffusionPipeline.from_pretrained(
model_id,
scheduler=scheduler,
safety_checker=None,
# torch_dtype=torch.float16, # Uncomment for GPU memory savings
).to("cuda")
if scheduler_type == "ppo" and factor_net_path:
weight = torch.load(factor_net_path, map_location="cpu")
pipe.scheduler.factor_net.load_state_dict(weight)
pipe.scheduler.factor_net.to("cuda")
return pipe
generator = torch.Generator("cuda").manual_seed(seed)
# DDIM baseline (8 steps)
pipe_ddim = load_pipeline("ddim")
image_ddim = pipe_ddim(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale,
generator=generator, height=height, width=width).images[0]
image_ddim.save("ddim_result.jpg")
# ConSolver (8 steps)
pipe_consolver = load_pipeline("ppo")
image_consolver = pipe_consolver(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale,
generator=generator, height=height, width=width).images[0]
image_consolver.save("consolver_result.jpg")
```
<div align="center">
<table>
<tr>
<td align="center">
<img src="https://github.com/user-attachments/assets/35f5f99a-ca5f-4919-82cf-04a67a2dbe13" alt="DDIM" width="80%" />
</td>
<td align="center">
<img src="https://github.com/user-attachments/assets/6428a663-b488-4ecc-b79c-4fcb431d5630" alt="Consistency Solver" width="80%" />
</td>
</tr>
<tr>
<td align="center">
<em>DDIM</em>
</td>
<td align="center">
<em>ConsistencySolver</em>
</td>
</tr>
</table>
</div>
|