|
69 | 69 | Examples: |
70 | 70 | ```py |
71 | 71 | # !pip install controlnet_aux |
72 | | - >>> from diffusers import ( |
73 | | - ... StableDiffusionXLControlNetUnionImg2ImgPipeline, |
74 | | - ... ControlNetUnionModel, |
75 | | - ... AutoencoderKL, |
76 | | - ... ) |
77 | | - >>> from diffusers.models.controlnets import ControlNetUnionInputProMax |
78 | | - >>> from diffusers.utils import load_image |
79 | | - >>> import torch |
80 | | - >>> from PIL import Image |
81 | | - >>> import numpy as np |
82 | | - >>> prompt = "A cat" |
83 | | - >>> # download an image |
84 | | - >>> image = load_image( |
85 | | - ... "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/kandinsky/cat.png" |
86 | | - ... ) |
87 | | - >>> # initialize the models and pipeline |
88 | | - >>> controlnet = ControlNetUnionModel.from_pretrained( |
89 | | - ... "brad-twinkl/controlnet-union-sdxl-1.0-promax", torch_dtype=torch.float16 |
90 | | - ... ) |
91 | | - >>> vae = AutoencoderKL.from_pretrained( |
92 | | - ... "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16 |
93 | | - ... ) |
94 | | - >>> pipe = StableDiffusionXLControlNetUnionImg2ImgPipeline.from_pretrained( |
95 | | - ... "stabilityai/stable-diffusion-xl-base-1.0", |
96 | | - ... controlnet=controlnet, |
97 | | - ... vae=vae, |
98 | | - ... torch_dtype=torch.float16, |
99 | | - ... ).to("cuda") |
100 | | - >>> # `enable_model_cpu_offload` is not recommended due to multiple generations |
101 | | - >>> height = image.height |
102 | | - >>> width = image.width |
103 | | - >>> ratio = np.sqrt(1024.0 * 1024.0 / (width * height)) |
104 | | - >>> # 3 * 3 upscale correspond to 16 * 3 multiply, 2 * 2 correspond to 16 * 2 multiply and so on. |
105 | | - >>> scale_image_factor = 3 |
106 | | - >>> base_factor = 16 |
107 | | - >>> factor = scale_image_factor * base_factor |
108 | | - >>> W, H = int(width * ratio) // factor * factor, int(height * ratio) // factor * factor |
109 | | - >>> image = image.resize((W, H)) |
110 | | - >>> target_width = W // scale_image_factor |
111 | | - >>> target_height = H // scale_image_factor |
112 | | - >>> images = [] |
113 | | - >>> crops_coords_list = [ |
114 | | - ... (0, 0), |
115 | | - ... (0, width // 2), |
116 | | - ... (height // 2, 0), |
117 | | - ... (width // 2, height // 2), |
118 | | - ... 0, |
119 | | - ... 0, |
120 | | - ... 0, |
121 | | - ... 0, |
122 | | - ... 0, |
123 | | - ... ] |
124 | | - >>> for i in range(scale_image_factor): |
125 | | - ... for j in range(scale_image_factor): |
126 | | - ... left = j * target_width |
127 | | - ... top = i * target_height |
128 | | - ... right = left + target_width |
129 | | - ... bottom = top + target_height |
130 | | - ... cropped_image = image.crop((left, top, right, bottom)) |
131 | | - ... cropped_image = cropped_image.resize((W, H)) |
132 | | - ... images.append(cropped_image) |
133 | | - >>> # set ControlNetUnion input |
134 | | - >>> result_images = [] |
135 | | - >>> for sub_img, crops_coords in zip(images, crops_coords_list): |
136 | | - ... union_input = ControlNetUnionInputProMax( |
137 | | - ... tile=sub_img, |
138 | | - ... ) |
139 | | - ... new_width, new_height = W, H |
140 | | - ... out = pipe( |
141 | | - ... prompt=[prompt] * 1, |
142 | | - ... image=sub_img, |
143 | | - ... control_image_list=union_input, |
144 | | - ... width=new_width, |
145 | | - ... height=new_height, |
146 | | - ... num_inference_steps=30, |
147 | | - ... crops_coords_top_left=(W, H), |
148 | | - ... target_size=(W, H), |
149 | | - ... original_size=(W * 2, H * 2), |
150 | | - ... ) |
151 | | - ... result_images.append(out.images[0]) |
152 | | - >>> new_im = Image.new( |
153 | | - ... "RGB", (new_width * scale_image_factor, new_height * scale_image_factor) |
154 | | - ... ) |
155 | | - >>> new_im.paste(result_images[0], (0, 0)) |
156 | | - >>> new_im.paste(result_images[1], (new_width, 0)) |
157 | | - >>> new_im.paste(result_images[2], (new_width * 2, 0)) |
158 | | - >>> new_im.paste(result_images[3], (0, new_height)) |
159 | | - >>> new_im.paste(result_images[4], (new_width, new_height)) |
160 | | - >>> new_im.paste(result_images[5], (new_width * 2, new_height)) |
161 | | - >>> new_im.paste(result_images[6], (0, new_height * 2)) |
162 | | - >>> new_im.paste(result_images[7], (new_width, new_height * 2)) |
163 | | - >>> new_im.paste(result_images[8], (new_width * 2, new_height * 2)) |
| 72 | + from diffusers import ( |
| 73 | + StableDiffusionXLControlNetUnionImg2ImgPipeline, |
| 74 | + ControlNetUnionModel, |
| 75 | + AutoencoderKL, |
| 76 | + ) |
| 77 | + from diffusers.models.controlnets import ControlNetUnionInputProMax |
| 78 | + from diffusers.utils import load_image |
| 79 | + import torch |
| 80 | + from PIL import Image |
| 81 | + import numpy as np |
| 82 | +
|
| 83 | + prompt = "A cat" |
| 84 | + # download an image |
| 85 | + image = load_image( |
| 86 | + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/kandinsky/cat.png" |
| 87 | + ) |
| 88 | + # initialize the models and pipeline |
| 89 | + controlnet = ControlNetUnionModel.from_pretrained( |
| 90 | + "brad-twinkl/controlnet-union-sdxl-1.0-promax", torch_dtype=torch.float16 |
| 91 | + ) |
| 92 | + vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16) |
| 93 | + pipe = StableDiffusionXLControlNetUnionImg2ImgPipeline.from_pretrained( |
| 94 | + "stabilityai/stable-diffusion-xl-base-1.0", |
| 95 | + controlnet=controlnet, |
| 96 | + vae=vae, |
| 97 | + torch_dtype=torch.float16, |
| 98 | + ).to("cuda") |
| 99 | + # `enable_model_cpu_offload` is not recommended due to multiple generations |
| 100 | + height = image.height |
| 101 | + width = image.width |
| 102 | + ratio = np.sqrt(1024.0 * 1024.0 / (width * height)) |
| 103 | + # 3 * 3 upscale correspond to 16 * 3 multiply, 2 * 2 correspond to 16 * 2 multiply and so on. |
| 104 | + scale_image_factor = 3 |
| 105 | + base_factor = 16 |
| 106 | + factor = scale_image_factor * base_factor |
| 107 | + W, H = int(width * ratio) // factor * factor, int(height * ratio) // factor * factor |
| 108 | + image = image.resize((W, H)) |
| 109 | + target_width = W // scale_image_factor |
| 110 | + target_height = H // scale_image_factor |
| 111 | + images = [] |
| 112 | + crops_coords_list = [ |
| 113 | + (0, 0), |
| 114 | + (0, width // 2), |
| 115 | + (height // 2, 0), |
| 116 | + (width // 2, height // 2), |
| 117 | + 0, |
| 118 | + 0, |
| 119 | + 0, |
| 120 | + 0, |
| 121 | + 0, |
| 122 | + ] |
| 123 | + for i in range(scale_image_factor): |
| 124 | + for j in range(scale_image_factor): |
| 125 | + left = j * target_width |
| 126 | + top = i * target_height |
| 127 | + right = left + target_width |
| 128 | + bottom = top + target_height |
| 129 | + cropped_image = image.crop((left, top, right, bottom)) |
| 130 | + cropped_image = cropped_image.resize((W, H)) |
| 131 | + images.append(cropped_image) |
| 132 | + # set ControlNetUnion input |
| 133 | + result_images = [] |
| 134 | + for sub_img, crops_coords in zip(images, crops_coords_list): |
| 135 | + union_input = ControlNetUnionInputProMax( |
| 136 | + tile=sub_img, |
| 137 | + ) |
| 138 | + new_width, new_height = W, H |
| 139 | + out = pipe( |
| 140 | + prompt=[prompt] * 1, |
| 141 | + image=sub_img, |
| 142 | + control_image_list=union_input, |
| 143 | + width=new_width, |
| 144 | + height=new_height, |
| 145 | + num_inference_steps=30, |
| 146 | + crops_coords_top_left=(W, H), |
| 147 | + target_size=(W, H), |
| 148 | + original_size=(W * 2, H * 2), |
| 149 | + ) |
| 150 | + result_images.append(out.images[0]) |
| 151 | + new_im = Image.new("RGB", (new_width * scale_image_factor, new_height * scale_image_factor)) |
| 152 | + new_im.paste(result_images[0], (0, 0)) |
| 153 | + new_im.paste(result_images[1], (new_width, 0)) |
| 154 | + new_im.paste(result_images[2], (new_width * 2, 0)) |
| 155 | + new_im.paste(result_images[3], (0, new_height)) |
| 156 | + new_im.paste(result_images[4], (new_width, new_height)) |
| 157 | + new_im.paste(result_images[5], (new_width * 2, new_height)) |
| 158 | + new_im.paste(result_images[6], (0, new_height * 2)) |
| 159 | + new_im.paste(result_images[7], (new_width, new_height * 2)) |
| 160 | + new_im.paste(result_images[8], (new_width * 2, new_height * 2)) |
164 | 161 | ``` |
165 | 162 | """ |
166 | 163 |
|
|
0 commit comments