|
46 | 46 | >>> from diffusers import CogView4Pipeline |
47 | 47 |
|
48 | 48 | >>> pipe = CogView4ControlPipeline.from_pretrained("THUDM/CogView4-6B-Control", torch_dtype=torch.bfloat16) |
49 | | - >>> pipe.to("cuda") |
50 | | -
|
51 | | - >>> prompt = "A photo of an astronaut riding a horse on mars" |
52 | | - >>> image = pipe(prompt).images[0] |
53 | | - >>> image.save("output.png") |
| 49 | + >>> control_image = load_image( |
| 50 | + ... "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd_controlnet/bird_canny.png" |
| 51 | + ... ) |
| 52 | + >>> prompt = "A bird in space" |
| 53 | + >>> image = pipe( |
| 54 | + ... prompt, control_image=control_image, height=1024, width=1024, guidance_scale=3.5) |
| 55 | + ... ).images[0] |
| 56 | + >>> image.save("cogview4-control.png") |
54 | 57 | ``` |
55 | 58 | """ |
56 | 59 |
|
57 | | - |
| 60 | +# Copied from diffusers.pipelines.cogview4.pipeline_cogview4.calculate_shift |
58 | 61 | def calculate_shift( |
59 | 62 | image_seq_len, |
60 | 63 | base_seq_len: int = 256, |
@@ -175,6 +178,7 @@ def __init__( |
175 | 178 | self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8 |
176 | 179 | self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) |
177 | 180 |
|
| 181 | + # Copied from diffusers.pipelines.cogview4.pipeline_cogview4.CogView4Pipeline._get_glm_embeds |
178 | 182 | def _get_glm_embeds( |
179 | 183 | self, |
180 | 184 | prompt: Union[str, List[str]] = None, |
@@ -341,7 +345,7 @@ def prepare_image( |
341 | 345 | # image batch size is the same as prompt batch size |
342 | 346 | repeat_by = num_images_per_prompt |
343 | 347 |
|
344 | | - image = image.repeat_interleave(repeat_by, dim=0) |
| 348 | + image = image.repeat_interleave(repeat_by, dim=0, output_size=image.shape[0] * repeat_by) |
345 | 349 |
|
346 | 350 | image = image.to(device=device, dtype=dtype) |
347 | 351 |
|
|
0 commit comments