Skip to content

Commit fdc1a60

Browse files
Merge branch 'dev'
2 parents 1e3b700 + bc0f09a commit fdc1a60

File tree

13 files changed

+441
-41
lines changed

13 files changed

+441
-41
lines changed

README.md

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ stable_diffusion = StableDiffusion(
375375
)
376376
output = stable_diffusion.generate_image(
377377
prompt="make the cat blue",
378-
images=["input.png"],
378+
ref_images=["input.png"],
379379
cfg_scale=1.0, # a cfg_scale of 1 is recommended for FLUX
380380
)
381381
```
@@ -480,7 +480,6 @@ output = stable_diffusion.generate_image(
480480
You can use [PhotoMaker](https://github.com/TencentARC/PhotoMaker) to personalize generated images with your own ID.
481481
482482
**NOTE**, currently PhotoMaker **ONLY** works with **SDXL** (any SDXL model files will work).
483-
The VAE in SDXL encounters NaN issues. You can find a fixed VAE here: [SDXL VAE FP16 Fix](https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/blob/main/sdxl_vae.safetensors).
484483
485484
Download PhotoMaker model file (in safetensor format) [here](https://huggingface.co/bssrdf/PhotoMaker). The official release of the model file (in .bin format) does not work with `stablediffusion.cpp`.
486485
@@ -535,6 +534,75 @@ An `id_embeds.bin` file will be generated in `input_images_dir`.
535534
536535
---
537536
537+
### <u>QWEN Image</u>
538+
539+
Download the weights from the links below:
540+
541+
- Download `Qwen Image`
542+
- safetensors: https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/tree/main/split_files/diffusion_models
543+
- gguf: https://huggingface.co/QuantStack/Qwen-Image-GGUF/tree/main
544+
- Download `vae`
545+
- safetensors: https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/tree/main/split_files/vae
546+
- Download `qwen_2.5_vl 7b`
547+
- safetensors: https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/tree/main/split_files/text_encoders
548+
- gguf: https://huggingface.co/mradermacher/Qwen2.5-VL-7B-Instruct-GGUF/tree/main
549+
550+
```python
551+
from stable_diffusion_cpp import StableDiffusion
552+
553+
stable_diffusion = StableDiffusion(
554+
diffusion_model_path="../models/qwen-image-Q8_0.gguf",
555+
qwen2vl_path="../models/Qwen2.5-VL-7B-Instruct.Q8_0.gguf",
556+
vae_path="../models/qwen_image_vae.safetensors",
557+
offload_params_to_cpu=True,
558+
flow_shift=3,
559+
)
560+
561+
output = stable_diffusion.generate_image(
562+
prompt='一个穿着"QWEN"标志的T恤的中国美女正拿着黑色的马克笔面相镜头微笑。她身后的玻璃板上手写体写着 “一、Qwen-Image的技术路线: 探索视觉生成基础模型的极限,开创理解与生成一体化的未来。二、Qwen-Image的模型特色:1、复杂文字渲染。支持中英渲染、自动布局; 2、精准图像编辑。支持文字编辑、物体增减、风格变换。三、Qwen-Image的未来愿景:赋能专业内容创作、助力生成式AI发展。”',
563+
cfg_scale=2.5,
564+
sample_method='euler',
565+
)
566+
```
567+
568+
#### <u>QWEN Image Edit</u>
569+
570+
Download the weights from the links below:
571+
572+
- Download `Qwen Image Edit`
573+
- Qwen Image Edit
574+
- safetensors: https://huggingface.co/Comfy-Org/Qwen-Image-Edit_ComfyUI/tree/main/split_files/diffusion_models
575+
- gguf: https://huggingface.co/QuantStack/Qwen-Image-Edit-GGUF/tree/main
576+
- Qwen Image Edit 2509
577+
- safetensors: https://huggingface.co/Comfy-Org/Qwen-Image-Edit_ComfyUI/tree/main/split_files/diffusion_models
578+
- gguf: https://huggingface.co/QuantStack/Qwen-Image-Edit-2509-GGUF/tree/main
579+
- Download `vae`
580+
- safetensors: https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/tree/main/split_files/vae
581+
- Download `qwen_2.5_vl 7b`
582+
- safetensors: https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/tree/main/split_files/text_encoders
583+
- gguf: https://huggingface.co/mradermacher/Qwen2.5-VL-7B-Instruct-GGUF/tree/main
584+
585+
```python
586+
from stable_diffusion_cpp import StableDiffusion
587+
588+
stable_diffusion = StableDiffusion(
589+
diffusion_model_path="../models/Qwen_Image_Edit-Q8_0.gguf",
590+
qwen2vl_path="../models/Qwen2.5-VL-7B-Instruct.Q8_0.gguf",
591+
vae_path="../models/qwen_image_vae.safetensors",
592+
offload_params_to_cpu=True,
593+
flow_shift=3,
594+
)
595+
596+
output = stable_diffusion.generate_image(
597+
prompt="make the cat blue",
598+
ref_images=["input.png"],
599+
cfg_scale=2.5,
600+
sample_method='euler',
601+
)
602+
```
603+
604+
---
605+
538606
### <u>Wan Video Generation</u>
539607
540608
See [stable-diffusion.cpp Wan download weights](https://github.com/leejet/stable-diffusion.cpp/blob/master/docs/wan.md#download-weights) for a complete list of Wan models.

stable_diffusion_cpp/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44

55
# isort: on
66

7-
__version__ = "0.3.5"
7+
__version__ = "0.3.6"

stable_diffusion_cpp/_internals.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ def __init__(
2323
clip_g_path: str,
2424
clip_vision_path: str,
2525
t5xxl_path: str,
26+
qwen2vl_path: str,
27+
qwen2vl_vision_path: str,
2628
diffusion_model_path: str,
2729
high_noise_diffusion_model_path: str,
2830
vae_path: str,
@@ -35,13 +37,15 @@ def __init__(
3537
n_threads: int,
3638
wtype: int,
3739
rng_type: int,
40+
prediction: int,
3841
offload_params_to_cpu: bool,
3942
keep_clip_on_cpu: bool,
4043
keep_control_net_on_cpu: bool,
4144
keep_vae_on_cpu: bool,
4245
diffusion_flash_attn: bool,
4346
diffusion_conv_direct: bool,
4447
vae_conv_direct: bool,
48+
force_sdxl_vae_conv_scale: bool,
4549
chroma_use_dit_mask: bool,
4650
chroma_use_t5_mask: bool,
4751
chroma_t5_mask_pad: int,
@@ -56,6 +60,8 @@ def __init__(
5660
clip_g_path=clip_g_path.encode("utf-8"),
5761
clip_vision_path=clip_vision_path.encode("utf-8"),
5862
t5xxl_path=t5xxl_path.encode("utf-8"),
63+
qwen2vl_path=qwen2vl_path.encode("utf-8"),
64+
qwen2vl_vision_path=qwen2vl_vision_path.encode("utf-8"),
5965
diffusion_model_path=diffusion_model_path.encode("utf-8"),
6066
high_noise_diffusion_model_path=high_noise_diffusion_model_path.encode("utf-8"),
6167
vae_path=vae_path.encode("utf-8"),
@@ -69,13 +75,15 @@ def __init__(
6975
n_threads=n_threads,
7076
wtype=wtype,
7177
rng_type=rng_type,
78+
prediction=prediction,
7279
offload_params_to_cpu=offload_params_to_cpu,
7380
keep_clip_on_cpu=keep_clip_on_cpu,
7481
keep_control_net_on_cpu=keep_control_net_on_cpu,
7582
keep_vae_on_cpu=keep_vae_on_cpu,
7683
diffusion_flash_attn=diffusion_flash_attn,
7784
diffusion_conv_direct=diffusion_conv_direct,
7885
vae_conv_direct=vae_conv_direct,
86+
force_sdxl_vae_conv_scale=force_sdxl_vae_conv_scale,
7987
chroma_use_dit_mask=chroma_use_dit_mask,
8088
chroma_use_t5_mask=chroma_use_t5_mask,
8189
chroma_t5_mask_pad=chroma_t5_mask_pad,

stable_diffusion_cpp/_logger.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def sd_log_callback(
2727
data: ctypes.c_void_p,
2828
):
2929
if logger.level <= SD_LOG_LEVEL_TO_LOGGING_LEVEL[level]:
30-
print(text.decode("utf-8"), end="", flush=True, file=sys.stderr)
30+
print(text.decode("utf-8", errors="replace"), end="", flush=True, file=sys.stderr)
3131

3232

3333
stable_diffusion_cpp.sd_set_log_callback(sd_log_callback, ctypes.c_void_p(0))

0 commit comments

Comments
 (0)