william-murray1204
diff --git a/‎README.md‎
Lines changed: 28 additions & 7 deletions b/‎README.md‎
Lines changed: 28 additions & 7 deletions
diff --git a/‎docs/distilled_sd.md‎
Lines changed: 99 additions & 0 deletions b/‎docs/distilled_sd.md‎
Lines changed: 99 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎stable_diffusion_cpp/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎stable_diffusion_cpp/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎stable_diffusion_cpp/_internals.py‎
Lines changed: 2 additions & 0 deletions b/‎stable_diffusion_cpp/_internals.py‎
Lines changed: 2 additions & 0 deletions
@@ -131,7 +131,7 @@ CMAKE_ARGS="-DSD_VULKAN=ON" pip install stable-diffusion-cpp-python
 <details>
 <summary>Using SYCL</summary>
 
-Using SYCL runs the computation on an Intel GPU. Please make sure you have installed the related driver and [Intel® oneAPI Base toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) before starting. For more details refer to [llama.cpp SYCL backend](https://github.com/ggerganov/llama.cpp/blob/master/docs/backend/SYCL.md#linux).
+Using SYCL runs the computation on an Intel GPU. Please make sure you have installed the related driver and [Intel® oneAPI Base toolkit](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html) before starting. For more details refer to [llama.cpp SYCL backend](https://github.com/ggml-org/llama.cpp/blob/master/docs/backend/SYCL.md#linux).
 
 ```bash
 # Export relevant ENV variables
@@ -268,11 +268,15 @@ Below is a short example demonstrating how to use the high-level API to generate
 ### <u>Text to Image</u>
 
 ```python
+from PIL import Image
 from stable_diffusion_cpp import StableDiffusion
 
-def callback(step: int, steps: int, time: float):
+def progress_callback(step: int, steps: int, time: float):
     print("Completed step: {} of {}".format(step, steps))
 
+def preview_callback(step: int, images: list[Image.Image], is_noisy: bool):
+    images[0].save(f"{PREVIEW_OUTPUT_DIR}/{step}.png")
+
 stable_diffusion = StableDiffusion(
       model_path="../models/v1-5-pruned-emaonly.safetensors",
       # wtype="default", # Weight type (e.g. "q8_0", "f16", etc) (The "default" setting is automatically applied and determines the weight type of a model file)
@@ -281,8 +285,11 @@ output = stable_diffusion.generate_image(
       prompt="a lovely cat",
       width=512,
       height=512,
-      progress_callback=callback,
+      progress_callback=progress_callback,
       # seed=1337, # Uncomment to set a specific seed (use -1 for a random seed)
+      preview_method="proj",
+      preview_interval=2,  # Call every 2 steps
+      preview_callback=preview_callback,
 )
 output[0].save("output.png") # Output returned as list of PIL Images
 
@@ -388,6 +395,12 @@ Download the weights from the links below:
 - Otherwise, download chroma's safetensors from [lodestones/Chroma1-Flash](https://huggingface.co/lodestones/Chroma1-Flash), [lodestones/Chroma1-Base](https://huggingface.co/lodestones/Chroma1-Base) or [lodestones/Chroma1-HD](https://huggingface.co/lodestones/Chroma1-HD) ([lodestones/Chroma](https://huggingface.co/lodestones/Chroma) is DEPRECATED)
 - The `vae` and `t5xxl` models are the same as for FLUX image generation linked above (`clip_l` not required).
 
+or Chroma Radiance models from:
+
+- safetensors: https://huggingface.co/lodestones/Chroma1-Radiance/tree/main
+- gguf: https://huggingface.co/silveroxides/Chroma1-Radiance-GGUF/tree/main
+- t5xxl: https://huggingface.co/comfyanonymous/flux_text_encoders/blob/main/t5xxl_fp16.safetensors
+
 ```python
 from stable_diffusion_cpp import StableDiffusion
 
@@ -407,6 +420,12 @@ output = stable_diffusion.generate_image(
 
 ---
 
+### <u>Some SD1.x and SDXL distilled models</u>
+
+See [docs/distilled_sd.md](./docs/distilled_sd.md) for instructions on using distilled SD models.
+
+---
+
 ### <u>SD3.5 Image Generation</u>
 
 Download the weights from the links below:
@@ -712,17 +731,19 @@ stable_diffusion.convert(
 
 ---
 
-### <u>Listing GGML model and RNG types, schedulers and sample methods</u>
+### <u>Listing GGML model/prediction/RNG types, sample/preview methods and schedulers</u>
 
-Access the GGML model and RNG types, schedulers, and sample methods via the following maps:
+Access the GGML model/prediction/RNG types, sample/preview methods and schedulers via the following maps:
 
 ```python
-from stable_diffusion_cpp import GGML_TYPE_MAP, RNG_TYPE_MAP, SCHEDULER_MAP, SAMPLE_METHOD_MAP
+from stable_diffusion_cpp import GGML_TYPE_MAP, RNG_TYPE_MAP, SCHEDULER_MAP, SAMPLE_METHOD_MAP, PREDICTION_MAP, PREVIEW_MAP
 
 print("GGML model types:", list(GGML_TYPE_MAP))
 print("RNG types:", list(RNG_TYPE_MAP))
 print("Schedulers:", list(SCHEDULER_MAP))
 print("Sample methods:", list(SAMPLE_METHOD_MAP))
+print("Prediction types:", list(PREDICTION_MAP))
+print("Preview methods:", list(PREVIEW_MAP))
 ```
 
 ---
@@ -778,7 +799,7 @@ Now you can make changes to the code within the `stable_diffusion_cpp` directory
 
 - [stable-diffusion.cpp](https://github.com/leejet/stable-diffusion.cpp)
 - [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)
-- [llama.cpp](https://github.com/ggerganov/llama.cpp)
+- [llama.cpp](https://github.com/ggml-org/llama.cpp)
 - [whisper-cpp-python](https://github.com/carloscdias/whisper-cpp-python)
 - [Golang stable-diffusion](https://github.com/seasonjs/stable-diffusion)
 - [StableDiffusion.NET](https://github.com/DarthAffe/StableDiffusion.NET)
 
@@ -0,0 +1,99 @@
+# Running distilled models: SSD1B and SDx.x with tiny U-Nets
+
+## Preface 
+
+These models feature a reduced U-Net architecture. Unlike standard SDXL models, the SSD-1B U-Net contains only one middle block and fewer attention layers in its up- and down-blocks, resulting in significantly smaller file sizes. Using these models can reduce inference time by more than 33%. For more details, refer to Segmind's paper: https://arxiv.org/abs/2401.02677v1.
+Similarly, SD1.x- and SD2.x-style models with a tiny U-Net consist of only 6 U-Net blocks, leading to very small files and time savings of up to 50%. For more information, see the paper: https://arxiv.org/pdf/2305.15798.pdf.
+
+## SSD1B
+
+Note that not all of these models follow the standard parameter naming conventions. However, several useful SSD-1B models are available online, such as:
+
+ * https://huggingface.co/segmind/SSD-1B/resolve/main/SSD-1B-A1111.safetensors
+ * https://huggingface.co/hassenhamdi/SSD-1B-fp8_e4m3fn/resolve/main/SSD-1B_fp8_e4m3fn.safetensors
+
+Useful LoRAs are also available:
+
+ * https://huggingface.co/seungminh/lora-swarovski-SSD-1B/resolve/main/pytorch_lora_weights.safetensors
+ * https://huggingface.co/kylielee505/mylcmlorassd/resolve/main/pytorch_lora_weights.safetensors
+
+These files can be used out-of-the-box, unlike the models described in the next section.
+
+
+## SD1.x, SD2.x with tiny U-Nets
+
+These models require conversion before use. You will need a Python script provided by the diffusers team, available on GitHub:
+
+ * https://raw.githubusercontent.com/huggingface/diffusers/refs/heads/main/scripts/convert_diffusers_to_original_stable_diffusion.py
+
+### SD2.x
+
+NotaAI provides the following model online:
+
+* https://huggingface.co/nota-ai/bk-sdm-v2-tiny
+
+Creating a .safetensors file involves two steps. First, run this short Python script to download the model from Hugging Face:
+
+```python
+from diffusers import StableDiffusionPipeline
+pipe = StableDiffusionPipeline.from_pretrained("nota-ai/bk-sdm-v2-tiny",cache_dir="./")
+```
+
+Second, create the .safetensors file by running:
+
+```bash
+python convert_diffusers_to_original_stable_diffusion.py \
+      --model_path  models--nota-ai--bk-sdm-v2-tiny/snapshots/68277af553777858cd47e133f92e4db47321bc74 \
+      --checkpoint_path bk-sdm-v2-tiny.safetensors --half --use_safetensors
+```
+
+This will generate the **file bk-sdm-v2-tiny.safetensors**, which is now ready for use with sd.cpp.
+
+### SD1.x
+
+Several Tiny SD 1.x models are available online, such as:
+
+ * https://huggingface.co/segmind/tiny-sd
+ * https://huggingface.co/segmind/portrait-finetuned
+ * https://huggingface.co/nota-ai/bk-sdm-tiny
+
+These models also require conversion, partly because some tensors are stored in a non-contiguous manner. To create a usable checkpoint file, follow these simple steps:
+Download and prepare the model using Python: 
+
+##### Download the model using Python on your computer, for example this way:
+
+```python
+import torch
+from diffusers import StableDiffusionPipeline
+pipe = StableDiffusionPipeline.from_pretrained("segmind/tiny-sd")
+unet=pipe.unet
+for param in unet.parameters():
+    param.data = param.data.contiguous()     # <- important here
+pipe.save_pretrained("segmindtiny-sd", safe_serialization=True)
+```
+
+##### Run the conversion script:
+
+```bash
+python convert_diffusers_to_original_stable_diffusion.py \
+      --model_path  ./segmindtiny-sd \
+      --checkpoint_path ./segmind_tiny-sd.ckpt --half
+```
+
+The file segmind_tiny-sd.ckpt will be generated and is now ready for use with sd.cpp. You can follow a similar process for the other models mentioned above.
+
+
+### Another available .ckpt file:
+
+ * https://huggingface.co/ClashSAN/small-sd/resolve/main/tinySDdistilled.ckpt
+
+To use this file, you must first adjust its non-contiguous tensors:
+
+```python
+import torch
+ckpt = torch.load("tinySDdistilled.ckpt", map_location=torch.device('cpu'))
+for key, value in ckpt['state_dict'].items():
+    if isinstance(value, torch.Tensor):
+        ckpt['state_dict'][key] = value.contiguous()
+torch.save(ckpt, "tinySDdistilled_fixed.ckpt")
+```
@@ -59,7 +59,7 @@ line-length = 130
 
 [tool.isort]
 profile = "black"
-known_local_folder = ["stable_diffusion_cpp"]
+known_local_folder = ["stable_diffusion_cpp", "tests"]
 remove_redundant_aliases = true
 length_sort = true
 
 
@@ -4,4 +4,4 @@
 
 # isort: on
 
-__version__ = "0.3.6"
+__version__ = "0.3.7"
@@ -43,6 +43,7 @@ def __init__(
         keep_control_net_on_cpu: bool,
         keep_vae_on_cpu: bool,
         diffusion_flash_attn: bool,
+        tae_preview_only: bool,
         diffusion_conv_direct: bool,
         vae_conv_direct: bool,
         force_sdxl_vae_conv_scale: bool,
@@ -81,6 +82,7 @@ def __init__(
             keep_control_net_on_cpu=keep_control_net_on_cpu,
             keep_vae_on_cpu=keep_vae_on_cpu,
             diffusion_flash_attn=diffusion_flash_attn,
+            tae_preview_only=tae_preview_only,
             diffusion_conv_direct=diffusion_conv_direct,
             vae_conv_direct=vae_conv_direct,
             force_sdxl_vae_conv_scale=force_sdxl_vae_conv_scale,
Original file line number	Diff line number	Diff line change
`@@ -4,4 +4,4 @@`
`4`	`4`
`5`	`5`	`# isort: on`
`6`	`6`
`7`		`-__version__ = "0.3.6"`
	`7`	`+__version__ = "0.3.7"`