feat: support for sd3.5

william-murray1204 · william-murray1204 · commit 5e73e5a059d7 · 2024-10-29T00:38:35.000+11:00
diff --git a/README.md b/README.md
@@ -259,6 +259,33 @@ Note that:
 - Only the Flux-dev q8_0 will work with LoRAs.
 - You can download FLUX LoRA models from https://huggingface.co/XLabs-AI/flux-lora-collection/tree/main (you must use a comfy converted version!!!).
 
+### SD3.5 Image Generation
+
+Download the weights from the links below:
+
+- Download sd3.5_large from https://huggingface.co/stabilityai/stable-diffusion-3.5-large/blob/main/sd3.5_large.safetensors
+- Download clip_g from https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/clip_g.safetensors
+- Download clip_l from https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/clip_l.safetensors
+- Download t5xxl from https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/t5xxl_fp16.safetensors
+
+```python
+from stable_diffusion_cpp import StableDiffusion
+
+stable_diffusion = StableDiffusion(
+    model_path="../models/sd3.5_large.safetensors",
+    clip_l_path="../models/clip_l.safetensors",
+    clip_g_path="../models/clip_g.safetensors",
+    t5xxl_path="../models/t5xxl_fp16.safetensors",
+)
+output = stable_diffusion.txt_to_img(
+      prompt="a lovely cat holding a sign says 'Stable diffusion 3.5 Large'",
+      height=1024,
+      width=1024,
+      cfg_scale=4.5,
+      sample_method="euler",
+)
+```
+
 ### Other High-level API Examples
 
 Other examples for the high-level API (such as image to image, upscaling and model conversion) can be found in the [tests](tests) directory.
@@ -293,8 +320,6 @@ img = sd_cpp.upscale(
       image_bytes,
       upscale_factor,
 ) # Upscale the image
-
-sd_cpp.free_image(c_image)
 ```
 
 ## Development
diff --git a/stable_diffusion_cpp/_internals.py b/stable_diffusion_cpp/_internals.py
@@ -21,6 +21,7 @@ def __init__(
         self,
         model_path: str,
         clip_l_path: str,
+        clip_g_path: str,
         t5xxl_path: str,
         diffusion_model_path: str,
         vae_path: str,
@@ -43,6 +44,7 @@ def __init__(
     ):
         self.model_path = model_path
         self.clip_l_path = clip_l_path
+        self.clip_g_path = clip_g_path
         self.t5xxl_path = t5xxl_path
         self.diffusion_model_path = diffusion_model_path
         self.vae_path = vae_path
@@ -84,6 +86,7 @@ def __init__(
                 self.model = sd_cpp.new_sd_ctx(
                     self.model_path.encode("utf-8"),
                     self.clip_l_path.encode("utf-8"),
+                    self.clip_g_path.encode("utf-8"),
                     self.t5xxl_path.encode("utf-8"),
                     self.diffusion_model_path.encode("utf-8"),
                     self.vae_path.encode("utf-8"),
diff --git a/stable_diffusion_cpp/stable_diffusion.py b/stable_diffusion_cpp/stable_diffusion.py
@@ -22,6 +22,7 @@ def __init__(
         self,
         model_path: str = "",
         clip_l_path: str = "",
+        clip_g_path: str = "",
         t5xxl_path: str = "",
         diffusion_model_path: str = "",
         vae_path: str = "",
@@ -88,6 +89,7 @@ def __init__(
         # Params
         self.model_path = model_path
         self.clip_l_path = clip_l_path
+        self.clip_g_path = clip_g_path
         self.t5xxl_path = t5xxl_path
         self.diffusion_model_path = diffusion_model_path
         self.vae_path = vae_path
@@ -126,6 +128,7 @@ def __init__(
                 _StableDiffusionModel(
                     self.model_path,
                     self.clip_l_path,
+                    self.clip_g_path,
                     self.t5xxl_path,
                     self.diffusion_model_path,
                     self.vae_path,
diff --git a/stable_diffusion_cpp/stable_diffusion_cpp.py b/stable_diffusion_cpp/stable_diffusion_cpp.py
@@ -301,6 +301,7 @@ class GGMLType(IntEnum):
     [
         ctypes.c_char_p,  # model_path
         ctypes.c_char_p,  # clip_l_path
+        ctypes.c_char_p, # clip_g_path
         ctypes.c_char_p,  # t5xxl_path
         ctypes.c_char_p,  # diffusion_model_path
         ctypes.c_char_p,  # vae_path
@@ -325,6 +326,7 @@ class GGMLType(IntEnum):
 def new_sd_ctx(
     model_path: bytes,
     clip_l_path: bytes,
+    clip_g_path: bytes,
     t5xxl_path: bytes,
     diffusion_model_path: bytes,
     vae_path: bytes,
diff --git a/tests/test_sd3.py b/tests/test_sd3.py
@@ -0,0 +1,42 @@
+import os
+import traceback
+from stable_diffusion_cpp import StableDiffusion
+
+MODEL_PATH = "C:\\stable-diffusion\\sd3.5\\sd3.5_large-q4_k_5_0.gguf"
+CLIP_L_PATH = "C:\\stable-diffusion\\sd3.5\\clip_l.safetensors"
+CLIP_G_PATH = "C:\\stable-diffusion\\sd3.5\\clip_g.safetensors"
+T5XXL_PATH = "C:\\stable-diffusion\\sd3.5\\t5xxl_fp16.safetensors"
+
+stable_diffusion = StableDiffusion(
+    model_path=MODEL_PATH,
+    clip_l_path=CLIP_L_PATH,
+    clip_g_path=CLIP_G_PATH,
+    t5xxl_path=T5XXL_PATH,
+)
+
+
+def callback(step: int, steps: int, time: float):
+    print("Completed step: {} of {}".format(step, steps))
+
+
+try:
+    # Generate images
+    images = stable_diffusion.txt_to_img(
+        prompt="a lovely cat holding a sign says 'Stable diffusion 3.5 Large'",
+        height=832,
+        width=832,
+        cfg_scale=4.5,
+        sample_method="euler",
+    )
+
+    OUTPUT_DIR = "tests/outputs"
+    if not os.path.exists(OUTPUT_DIR):
+        os.makedirs(OUTPUT_DIR)
+
+    # Save images
+    for i, image in enumerate(images):
+        image.save(f"{OUTPUT_DIR}/sd3_{i}.png")
+
+except Exception as e:
+    traceback.print_exc()
+    print("Test - sd3 failed: ", e)
diff --git a/vendor/stable-diffusion.cpp b/vendor/stable-diffusion.cpp
@@ -1 +1 @@
-Subproject commit 14206fd48832ab600d9db75f15acb5062ae2c296
+Subproject commit ac54e0076052a196b7df961eb1f792c9ff4d7f22