Skip to content

Commit 66b1db1

Browse files
sync: update submodule (inpainting support)
1 parent 2b5dfaf commit 66b1db1

File tree

9 files changed

+193
-52
lines changed

9 files changed

+193
-52
lines changed

README.md

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,12 @@ All `stable-diffusion.cpp` cmake build options can be set via the `CMAKE_ARGS` e
4242

4343
```bash
4444
# Linux and Mac
45-
CMAKE_ARGS="-DSD_CUBLAS=ON" pip install stable-diffusion-cpp-python
45+
CMAKE_ARGS="-DSD_CUDA=ON" pip install stable-diffusion-cpp-python
4646
```
4747

4848
```powershell
4949
# Windows
50-
$env:CMAKE_ARGS="-DSD_CUBLAS=ON"
50+
$env:CMAKE_ARGS="-DSD_CUDA=ON"
5151
pip install stable-diffusion-cpp-python
5252
```
5353

@@ -60,13 +60,13 @@ They can also be set via `pip install -C / --config-settings` command and saved
6060

6161
```bash
6262
pip install --upgrade pip # ensure pip is up to date
63-
pip install stable-diffusion-cpp-python -C cmake.args="-DSD_CUBLAS=ON"
63+
pip install stable-diffusion-cpp-python -C cmake.args="-DSD_CUDA=ON"
6464
```
6565

6666
```txt
6767
# requirements.txt
6868
69-
stable-diffusion-cpp-python -C cmake.args="-DSD_CUBLAS=ON"
69+
stable-diffusion-cpp-python -C cmake.args="-DSD_CUDA=ON"
7070
```
7171

7272
</details>
@@ -75,16 +75,16 @@ stable-diffusion-cpp-python -C cmake.args="-DSD_CUBLAS=ON"
7575

7676
Below are some common backends, their build commands and any additional environment variables required.
7777

78-
<!-- CUBLAS -->
78+
<!-- CUDA -->
7979
<details>
80-
<summary>Using CUBLAS (CUDA)</summary>
80+
<summary>Using CUDA (CUBLAS)</summary>
8181

8282
This provides BLAS acceleration using the CUDA cores of your Nvidia GPU. Make sure you have the CUDA toolkit installed. You can download it from your Linux distro's package manager (e.g. `apt install nvidia-cuda-toolkit`) or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads). You can check your installed CUDA toolkit version by running `nvcc --version`.
8383

8484
- It is recommended you have at least 4 GB of VRAM.
8585

8686
```bash
87-
CMAKE_ARGS="-DSD_CUBLAS=ON" pip install stable-diffusion-cpp-python
87+
CMAKE_ARGS="-DSD_CUDA=ON" pip install stable-diffusion-cpp-python
8888
```
8989

9090
</details>
@@ -148,7 +148,7 @@ CMAKE_ARGS="-DSD_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML
148148
<details>
149149
<summary>Using Flash Attention</summary>
150150

151-
Enabling flash attention reduces memory usage by at least 400 MB. At the moment, it is not supported when CUBLAS is enabled because the kernel implementation is missing.
151+
Enabling flash attention reduces memory usage by at least 400 MB. At the moment, it is not supported when CUDA (CUBLAS) is enabled because the kernel implementation is missing.
152152

153153
```bash
154154
CMAKE_ARGS="-DSD_FLASH_ATTN=ON" pip install stable-diffusion-cpp-python
@@ -166,6 +166,19 @@ CMAKE_ARGS="-DGGML_OPENBLAS=ON" pip install stable-diffusion-cpp-python
166166

167167
</details>
168168

169+
<!-- MUSA -->
170+
171+
<details>
172+
<summary>Using MUSA</summary>
173+
174+
This provides BLAS acceleration using the MUSA cores of your Moore Threads GPU. Make sure to have the MUSA toolkit installed.
175+
176+
```bash
177+
CMAKE_ARGS="-DCMAKE_C_COMPILER=/usr/local/musa/bin/clang -DCMAKE_CXX_COMPILER=/usr/local/musa/bin/clang++ -DSD_MUSA=ON -DCMAKE_BUILD_TYPE=Release" pip install stable-diffusion-cpp-python
178+
```
179+
180+
</details>
181+
169182
### Upgrading and Reinstalling
170183

171184
To upgrade and rebuild `stable-diffusion-cpp-python` add `--upgrade --force-reinstall --no-cache-dir` flags to the `pip install` command to ensure the package is rebuilt from source.
@@ -299,7 +312,23 @@ stable_diffusion = StableDiffusion(model_path="../models/v1-5-pruned-emaonly.saf
299312

300313
output = stable_diffusion.img_to_img(
301314
prompt="blue eyes",
302-
image=INPUT_IMAGE,
315+
image=INPUT_IMAGE, # Note: The input image will be automatically resized to the match the width and height arguments (default: 512x512)
316+
strength=0.4,
317+
)
318+
```
319+
320+
### Inpainting
321+
322+
```python
323+
from stable_diffusion_cpp import StableDiffusion
324+
325+
# Note: Inpainting with a base model gives poor results. A model fine-tuned for inpainting is recommended.
326+
stable_diffusion = StableDiffusion(model_path="../models/v1-5-pruned-emaonly.safetensors")
327+
328+
output = stable_diffusion.img_to_img(
329+
prompt="blue eyes",
330+
image="../input.png",
331+
mask_image="../mask.png", # A grayscale image where 0 is masked and 255 is unmasked
303332
strength=0.4,
304333
)
305334
```

assets/mask.png

3.05 KB
Loading

stable_diffusion_cpp/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44

55
# isort: on
66

7-
__version__ = "0.2.2"
7+
__version__ = "0.2.3"

stable_diffusion_cpp/stable_diffusion.py

Lines changed: 68 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,9 @@ def __init__(
3333
vae_decode_only: bool = False,
3434
vae_tiling: bool = False,
3535
n_threads: int = -1,
36-
wtype: Union[str, GGMLType, int, float, None] = "default",
37-
rng_type: Union[str, RNGType, int, float, None] = "cuda",
38-
schedule: Union[str, Schedule, int, float, None] = "default",
36+
wtype: Optional[Union[str, GGMLType, int, float]] = "default",
37+
rng_type: Optional[Union[str, RNGType, int, float]] = "cuda",
38+
schedule: Optional[Union[str, Schedule, int, float]] = "default",
3939
keep_clip_on_cpu: bool = False,
4040
keep_control_net_cpu: bool = False,
4141
keep_vae_on_cpu: bool = False,
@@ -189,7 +189,7 @@ def txt_to_img(
189189
guidance: float = 3.5,
190190
width: int = 512,
191191
height: int = 512,
192-
sample_method: Union[str, SampleMethod, int, float, None] = "euler_a",
192+
sample_method: Optional[Union[str, SampleMethod, int, float]] = "euler_a",
193193
sample_steps: int = 20,
194194
seed: int = 42,
195195
batch_count: int = 1,
@@ -315,13 +315,14 @@ def img_to_img(
315315
self,
316316
image: Union[Image.Image, str],
317317
prompt: str,
318+
mask_image: Optional[Union[Image.Image, str]] = None,
318319
negative_prompt: str = "",
319320
clip_skip: int = -1,
320321
cfg_scale: float = 7.0,
321322
guidance: float = 3.5,
322323
width: int = 512,
323324
height: int = 512,
324-
sample_method: Union[str, SampleMethod, int, float, None] = "euler_a",
325+
sample_method: Optional[Union[str, SampleMethod, int, float]] = "euler_a",
325326
sample_steps: int = 20,
326327
strength: float = 0.75,
327328
seed: int = 42,
@@ -344,6 +345,7 @@ def img_to_img(
344345
Args:
345346
image: The input image path or Pillow Image to direct the generation.
346347
prompt: The prompt to render.
348+
mask_image: The inpainting mask image path or Pillow Image.
347349
negative_prompt: The negative prompt.
348350
clip_skip: Ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer.
349351
cfg_scale: Unconditional guidance scale.
@@ -414,8 +416,25 @@ def sd_progress_callback(
414416
# Resize the input image
415417
image = self._resize_image(image, width, height) # Input image and generated image must have the same size
416418

417-
# Convert the image to a byte array
419+
def _create_blank_mask_image(width: int, height: int):
420+
"""Create a blank white mask image in c_unit8 format."""
421+
mask_image_buffer = (ctypes.c_uint8 * (width * height))(*[255] * (width * height))
422+
return mask_image_buffer
423+
424+
# Convert the image and mask image to a byte array
418425
image_pointer = self._image_to_sd_image_t_p(image)
426+
if mask_image:
427+
# Resize the mask image (however the mask should ideally already be the same size as the input image)
428+
mask_image = self._resize_image(mask_image, width, height)
429+
mask_image_pointer = self._image_to_sd_image_t_p(mask_image, channel=1)
430+
else:
431+
# Create a blank white mask image
432+
mask_image_pointer = self._c_uint8_to_sd_image_t_p(
433+
image=_create_blank_mask_image(width, height),
434+
width=width,
435+
height=height,
436+
channel=1,
437+
)
419438

420439
# Convert skip_layers to a ctypes array
421440
skip_layers_array = (ctypes.c_int * len(skip_layers))(*skip_layers)
@@ -426,6 +445,7 @@ def sd_progress_callback(
426445
c_images = sd_cpp.img2img(
427446
self.model,
428447
image_pointer,
448+
mask_image_pointer,
429449
prompt.encode("utf-8"),
430450
negative_prompt.encode("utf-8"),
431451
clip_skip,
@@ -466,7 +486,7 @@ def img_to_vid(
466486
augmentation_level: float = 0.0,
467487
min_cfg: float = 1.0,
468488
cfg_scale: float = 7.0,
469-
sample_method: Union[str, SampleMethod, int, float, None] = "euler_a",
489+
sample_method: Optional[Union[str, SampleMethod, int, float]] = "euler_a",
470490
sample_steps: int = 20,
471491
strength: float = 0.75,
472492
seed: int = 42,
@@ -661,7 +681,6 @@ def sd_progress_callback(
661681
# ==================== Upscale images ====================
662682

663683
upscaled_images = []
664-
665684
for image in images:
666685

667686
# Convert the image to a byte array
@@ -698,19 +717,24 @@ def _resize_image(self, image: Union[Image.Image, str], width: int, height: int)
698717
def _format_image(
699718
self,
700719
image: Union[Image.Image, str],
720+
channel: int = 3,
701721
) -> Image.Image:
702-
"""Convert an image path or Pillow Image to a Pillow Image of RGBA format."""
722+
"""Convert an image path or Pillow Image to a Pillow Image of RGBA or grayscale (inpainting masks) format."""
703723
# Convert image path to image if str
704724
if isinstance(image, str):
705725
image = Image.open(image)
706726

707-
# Convert any non RGBA to RGBA
708-
if image.format != "PNG":
709-
image = image.convert("RGBA")
727+
if channel == 1:
728+
# Grayscale the image if channel is 1
729+
image = image.convert("L")
730+
else:
731+
# Convert any non RGBA to RGBA
732+
if image.format != "PNG":
733+
image = image.convert("RGBA")
710734

711-
# Ensure the image is in RGB mode
712-
if image.mode != "RGB":
713-
image = image.convert("RGB")
735+
# Ensure the image is in RGB mode
736+
if image.mode != "RGB":
737+
image = image.convert("RGB")
714738

715739
return image, image.width, image.height
716740

@@ -741,14 +765,12 @@ def _format_control_cond(
741765

742766
# ============= Image to C uint8 pointer =============
743767

744-
def _cast_image(self, image: Union[Image.Image, str]):
768+
def _cast_image(self, image: Union[Image.Image, str], channel: int = 3):
745769
"""Cast a PIL Image to a C uint8 pointer."""
746-
747-
image, width, height = self._format_image(image)
770+
image, width, height = self._format_image(image, channel)
748771

749772
# Convert the PIL Image to a byte array
750773
image_bytes = image.tobytes()
751-
752774
data = ctypes.cast(
753775
(ctypes.c_byte * len(image_bytes))(*image_bytes),
754776
ctypes.POINTER(ctypes.c_uint8),
@@ -757,8 +779,8 @@ def _cast_image(self, image: Union[Image.Image, str]):
757779

758780
# ============= Image to C sd_image_t =============
759781

760-
def _c_uint8_to_sd_image_t_p(self, image: ctypes.c_uint8, width, height, channel: int = 3):
761-
# Create a new C sd_image_t
782+
def _c_uint8_to_sd_image_t_p(self, image: ctypes.c_uint8, width: int, height: int, channel: int = 3) -> sd_cpp.sd_image_t:
783+
"""Convert a C uint8 pointer to a C sd_image_t."""
762784
c_image = sd_cpp.sd_image_t(
763785
width=width,
764786
height=height,
@@ -767,21 +789,18 @@ def _c_uint8_to_sd_image_t_p(self, image: ctypes.c_uint8, width, height, channel
767789
)
768790
return c_image
769791

770-
def _image_to_sd_image_t_p(self, image: Union[Image.Image, str]):
792+
def _image_to_sd_image_t_p(self, image: Union[Image.Image, str], channel: int = 3) -> sd_cpp.sd_image_t:
771793
"""Convert a PIL Image or image path to a C sd_image_t."""
772-
773-
data, width, height = self._cast_image(image)
774-
775-
# Create a new C sd_image_t
776-
c_image = self._c_uint8_to_sd_image_t_p(data, width, height)
794+
data, width, height = self._cast_image(image, channel)
795+
c_image = self._c_uint8_to_sd_image_t_p(data, width, height, channel)
777796
return c_image
778797

779798
# ============= C sd_image_t to Image =============
780799

781-
def _c_array_to_bytes(self, c_array, buffer_size: int):
800+
def _c_array_to_bytes(self, c_array, buffer_size: int) -> bytes:
782801
return bytearray(ctypes.cast(c_array, ctypes.POINTER(ctypes.c_byte * buffer_size)).contents)
783802

784-
def _dereference_sd_image_t_p(self, c_image: sd_cpp.sd_image_t):
803+
def _dereference_sd_image_t_p(self, c_image: sd_cpp.sd_image_t) -> Dict:
785804
"""Dereference a C sd_image_t pointer to a Python dictionary with height, width, channel and data (bytes)."""
786805

787806
# Calculate the size of the data buffer
@@ -795,7 +814,7 @@ def _dereference_sd_image_t_p(self, c_image: sd_cpp.sd_image_t):
795814
}
796815
return image
797816

798-
def _image_slice(self, c_images: sd_cpp.sd_image_t, count: int, upscale_factor: int):
817+
def _image_slice(self, c_images: sd_cpp.sd_image_t, count: int, upscale_factor: int) -> List[Dict]:
799818
"""Slice a C array of images."""
800819
image_array = ctypes.cast(c_images, ctypes.POINTER(sd_cpp.sd_image_t * count)).contents
801820

@@ -821,7 +840,7 @@ def _image_slice(self, c_images: sd_cpp.sd_image_t, count: int, upscale_factor:
821840
# Return the list of images
822841
return images
823842

824-
def _sd_image_t_p_to_images(self, c_images: sd_cpp.sd_image_t, count: int, upscale_factor: int):
843+
def _sd_image_t_p_to_images(self, c_images: sd_cpp.sd_image_t, count: int, upscale_factor: int) -> List[Image.Image]:
825844
"""Convert C sd_image_t_p images to a Python list of images."""
826845

827846
# Convert C array to Python list of images
@@ -836,20 +855,30 @@ def _sd_image_t_p_to_images(self, c_images: sd_cpp.sd_image_t, count: int, upsca
836855

837856
# ============= Bytes to Image =============
838857

839-
def _bytes_to_image(self, byte_data: bytes, width: int, height: int):
858+
def _bytes_to_image(self, byte_data: bytes, width: int, height: int, channel: int = 3) -> Image.Image:
840859
"""Convert a byte array to a PIL Image."""
860+
# Initialize the image with RGBA mode
841861
image = Image.new("RGBA", (width, height))
842862

843863
for y in range(height):
844864
for x in range(width):
845-
idx = (y * width + x) * 3
846-
image.putpixel(
847-
(x, y),
848-
(byte_data[idx], byte_data[idx + 1], byte_data[idx + 2], 255),
849-
)
865+
idx = (y * width + x) * channel
866+
# Dynamically create the color tuple
867+
color = tuple(byte_data[idx + i] if idx + i < len(byte_data) else 0 for i in range(channel))
868+
if channel == 1: # Grayscale
869+
color = (color[0],) * 3 + (255,) # Convert to (R, G, B, A)
870+
elif channel == 3: # RGB
871+
color = color + (255,) # Add alpha channel
872+
elif channel == 4: # RGBA
873+
pass # Use color as is
874+
else:
875+
raise ValueError(f"Unsupported channel value: {channel}")
876+
# Set the pixel
877+
image.putpixel((x, y), color)
878+
850879
return image
851880

852-
def __setstate__(self, state):
881+
def __setstate__(self, state) -> None:
853882
self.__init__(**state)
854883

855884
def close(self) -> None:
@@ -865,7 +894,7 @@ def __del__(self) -> None:
865894
# ============================================
866895

867896

868-
def validate_dimensions(dimension: int | float, attribute_name: str) -> int:
897+
def validate_dimensions(dimension: Union[int, float], attribute_name: str) -> int:
869898
"""Dimensions must be a multiple of 64 otherwise a GGML_ASSERT error is encountered."""
870899
dimension = int(dimension)
871900
if dimension <= 0 or dimension % 64 != 0:

stable_diffusion_cpp/stable_diffusion_cpp.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,8 @@ class GGMLType(IntEnum):
253253
SD_TYPE_F16 = 1
254254
SD_TYPE_Q4_0 = 2
255255
SD_TYPE_Q4_1 = 3
256+
# SD_TYPE_Q4_2 = 4 support has been removed
257+
# SD_TYPE_Q4_3 = 5 support has been removed
256258
SD_TYPE_Q5_0 = 6
257259
SD_TYPE_Q5_1 = 7
258260
SD_TYPE_Q8_0 = 8
@@ -444,12 +446,13 @@ def txt2img(
444446
# ------------ img2img ------------
445447

446448

447-
# SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx, sd_image_t init_image, const char* prompt, const char* negative_prompt, int clip_skip, float cfg_scale, float guidance, int width, int height, enum sample_method_t sample_method, int sample_steps, float strength, int64_t seed, int batch_count, const sd_image_t* control_cond, float control_strength, float style_strength, bool normalize_input, const char* input_id_images_path, int* skip_layers, size_t skip_layers_count, float slg_scale, float skip_layer_start, float skip_layer_end);
449+
# SD_API sd_image_t* img2img(sd_ctx_t* sd_ctx, sd_image_t init_image, sd_image_t mask_image, const char* prompt, const char* negative_prompt, int clip_skip, float cfg_scale, float guidance, int width, int height, enum sample_method_t sample_method, int sample_steps, float strength, int64_t seed, int batch_count, const sd_image_t* control_cond, float control_strength, float style_strength, bool normalize_input, const char* input_id_images_path, int* skip_layers, size_t skip_layers_count, float slg_scale, float skip_layer_start, float skip_layer_end);
448450
@ctypes_function(
449451
"img2img",
450452
[
451453
sd_ctx_t_p_ctypes, # sd_ctx
452454
sd_image_t, # init_image
455+
sd_image_t, # mask_image
453456
ctypes.c_char_p, # prompt
454457
ctypes.c_char_p, # negative_prompt
455458
ctypes.c_int, # clip_skip
@@ -478,6 +481,7 @@ def txt2img(
478481
def img2img(
479482
sd_ctx: sd_ctx_t_p,
480483
init_image: sd_image_t,
484+
mask_image: sd_image_t,
481485
prompt: bytes,
482486
negative_prompt: bytes,
483487
clip_skip: int,

0 commit comments

Comments
 (0)