Skip to content

Commit 4e75562

Browse files
authored
Merge pull request #18 from ShyVortex/dev-sd3.5
Add: stable-diffusion-3.5-turbo pipeline integration
2 parents 049c9cb + 40619f8 commit 4e75562

File tree

8 files changed

+243
-13
lines changed

8 files changed

+243
-13
lines changed

.idea/inspectionProfiles/Project_Default.xml

Lines changed: 57 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

README.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ The last two are the essential part of the project and they act as GUI for the P
1717
In order to compile and run the software, it is required that you have the following prerequisites:
1818
- Open Java Development Kit (OpenJDK) 17 or above
1919
- Apache Maven (at least version 3.6.3 is recommended)
20+
- Hugging Face CLI
2021

2122
You also MUST install a Python virtual environment in your home directory, inside a folder named 'venv',
2223
with the packages listed in *requirements*.
@@ -51,14 +52,20 @@ Then, if you also want a runnable .jar archive, type:
5152
```
5253
With these commands, a new folder named 'target' is created containing the compiled project as well as the executable file.
5354

54-
## Unlock Stable Diffusion 3
55-
The newest generative model is currently gated, so first you need to sign up [here](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers).
55+
## Usage with Stable Diffusion 3 and 3.5
56+
The newest generative models are currently gated, so first you need to sign up [here](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers)
57+
and [here](https://huggingface.co/stabilityai/stable-diffusion-3.5-large-turbo).
5658
Proceed to generate a [token](https://huggingface.co/settings/tokens) under your account settings which you will use to login with:
5759
```shell
5860
huggingface-cli login
5961
```
6062
Enter your credentials first, then the token when it's needed.
6163

64+
It is recommended to pre-download the quantized model of Stable Diffusion 3.5 to avoid long waiting times while using the app:
65+
```shell
66+
huggingface-cli download diffusers/t5-nf4
67+
```
68+
6269
## Screenshots
6370
### Home
6471
![home-view](https://github.com/user-attachments/assets/50052e5a-c8a4-4eaa-b39f-ae537c81fb9f)
@@ -84,6 +91,7 @@ The project utilizes Stable Diffusion's generative AI pipelines for image genera
8491
+ [stable-diffusion-2-1](https://huggingface.co/stabilityai/stable-diffusion-2-1)
8592
+ [stable-diffusion-2-1-base](https://huggingface.co/stabilityai/stable-diffusion-2-1-base)
8693
+ [stable-diffusion-3-medium](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers)
94+
+ [stable-diffusion-3.5-large-turbo](https://huggingface.co/stabilityai/stable-diffusion-3.5-large-turbo)
8795
+ [sd-x2-latent-upscaler](https://huggingface.co/stabilityai/sd-x2-latent-upscaler)
8896
+ [pixel-art-style](https://huggingface.co/kohbanye/pixel-art-style)
8997
+ [pixel-art-xl](https://huggingface.co/nerijs/pixel-art-xl)

requirements.txt

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
accelerate==0.26.1
22
certifi==2024.7.4
33
charset-normalizer==3.3.2
4-
diffusers==0.29.2
4+
diffusers==0.32.2
55
filelock==3.13.1
66
fsspec==2023.12.2
77
huggingface-hub==0.24.3
@@ -16,7 +16,7 @@ nvidia-cublas-cu12==12.1.3.1
1616
nvidia-cuda-cupti-cu12==12.1.105
1717
nvidia-cuda-nvrtc-cu12==12.1.105
1818
nvidia-cuda-runtime-cu12==12.1.105
19-
nvidia-cudnn-cu12==8.9.2.26
19+
nvidia-cudnn-cu12==9.1.0.70
2020
nvidia-cufft-cu12==11.0.2.54
2121
nvidia-curand-cu12==10.3.2.106
2222
nvidia-cusolver-cu12==11.4.5.107
@@ -34,13 +34,17 @@ regex==2023.12.25
3434
requests==2.32.2
3535
safetensors==0.4.1
3636
sympy==1.12
37-
tokenizers==0.15.0
37+
tokenizers==0.21.0
3838
torch==2.4.0
39-
torchvision==0.16.2
39+
torchvision==0.19.0
4040
tqdm==4.66.3
4141
transformers==4.48.0
42-
triton==2.1.0
42+
triton==3.0.0
4343
typing_extensions==4.9.0
4444
urllib3==2.2.2
4545
zipp==3.19.1
4646
sentencepiece==0.2.0
47+
bitsandbytes==0.45.4
48+
GPUtil==1.4.0
49+
scipy==1.11.2
50+
protobuf==6.30.2

src/main/java/it/unimol/diffusiontool/application/DiffusionApplication.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ public User getUser() {
5555
}
5656

5757
private void setVersion() {
58-
this.version = "1.1.0";
58+
this.version = "1.2.0";
5959
}
6060

6161
public void setRootNode(Parent rootNode) {

src/main/java/it/unimol/diffusiontool/controller/DiffusionController.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ private void initGenerateView() {
254254
styleComboBox.getItems().addAll(
255255
"Stable Diffusion 2.1",
256256
"Stable Diffusion 3",
257+
"Stable Diffusion 3.5",
257258
"Pixel Art"
258259
);
259260
styleComboBox.setPromptText(styleComboBox.getItems().get(0));
@@ -1149,6 +1150,7 @@ public String callPyScript(String prompt, String tags, String date, String path)
11491150
String inputLine;
11501151
while ((inputLine = inputbufferedReader.readLine()) != null) {
11511152
output.append(inputLine).append("\n");
1153+
System.out.println(inputLine.trim());
11521154
}
11531155
}
11541156

@@ -1225,6 +1227,8 @@ public File findPyScript() {
12251227
fileName = includeUpscaling ? "generate_upscale.py" : "generate_sd2-1.py";
12261228
else if (styleComboBox.getValue().equals("Stable Diffusion 3"))
12271229
fileName = "generate_sd3.py";
1230+
else if (styleComboBox.getValue().equals("Stable Diffusion 3.5"))
1231+
fileName = "generate_sd3-5.py";
12281232
else
12291233
fileName = "generate_pixart.py";
12301234
break;
@@ -1343,7 +1347,7 @@ public void throwGenericAlert() {
13431347
genAlert.setHeaderText("ERROR: Upscaling Failure");
13441348
genAlert.setContentText("Something went wrong in the image upscaling. Please retry");
13451349
}
1346-
genAlert.showAndWait();
1350+
Platform.runLater(genAlert::showAndWait);
13471351
}
13481352

13491353
private int checkAvailableSpace() {

src/main/python/it/unimol/diffusiontool/generate_sd2-1.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
def main():
1111
# Check if the correct number of command-line arguments is provided
1212
if len(sys.argv) != 4:
13-
print("Usage: python generate_sd2-1.py <prompt> <tags> <date>")
13+
print("Usage: python generate_sd2-1.py <prompt> <tags> <date>", file=sys.stderr)
1414
sys.exit(1)
1515

1616
# Get the prompt and date from the command-line arguments passed from Java
@@ -29,7 +29,7 @@ def main():
2929
# Process the prompt and set the output path
3030
with torch.cuda.amp.autocast():
3131
image = pipe(prompt=prompt, negative_prompt=tags, num_inference_steps=25).images[0]
32-
output_folder = os.path.abspath("result/generated/sd2-1")
32+
output_folder = os.path.abspath("result/generated/general/sd2-1")
3333
output_filename = f"generated_image_{date}.png"
3434
output_filepath = os.path.join(output_folder, output_filename)
3535

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
import sys
2+
from diffusers import BitsAndBytesConfig, SD3Transformer2DModel
3+
from diffusers import StableDiffusion3Pipeline
4+
from transformers import T5EncoderModel
5+
import torch
6+
import GPUtil
7+
import os
8+
import base64
9+
10+
def use_full_model(prompt, tags, date):
11+
pipe = StableDiffusion3Pipeline.from_pretrained(
12+
"stabilityai/stable-diffusion-3.5-large-turbo",
13+
torch_dtype=torch.bfloat16
14+
)
15+
pipe = pipe.to("cuda")
16+
17+
print("Model loaded successfully. Generating image...", file=sys.stderr)
18+
19+
image = pipe(
20+
prompt=prompt,
21+
negative_prompt=tags,
22+
num_inference_steps=4,
23+
guidance_scale=0.0,
24+
).images[0]
25+
26+
print("Image generation completed. Saving the image...", file=sys.stderr)
27+
28+
output_folder = os.path.abspath("result/generated/general/sd3-5")
29+
output_filename = f"generated_image_{date}.png"
30+
output_filepath = os.path.join(output_folder, output_filename)
31+
32+
# Check if the output folder exists, and create it if not, then save the image
33+
if not os.path.exists(output_folder):
34+
os.makedirs(output_folder)
35+
image.save(output_filepath)
36+
37+
print(f"Image saved to {output_filepath}", file=sys.stderr)
38+
39+
# Encode the image as a base64 string
40+
with open(output_filepath, "rb") as image_file:
41+
encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
42+
43+
return encoded_image
44+
45+
def use_quantized_model(prompt, tags, date):
46+
model_id = "stabilityai/stable-diffusion-3.5-large-turbo"
47+
48+
nf4_config = BitsAndBytesConfig(
49+
load_in_4bit=True,
50+
bnb_4bit_quant_type="nf4",
51+
bnb_4bit_compute_dtype=torch.bfloat16
52+
)
53+
model_nf4 = SD3Transformer2DModel.from_pretrained(
54+
model_id,
55+
subfolder="transformer",
56+
quantization_config=nf4_config,
57+
torch_dtype=torch.bfloat16
58+
)
59+
60+
print("Quantized model loaded successfully.", file=sys.stderr)
61+
62+
t5_nf4 = T5EncoderModel.from_pretrained("diffusers/t5-nf4", torch_dtype=torch.bfloat16)
63+
64+
print("T5 Encoder loaded successfully.", file=sys.stderr)
65+
66+
pipeline = StableDiffusion3Pipeline.from_pretrained(
67+
model_id,
68+
transformer=model_nf4,
69+
text_encoder_3=t5_nf4,
70+
torch_dtype=torch.bfloat16
71+
)
72+
pipeline.enable_model_cpu_offload()
73+
74+
print("Pipeline initialized. Generating image...", file=sys.stderr)
75+
76+
image = pipeline(
77+
prompt=prompt,
78+
negative_prompt=tags,
79+
num_inference_steps=4,
80+
guidance_scale=0.0,
81+
max_sequence_length=512,
82+
).images[0]
83+
84+
print("Image generation complete. Saving...", file=sys.stderr)
85+
86+
output_folder = os.path.abspath("result/generated/general/sd3-5")
87+
output_filename = f"generated_image_{date}.png"
88+
output_filepath = os.path.join(output_folder, output_filename)
89+
90+
# Check if the output folder exists, and create it if not, then save the image
91+
if not os.path.exists(output_folder):
92+
os.makedirs(output_folder)
93+
image.save(output_filepath)
94+
95+
print(f"Image saved to {output_filepath}", file=sys.stderr)
96+
97+
# Encode the image as a base64 string
98+
with open(output_filepath, "rb") as image_file:
99+
encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
100+
101+
return encoded_image
102+
103+
def get_gpu_memory():
104+
gpus = GPUtil.getGPUs()
105+
106+
if not gpus:
107+
print("No GPUs detected.", file=sys.stderr)
108+
return None
109+
110+
all_memory = {gpu.id: gpu.memoryFree for gpu in gpus}
111+
112+
# Get the GPU with the most free memory
113+
gpu_memory = max(all_memory, key=all_memory.get)
114+
115+
print(f"Detected GPU with {all_memory[gpu_memory]} MB free memory.", file=sys.stderr)
116+
return gpu_memory
117+
118+
def main():
119+
# Check if the correct number of command-line arguments is provided
120+
if len(sys.argv) != 4:
121+
print("Usage: python generate_sd3-5.py <prompt> <tags> <date>", file=sys.stderr)
122+
sys.exit(1)
123+
124+
# Get the prompt and date from the command-line arguments passed from Java
125+
prompt = sys.argv[1]
126+
tags = sys.argv[2]
127+
date = sys.argv[3]
128+
129+
# Get available video memory and decide which model to use
130+
vram = get_gpu_memory()
131+
if vram is None:
132+
print("Running the quantized model...", file=sys.stderr)
133+
image = use_quantized_model(
134+
prompt=prompt,
135+
tags=tags,
136+
date=date
137+
)
138+
elif vram >= 8192:
139+
print("Running the full model...", file=sys.stderr)
140+
image = use_full_model(
141+
prompt=prompt,
142+
tags=tags,
143+
date=date
144+
)
145+
else:
146+
print("Running the quantized model...", file=sys.stderr)
147+
image = use_quantized_model(
148+
prompt=prompt,
149+
tags=tags,
150+
date=date
151+
)
152+
153+
# Print image as string
154+
print(image)
155+
156+
if __name__ == "__main__":
157+
main()

src/main/python/it/unimol/diffusiontool/generate_sd3.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
def main():
1111
# Check if the correct number of command-line arguments is provided
1212
if len(sys.argv) != 4:
13-
print("Usage: python generate_sd3.py <prompt> <tags> <date>")
13+
print("Usage: python generate_sd3.py <prompt> <tags> <date>", file=sys.stderr)
1414
sys.exit(1)
1515

1616
# Get the prompt and date from the command-line arguments passed from Java
@@ -41,7 +41,7 @@ def main():
4141
num_inference_steps=25,
4242
guidance_scale=6.5
4343
).images[0]
44-
output_folder = os.path.abspath("result/generated/sd3")
44+
output_folder = os.path.abspath("result/generated/general/sd3")
4545
output_filename = f"generated_image_{date}.png"
4646
output_filepath = os.path.join(output_folder, output_filename)
4747

0 commit comments

Comments
 (0)