Merge pull request #18 from ShyVortex/dev-sd3.5

ShyVortex · web-flow · commit 4e75562dbb00 · 2025-04-17T11:53:59.000+02:00
Add: stable-diffusion-3.5-turbo pipeline integration
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
diff --git a/README.md b/README.md
@@ -17,6 +17,7 @@ The last two are the essential part of the project and they act as GUI for the P
 In order to compile and run the software, it is required that you have the following prerequisites:
 - Open Java Development Kit (OpenJDK) 17 or above
 - Apache Maven (at least version 3.6.3 is recommended)
+- Hugging Face CLI
  
 You also MUST install a Python virtual environment in your home directory, inside a folder named 'venv',
 with the packages listed in *requirements*.
@@ -51,14 +52,20 @@ Then, if you also want a runnable .jar archive, type:
  ```
 With these commands, a new folder named 'target' is created containing the compiled project as well as the executable file.
 
-## Unlock Stable Diffusion 3
-The newest generative model is currently gated, so first you need to sign up [here](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers).  
+## Usage with Stable Diffusion 3 and 3.5
+The newest generative models are currently gated, so first you need to sign up [here](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers) 
+and [here](https://huggingface.co/stabilityai/stable-diffusion-3.5-large-turbo).
 Proceed to generate a [token](https://huggingface.co/settings/tokens) under your account settings which you will use to login with:
  ```shell
  huggingface-cli login
  ```
 Enter your credentials first, then the token when it's needed.
 
+It is recommended to pre-download the quantized model of Stable Diffusion 3.5 to avoid long waiting times while using the app:
+ ```shell
+ huggingface-cli download diffusers/t5-nf4
+ ```
+
 ## Screenshots
 ### Home
 ![home-view](https://github.com/user-attachments/assets/50052e5a-c8a4-4eaa-b39f-ae537c81fb9f)
@@ -84,6 +91,7 @@ The project utilizes Stable Diffusion's generative AI pipelines for image genera
 + [stable-diffusion-2-1](https://huggingface.co/stabilityai/stable-diffusion-2-1)
 + [stable-diffusion-2-1-base](https://huggingface.co/stabilityai/stable-diffusion-2-1-base)
 + [stable-diffusion-3-medium](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers)
++ [stable-diffusion-3.5-large-turbo](https://huggingface.co/stabilityai/stable-diffusion-3.5-large-turbo)
 + [sd-x2-latent-upscaler](https://huggingface.co/stabilityai/sd-x2-latent-upscaler)
 + [pixel-art-style](https://huggingface.co/kohbanye/pixel-art-style)
 + [pixel-art-xl](https://huggingface.co/nerijs/pixel-art-xl)
diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,7 @@
 accelerate==0.26.1
 certifi==2024.7.4
 charset-normalizer==3.3.2
-diffusers==0.29.2
+diffusers==0.32.2
 filelock==3.13.1
 fsspec==2023.12.2
 huggingface-hub==0.24.3
@@ -16,7 +16,7 @@ nvidia-cublas-cu12==12.1.3.1
 nvidia-cuda-cupti-cu12==12.1.105
 nvidia-cuda-nvrtc-cu12==12.1.105
 nvidia-cuda-runtime-cu12==12.1.105
-nvidia-cudnn-cu12==8.9.2.26
+nvidia-cudnn-cu12==9.1.0.70
 nvidia-cufft-cu12==11.0.2.54
 nvidia-curand-cu12==10.3.2.106
 nvidia-cusolver-cu12==11.4.5.107
@@ -34,13 +34,17 @@ regex==2023.12.25
 requests==2.32.2
 safetensors==0.4.1
 sympy==1.12
-tokenizers==0.15.0
+tokenizers==0.21.0
 torch==2.4.0
-torchvision==0.16.2
+torchvision==0.19.0
 tqdm==4.66.3
 transformers==4.48.0
-triton==2.1.0
+triton==3.0.0
 typing_extensions==4.9.0
 urllib3==2.2.2
 zipp==3.19.1
 sentencepiece==0.2.0
+bitsandbytes==0.45.4
+GPUtil==1.4.0
+scipy==1.11.2
+protobuf==6.30.2
diff --git a/src/main/java/it/unimol/diffusiontool/application/DiffusionApplication.java b/src/main/java/it/unimol/diffusiontool/application/DiffusionApplication.java
@@ -55,7 +55,7 @@ public User getUser() {
     }
 
     private void setVersion() {
-        this.version = "1.1.0";
+        this.version = "1.2.0";
     }
 
     public void setRootNode(Parent rootNode) {
diff --git a/src/main/java/it/unimol/diffusiontool/controller/DiffusionController.java b/src/main/java/it/unimol/diffusiontool/controller/DiffusionController.java
@@ -254,6 +254,7 @@ private void initGenerateView() {
         styleComboBox.getItems().addAll(
                 "Stable Diffusion 2.1",
                 "Stable Diffusion 3",
+                "Stable Diffusion 3.5",
                 "Pixel Art"
         );
         styleComboBox.setPromptText(styleComboBox.getItems().get(0));
@@ -1149,6 +1150,7 @@ public String callPyScript(String prompt, String tags, String date, String path)
             String inputLine;
             while ((inputLine = inputbufferedReader.readLine()) != null) {
                 output.append(inputLine).append("\n");
+                System.out.println(inputLine.trim());
             }
         }
 
@@ -1225,6 +1227,8 @@ public File findPyScript() {
                     fileName = includeUpscaling ? "generate_upscale.py" : "generate_sd2-1.py";
                 else if (styleComboBox.getValue().equals("Stable Diffusion 3"))
                     fileName = "generate_sd3.py";
+                else if (styleComboBox.getValue().equals("Stable Diffusion 3.5"))
+                    fileName = "generate_sd3-5.py";
                 else
                     fileName = "generate_pixart.py";
                 break;
@@ -1343,7 +1347,7 @@ public void throwGenericAlert() {
             genAlert.setHeaderText("ERROR: Upscaling Failure");
             genAlert.setContentText("Something went wrong in the image upscaling. Please retry");
         }
-        genAlert.showAndWait();
+        Platform.runLater(genAlert::showAndWait);
     }
 
     private int checkAvailableSpace() {
diff --git a/src/main/python/it/unimol/diffusiontool/generate_sd2-1.py b/src/main/python/it/unimol/diffusiontool/generate_sd2-1.py
@@ -10,7 +10,7 @@
 def main():
     # Check if the correct number of command-line arguments is provided
     if len(sys.argv) != 4:
-        print("Usage: python generate_sd2-1.py <prompt> <tags> <date>")
+        print("Usage: python generate_sd2-1.py <prompt> <tags> <date>", file=sys.stderr)
         sys.exit(1)
 
     # Get the prompt and date from the command-line arguments passed from Java
@@ -29,7 +29,7 @@ def main():
     # Process the prompt and set the output path
     with torch.cuda.amp.autocast():
         image = pipe(prompt=prompt, negative_prompt=tags, num_inference_steps=25).images[0]
-    output_folder = os.path.abspath("result/generated/sd2-1")
+    output_folder = os.path.abspath("result/generated/general/sd2-1")
     output_filename = f"generated_image_{date}.png"
     output_filepath = os.path.join(output_folder, output_filename)
 
diff --git a/src/main/python/it/unimol/diffusiontool/generate_sd3-5.py b/src/main/python/it/unimol/diffusiontool/generate_sd3-5.py
@@ -0,0 +1,157 @@
+import sys
+from diffusers import BitsAndBytesConfig, SD3Transformer2DModel
+from diffusers import StableDiffusion3Pipeline
+from transformers import T5EncoderModel
+import torch
+import GPUtil
+import os
+import base64
+
+def use_full_model(prompt, tags, date):
+    pipe = StableDiffusion3Pipeline.from_pretrained(
+        "stabilityai/stable-diffusion-3.5-large-turbo",
+        torch_dtype=torch.bfloat16
+    )
+    pipe = pipe.to("cuda")
+
+    print("Model loaded successfully. Generating image...", file=sys.stderr)
+
+    image = pipe(
+        prompt=prompt,
+        negative_prompt=tags,
+        num_inference_steps=4,
+        guidance_scale=0.0,
+    ).images[0]
+
+    print("Image generation completed. Saving the image...", file=sys.stderr)
+
+    output_folder = os.path.abspath("result/generated/general/sd3-5")
+    output_filename = f"generated_image_{date}.png"
+    output_filepath = os.path.join(output_folder, output_filename)
+
+    # Check if the output folder exists, and create it if not, then save the image
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+    image.save(output_filepath)
+
+    print(f"Image saved to {output_filepath}", file=sys.stderr)
+
+    # Encode the image as a base64 string
+    with open(output_filepath, "rb") as image_file:
+        encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
+
+    return encoded_image
+
+def use_quantized_model(prompt, tags, date):
+    model_id = "stabilityai/stable-diffusion-3.5-large-turbo"
+
+    nf4_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.bfloat16
+    )
+    model_nf4 = SD3Transformer2DModel.from_pretrained(
+        model_id,
+        subfolder="transformer",
+        quantization_config=nf4_config,
+        torch_dtype=torch.bfloat16
+    )
+
+    print("Quantized model loaded successfully.", file=sys.stderr)
+
+    t5_nf4 = T5EncoderModel.from_pretrained("diffusers/t5-nf4", torch_dtype=torch.bfloat16)
+
+    print("T5 Encoder loaded successfully.", file=sys.stderr)
+
+    pipeline = StableDiffusion3Pipeline.from_pretrained(
+        model_id,
+        transformer=model_nf4,
+        text_encoder_3=t5_nf4,
+        torch_dtype=torch.bfloat16
+    )
+    pipeline.enable_model_cpu_offload()
+
+    print("Pipeline initialized. Generating image...", file=sys.stderr)
+
+    image = pipeline(
+        prompt=prompt,
+        negative_prompt=tags,
+        num_inference_steps=4,
+        guidance_scale=0.0,
+        max_sequence_length=512,
+    ).images[0]
+
+    print("Image generation complete. Saving...", file=sys.stderr)
+
+    output_folder = os.path.abspath("result/generated/general/sd3-5")
+    output_filename = f"generated_image_{date}.png"
+    output_filepath = os.path.join(output_folder, output_filename)
+
+    # Check if the output folder exists, and create it if not, then save the image
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+    image.save(output_filepath)
+
+    print(f"Image saved to {output_filepath}", file=sys.stderr)
+
+    # Encode the image as a base64 string
+    with open(output_filepath, "rb") as image_file:
+        encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
+
+    return encoded_image
+
+def get_gpu_memory():
+    gpus = GPUtil.getGPUs()
+
+    if not gpus:
+        print("No GPUs detected.", file=sys.stderr)
+        return None
+
+    all_memory = {gpu.id: gpu.memoryFree for gpu in gpus}
+
+    # Get the GPU with the most free memory
+    gpu_memory = max(all_memory, key=all_memory.get)
+
+    print(f"Detected GPU with {all_memory[gpu_memory]} MB free memory.", file=sys.stderr)
+    return gpu_memory
+
+def main():
+    # Check if the correct number of command-line arguments is provided
+    if len(sys.argv) != 4:
+        print("Usage: python generate_sd3-5.py <prompt> <tags> <date>", file=sys.stderr)
+        sys.exit(1)
+
+    # Get the prompt and date from the command-line arguments passed from Java
+    prompt = sys.argv[1]
+    tags = sys.argv[2]
+    date = sys.argv[3]
+
+    # Get available video memory and decide which model to use
+    vram = get_gpu_memory()
+    if vram is None:
+        print("Running the quantized model...", file=sys.stderr)
+        image = use_quantized_model(
+            prompt=prompt,
+            tags=tags,
+            date=date
+        )
+    elif vram >= 8192:
+        print("Running the full model...", file=sys.stderr)
+        image = use_full_model(
+            prompt=prompt,
+            tags=tags,
+            date=date
+        )
+    else:
+        print("Running the quantized model...", file=sys.stderr)
+        image = use_quantized_model(
+            prompt=prompt,
+            tags=tags,
+            date=date
+        )
+
+    # Print image as string
+    print(image)
+
+if __name__ == "__main__":
+    main()
diff --git a/src/main/python/it/unimol/diffusiontool/generate_sd3.py b/src/main/python/it/unimol/diffusiontool/generate_sd3.py
@@ -10,7 +10,7 @@
 def main():
     # Check if the correct number of command-line arguments is provided
     if len(sys.argv) != 4:
-        print("Usage: python generate_sd3.py <prompt> <tags> <date>")
+        print("Usage: python generate_sd3.py <prompt> <tags> <date>", file=sys.stderr)
         sys.exit(1)
 
     # Get the prompt and date from the command-line arguments passed from Java
@@ -41,7 +41,7 @@ def main():
             num_inference_steps=25,
             guidance_scale=6.5
         ).images[0]
-    output_folder = os.path.abspath("result/generated/sd3")
+    output_folder = os.path.abspath("result/generated/general/sd3")
     output_filename = f"generated_image_{date}.png"
     output_filepath = os.path.join(output_folder, output_filename)
 

Original file line number	Diff line number	Diff line change
`@@ -55,7 +55,7 @@ public User getUser() {`
`55`	`55`	`}`
`56`	`56`
`57`	`57`	`private void setVersion() {`
`58`		`- this.version = "1.1.0";`
	`58`	`+ this.version = "1.2.0";`
`59`	`59`	`}`
`60`	`60`
`61`	`61`	`public void setRootNode(Parent rootNode) {`