Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions .idea/inspectionProfiles/Project_Default.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 10 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ The last two are the essential part of the project and they act as GUI for the P
In order to compile and run the software, it is required that you have the following prerequisites:
- Open Java Development Kit (OpenJDK) 17 or above
- Apache Maven (at least version 3.6.3 is recommended)
- Hugging Face CLI

You also MUST install a Python virtual environment in your home directory, inside a folder named 'venv',
with the packages listed in *requirements*.
Expand Down Expand Up @@ -51,14 +52,20 @@ Then, if you also want a runnable .jar archive, type:
```
With these commands, a new folder named 'target' is created containing the compiled project as well as the executable file.

## Unlock Stable Diffusion 3
The newest generative model is currently gated, so first you need to sign up [here](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers).
## Usage with Stable Diffusion 3 and 3.5
The newest generative models are currently gated, so first you need to sign up [here](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers)
and [here](https://huggingface.co/stabilityai/stable-diffusion-3.5-large-turbo).
Proceed to generate a [token](https://huggingface.co/settings/tokens) under your account settings which you will use to login with:
```shell
huggingface-cli login
```
Enter your credentials first, then the token when it's needed.

It is recommended to pre-download the quantized model of Stable Diffusion 3.5 to avoid long waiting times while using the app:
```shell
huggingface-cli download diffusers/t5-nf4
```

## Screenshots
### Home
![home-view](https://github.com/user-attachments/assets/50052e5a-c8a4-4eaa-b39f-ae537c81fb9f)
Expand All @@ -84,6 +91,7 @@ The project utilizes Stable Diffusion's generative AI pipelines for image genera
+ [stable-diffusion-2-1](https://huggingface.co/stabilityai/stable-diffusion-2-1)
+ [stable-diffusion-2-1-base](https://huggingface.co/stabilityai/stable-diffusion-2-1-base)
+ [stable-diffusion-3-medium](https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers)
+ [stable-diffusion-3.5-large-turbo](https://huggingface.co/stabilityai/stable-diffusion-3.5-large-turbo)
+ [sd-x2-latent-upscaler](https://huggingface.co/stabilityai/sd-x2-latent-upscaler)
+ [pixel-art-style](https://huggingface.co/kohbanye/pixel-art-style)
+ [pixel-art-xl](https://huggingface.co/nerijs/pixel-art-xl)
Expand Down
14 changes: 9 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
accelerate==0.26.1
certifi==2024.7.4
charset-normalizer==3.3.2
diffusers==0.29.2
diffusers==0.32.2
filelock==3.13.1
fsspec==2023.12.2
huggingface-hub==0.24.3
Expand All @@ -16,7 +16,7 @@ nvidia-cublas-cu12==12.1.3.1
nvidia-cuda-cupti-cu12==12.1.105
nvidia-cuda-nvrtc-cu12==12.1.105
nvidia-cuda-runtime-cu12==12.1.105
nvidia-cudnn-cu12==8.9.2.26
nvidia-cudnn-cu12==9.1.0.70
nvidia-cufft-cu12==11.0.2.54
nvidia-curand-cu12==10.3.2.106
nvidia-cusolver-cu12==11.4.5.107
Expand All @@ -34,13 +34,17 @@ regex==2023.12.25
requests==2.32.2
safetensors==0.4.1
sympy==1.12
tokenizers==0.15.0
tokenizers==0.21.0
torch==2.4.0
torchvision==0.16.2
torchvision==0.19.0
tqdm==4.66.3
transformers==4.48.0
triton==2.1.0
triton==3.0.0
typing_extensions==4.9.0
urllib3==2.2.2
zipp==3.19.1
sentencepiece==0.2.0
bitsandbytes==0.45.4
GPUtil==1.4.0
scipy==1.11.2
protobuf==6.30.2
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public User getUser() {
}

private void setVersion() {
this.version = "1.1.0";
this.version = "1.2.0";
}

public void setRootNode(Parent rootNode) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ private void initGenerateView() {
styleComboBox.getItems().addAll(
"Stable Diffusion 2.1",
"Stable Diffusion 3",
"Stable Diffusion 3.5",
"Pixel Art"
);
styleComboBox.setPromptText(styleComboBox.getItems().get(0));
Expand Down Expand Up @@ -1149,6 +1150,7 @@ public String callPyScript(String prompt, String tags, String date, String path)
String inputLine;
while ((inputLine = inputbufferedReader.readLine()) != null) {
output.append(inputLine).append("\n");
System.out.println(inputLine.trim());
}
}

Expand Down Expand Up @@ -1225,6 +1227,8 @@ public File findPyScript() {
fileName = includeUpscaling ? "generate_upscale.py" : "generate_sd2-1.py";
else if (styleComboBox.getValue().equals("Stable Diffusion 3"))
fileName = "generate_sd3.py";
else if (styleComboBox.getValue().equals("Stable Diffusion 3.5"))
fileName = "generate_sd3-5.py";
else
fileName = "generate_pixart.py";
break;
Expand Down Expand Up @@ -1343,7 +1347,7 @@ public void throwGenericAlert() {
genAlert.setHeaderText("ERROR: Upscaling Failure");
genAlert.setContentText("Something went wrong in the image upscaling. Please retry");
}
genAlert.showAndWait();
Platform.runLater(genAlert::showAndWait);
}

private int checkAvailableSpace() {
Expand Down
4 changes: 2 additions & 2 deletions src/main/python/it/unimol/diffusiontool/generate_sd2-1.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
def main():
# Check if the correct number of command-line arguments is provided
if len(sys.argv) != 4:
print("Usage: python generate_sd2-1.py <prompt> <tags> <date>")
print("Usage: python generate_sd2-1.py <prompt> <tags> <date>", file=sys.stderr)
sys.exit(1)

# Get the prompt and date from the command-line arguments passed from Java
Expand All @@ -29,7 +29,7 @@ def main():
# Process the prompt and set the output path
with torch.cuda.amp.autocast():
image = pipe(prompt=prompt, negative_prompt=tags, num_inference_steps=25).images[0]
output_folder = os.path.abspath("result/generated/sd2-1")
output_folder = os.path.abspath("result/generated/general/sd2-1")
output_filename = f"generated_image_{date}.png"
output_filepath = os.path.join(output_folder, output_filename)

Expand Down
157 changes: 157 additions & 0 deletions src/main/python/it/unimol/diffusiontool/generate_sd3-5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import sys
from diffusers import BitsAndBytesConfig, SD3Transformer2DModel
from diffusers import StableDiffusion3Pipeline
from transformers import T5EncoderModel
import torch
import GPUtil
import os
import base64

def use_full_model(prompt, tags, date):
pipe = StableDiffusion3Pipeline.from_pretrained(
"stabilityai/stable-diffusion-3.5-large-turbo",
torch_dtype=torch.bfloat16
)
pipe = pipe.to("cuda")

print("Model loaded successfully. Generating image...", file=sys.stderr)

image = pipe(
prompt=prompt,
negative_prompt=tags,
num_inference_steps=4,
guidance_scale=0.0,
).images[0]

print("Image generation completed. Saving the image...", file=sys.stderr)

output_folder = os.path.abspath("result/generated/general/sd3-5")
output_filename = f"generated_image_{date}.png"
output_filepath = os.path.join(output_folder, output_filename)

# Check if the output folder exists, and create it if not, then save the image
if not os.path.exists(output_folder):
os.makedirs(output_folder)
image.save(output_filepath)

print(f"Image saved to {output_filepath}", file=sys.stderr)

# Encode the image as a base64 string
with open(output_filepath, "rb") as image_file:
encoded_image = base64.b64encode(image_file.read()).decode('utf-8')

return encoded_image

def use_quantized_model(prompt, tags, date):
model_id = "stabilityai/stable-diffusion-3.5-large-turbo"

nf4_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
model_nf4 = SD3Transformer2DModel.from_pretrained(
model_id,
subfolder="transformer",
quantization_config=nf4_config,
torch_dtype=torch.bfloat16
)

print("Quantized model loaded successfully.", file=sys.stderr)

t5_nf4 = T5EncoderModel.from_pretrained("diffusers/t5-nf4", torch_dtype=torch.bfloat16)

print("T5 Encoder loaded successfully.", file=sys.stderr)

pipeline = StableDiffusion3Pipeline.from_pretrained(
model_id,
transformer=model_nf4,
text_encoder_3=t5_nf4,
torch_dtype=torch.bfloat16
)
pipeline.enable_model_cpu_offload()

print("Pipeline initialized. Generating image...", file=sys.stderr)

image = pipeline(
prompt=prompt,
negative_prompt=tags,
num_inference_steps=4,
guidance_scale=0.0,
max_sequence_length=512,
).images[0]

print("Image generation complete. Saving...", file=sys.stderr)

output_folder = os.path.abspath("result/generated/general/sd3-5")
output_filename = f"generated_image_{date}.png"
output_filepath = os.path.join(output_folder, output_filename)

# Check if the output folder exists, and create it if not, then save the image
if not os.path.exists(output_folder):
os.makedirs(output_folder)
image.save(output_filepath)

print(f"Image saved to {output_filepath}", file=sys.stderr)

# Encode the image as a base64 string
with open(output_filepath, "rb") as image_file:
encoded_image = base64.b64encode(image_file.read()).decode('utf-8')

return encoded_image

def get_gpu_memory():
gpus = GPUtil.getGPUs()

if not gpus:
print("No GPUs detected.", file=sys.stderr)
return None

all_memory = {gpu.id: gpu.memoryFree for gpu in gpus}

# Get the GPU with the most free memory
gpu_memory = max(all_memory, key=all_memory.get)

print(f"Detected GPU with {all_memory[gpu_memory]} MB free memory.", file=sys.stderr)
return gpu_memory

def main():
# Check if the correct number of command-line arguments is provided
if len(sys.argv) != 4:
print("Usage: python generate_sd3-5.py <prompt> <tags> <date>", file=sys.stderr)
sys.exit(1)

# Get the prompt and date from the command-line arguments passed from Java
prompt = sys.argv[1]
tags = sys.argv[2]
date = sys.argv[3]

# Get available video memory and decide which model to use
vram = get_gpu_memory()
if vram is None:
print("Running the quantized model...", file=sys.stderr)
image = use_quantized_model(
prompt=prompt,
tags=tags,
date=date
)
elif vram >= 8192:
print("Running the full model...", file=sys.stderr)
image = use_full_model(
prompt=prompt,
tags=tags,
date=date
)
else:
print("Running the quantized model...", file=sys.stderr)
image = use_quantized_model(
prompt=prompt,
tags=tags,
date=date
)

# Print image as string
print(image)

if __name__ == "__main__":
main()
4 changes: 2 additions & 2 deletions src/main/python/it/unimol/diffusiontool/generate_sd3.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
def main():
# Check if the correct number of command-line arguments is provided
if len(sys.argv) != 4:
print("Usage: python generate_sd3.py <prompt> <tags> <date>")
print("Usage: python generate_sd3.py <prompt> <tags> <date>", file=sys.stderr)
sys.exit(1)

# Get the prompt and date from the command-line arguments passed from Java
Expand Down Expand Up @@ -41,7 +41,7 @@ def main():
num_inference_steps=25,
guidance_scale=6.5
).images[0]
output_folder = os.path.abspath("result/generated/sd3")
output_folder = os.path.abspath("result/generated/general/sd3")
output_filename = f"generated_image_{date}.png"
output_filepath = os.path.join(output_folder, output_filename)

Expand Down