python SD sample

gedoensmax · gedoensmax · commit 3b88b54b4b53 · 2025-09-18T18:37:52.000+02:00
diff --git a/python/models/stable_difusion/README.md b/python/models/stable_difusion/README.md
@@ -0,0 +1,77 @@
+# Stable Diffusion 3 Medium ONNX Export Guide
+
+This guide provides the steps to convert the `stabilityai/stable-diffusion-3-medium` model to the ONNX format for use with the CUDA execution provider. It also includes a step to address an issue with mixed-precision nodes that may occur during the conversion process.
+
+## 1. Prerequisites and Installation
+
+Install the required Python packages using the following `requirements.txt` content:
+
+```
+numpy
+torch --index-url https://download.pytorch.org/whl/cu121
+optimum[onnxruntime]
+onnxruntime-gpu
+diffusers
+sentencepiece
+transformers
+```
+
+You can save this to a `requirements.txt` file and install it with:
+```bash
+pip install -r requirements.txt
+```
+This will install `onnxruntime-gpu` with the CUDA execution provider, which is necessary for model conversion.
+
+## 2. Model Conversion
+
+Run the following command to export the model to ONNX format. This command uses `optimum-cli` to convert the model to half-precision (`fp16`) on a CUDA device.
+
+```bash
+optimum-cli export onnx --model stabilityai/stable-diffusion-3-medium --dtype fp16 --device cuda fp16_optimum
+```
+
+This will download the model and convert it into multiple ONNX files in the `fp16_optimum` directory.
+
+## 3. Correcting FP64 Nodes
+
+The PyTorch model may contain some `fp64` nodes, which are exported as-is during the conversion. If you encounter issues with these nodes, you can use the provided `Replace_fp64.py` script to replace them with `fp32` nodes. This script will process all `.onnx` files in the input directory and save the corrected files to the output directory.
+
+```bash
+python replace_fp64.py fp16_optimum corrected_model
+```
+This will create a `corrected_model` directory with the FP64 nodes converted to FP32.
+
+## 4. Using a Custom ONNX Runtime
+
+If you have a locally built ONNX Runtime wheel with specific optimizations (e.g., for NvTensorRTRTXExecutionProvider), ensure that you install it in your environment before running inference. Additionally, be sure to uninstall the default `onnxruntime` package installed via `requirements.txt` to avoid any conflicts.
+
+## 5. Running Inference
+
+To run inference with the converted ONNX model, use the provided `RunSd.py` script. This script loads the ONNX model and generates an image based on a prompt.
+
+Here is an example command to run the script:
+```bash
+python run_sd.py --model_path corrected_model --prompt "A beautiful landscape painting of a waterfall in a lush forest" --output_dir generated_images
+```
+
+### Command-line Arguments
+
+The `RunSd.py` script accepts several arguments to customize the image generation process:
+
+*   `--model_path`: Path to the directory containing the ONNX models (e.g., `corrected_model`). (Required)
+*   `--prompt`: The text prompt to generate the image from.
+*   `--negative_prompt`: The prompt not to guide the image generation.
+*   `--height`: The height of the generated image (default: 512).
+*   `--width`: The width of the generated image (default: 512).
+*   `--steps`: The number of inference steps (default: 50).
+*   `--guidance_scale`: Guidance scale for the prompt (default: 7.5).
+*   `--seed`: A seed for reproducibility.
+*   `--output_dir`: The directory to save the generated images (default: `generated_images`).
+*   `--execution_provider`: The ONNX Runtime execution provider to use (default: `NvTensorRTRTXExecutionProvider`).
+
+For a full list of arguments, you can run:
+```bash
+python run_sd.py --help
+```
+
+The generated image will be saved in the specified output directory. 
diff --git a/python/models/stable_difusion/replace_fp64.py b/python/models/stable_difusion/replace_fp64.py
@@ -0,0 +1,117 @@
+import onnx
+from onnx import numpy_helper
+import numpy as np
+import argparse
+import os
+import shutil
+import logging
+
+# Setup logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+
+def convert_fp64_to_fp32(model_path: str, output_path: str):
+    """
+    Loads an ONNX model, converts all float64 tensors and casts to float32,
+    and saves the modified model.
+    """
+    logging.info(f"Loading model from: {model_path}")
+    model = onnx.load(model_path)
+    
+    # 1. Convert all initializers from float64 to float32
+    converted_initializers = 0
+    new_initializers = []
+    for initializer in model.graph.initializer:
+        if initializer.data_type == onnx.TensorProto.DOUBLE:
+            initializer_np = numpy_helper.to_array(initializer)
+            initializer_fp32 = initializer_np.astype(np.float32)
+            new_initializer = numpy_helper.from_array(initializer_fp32, name=initializer.name)
+            new_initializers.append(new_initializer)
+            converted_initializers += 1
+        else:
+            new_initializers.append(initializer)
+
+    model.graph.ClearField("initializer")
+    model.graph.initializer.extend(new_initializers)
+    
+    if converted_initializers > 0:
+        logging.info(f"Converted {converted_initializers} initializers from FP64 to FP32.")
+
+    # 2. Convert nodes
+    converted_casts = 0
+    converted_constants = 0
+    for node in model.graph.node:
+        if node.op_type == 'Constant':
+            for attr in node.attribute:
+                if attr.name == 'value' and attr.t.data_type == onnx.TensorProto.DOUBLE:
+                    attr.t.data_type = onnx.TensorProto.FLOAT
+                    fp64_array = np.frombuffer(attr.t.raw_data, dtype=np.float64)
+                    fp32_array = fp64_array.astype(np.float32)
+                    attr.t.raw_data = fp32_array.tobytes()
+                    converted_constants += 1
+        elif node.op_type == 'Cast':
+            for attr in node.attribute:
+                if attr.name == 'to' and attr.i == onnx.TensorProto.DOUBLE:
+                    attr.i = onnx.TensorProto.FLOAT
+                    converted_casts += 1
+    
+    if converted_casts > 0:
+        logging.info(f"Modified {converted_casts} Cast operators from FP64 to FP32.")
+    if converted_constants > 0:
+        logging.info(f"Modified {converted_constants} Constant operators from FP64 to FP32.")
+        
+    # 3. Convert all graph inputs, outputs, and value_info from float64 to float32
+    converted_tensors = 0
+    for tensor in list(model.graph.value_info) + list(model.graph.input) + list(model.graph.output):
+        if tensor.type.tensor_type.elem_type == onnx.TensorProto.DOUBLE:
+            tensor.type.tensor_type.elem_type = onnx.TensorProto.FLOAT
+            converted_tensors += 1
+    
+    if converted_tensors > 0:
+        logging.info(f"Converted {converted_tensors} tensor definitions from FP64 to FP32.")
+    
+    # 4. Save the modified model
+    logging.info(f"Saving modified model to: {output_path}")
+    onnx.save(model, output_path, save_as_external_data=True)
+    logging.info("Conversion complete.")
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description="Convert ONNX models in a directory from float64 to float32 precision."
+    )
+    parser.add_argument("input_dir", type=str, help="Directory containing the input ONNX models.")
+    parser.add_argument("output_dir", type=str, help="Directory where the converted models will be saved.")
+    args = parser.parse_args()
+    
+    input_dir = args.input_dir
+    output_dir = args.output_dir
+
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+        logging.info(f"Created output directory: {output_dir}")
+
+    for root, _, files in os.walk(input_dir):
+        # Replicate directory structure in the output directory
+        relative_path = os.path.relpath(root, input_dir)
+        output_subdir = os.path.join(output_dir, relative_path)
+        if not os.path.exists(output_subdir):
+            os.makedirs(output_subdir)
+
+        for filename in files:
+            input_path = os.path.join(root, filename)
+            output_path = os.path.join(output_subdir, filename)
+
+            if filename.endswith(".onnx"): 
+                logging.info("-" * 50)
+                logging.info(f"Processing ONNX file: {input_path}")
+                try:
+                    convert_fp64_to_fp32(input_path, output_path)
+                except Exception as e:
+                    logging.error(f"Failed to convert {input_path}: {e}")
+                logging.info("-" * 50)
+            elif filename.endswith(".onnx_data"):
+                # Skip copying .onnx_data files as new ones will be created on save
+                continue
+            else:
+                logging.info(f"Copying file: {input_path} to {output_path}")
+                shutil.copy2(input_path, output_path)
diff --git a/python/models/stable_difusion/requirements.txt b/python/models/stable_difusion/requirements.txt
@@ -0,0 +1,7 @@
+numpy
+torch
+--index-url https://download.pytorch.org/whl/cu129
+optimum[onnxruntime]
+onnxruntime-gpu
+diffusers
+sentencepiece 
diff --git a/python/models/stable_difusion/run_sd.py b/python/models/stable_difusion/run_sd.py