Hot fix (#43)

contentis · w-e-w · MorkTheOrk · web-flow · commit 412f4ffe04df · 2023-10-18T21:29:25.000+02:00
* trt profile as one markdown

* remove todo and comment code

* updated install.py to catch old packages

* Fix typo in utilities.py

speciailized -&gt; specialized

* move available profiles into a seperate column

* Correct "Export Default Engines"

* remove yiel in favour of print

* fix: installed check for onnx-graphsurgeon

Apply the package import name to `launch.is_installed()` instead of the pip package name.

* remove ui config mod

* extend readme

* fix bug when deleting engine manually

* adding resolution constraint and nvidia support guide

* fix type

---------

Co-authored-by: w-e-w &lt;40751091+w-e-w@users.noreply.github.com&gt;
Co-authored-by: Cem Moluluo &lt;cmoluluo@nvidia.com&gt;
Co-authored-by: Ikko Eltociear Ashimine &lt;eltociear@gmail.com&gt;
Co-authored-by: Cem Moluluo &lt;cemmoluluo@gmail.com&gt;
Co-authored-by: Milly &lt;milly.ca@gmail.com&gt;
diff --git a/README.md b/README.md
@@ -28,8 +28,31 @@ Happy prompting!
 
 TensorRT uses optimized engines for specific resolutions and batch sizes. You can generate as many optimized engines as desired. Types:
 
-- The “Generate Default Engines” selection adds support for resolutions between 512x512 and 768x768 for Stable Diffusion 1.5 and 768x768 to 1024x1024 for SDXL with batch sizes 1 to 4.
+- The "Export Default Engines” selection adds support for resolutions between 512x512 and 768x768 for Stable Diffusion 1.5 and 768x768 to 1024x1024 for SDXL with batch sizes 1 to 4.
 - Static engines support a single specific output resolution and batch size. 
 - Dynamic engines support a range of resolutions and batch sizes, at a small cost in performance. Wider ranges will use more VRAM. 
 
-Each preset can be adjusted with the “Advanced Settings” option.
+Each preset can be adjusted with the “Advanced Settings” option. More detailed instructions can be found [here](https://nvidia.custhelp.com/app/answers/detail/a_id/5487/~/tensorrt-extension-for-stable-diffusion-web-ui).
+
+### Common Issues/Limitations
+
+**HIRES FIX:** If using the hires.fix option in Automatic1111 you must build engines that match both the starting and ending resolutions. For instance, if initial size is `512 x 512` and hires.fix upscales to `1024 x 1024`, you must either generate two engines, one at 512 and one at 1024, or generate a single dynamic engine that covers the whole range.
+Having two seperate engines will heavily impact performance at the moment. Stay tuned for updates.
+
+**Resolution:** When generating images the resolution needs to be a multiple of 64. This applies to hires.fix as well, requiring the low and high-res to be divisible by 64.
+
+**Failing CMD arguments:**
+
+- `medvram` and `lowvram` Have caused issues when compiling the engine and running it.
+- `api` Has caused the `model.json` to not be updated. Resulting in SD Unets not appearing after compilation.
+
+**Failing installation or TensorRT tab not appearing in UI:** This is most likely due to a failed install. To resolve this manually use this [guide](https://github.com/NVIDIA/Stable-Diffusion-WebUI-TensorRT/issues/27#issuecomment-1767570566).
+
+## Requirements
+
+**Driver**:
+
+- Linux: >= 450.80.02
+- Windows: >=452.39
+
+We always recommend keeping the driver up-to-date for system wide performance improvments.
diff --git a/info.md b/info.md
@@ -15,7 +15,7 @@ Happy prompting!
 
 TensorRT uses optimized engines for specific resolutions and batch sizes. You can generate as many optimized engines as desired. Types:
 
-- The "Generate Default Engines" selection adds support for resolutions between 512x512 and 768x768 for Stable Diffusion 1.5 and 768x768 to 1024x1024 for SDXL with batch sizes 1 to 4.
+- The "Export Default Engines" selection adds support for resolutions between 512x512 and 768x768 for Stable Diffusion 1.5 and 768x768 to 1024x1024 for SDXL with batch sizes 1 to 4.
 - Static engines support a single specific output resolution and batch size.
 - Dynamic engines support a range of resolutions and batch sizes, at a small cost in performance. Wider ranges will use more VRAM. 
 
diff --git a/install.py b/install.py
@@ -1,30 +1,31 @@
 import launch
-from modules import shared
+from importlib_metadata import version
 
 def install():
+    if launch.is_installed("tensorrt"):
+        if not version("tensorrt") == "9.0.1.post11.dev4":
+            launch.run(["python","-m","pip","uninstall","-y","tensorrt"], "removing old version of tensorrt")
+        
+    
     if not launch.is_installed("tensorrt"):
         print("TensorRT is not installed! Installing...")
         launch.run_pip("install nvidia-cudnn-cu11==8.9.4.25", "nvidia-cudnn-cu11")
         launch.run_pip("install --pre --extra-index-url https://pypi.nvidia.com tensorrt==9.0.1.post11.dev4", "tensorrt", live=True)
-        launch.run(["python","-m","pip","uninstall","-y","nvidia-cudnn-cu11"],"removing nvidia-cudnn-cu11")
+        launch.run(["python","-m","pip","uninstall","-y","nvidia-cudnn-cu11"], "removing nvidia-cudnn-cu11")
+        
+    if launch.is_installed("nvidia-cudnn-cu11"):
+        if version("nvidia-cudnn-cu11") == "8.9.4.25":
+            launch.run(["python","-m","pip","uninstall","-y","nvidia-cudnn-cu11"], "removing nvidia-cudnn-cu11")
 
     # Polygraphy	
     if not launch.is_installed("polygraphy"):
         print("Polygraphy is not installed! Installing...")
         launch.run_pip("install polygraphy --extra-index-url https://pypi.ngc.nvidia.com", "polygraphy", live=True)
     
     # ONNX GS
-    if not launch.is_installed("onnx-graphsurgeon"):
+    if not launch.is_installed("onnx_graphsurgeon"):
         print("GS is not installed! Installing...")
         launch.run_pip("install protobuf==3.20.2", "protobuf", live=True)
         launch.run_pip('install onnx-graphsurgeon --extra-index-url https://pypi.ngc.nvidia.com', "onnx-graphsurgeon", live=True)
-
-    if shared.opts is None:
-        print("UI Config not initialized")
-        return 
-    
-    if "sd_unet" not in shared.opts["quicksettings_list"]:
-        shared.opts["quicksettings_list"].append("sd_unet")
-        shared.opts.save(shared.config_filename)
  
 install()
diff --git a/model_manager.py b/model_manager.py
@@ -63,7 +63,9 @@ def update(self):
             for trt_file in os.listdir(TRT_MODEL_DIR)
             if trt_file.endswith(".trt")
         ]
-        for cc, base_models in self.all_models.items():
+
+        tmp_all_models = self.all_models.copy()
+        for cc, base_models in tmp_all_models.items():
             for base_model, models in base_models.items():
                 tmp_config_list = {}
                 for model_config in models:
diff --git a/scripts/trt.py b/scripts/trt.py
@@ -106,7 +106,7 @@ def switch_engine(self, feed_dict):
         )
         if len(valid_models) == 0:
             raise ValueError(
-                "No valid profile found. Please go to the TensorRT tab and generate an engine with the necessary profile. Or use the default (torch) U-Net."
+                "No valid profile found. Please go to the TensorRT tab and generate an engine with the necessary profile. If using hires.fix, you need an engine for both the base and upscaled resolutions. Otherwise, use the default (torch) U-Net."
             )
 
         best = valid_models[np.argmin(distances)]
diff --git a/ui_trt.py b/ui_trt.py
@@ -31,18 +31,6 @@ def get_version_from_model(sd_model):
         return "xl-1.0"
 
 
-class LogLevel:
-    Debug = 0
-    Info = 1
-    Warning = 2
-    Error = 3
-
-
-def log_md(logging_history, message, prefix="**[INFO]:**"):
-    logging_history += f"{prefix} {message} \n"
-    return logging_history
-
-
 def export_unet_to_trt(
     batch_min,
     batch_opt,
@@ -61,7 +49,6 @@ def export_unet_to_trt(
     preset,
     controlnet=None,
 ):
-    logging_history = ""
 
     if preset == "Default":
         (
@@ -82,10 +69,7 @@ def export_unet_to_trt(
     use_fp32 = False
     if cc_major < 7:
         use_fp32 = True
-        logging_history = log_md(
-            logging_history, "FP16 has been disabled because your GPU does not support it."
-        )
-        yield logging_history
+        print("FP16 has been disabled because your GPU does not support it.")
 
     unet_hidden_dim = shared.sd_model.model.diffusion_model.in_channels
     if unet_hidden_dim == 9:
@@ -95,10 +79,7 @@ def export_unet_to_trt(
     model_name = shared.sd_model.sd_checkpoint_info.model_name
     onnx_filename, onnx_path = modelmanager.get_onnx_path(model_name, model_hash)
 
-    logging_history = log_md(
-        logging_history, f"Exporting {model_name} to TensorRT", prefix="###"
-    )
-    yield logging_history
+    print(f"Exporting {model_name} to TensorRT")
 
     timing_cache = modelmanager.get_timing_cache()
 
@@ -149,27 +130,23 @@ def export_unet_to_trt(
     print(profile)
 
     if not os.path.exists(onnx_path):
-        logging_history = log_md(logging_history, "No ONNX file found. Exporting ONNX…")
-        yield logging_history
+        print("No ONNX file found. Exporting ONNX...")
+        gr.Info("No ONNX file found. Exporting ONNX...  Please check the progress in the terminal.")
         export_onnx(
             onnx_path,
             modelobj,
             profile=profile,
             diable_optimizations=diable_optimizations,
         )
-        logging_history = log_md(logging_history, "Exported to ONNX.")
-        yield logging_history
+        print("Exported to ONNX.")
 
     trt_engine_filename, trt_path = modelmanager.get_trt_path(
         model_name, model_hash, profile, static_shapes
     )
 
     if not os.path.exists(trt_path) or force_export:
-        logging_history = log_md(
-            logging_history,
-            "Building TensorRT engine... This can take a while, please check the progress in the terminal.",
-        )
-        yield logging_history
+        print("Building TensorRT engine... This can take a while, please check the progress in the terminal.")
+        gr.Info("Building TensorRT engine... This can take a while, please check the progress in the terminal.")
         gc.collect()
         torch.cuda.empty_cache()
         ret = export_trt(
@@ -180,12 +157,9 @@ def export_unet_to_trt(
             use_fp16=not use_fp32,
         )
         if ret:
-            yield logging_history + "\n --- \n ## Export Failed due to unknown reason. See shell for more information. \n"
-            return
-        logging_history = log_md(
-            logging_history, "TensorRT engines has been saved to disk."
-        )
-        yield logging_history
+            return "## Export Failed due to unknown reason. See shell for more information. \n"
+
+        print("TensorRT engines has been saved to disk.")
         modelmanager.add_entry(
             model_name,
             model_hash,
@@ -199,25 +173,17 @@ def export_unet_to_trt(
             lora=False,
         )
     else:
-        logging_history = log_md(
-            logging_history,
-            "TensorRT engine found. Skipping build. You can enable Force Export in the Advanced Settings to force a rebuild if needed.",
-        )
-        yield logging_history
+        print("TensorRT engine found. Skipping build. You can enable Force Export in the Advanced Settings to force a rebuild if needed.")
 
-    yield logging_history + "\n --- \n ## Exported Successfully \n"
+    return "## Exported Successfully \n"
 
 
 def export_lora_to_trt(lora_name, force_export):
-    logging_history = ""
     is_inpaint = False
     use_fp32 = False
     if cc_major < 7:
         use_fp32 = True
-        logging_history = log_md(
-            logging_history, "FP16 has been disabled because your GPU does not support it."
-        )
-        yield logging_history
+        print("FP16 has been disabled because your GPU does not support it.")
     unet_hidden_dim = shared.sd_model.model.diffusion_model.in_channels
     if unet_hidden_dim == 9:
         is_inpaint = True
@@ -261,8 +227,8 @@ def export_lora_to_trt(lora_name, force_export):
         diable_optimizations = False
 
     if not os.path.exists(onnx_lora_path):
-        logging_history = log_md(logging_history, "No ONNX file found. Exporting ONNX…")
-        yield logging_history
+        print("No ONNX file found. Exporting ONNX...")
+        gr.Info("No ONNX file found. Exporting ONNX...  Please check the progress in the terminal.")
         export_onnx(
             onnx_lora_path,
             modelobj,
@@ -272,33 +238,29 @@ def export_lora_to_trt(lora_name, force_export):
             diable_optimizations=diable_optimizations,
             lora_path=lora_model["filename"],
         )
-        logging_history = log_md(logging_history, "Exported to ONNX.")
-        yield logging_history
+        print("Exported to ONNX.")
 
     trt_lora_name = onnx_lora_filename.replace(".onnx", ".trt")
     trt_lora_path = os.path.join(TRT_MODEL_DIR, trt_lora_name)
 
     available_trt_unet = modelmanager.available_models()
     if len(available_trt_unet[base_name]) == 0:
-        logging_history = log_md(logging_history, "Please export the base model first.")
-        yield logging_history
+        return "## Please export the base model first."
     trt_base_path = os.path.join(
         TRT_MODEL_DIR, available_trt_unet[base_name][0]["filepath"]
     )
 
     if not os.path.exists(onnx_base_path):
-        raise ValueError("Please export the base model first.")
+        return "## Please export the base model first."
 
     if not os.path.exists(trt_lora_path) or force_export:
-        logging_history = log_md(
-            logging_history, "No TensorRT engine found. Building..."
-        )
-        yield logging_history
+        print("No TensorRT engine found. Building...")
+        gr.Info("No TensorRT engine found. Building...")
+
         engine = Engine(trt_base_path)
         engine.load()
         engine.refit(onnx_base_path, onnx_lora_path, dump_refit_path=trt_lora_path)
-        logging_history = log_md(logging_history, "Built TensorRT engine.")
-        yield logging_history
+        print("Built TensorRT engine.")
 
         modelmanager.add_lora_entry(
             base_name,
@@ -309,7 +271,7 @@ def export_lora_to_trt(lora_name, force_export):
             0,
             unet_hidden_dim,
         )
-    yield logging_history + "\n --- \n ## Exported Successfully \n"
+    return "## Exported Successfully \n"
 
 
 def export_default_unet_to_trt():
@@ -827,23 +789,27 @@ def on_ui_tabs():
             with gr.Accordion("Output", open=True):
                 trt_result = gr.Markdown(elem_id="trt_result", value="")
 
+        def get_trt_profiles_markdown():
+            profiles_md_string = ""
+            for model, profiles in engine_profile_card().items():
+                profiles_md_string += f"<details><summary>{model} ({len(profiles)} Profiles)</summary>\n\n"
+                for i, profile in enumerate(profiles):
+                    profiles_md_string += f"#### Profile {i} \n{profile}\n\n"
+                profiles_md_string += "</details>\n"
+            profiles_md_string += "</details>\n"
+            return profiles_md_string
+
+
         with gr.Column(variant="panel"):
             with gr.Row(equal_height=True, variant="compact"):
                 button_refresh_profiles = ToolButton(value=refresh_symbol, elem_id="trt_refresh_profiles", visible=True)
                 profile_header_md = gr.Markdown(
                     value=f"## Available TensorRT Engine Profiles"
                 )
-            engines_md = engine_profile_card()
-            for model, profiles in engines_md.items():
-                with gr.Row(equal_height=False):
-                    row_name = model + " ({} Profiles)".format(len(profiles))
-                    with gr.Accordion(row_name, open=False):
-                        out_string = ""
-                        for i, profile in enumerate(profiles):
-                            out_string += f"#### Profile {i} \n"
-                            out_string += profile
-                            out_string += "\n\n"
-                        gr.Markdown(elem_id=f"trt_{model}_{i}", value=out_string)
+            with gr.Row(equal_height=True):
+                trt_profiles_markdown = gr.Markdown(elem_id=f"trt_profiles_markdown", value=get_trt_profiles_markdown())
+        
+        button_refresh_profiles.click(lambda: gr.Markdown.update(value=get_trt_profiles_markdown()), outputs=[trt_profiles_markdown])
 
         button_export_unet.click(
             export_unet_to_trt,
@@ -895,13 +861,4 @@ def on_ui_tabs():
             outputs=[trt_result],
         )
 
-        
-        # TODO Dynamically update available profiles. Not possible with gradio?!
-        button_refresh_profiles.click(
-                fn=shared.state.request_restart,
-                _js='restart_reload',
-                inputs=[],
-                outputs=[],
-        )
-
     return [(trt_interface, "TensorRT", "tensorrt")]
diff --git a/utilities.py b/utilities.py
@@ -228,7 +228,7 @@ def map_name(name):
         refitter = trt.Refitter(self.engine, TRT_LOGGER)
         all_weights = refitter.get_all()
         for layer_name, role in zip(all_weights[0], all_weights[1]):
-            # for speciailized roles, use a unique name in the map:
+            # for specialized roles, use a unique name in the map:
             if role == trt.WeightsRole.KERNEL:
                 name = layer_name + "_TRTKERNEL"
             elif role == trt.WeightsRole.BIAS:

Original file line number	Diff line number	Diff line change
`@@ -106,7 +106,7 @@ def switch_engine(self, feed_dict):`
`106`	`106`	`)`
`107`	`107`	`if len(valid_models) == 0:`
`108`	`108`	`raise ValueError(`
`109`		`- "No valid profile found. Please go to the TensorRT tab and generate an engine with the necessary profile. Or use the default (torch) U-Net."`
	`109`	`+ "No valid profile found. Please go to the TensorRT tab and generate an engine with the necessary profile. If using hires.fix, you need an engine for both the base and upscaled resolutions. Otherwise, use the default (torch) U-Net."`
`110`	`110`	`)`
`111`	`111`
`112`	`112`	`best = valid_models[np.argmin(distances)]`