leejet · leejet · Sep 8, 2025 · Sep 7, 2025 · Sep 7, 2025 · Sep 8, 2025
diff --git a/.gitignore b/.gitignore
@@ -4,10 +4,11 @@ test/
 .cache/
 *.swp
 .vscode/
+.idea/
 *.bat
 *.bin
 *.exe
 *.gguf
 output*.png
 models*
-*.log
+*.log
diff --git a/README.md b/README.md
@@ -137,7 +137,9 @@ This provides BLAS acceleration using the ROCm cores of your AMD GPU. Make sure
 Windows User Refer to [docs/hipBLAS_on_Windows.md](docs%2FhipBLAS_on_Windows.md) for a comprehensive guide.
 
 ```
-cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=gfx1100 -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON
+export GFX_NAME=$(rocminfo | grep -m 1 -E "gfx[^0]{1}" | sed -e 's/ *Name: *//' | awk '{$1=$1; print}' || echo "rocminfo missing")
+echo $GFX_NAME
+cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGPU_TARGETS=$GFX_NAME -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON
 cmake --build . --config Release
 ```
 

diff --git a/docs/lora.md b/docs/lora.md
@@ -10,4 +10,30 @@ Here's a simple example:
 ./bin/sd -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat<lora:marblesh:1>" --lora-model-dir ../models
 ```
 
-`../models/marblesh.safetensors` or `../models/marblesh.ckpt` will be applied to the model
+`../models/marblesh.safetensors` or `../models/marblesh.ckpt` will be applied to the model
+
+# Support matrix
+
+> ℹ️ CUDA `get_rows` support is defined here:  
+> [ggml-org/ggml/src/ggml-cuda/getrows.cu#L156](https://github.com/ggml-org/ggml/blob/7dee1d6a1e7611f238d09be96738388da97c88ed/src/ggml-cuda/getrows.cu#L156)  
+> Currently only the basic types + Q4/Q5/Q8 are implemented. K-quants are **not** supported.
+
+NOTE: The other backends may have different support.
+
+| Quant / Type | CUDA |
+|--------------|------|
+| F32          | ✔️   |
+| F16          | ✔️   |
+| BF16         | ✔️   |
+| I32          | ✔️   |
+| Q4_0         | ✔️   |
+| Q4_1         | ✔️   |
+| Q5_0         | ✔️   |
+| Q5_1         | ✔️   |
+| Q8_0         | ✔️   |
+| Q2_K         | ❌   |
+| Q3_K         | ❌   |
+| Q4_K         | ❌   |
+| Q5_K         | ❌   |
+| Q6_K         | ❌   |
+| Q8_K         | ❌   |
diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp
@@ -1,6 +1,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <time.h>
+#include <filesystem>
 #include <functional>
 #include <iostream>
 #include <map>
@@ -1283,6 +1284,21 @@ int main(int argc, const char* argv[]) {
         }
     }
 
+    // create directory if not exists
+    {
+        namespace fs            = std::filesystem;
+        const fs::path out_path = params.output_path;
+        if (const fs::path out_dir = out_path.parent_path(); !out_dir.empty()) {
+            std::error_code ec;
+            fs::create_directories(out_dir, ec);  // OK if already exists
+            if (ec) {
+                fprintf(stderr, "failed to create directory '%s': %s\n",
+                        out_dir.string().c_str(), ec.message().c_str());
+                return 1;
+            }
+        }
+    }
+
     std::string base_path;
     std::string file_ext;
     std::string file_ext_lower;

diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
@@ -330,7 +330,7 @@ class StableDiffusionGGML {
             if (sd_version_is_dit(version)) {
                 use_t5xxl = true;
             }
-            if (!ggml_backend_is_cpu(backend) && use_t5xxl) {
+            if (!clip_on_cpu && !ggml_backend_is_cpu(backend) && use_t5xxl) {
                 LOG_WARN(
                     "!!!It appears that you are using the T5 model. Some backends may encounter issues with it."
                     "If you notice that the generated images are completely black,"
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,10 +4,11 @@ test/ @@
     .cache/
     *.swp
     .vscode/
+    .idea/
     *.bat
     *.bin
     *.exe
     *.gguf
     output*.png
     models*
-    *.log
+    *.log