diff --git a/.gitignore b/.gitignore index 38fe570df..7d32dea06 100644 --- a/.gitignore +++ b/.gitignore @@ -4,10 +4,11 @@ test/ .cache/ *.swp .vscode/ +.idea/ *.bat *.bin *.exe *.gguf output*.png models* -*.log \ No newline at end of file +*.log diff --git a/README.md b/README.md index c69335b24..a4585be0c 100644 --- a/README.md +++ b/README.md @@ -137,7 +137,9 @@ This provides BLAS acceleration using the ROCm cores of your AMD GPU. Make sure Windows User Refer to [docs/hipBLAS_on_Windows.md](docs%2FhipBLAS_on_Windows.md) for a comprehensive guide. ``` -cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=gfx1100 -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON +export GFX_NAME=$(rocminfo | grep -m 1 -E "gfx[^0]{1}" | sed -e 's/ *Name: *//' | awk '{$1=$1; print}' || echo "rocminfo missing") +echo $GFX_NAME +cmake .. -G "Ninja" -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DSD_HIPBLAS=ON -DCMAKE_BUILD_TYPE=Release -DGPU_TARGETS=$GFX_NAME -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON cmake --build . --config Release ``` diff --git a/docs/lora.md b/docs/lora.md index fb76f287c..e2e1d82e9 100644 --- a/docs/lora.md +++ b/docs/lora.md @@ -10,4 +10,30 @@ Here's a simple example: ./bin/sd -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat" --lora-model-dir ../models ``` -`../models/marblesh.safetensors` or `../models/marblesh.ckpt` will be applied to the model \ No newline at end of file +`../models/marblesh.safetensors` or `../models/marblesh.ckpt` will be applied to the model + +# Support matrix + +> ℹ️ CUDA `get_rows` support is defined here: +> [ggml-org/ggml/src/ggml-cuda/getrows.cu#L156](https://github.com/ggml-org/ggml/blob/7dee1d6a1e7611f238d09be96738388da97c88ed/src/ggml-cuda/getrows.cu#L156) +> Currently only the basic types + Q4/Q5/Q8 are implemented. K-quants are **not** supported. + +NOTE: The other backends may have different support. + +| Quant / Type | CUDA | +|--------------|------| +| F32 | ✔️ | +| F16 | ✔️ | +| BF16 | ✔️ | +| I32 | ✔️ | +| Q4_0 | ✔️ | +| Q4_1 | ✔️ | +| Q5_0 | ✔️ | +| Q5_1 | ✔️ | +| Q8_0 | ✔️ | +| Q2_K | ❌ | +| Q3_K | ❌ | +| Q4_K | ❌ | +| Q5_K | ❌ | +| Q6_K | ❌ | +| Q8_K | ❌ | diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index 8dd29051d..098c98cc5 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -1283,6 +1284,21 @@ int main(int argc, const char* argv[]) { } } + // create directory if not exists + { + namespace fs = std::filesystem; + const fs::path out_path = params.output_path; + if (const fs::path out_dir = out_path.parent_path(); !out_dir.empty()) { + std::error_code ec; + fs::create_directories(out_dir, ec); // OK if already exists + if (ec) { + fprintf(stderr, "failed to create directory '%s': %s\n", + out_dir.string().c_str(), ec.message().c_str()); + return 1; + } + } + } + std::string base_path; std::string file_ext; std::string file_ext_lower; diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 17804c11b..e6425fa17 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -330,7 +330,7 @@ class StableDiffusionGGML { if (sd_version_is_dit(version)) { use_t5xxl = true; } - if (!ggml_backend_is_cpu(backend) && use_t5xxl) { + if (!clip_on_cpu && !ggml_backend_is_cpu(backend) && use_t5xxl) { LOG_WARN( "!!!It appears that you are using the T5 model. Some backends may encounter issues with it." "If you notice that the generated images are completely black,"