Merge branch 'main' into feat/dynamic_link

MarcusDunn · web-flow · commit 70f26c2f5dbd · 2024-07-09T10:31:01.000-07:00
diff --git a/.github/workflows/llama-cpp-rs-check.yml b/.github/workflows/llama-cpp-rs-check.yml
@@ -45,11 +45,11 @@ jobs:
       - name: checkout
         uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
       - name: Setup QEMU
-        uses: docker/setup-qemu-action@68827325e0b33c7199eb31dd4e31fbe9023e06e3
+        uses: docker/setup-qemu-action@5927c834f5b4fdf503fca6f4c7eccda82949e1ee
         with:
           platforms: arm64,amd64
       - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@d70bba72b1f3fd22344832f00baa16ece964efeb
+        uses: docker/setup-buildx-action@4fd812986e6c8c2a69e18311145f9371337f27d4
       - name: Build
         uses: docker/build-push-action@v6
         with:
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -16,9 +16,9 @@ hf-hub = { version = "0.3.2" }
 criterion = "0.5.1"
 pprof = "0.13.0"
 bindgen = "0.69.4"
-cc = "1.0.100"
+cc = "1.0.105"
 anyhow = "1.0.86"
-clap = "4.5.4"
+clap = "4.5.8"
 encoding_rs = "0.8.34"
 
 [workspace.lints.rust]
diff --git a/embeddings/Cargo.toml b/embeddings/Cargo.toml
@@ -1,15 +1,19 @@
 [package]
 name = "embeddings"
-version = "0.1.60"
+version = "0.1.61"
 edition = "2021"
 
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
 [dependencies]
-llama-cpp-2 = { path = "../llama-cpp-2", version = "0.1.60" }
+llama-cpp-2 = { path = "../llama-cpp-2", version = "0.1.61" }
 hf-hub = { workspace = true }
 clap = { workspace = true , features = ["derive"] }
 anyhow = { workspace = true }
 
+[features]
+cuda = ["llama-cpp-2/cuda"]
+metal =  ["llama-cpp-2/metal"]
+native = ["llama-cpp-2/native"]
+vulkan = ["llama-cpp-2/vulkan"]
+
 [lints]
 workspace = true
diff --git a/embeddings/src/main.rs b/embeddings/src/main.rs
@@ -35,7 +35,7 @@ struct Args {
     #[clap(short)]
     normalise: bool,
     /// Disable offloading layers to the gpu
-    #[cfg(feature = "cuda")]
+    #[cfg(any(feature = "cuda", feature = "vulkan"))]
     #[clap(long)]
     disable_gpu: bool,
 }
@@ -78,7 +78,7 @@ fn main() -> Result<()> {
         model,
         prompt,
         normalise,
-        #[cfg(feature = "cuda")]
+        #[cfg(any(feature = "cuda", feature = "vulkan"))]
         disable_gpu,
     } = Args::parse();
 
@@ -87,13 +87,13 @@ fn main() -> Result<()> {
 
     // offload all layers to the gpu
     let model_params = {
-        #[cfg(feature = "cuda")]
+        #[cfg(any(feature = "cuda", feature = "vulkan"))]
         if !disable_gpu {
             LlamaModelParams::default().with_n_gpu_layers(1000)
         } else {
             LlamaModelParams::default()
         }
-        #[cfg(not(feature = "cuda"))]
+        #[cfg(not(any(feature = "cuda", feature = "vulkan")))]
         LlamaModelParams::default()
     };
 
diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "llama-cpp-2"
 description = "llama.cpp bindings for Rust"
-version = "0.1.60"
+version = "0.1.61"
 edition = "2021"
 license = "MIT OR Apache-2.0"
 repository = "https://github.com/utilityai/llama-cpp-rs"
@@ -10,16 +10,33 @@ repository = "https://github.com/utilityai/llama-cpp-rs"
 
 [dependencies]
 enumflags2 = "0.7.10"
-llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.60" }
+llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.61" }
 thiserror = { workspace = true }
 tracing = { workspace = true }
 
 [features]
 cuda = ["llama-cpp-sys-2/cuda"]
 metal = ["llama-cpp-sys-2/metal"]
 dynamic_link = ["llama-cpp-sys-2/dynamic_link"]
+vulkan = ["llama-cpp-sys-2/vulkan"]
+native = ["llama-cpp-sys-2/native"]
 sampler = []
 
+[target.'cfg(target_feature = "avx")'.dependencies]
+llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["avx"] }
+[target.'cfg(target_feature = "avx2")'.dependencies]
+llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["avx2"] }
+[target.'cfg(target_feature = "avx512f")'.dependencies]
+llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["avx512"] }
+[target.'cfg(target_feature = "avx512vbmi")'.dependencies]
+llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["avx512_vmbi"] }
+[target.'cfg(target_feature = "avx512vnni")'.dependencies]
+llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["avx512_vnni"] }
+[target.'cfg(target_feature = "f16c")'.dependencies]
+llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["f16c"] }
+[target.'cfg(target_feature = "fma")'.dependencies]
+llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features = ["fma"] }
+
 [target.'cfg(all(target_os = "macos", any(target_arch = "aarch64", target_arch = "arm64")))'.dependencies]  
 llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features=["metal"], version = "0.1.48" }
 
diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "llama-cpp-sys-2"
 description = "Low Level Bindings to llama.cpp"
-version = "0.1.60"
+version = "0.1.61"
 edition = "2021"
 license = "MIT OR Apache-2.0"
 repository = "https://github.com/utilityai/llama-cpp-rs"
@@ -53,6 +53,15 @@ cc = { workspace = true, features = ["parallel"] }
 once_cell = "1.19.0"
 
 [features]
+avx = []
+avx2 = []
+avx512 = []
+avx512_vmbi = []
+avx512_vnni = []
 cuda = []
+f16c = []
+fma = []
 metal = []
 dynamic_link = []
+vulkan = []
+native = []
diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs
@@ -507,7 +507,7 @@ fn compile_metal(cx: &mut Build, cxx: &mut Build) {
     let common = LLAMA_PATH.join("ggml-common.h");
 
     let input_file = File::open(ggml_metal_shader_path).expect("Failed to open input file");
-    let mut output_file =
+    let output_file =
         File::create(&ggml_metal_shader_out_path).expect("Failed to create output file");
 
     let output = Command::new("sed")
@@ -583,11 +583,35 @@ fn compile_metal(cx: &mut Build, cxx: &mut Build) {
         .file(LLAMA_PATH.join("ggml-metal.m"));
 }
 
+fn find_windows_vulkan_sdk() -> PathBuf {
+    // if the vulkan sdk is installed in the standard location then this should be pretty fast,
+    // but we still must search recursively because we don't know the exact version number.
+    // if it's installed somewhere else, this will take a while, but it's better than failing.
+    let vulkan_root = Command::new("powershell.exe")
+            .arg("-Command")
+            .arg(r#"
+if (test-path -pathtype Container "/VulkanSDK") {
+    $root = "/VulkanSDK"
+} else {
+    $root = "/"
+}
+get-childitem -path $root -recurse -filter "vulkan.h" 2>$null | foreach-object { $_.directory.parent.parent.fullname }
+"#)
+            .output()
+            .expect("could not find vulkan.h")
+            .stdout;
+    let vulkan_root = String::from_utf8_lossy(
+        vulkan_root
+            .split(|c| *c == b'\n')
+            .next()
+            .expect("could not find vulkan.h"),
+    );
+    PathBuf::from(vulkan_root.trim())
+}
+
 fn compile_vulkan(cx: &mut Build, cxx: &mut Build) -> &'static str {
     println!("Compiling Vulkan GGML..");
 
-    // Vulkan gets linked through the ash crate.
-
     if cfg!(debug_assertions) {
         cx.define("GGML_VULKAN_DEBUG", None)
             .define("GGML_VULKAN_CHECK_RESULTS", None)
@@ -602,12 +626,25 @@ fn compile_vulkan(cx: &mut Build, cxx: &mut Build) -> &'static str {
 
     let lib_name = "ggml-vulkan";
 
-    cxx.clone()
-        .include("./thirdparty/Vulkan-Headers/include/")
-        .include(LLAMA_PATH.as_path())
-        .file(LLAMA_PATH.join("ggml-vulkan.cpp"))
-        .compile(lib_name);
-
+    if cfg!(target_os = "windows") {
+        let vulkan_root = find_windows_vulkan_sdk();
+        cxx.clone()
+            .include(vulkan_root.join("Include"))
+            .include(LLAMA_PATH.as_path())
+            .file(LLAMA_PATH.join("ggml-vulkan.cpp"))
+            .compile(lib_name);
+        println!(
+            "cargo:rustc-link-search=native={}",
+            vulkan_root.join("Lib").display()
+        );
+        println!("cargo:rustc-link-lib=vulkan-1");
+    } else {
+        cxx.clone()
+            .include(LLAMA_PATH.as_path())
+            .file(LLAMA_PATH.join("ggml-vulkan.cpp"))
+            .compile(lib_name);
+        println!("cargo:rustc-link-lib=vulkan");
+    }
     lib_name
 }
 
@@ -673,6 +710,7 @@ fn main() {
     push_warn_flags(&mut cx, &mut cxx);
     push_feature_flags(&mut cx, &mut cxx);
 
+    #[allow(unused_variables)]
     let feat_lib = if cfg!(feature = "vulkan") {
         Some(compile_vulkan(&mut cx, &mut cxx))
     } else if cfg!(feature = "cuda") {
diff --git a/simple/Cargo.toml b/simple/Cargo.toml
@@ -1,12 +1,12 @@
 [package]
 name = "simple"
-version = "0.1.60"
+version = "0.1.61"
 edition = "2021"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-llama-cpp-2 = { path = "../llama-cpp-2", version = "0.1.60" }
+llama-cpp-2 = { path = "../llama-cpp-2", version = "0.1.61" }
 hf-hub = { workspace = true }
 clap = { workspace = true , features = ["derive"] }
 anyhow = { workspace = true }
@@ -15,6 +15,8 @@ encoding_rs = { workspace = true }
 [features]
 cuda = ["llama-cpp-2/cuda"]
 metal =  ["llama-cpp-2/metal"]
+native = ["llama-cpp-2/native"]
+vulkan = ["llama-cpp-2/vulkan"]
 
 [lints]
 workspace = true
diff --git a/simple/src/main.rs b/simple/src/main.rs
@@ -44,7 +44,7 @@ struct Args {
     #[arg(short = 'o', value_parser = parse_key_val)]
     key_value_overrides: Vec<(String, ParamOverrideValue)>,
     /// Disable offloading layers to the gpu
-    #[cfg(feature = "cuda")]
+    #[cfg(any(feature = "cuda", feature = "vulkan"))]
     #[clap(long)]
     disable_gpu: bool,
     #[arg(short = 's', long, help = "RNG seed (default: 1234)")]
@@ -124,7 +124,7 @@ fn main() -> Result<()> {
         model,
         prompt,
         file,
-        #[cfg(feature = "cuda")]
+        #[cfg(any(feature = "cuda", feature = "vulkan"))]
         disable_gpu,
         key_value_overrides,
         seed,
@@ -138,13 +138,13 @@ fn main() -> Result<()> {
 
     // offload all layers to the gpu
     let model_params = {
-        #[cfg(feature = "cuda")]
+        #[cfg(any(feature = "cuda", feature = "vulkan"))]
         if !disable_gpu {
             LlamaModelParams::default().with_n_gpu_layers(1000)
         } else {
             LlamaModelParams::default()
         }
-        #[cfg(not(feature = "cuda"))]
+        #[cfg(not(any(feature = "cuda", feature = "vulkan")))]
         LlamaModelParams::default()
     };