Change cublas to cuda

SilasMarvin · SilasMarvin · commit 6b0f923b2a0b · 2024-06-04T11:18:58.000-07:00
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/README.md b/README.md
@@ -35,7 +35,7 @@ git clone --recursive https://github.com/utilityai/llama-cpp-rs
 cd llama-cpp-rs
 ```
 
-Run the simple example (add `--featues cublas` if you have a cuda gpu)
+Run the simple example (add `--featues cuda` if you have a cuda gpu)
 
 ```bash
 cargo run --release --bin simple "The way to kill a linux process is" hf-model TheBloke/Llama-2-7B-GGUF llama-2-7b.Q4_K_M.gguf
diff --git a/embeddings/src/main.rs b/embeddings/src/main.rs
@@ -20,8 +20,8 @@ use llama_cpp_2::ggml_time_us;
 use llama_cpp_2::llama_backend::LlamaBackend;
 use llama_cpp_2::llama_batch::LlamaBatch;
 use llama_cpp_2::model::params::LlamaModelParams;
-use llama_cpp_2::model::{AddBos, Special};
 use llama_cpp_2::model::LlamaModel;
+use llama_cpp_2::model::{AddBos, Special};
 
 #[derive(clap::Parser, Debug, Clone)]
 struct Args {
@@ -35,7 +35,7 @@ struct Args {
     #[clap(short)]
     normalise: bool,
     /// Disable offloading layers to the gpu
-    #[cfg(feature = "cublas")]
+    #[cfg(feature = "cuda")]
     #[clap(long)]
     disable_gpu: bool,
 }
@@ -78,7 +78,7 @@ fn main() -> Result<()> {
         model,
         prompt,
         normalise,
-        #[cfg(feature = "cublas")]
+        #[cfg(feature = "cuda")]
         disable_gpu,
     } = Args::parse();
 
@@ -87,13 +87,13 @@ fn main() -> Result<()> {
 
     // offload all layers to the gpu
     let model_params = {
-        #[cfg(feature = "cublas")]
+        #[cfg(feature = "cuda")]
         if !disable_gpu {
             LlamaModelParams::default().with_n_gpu_layers(1000)
         } else {
             LlamaModelParams::default()
         }
-        #[cfg(not(feature = "cublas"))]
+        #[cfg(not(feature = "cuda"))]
         LlamaModelParams::default()
     };
 
diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml
@@ -14,7 +14,7 @@ thiserror = { workspace = true }
 tracing = { workspace = true }
 
 [features]
-cublas = ["llama-cpp-sys-2/cublas"]
+cuda = ["llama-cpp-sys-2/cuda"]
 metal = ["llama-cpp-sys-2/metal"]
 sampler = []
 
@@ -25,4 +25,4 @@ llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features=["metal"], version = "
 workspace = true
 
 [package.metadata.docs.rs]
-features = ["sampler"]
+features = ["sampler"]
diff --git a/llama-cpp-2/src/lib.rs b/llama-cpp-2/src/lib.rs
@@ -11,7 +11,7 @@
 //!
 //! # Feature Flags
 //!
-//! - `cublas` enables CUDA gpu support.
+//! - `cuda` enables CUDA gpu support.
 //! - `sampler` adds the [`context::sample::sampler`] struct for a more rusty way of sampling.
 use std::ffi::NulError;
 use std::fmt::Debug;
diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml
@@ -51,6 +51,6 @@ cc = { workspace = true, features = ["parallel"] }
 once_cell = "1.19.0"
 
 [features]
-cublas = []
+cuda = []
 metal = []
 
diff --git a/llama-cpp-sys-2/README.md b/llama-cpp-sys-2/README.md
@@ -1,5 +1,5 @@
 # llama-cpp-sys
 
-Raw bindings to llama.cpp with cublas support.
+Raw bindings to llama.cpp with cuda support.
 
-See [llama-cpp-2](https://crates.io/crates/llama-cpp-2) for a safe API.
+See [llama-cpp-2](https://crates.io/crates/llama-cpp-2) for a safe API.
diff --git a/llama-cpp-sys-2/llama.cpp b/llama-cpp-sys-2/llama.cpp
@@ -1 +1 @@
-Subproject commit 0541f06296753dbc59a57379eb54cec865a4c9f9
+Subproject commit 917dc8cfa67a72fb7c8bf7392270da3bf4833af4
diff --git a/simple/Cargo.toml b/simple/Cargo.toml
@@ -13,7 +13,7 @@ anyhow = { workspace = true }
 encoding_rs = { workspace = true }
 
 [features]
-cublas = ["llama-cpp-2/cublas"]
+cuda = ["llama-cpp-2/cuda"]
 metal =  ["llama-cpp-2/metal"]
 
 [lints]
diff --git a/simple/src/main.rs b/simple/src/main.rs
@@ -15,8 +15,8 @@ use llama_cpp_2::llama_backend::LlamaBackend;
 use llama_cpp_2::llama_batch::LlamaBatch;
 use llama_cpp_2::model::params::kv_overrides::ParamOverrideValue;
 use llama_cpp_2::model::params::LlamaModelParams;
-use llama_cpp_2::model::{AddBos, Special};
 use llama_cpp_2::model::LlamaModel;
+use llama_cpp_2::model::{AddBos, Special};
 use llama_cpp_2::token::data_array::LlamaTokenDataArray;
 use std::ffi::CString;
 use std::io::Write;
@@ -44,7 +44,7 @@ struct Args {
     #[arg(short = 'o', value_parser = parse_key_val)]
     key_value_overrides: Vec<(String, ParamOverrideValue)>,
     /// Disable offloading layers to the gpu
-    #[cfg(feature = "cublas")]
+    #[cfg(feature = "cuda")]
     #[clap(long)]
     disable_gpu: bool,
     #[arg(short = 's', long, help = "RNG seed (default: 1234)")]
@@ -123,7 +123,7 @@ fn main() -> Result<()> {
         model,
         prompt,
         file,
-        #[cfg(feature = "cublas")]
+        #[cfg(feature = "cuda")]
         disable_gpu,
         key_value_overrides,
         seed,
@@ -137,13 +137,13 @@ fn main() -> Result<()> {
 
     // offload all layers to the gpu
     let model_params = {
-        #[cfg(feature = "cublas")]
+        #[cfg(feature = "cuda")]
         if !disable_gpu {
             LlamaModelParams::default().with_n_gpu_layers(1000)
         } else {
             LlamaModelParams::default()
         }
-        #[cfg(not(feature = "cublas"))]
+        #[cfg(not(feature = "cuda"))]
         LlamaModelParams::default()
     };
 
diff --git a/test-build.Dockerfile b/test-build.Dockerfile
@@ -8,10 +8,10 @@ RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
 ENV PATH=/root/.cargo/bin:$PATH
 
 COPY . .
-RUN cargo build --bin simple --features cublas
+RUN cargo build --bin simple --features cuda
 
 FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} as base-cuda-runtime
 
 COPY --from=base-cuda /target/debug/simple /usr/local/bin/simple
 
-ENTRYPOINT ["/usr/local/bin/simple"]
+ENTRYPOINT ["/usr/local/bin/simple"]