Merge branch 'main' into 8-metal-on-mac

MarcusDunn · web-flow · commit 057da42418eb · 2024-02-25T09:52:47.000-08:00
diff --git a/.github/workflows/llama-cpp-rs-check.yml b/.github/workflows/llama-cpp-rs-check.yml
@@ -67,4 +67,16 @@ jobs:
       - name: Setup Rust
         uses: dtolnay/rust-toolchain@stable
       - name: Build
-        run: cargo build
+        run: cargo build
+  windows:
+    name: Check that it builds on windows
+    runs-on: windows-latest
+    steps:
+      - name: checkout
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
+        with:
+          submodules: recursive
+      - name: Setup Rust
+        uses: dtolnay/rust-toolchain@stable
+      - name: Build
+        run: cargo build
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/llama-cpp-2/Cargo.toml b/llama-cpp-2/Cargo.toml
@@ -1,15 +1,15 @@
 [package]
 name = "llama-cpp-2"
 description = "llama.cpp bindings for Rust"
-version = "0.1.25"
+version = "0.1.28"
 edition = "2021"
 license = "MIT OR Apache-2.0"
 repository = "https://github.com/utilityai/llama-cpp-rs"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.25" }
+llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", version = "0.1.28" }
 thiserror = { workspace = true }
 tracing = { workspace = true }
 
@@ -19,8 +19,8 @@ criterion = { workspace = true }
 pprof = { workspace = true, features = ["criterion", "flamegraph"] }
 
 # used in examples
-clap = { version = "4.5.0", features = ["derive"] }
-anyhow = "1.0.79"
+clap = { version = "4.5.1", features = ["derive"] }
+anyhow = "1.0.80"
 
 [[bench]]
 name = "grammar_bias"
diff --git a/llama-cpp-2/src/grammar.rs b/llama-cpp-2/src/grammar.rs
@@ -269,7 +269,7 @@ impl ParseState {
                     rest = r;
                     rule.push(llama_grammar_element {
                         type_: llama_cpp_sys_2::LLAMA_GRETYPE_CHAR,
-                        value: c,
+                        value: c as _,
                     });
                 }
                 rest = Self::consume_whitespace_and_comments(&rest[1..], nested);
@@ -292,14 +292,14 @@ impl ParseState {
                     };
                     rule.push(llama_grammar_element {
                         type_: gre_type,
-                        value: c,
+                        value: c as _,
                     });
                     if rest.starts_with("-]") {
                         let (c, r) = Self::parse_char(rest)?;
                         rest = r;
                         rule.push(llama_grammar_element {
                             type_: llama_cpp_sys_2::LLAMA_GRETYPE_CHAR_RNG_UPPER,
-                            value: c,
+                            value: c as _,
                         });
                     }
                 }
@@ -386,7 +386,7 @@ impl ParseState {
                 error,
             })?;
 
-        Ok((value, rest))
+        Ok((value as llama_gretype, rest))
     }
 
     fn parse_char(rest: &str) -> Result<(llama_gretype, &str), GrammarParseError> {
@@ -401,17 +401,17 @@ impl ParseState {
                 'x' => Self::parse_hex(rest, 2),
                 'u' => Self::parse_hex(rest, 4),
                 'U' => Self::parse_hex(rest, 8),
-                't' => Ok((u32::from('\t'), rest)),
-                'r' => Ok((u32::from('\r'), rest)),
-                'n' => Ok((u32::from('\n'), rest)),
-                '\\' => Ok((u32::from('\\'), rest)),
-                '"' => Ok((u32::from('"'), rest)),
-                '[' => Ok((u32::from('['), rest)),
-                ']' => Ok((u32::from(']'), rest)),
+                't' => Ok((u32::from('\t') as llama_gretype, rest)),
+                'r' => Ok((u32::from('\r') as llama_gretype, rest)),
+                'n' => Ok((u32::from('\n') as llama_gretype, rest)),
+                '\\' => Ok((u32::from('\\') as llama_gretype, rest)),
+                '"' => Ok((u32::from('"') as llama_gretype, rest)),
+                '[' => Ok((u32::from('[') as llama_gretype, rest)),
+                ']' => Ok((u32::from(']') as llama_gretype, rest)),
                 c => Err(GrammarParseError::UnknownEscape { escape: c }),
             }
         } else if let Some(c) = rest.chars().next() {
-            Ok((u32::from(c), &rest[c.len_utf8()..]))
+            Ok((u32::from(c) as llama_gretype, &rest[c.len_utf8()..]))
         } else {
             Err(GrammarParseError::UnexpectedEndOfInput {
                 parse_stage: "char",
diff --git a/llama-cpp-2/src/llama_backend.rs b/llama-cpp-2/src/llama_backend.rs
@@ -43,29 +43,86 @@ impl LlamaBackend {
     #[tracing::instrument(skip_all)]
     pub fn init() -> crate::Result<LlamaBackend> {
         Self::mark_init()?;
-        unsafe { llama_cpp_sys_2::llama_backend_init(false) }
+        unsafe { llama_cpp_sys_2::llama_backend_init() }
         Ok(LlamaBackend {})
     }
 
     /// Initialize the llama backend (with numa).
     /// ```
     ///# use llama_cpp_2::llama_backend::LlamaBackend;
     ///# use std::error::Error;
+    ///# use llama_cpp_2::llama_backend::NumaStrategy;
     ///
     ///# fn main() -> Result<(), Box<dyn Error>> {
-    ///  let llama_backend = LlamaBackend::init_numa()?;
+    ///
+    /// let llama_backend = LlamaBackend::init_numa(NumaStrategy::MIRROR)?;
     ///
     ///# Ok(())
     ///# }
     /// ```
     #[tracing::instrument(skip_all)]
-    pub fn init_numa() -> crate::Result<LlamaBackend> {
+    pub fn init_numa(strategy: NumaStrategy) -> crate::Result<LlamaBackend> {
         Self::mark_init()?;
-        unsafe { llama_cpp_sys_2::llama_backend_init(true) }
+        unsafe {
+            llama_cpp_sys_2::llama_numa_init(llama_cpp_sys_2::ggml_numa_strategy::from(strategy))
+        }
         Ok(LlamaBackend {})
     }
 }
 
+/// A rusty wrapper around `numa_strategy`.
+#[derive(Debug, Eq, PartialEq, Copy, Clone)]
+pub enum NumaStrategy {
+    /// The numa strategy is disabled.
+    DISABLED,
+    /// help wanted: what does this do?
+    DISTRIBUTE,
+    /// help wanted: what does this do?
+    ISOLATE,
+    /// help wanted: what does this do?
+    NUMACTL,
+    /// help wanted: what does this do?
+    MIRROR,
+    /// help wanted: what does this do?
+    COUNT,
+}
+
+/// An invalid numa strategy was provided.
+#[derive(Debug, Eq, PartialEq, Copy, Clone)]
+pub struct InvalidNumaStrategy(
+    /// The invalid numa strategy that was provided.
+    pub llama_cpp_sys_2::ggml_numa_strategy,
+);
+
+impl TryFrom<llama_cpp_sys_2::ggml_numa_strategy> for NumaStrategy {
+    type Error = InvalidNumaStrategy;
+
+    fn try_from(value: llama_cpp_sys_2::ggml_numa_strategy) -> Result<Self, Self::Error> {
+        match value {
+            llama_cpp_sys_2::GGML_NUMA_STRATEGY_DISABLED => Ok(Self::DISABLED),
+            llama_cpp_sys_2::GGML_NUMA_STRATEGY_DISTRIBUTE => Ok(Self::DISTRIBUTE),
+            llama_cpp_sys_2::GGML_NUMA_STRATEGY_ISOLATE => Ok(Self::ISOLATE),
+            llama_cpp_sys_2::GGML_NUMA_STRATEGY_NUMACTL => Ok(Self::NUMACTL),
+            llama_cpp_sys_2::GGML_NUMA_STRATEGY_MIRROR => Ok(Self::MIRROR),
+            llama_cpp_sys_2::GGML_NUMA_STRATEGY_COUNT => Ok(Self::COUNT),
+            value => Err(InvalidNumaStrategy(value)),
+        }
+    }
+}
+
+impl From<NumaStrategy> for llama_cpp_sys_2::ggml_numa_strategy {
+    fn from(value: NumaStrategy) -> Self {
+        match value {
+            NumaStrategy::DISABLED => llama_cpp_sys_2::GGML_NUMA_STRATEGY_DISABLED,
+            NumaStrategy::DISTRIBUTE => llama_cpp_sys_2::GGML_NUMA_STRATEGY_DISTRIBUTE,
+            NumaStrategy::ISOLATE => llama_cpp_sys_2::GGML_NUMA_STRATEGY_ISOLATE,
+            NumaStrategy::NUMACTL => llama_cpp_sys_2::GGML_NUMA_STRATEGY_NUMACTL,
+            NumaStrategy::MIRROR => llama_cpp_sys_2::GGML_NUMA_STRATEGY_MIRROR,
+            NumaStrategy::COUNT => llama_cpp_sys_2::GGML_NUMA_STRATEGY_COUNT,
+        }
+    }
+}
+
 /// Drops the llama backend.
 /// ```
 ///
@@ -92,3 +149,33 @@ impl Drop for LlamaBackend {
         unsafe { llama_cpp_sys_2::llama_backend_free() }
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn numa_from_and_to() {
+        let numas = [
+            NumaStrategy::DISABLED,
+            NumaStrategy::DISTRIBUTE,
+            NumaStrategy::ISOLATE,
+            NumaStrategy::NUMACTL,
+            NumaStrategy::MIRROR,
+            NumaStrategy::COUNT,
+        ];
+
+        for numa in &numas {
+            let from = llama_cpp_sys_2::ggml_numa_strategy::from(*numa);
+            let to = NumaStrategy::try_from(from).expect("Failed to convert from and to");
+            assert_eq!(*numa, to);
+        }
+    }
+
+    #[test]
+    fn check_invalid_numa() {
+        let invalid = 800;
+        let invalid = NumaStrategy::try_from(invalid);
+        assert_eq!(invalid, Err(InvalidNumaStrategy(invalid.unwrap_err().0)));
+    }
+}
diff --git a/llama-cpp-2/src/model.rs b/llama-cpp-2/src/model.rs
@@ -210,7 +210,7 @@ impl LlamaModel {
         }
 
         match self.token_type(token) {
-            LlamaTokenType::Normal => {}
+            LlamaTokenType::Normal | LlamaTokenType::UserDefined => {}
             LlamaTokenType::Control => {
                 if token == self.token_bos() || token == self.token_eos() {
                     return Ok(String::new());
@@ -219,7 +219,6 @@ impl LlamaModel {
             LlamaTokenType::Unknown
             | LlamaTokenType::Undefined
             | LlamaTokenType::Byte
-            | LlamaTokenType::UserDefined
             | LlamaTokenType::Unused => {
                 return Ok(String::new());
             }
@@ -332,9 +331,9 @@ impl Drop for LlamaModel {
 #[derive(Debug, Eq, Copy, Clone, PartialEq)]
 pub enum VocabType {
     /// Byte Pair Encoding
-    BPE = llama_cpp_sys_2::LLAMA_VOCAB_TYPE_BPE,
+    BPE = llama_cpp_sys_2::LLAMA_VOCAB_TYPE_BPE as _,
     /// Sentence Piece Tokenizer
-    SPM = llama_cpp_sys_2::LLAMA_VOCAB_TYPE_SPM,
+    SPM = llama_cpp_sys_2::LLAMA_VOCAB_TYPE_SPM as _,
 }
 
 /// There was an error converting a `llama_vocab_type` to a `VocabType`.
diff --git a/llama-cpp-2/src/token_type.rs b/llama-cpp-2/src/token_type.rs
@@ -6,19 +6,19 @@
 #[allow(clippy::module_name_repetitions)]
 pub enum LlamaTokenType {
     /// An undefined token type.
-    Undefined = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_UNDEFINED,
+    Undefined = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_UNDEFINED as _,
     /// A normal token type.
-    Normal = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_NORMAL,
+    Normal = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_NORMAL as _,
     /// An unknown token type.
-    Unknown = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_UNKNOWN,
+    Unknown = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_UNKNOWN as _,
     /// A control token type.
-    Control = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_CONTROL,
+    Control = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_CONTROL as _,
     /// A user defined token type.
-    UserDefined = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_USER_DEFINED,
+    UserDefined = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_USER_DEFINED as _,
     /// An unused token type.
-    Unused = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_UNUSED,
+    Unused = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_UNUSED as _,
     /// A byte token type.
-    Byte = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_BYTE,
+    Byte = llama_cpp_sys_2::LLAMA_TOKEN_TYPE_BYTE as _,
 }
 
 /// A safe wrapper for converting potentially deceptive `llama_token_type` values into
@@ -52,7 +52,7 @@ impl TryFrom<llama_cpp_sys_2::llama_token_type> for LlamaTokenType {
             llama_cpp_sys_2::LLAMA_TOKEN_TYPE_USER_DEFINED => Ok(LlamaTokenType::UserDefined),
             llama_cpp_sys_2::LLAMA_TOKEN_TYPE_UNUSED => Ok(LlamaTokenType::Unused),
             llama_cpp_sys_2::LLAMA_TOKEN_TYPE_BYTE => Ok(LlamaTokenType::Byte),
-            _ => Err(LlamaTokenTypeFromIntError::UnknownValue(value)),
+            _ => Err(LlamaTokenTypeFromIntError::UnknownValue(value as _)),
         }
     }
 }
diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "llama-cpp-sys-2"
 description = "Low Level Bindings to llama.cpp"
-version = "0.1.25"
+version = "0.1.28"
 edition = "2021"
 license = "MIT OR Apache-2.0"
 repository = "https://github.com/utilityai/llama-cpp-rs"
diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs
@@ -24,6 +24,11 @@ fn main() {
         ] {
             println!("cargo:rustc-link-lib={}", lib);
         }
+        if !ggml_cuda.get_compiler().is_like_msvc() {
+            for lib in ["culibos", "pthread", "dl", "rt"] {
+                println!("cargo:rustc-link-lib={}", lib);
+            }
+        }
 
         println!("cargo:rustc-link-search=native=/usr/local/cuda/lib64");
 
@@ -37,11 +42,19 @@ fn main() {
                 .flag_if_supported("-mno-unaligned-access");
         }
 
-        ggml.cuda(true)
+
+        ggml
+            .cuda(true)
             .std("c++17")
             .flag("-arch=all")
             .file("llama.cpp/ggml-cuda.cu");
 
+        if ggml_cuda.get_compiler().is_like_msvc() {
+            ggml_cuda.std("c++14");
+        } else {
+            ggml_cuda.std("c++17");
+        }
+
         ggml.define("GGML_USE_CUBLAS", None);
         ggml.define("GGML_USE_CUBLAS", None);
         llama_cpp.define("GGML_USE_CUBLAS", None);
diff --git a/llama-cpp-sys-2/llama.cpp b/llama-cpp-sys-2/llama.cpp
@@ -1 +1 @@
-Subproject commit 3bdc4cd0f595a6096cca4a64aa75ffa8a3503465
+Subproject commit 9e359a4f47c1b2dceb99e29706c9f7403d32ab5e