fixed cuda build

MarcusDunn · MarcusDunn · commit 0ef6c8c2b077 · 2024-04-21T12:28:36.000-07:00
diff --git a/llama-cpp-2/src/context/params.rs b/llama-cpp-2/src/context/params.rs
@@ -2,8 +2,6 @@
 use std::fmt::Debug;
 use std::num::NonZeroU32;
 
-use llama_cpp_sys_2;
-
 /// A rusty wrapper around `rope_scaling_type`.
 #[repr(i8)]
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
diff --git a/llama-cpp-sys-2/Cargo.toml b/llama-cpp-sys-2/Cargo.toml
@@ -33,6 +33,9 @@ include = [
     "/llama.cpp/llama.h",
     "/llama.cpp/unicode.h",
     "/llama.cpp/unicode.cpp",
+    "/llama.cpp/unicode-data.h",
+    "/llama.cpp/unicode-data.h",
+    "/llama.cpp/unicode-data.cpp",
     "/llama.cpp/ggml-common.h"
 ]
 
diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs
@@ -1,4 +1,5 @@
 use std::env;
+use std::ffi::OsStr;
 use std::path::Path;
 use std::path::PathBuf;
 
@@ -67,6 +68,12 @@ fn main() {
             .cuda(true)
             .flag("-arch=all")
             .file("llama.cpp/ggml-cuda.cu")
+            .files(std::fs::read_dir("llama.cpp/ggml-cuda")
+                .expect("failed to read 'llama.cpp/ggml-cuda'")
+                .map(|e| e.expect("failed to ready entry").path())
+                .filter(|p| p.extension().is_some_and(|it| it == OsStr::new("cu")))
+            )
+            .include("llama.cpp/ggml-cuda")
             .include("llama.cpp");
 
         if ggml_cuda.get_compiler().is_like_msvc() {
@@ -75,9 +82,9 @@ fn main() {
             ggml_cuda.flag("-std=c++11").std("c++11");
         }
 
-        ggml.define("GGML_USE_CUBLAS", None);
-        ggml_cuda.define("GGML_USE_CUBLAS", None);
-        llama_cpp.define("GGML_USE_CUBLAS", None);
+        ggml.define("GGML_USE_CUDA", None);
+        ggml_cuda.define("GGML_USE_CUDA", None);
+        llama_cpp.define("GGML_USE_CUDA", None);
     }
 
     for build in [&mut ggml, &mut llama_cpp] {
@@ -177,7 +184,8 @@ fn main() {
         .include("llama.cpp")
         .std("c++11")
         .file("llama.cpp/llama.cpp")
-        .file("llama.cpp/unicode.cpp");
+        .file("llama.cpp/unicode.cpp")
+        .file("llama.cpp/unicode-data.cpp");
 
     // Remove debug log output from `llama.cpp`
     let is_release = env::var("PROFILE").unwrap() == "release";
@@ -193,18 +201,18 @@ fn main() {
     }
 
     if let Some(ggml_cuda) = ggml_cuda {
-        println!("compiling ggml-cuda");
+        eprintln!("compiling ggml-cuda");
         ggml_cuda.compile("ggml-cuda");
-        println!("compiled ggml-cuda");
+        eprintln!("compiled ggml-cuda");
     }
 
-    println!("compiling ggml");
+    eprintln!("compiling ggml");
     ggml.compile("ggml");
-    println!("compiled ggml");
+    eprintln!("compiled ggml");
 
-    println!("compiling llama");
+    eprintln!("compiling llama");
     llama_cpp.compile("llama");
-    println!("compiled llama");
+    eprintln!("compiled llama");
 
     let header = "llama.cpp/llama.h";
 

Original file line number	Diff line number	Diff line change
`@@ -33,6 +33,9 @@ include = [`
`33`	`33`	`"/llama.cpp/llama.h",`
`34`	`34`	`"/llama.cpp/unicode.h",`
`35`	`35`	`"/llama.cpp/unicode.cpp",`
	`36`	`+ "/llama.cpp/unicode-data.h",`
	`37`	`+ "/llama.cpp/unicode-data.h",`
	`38`	`+ "/llama.cpp/unicode-data.cpp",`
`36`	`39`	`"/llama.cpp/ggml-common.h"`
`37`	`40`	`]`
`38`	`41`