Skip to content

Commit 8ea1482

Browse files
authored
Merge pull request #400 from utilityai/update-llama-cpp-2024-07-09
Update llama cpp [MAC TESTING WANTED]
2 parents 09088ec + 1788762 commit 8ea1482

File tree

3 files changed

+52
-49
lines changed

3 files changed

+52
-49
lines changed

llama-cpp-2/src/model.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
//! A safe wrapper around `llama_model`.
22
use std::ffi::CString;
3+
use std::num::NonZeroU16;
34
use std::os::raw::c_int;
45
use std::path::Path;
56
use std::ptr::NonNull;
@@ -131,7 +132,7 @@ impl LlamaModel {
131132
token: LlamaToken,
132133
special: Special,
133134
) -> Result<Vec<u8>, TokenToStringError> {
134-
self.token_to_bytes_with_size(token, 32, special)
135+
self.token_to_bytes_with_size(token, 32, special, None)
135136
}
136137

137138
/// Convert a vector of tokens to a single string.
@@ -264,7 +265,7 @@ impl LlamaModel {
264265
buffer_size: usize,
265266
special: Special,
266267
) -> Result<String, TokenToStringError> {
267-
let bytes = self.token_to_bytes_with_size(token, buffer_size, special)?;
268+
let bytes = self.token_to_bytes_with_size(token, buffer_size, special, None)?;
268269
Ok(String::from_utf8(bytes)?)
269270
}
270271

@@ -287,6 +288,7 @@ impl LlamaModel {
287288
token: LlamaToken,
288289
buffer_size: usize,
289290
special: Special,
291+
lstrip: Option<NonZeroU16>
290292
) -> Result<Vec<u8>, TokenToStringError> {
291293
if token == self.token_nl() {
292294
return Ok(String::from("\n").into_bytes());
@@ -314,8 +316,9 @@ impl LlamaModel {
314316
let len = string.as_bytes().len();
315317
let len = c_int::try_from(len).expect("length fits into c_int");
316318
let buf = string.into_raw();
319+
let lstrip = lstrip.map(|it| i32::from(it.get())).unwrap_or(0);
317320
let size = unsafe {
318-
llama_cpp_sys_2::llama_token_to_piece(self.model.as_ptr(), token.0, buf, len, special)
321+
llama_cpp_sys_2::llama_token_to_piece(self.model.as_ptr(), token.0, buf, len, lstrip, special)
319322
};
320323

321324
match size {

llama-cpp-sys-2/build.rs

Lines changed: 45 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,24 @@ compile_error!("feature \"vulkan\" cannot be enabled alongside other GPU based f
8686

8787
static LLAMA_PATH: Lazy<PathBuf> = Lazy::new(|| PathBuf::from("./llama.cpp"));
8888

89-
fn compile_bindings(out_path: &Path, llama_header_path: &Path) {
89+
fn compile_bindings(
90+
out_path: &Path,
91+
llama_header_path: &Path,
92+
) -> Result<(), Box<dyn std::error::Error + 'static>> {
9093
println!("Generating bindings..");
94+
95+
let includes = [
96+
llama_header_path.join("ggml").join("include"),
97+
];
98+
9199
let bindings = bindgen::Builder::default()
92-
// .header(llama_header_path.join("ggml.h").to_string_lossy())
93-
.header(llama_header_path.join("llama.h").to_string_lossy())
100+
.clang_args(includes.map(|path| format!("-I{}", path.to_string_lossy())))
101+
.header(
102+
llama_header_path
103+
.join("include")
104+
.join("llama.h")
105+
.to_string_lossy(),
106+
)
94107
.derive_partialeq(true)
95108
.allowlist_function("ggml_.*")
96109
.allowlist_type("ggml_.*")
@@ -106,11 +119,11 @@ fn compile_bindings(out_path: &Path, llama_header_path: &Path) {
106119
bindings = bindings.parse_callbacks(Box::new(GGMLLinkRename {}));
107120
}
108121

109-
let bindings = bindings.generate().expect("Unable to generate bindings");
122+
let bindings = bindings.generate()?;
110123

111-
bindings
112-
.write_to_file(out_path.join("bindings.rs"))
113-
.expect("Couldn't write bindings!");
124+
bindings.write_to_file(out_path.join("bindings.rs"))?;
125+
126+
Ok(())
114127
}
115128

116129
#[cfg(all(
@@ -324,26 +337,6 @@ fn push_feature_flags(cx: &mut Build, cxx: &mut Build) {
324337
}
325338
}
326339

327-
fn compile_opencl(cx: &mut Build, cxx: &mut Build) {
328-
println!("Compiling OpenCL GGML..");
329-
330-
// TODO
331-
println!("cargo:warning=OpenCL compilation and execution has not been properly tested yet");
332-
333-
cx.define("GGML_USE_CLBLAST", None);
334-
cxx.define("GGML_USE_CLBLAST", None);
335-
336-
if cfg!(target_os = "linux") {
337-
println!("cargo:rustc-link-lib=OpenCL");
338-
println!("cargo:rustc-link-lib=clblast");
339-
} else if cfg!(target_os = "macos") {
340-
println!("cargo:rustc-link-lib=framework=OpenCL");
341-
println!("cargo:rustc-link-lib=clblast");
342-
}
343-
344-
cxx.file(LLAMA_PATH.join("ggml-opencl.cpp"));
345-
}
346-
347340
fn compile_openblas(cx: &mut Build) {
348341
println!("Compiling OpenBLAS GGML..");
349342

@@ -462,7 +455,9 @@ fn compile_cuda(cx: &mut Build, cxx: &mut Build, featless_cxx: Build) -> &'stati
462455
}
463456

464457
let lib_name = "ggml-cuda";
465-
let cuda_path = LLAMA_PATH.join("ggml-cuda");
458+
let ggml_path = LLAMA_PATH.join("ggml");
459+
let ggml_src = ggml_path.join("src");
460+
let cuda_path = ggml_src.join("ggml-cuda");
466461
let cuda_sources = read_dir(cuda_path.as_path())
467462
.unwrap()
468463
.map(|f| f.unwrap())
@@ -476,10 +471,11 @@ fn compile_cuda(cx: &mut Build, cxx: &mut Build, featless_cxx: Build) -> &'stati
476471
.map(|entry| entry.path());
477472

478473
nvcc.include(cuda_path.as_path())
479-
.include(LLAMA_PATH.as_path())
474+
.include(ggml_src.as_path())
475+
.include(ggml_path.join("include").as_path())
480476
.files(cuda_sources)
481477
.files(template_instances)
482-
.file(LLAMA_PATH.join("ggml-cuda.cu"))
478+
.file(ggml_src.join("ggml-cuda.cu"))
483479
.compile(lib_name);
484480

485481
lib_name
@@ -655,22 +651,28 @@ fn compile_vulkan(cx: &mut Build, cxx: &mut Build) -> &'static str {
655651

656652
fn compile_ggml(mut cx: Build) {
657653
println!("Compiling GGML..");
654+
let ggml_src = LLAMA_PATH.join("ggml").join("src");
655+
let ggml_include = LLAMA_PATH.join("ggml").join("include");
658656
cx.std("c11")
659-
.include(LLAMA_PATH.as_path())
660-
.file(LLAMA_PATH.join("ggml.c"))
661-
.file(LLAMA_PATH.join("ggml-alloc.c"))
662-
.file(LLAMA_PATH.join("ggml-backend.c"))
663-
.file(LLAMA_PATH.join("ggml-quants.c"))
657+
.include(ggml_include)
658+
.file(ggml_src.join("ggml.c"))
659+
.file(ggml_src.join("ggml-alloc.c"))
660+
.file(ggml_src.join("ggml-backend.c"))
661+
.file(ggml_src.join("ggml-quants.c"))
664662
.compile("ggml");
665663
}
666664

667665
fn compile_llama(mut cxx: Build, _out_path: impl AsRef<Path>) {
668666
println!("Compiling Llama.cpp..");
667+
let llama_cpp_src = LLAMA_PATH.join("src");
668+
let llama_include = LLAMA_PATH.join("include");
669+
let ggml_include = LLAMA_PATH.join("ggml").join("include");
669670
cxx.std("c++11")
670-
.include(LLAMA_PATH.as_path())
671-
.file(LLAMA_PATH.join("unicode.cpp"))
672-
.file(LLAMA_PATH.join("unicode-data.cpp"))
673-
.file(LLAMA_PATH.join("llama.cpp"))
671+
.include(llama_include)
672+
.include(ggml_include)
673+
.file(llama_cpp_src.join("unicode.cpp"))
674+
.file(llama_cpp_src.join("unicode-data.cpp"))
675+
.file(llama_cpp_src.join("llama.cpp"))
674676
.compile("llama");
675677
}
676678

@@ -683,9 +685,10 @@ fn main() {
683685

684686
let llama_header_path = std::env::var("LLAMA_HEADE");
685687
if let Ok(llama_header_path) = llama_header_path {
686-
compile_bindings(&out_path, Path::new(&llama_header_path));
688+
compile_bindings(&out_path, Path::new(&llama_header_path))
689+
.expect("failed to generate bindings");
687690
} else {
688-
compile_bindings(&out_path, &LLAMA_PATH);
691+
compile_bindings(&out_path, &LLAMA_PATH).expect("failed to generate bindings");
689692
}
690693

691694
if let Ok(llama_lib_path) = std::env::var("LLAMA_LIB") {
@@ -703,7 +706,7 @@ fn main() {
703706

704707
println!("cargo:rerun-if-changed={}", LLAMA_PATH.display());
705708

706-
compile_bindings(&out_path, &LLAMA_PATH);
709+
compile_bindings(&out_path, &LLAMA_PATH).expect("failed to generate bindings");
707710

708711
let mut cx = Build::new();
709712
let mut cxx = Build::new();
@@ -720,9 +723,6 @@ fn main() {
720723
Some(compile_vulkan(&mut cx, &mut cxx))
721724
} else if cfg!(feature = "cuda") {
722725
Some(compile_cuda(&mut cx, &mut cxx, featless_cxx))
723-
} else if cfg!(feature = "opencl") {
724-
compile_opencl(&mut cx, &mut cxx);
725-
None
726726
} else if cfg!(feature = "openblas") {
727727
compile_openblas(&mut cx);
728728
None

llama-cpp-sys-2/llama.cpp

0 commit comments

Comments
 (0)