Skip to content

Commit 8a73403

Browse files
committed
failing to link dur to "mach-o"
1 parent 242e5a7 commit 8a73403

File tree

4 files changed

+125
-10
lines changed

4 files changed

+125
-10
lines changed

llama-cpp-2/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ anyhow = "1.0.79"
2626
name = "grammar_bias"
2727
harness = false
2828

29+
[[bench]]
30+
name = "generate"
31+
harness = false
32+
2933
[features]
3034
cublas = ["llama-cpp-sys-2/cublas"]
3135

llama-cpp-2/benches/generate.rs

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
use anyhow::Context;
2+
use criterion::{Criterion, criterion_group, criterion_main};
3+
use pprof::criterion::{Output, PProfProfiler};
4+
use llama_cpp_2::context::params::LlamaContextParams;
5+
use llama_cpp_2::llama_backend::LlamaBackend;
6+
use llama_cpp_2::llama_batch::LlamaBatch;
7+
use llama_cpp_2::model::{AddBos, LlamaModel};
8+
use llama_cpp_2::model::params::LlamaModelParams;
9+
use llama_cpp_2::token::data_array::LlamaTokenDataArray;
10+
11+
fn generate(c: &mut Criterion) {
12+
let api = hf_hub::api::sync::ApiBuilder::new()
13+
.with_progress(true)
14+
.build()
15+
.unwrap();
16+
let file = api
17+
.model("TheBloke/Llama-2-7B-Chat-GGUF".to_string())
18+
.get("llama-2-7b-chat.Q4_K_M.gguf")
19+
.unwrap();
20+
let backend = LlamaBackend::init().unwrap();
21+
let model_params = LlamaModelParams::default();
22+
let model = LlamaModel::load_from_file(&backend, &file, &model_params).unwrap();
23+
let mut ctx = model
24+
.new_context(&backend, LlamaContextParams::default())
25+
.unwrap();
26+
27+
c.bench_function("generate 50 tokens", |b| {
28+
b.iter(|| {
29+
let tokens_list = model.str_to_token("Hello, my name is", AddBos::Always).unwrap();
30+
let mut n_ctx = tokens_list.len() as i32;
31+
let mut batch = LlamaBatch::new(512, 1);
32+
let last_index: i32 = (tokens_list.len() - 1) as i32;
33+
for (i, token) in (0_i32..).zip(tokens_list.into_iter()) {
34+
let is_last = i == last_index;
35+
batch.add(token, i, &[0], is_last).unwrap();
36+
}
37+
ctx.decode(&mut batch).unwrap();
38+
39+
for _ in 0..50 {
40+
let candidates = ctx.candidates_ith(batch.n_tokens() - 1);
41+
let candidates_p = LlamaTokenDataArray::from_iter(candidates, false);
42+
let new_token_id = ctx.sample_token_greedy(candidates_p);
43+
if new_token_id == model.token_eos() {
44+
break;
45+
}
46+
batch.clear();
47+
batch.add(new_token_id, n_ctx, &[0], true).unwrap();
48+
n_ctx += 1;
49+
ctx.decode(&mut batch).unwrap();
50+
}
51+
ctx.clear_kv_cache_seq(0, None, None)
52+
});
53+
});
54+
}
55+
56+
criterion_group!(
57+
name = benches;
58+
config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
59+
targets = generate
60+
);
61+
criterion_main!(benches);

llama-cpp-2/benches/grammar_bias.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ fn criterion_benchmark(c: &mut Criterion) {
3232
let model_params = LlamaModelParams::default();
3333
let model = LlamaModel::load_from_file(&backend, &file, &model_params).unwrap();
3434
let mut ctx = model
35-
.new_context(&backend, &LlamaContextParams::default())
35+
.new_context(&backend, LlamaContextParams::default())
3636
.unwrap();
3737
let grammar = LlamaGrammar::from_str(include_str!("../src/grammar/json.gbnf")).unwrap();
3838

llama-cpp-sys-2/build.rs

Lines changed: 59 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ fn main() {
1313

1414
let mut ggml = cc::Build::new();
1515
let mut ggml_cuda = if cublas_enabled { Some(cc::Build::new()) } else { None };
16+
let mut ggml_metal= if cfg!(target_os = "macos") { Some(cc::Build::new()) } else { None };
1617
let mut llama_cpp = cc::Build::new();
1718

1819
ggml.cpp(false);
@@ -60,20 +61,22 @@ fn main() {
6061
llama_cpp.define("GGML_USE_ACCELERATE", None);
6162
llama_cpp.define("ACCELERATE_NEW_LAPACK", None);
6263
llama_cpp.define("ACCELERATE_LAPACK_ILP64", None);
63-
println!("cargo:rustc-link-lib=framework=Accelerate");
64+
println!("cargo:rustc-link-arg=framework=Accelerate");
6465

6566
// MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
6667
// https://github.com/ggerganov/llama.cpp/blob/3c0d25c4756742ebf15ad44700fabc0700c638bd/Makefile#L509-L511
67-
println!("cargo:rustc-link-lib=framework Foundation");
68-
println!("cargo:rustc-link-lib=framework Metal");
69-
println!("cargo:rustc-link-lib=framework MetalKit");
70-
68+
println!("cargo:rustc-link-arg=framework=Foundation");
69+
println!("cargo:rustc-link-arg=framework=Metal");
70+
println!("cargo:rustc-link-arg=framework=MetalKit");
71+
}
7172

72-
// https://github.com/ggerganov/llama.cpp/blob/3c0d25c4756742ebf15ad44700fabc0700c638bd/Makefile#L517-L520
73-
ggml
74-
.file("llama.cpp/ggml-metal.m")
75-
.file("llama.cpp/ggml-metal.h");
73+
if let Some(ggml_metal) = &mut ggml_metal {
74+
metal_hack(ggml_metal);
75+
ggml_metal
76+
.file("llama.cpp/ggml-metal")
77+
.include("llama.cpp");
7678
}
79+
7780
if cfg!(target_os = "dragonfly") {
7881
llama_cpp.define("__BSD_VISIBLE", None);
7982
}
@@ -83,6 +86,12 @@ fn main() {
8386
ggml_cuda.compile("ggml-cuda");
8487
}
8588

89+
90+
if let Some(ggml_metal) = ggml_metal {
91+
println!("compiling ggml-metal");
92+
ggml_metal.compile("ggml-metal")
93+
}
94+
8695
if cfg!(target_os = "linux") {
8796
ggml.define("_GNU_SOURCE", None);
8897
}
@@ -97,6 +106,7 @@ fn main() {
97106

98107
llama_cpp
99108
.define("_XOPEN_SOURCE", Some("600"))
109+
.include("llama.cpp")
100110
.std("c++17")
101111
.file("llama.cpp/llama.cpp");
102112

@@ -124,3 +134,43 @@ fn main() {
124134
.write_to_file(out_path.join("bindings.rs"))
125135
.expect("failed to write bindings to file");
126136
}
137+
138+
139+
// courtesy of https://github.com/rustformers/llm
140+
fn metal_hack(build: &mut cc::Build) {
141+
const GGML_METAL_METAL_PATH: &str = "llama.cpp/ggml-metal.metal";
142+
const GGML_METAL_PATH: &str = "llama.cpp/ggml-metal.m";
143+
144+
let out_dir = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR is not defined"));
145+
146+
let ggml_metal_path = {
147+
let ggml_metal_metal = std::fs::read_to_string(GGML_METAL_METAL_PATH)
148+
.expect("Could not read ggml-metal.metal")
149+
.replace('\\', "\\\\")
150+
.replace('\n', "\\n")
151+
.replace('\r', "\\r")
152+
.replace('\"', "\\\"");
153+
154+
let ggml_metal =
155+
std::fs::read_to_string(GGML_METAL_PATH).expect("Could not read ggml-metal.m");
156+
157+
let needle = r#"NSString * src = [NSString stringWithContentsOfFile:sourcePath encoding:NSUTF8StringEncoding error:&error];"#;
158+
if !ggml_metal.contains(needle) {
159+
panic!("ggml-metal.m does not contain the needle to be replaced; the patching logic needs to be reinvestigated. Contact a `llama-cpp-sys-2` developer!");
160+
}
161+
162+
// Replace the runtime read of the file with a compile-time string
163+
let ggml_metal = ggml_metal.replace(
164+
needle,
165+
&format!(r#"NSString * src = @"{ggml_metal_metal}";"#),
166+
);
167+
168+
let patched_ggml_metal_path = out_dir.join("ggml-metal.m");
169+
std::fs::write(&patched_ggml_metal_path, ggml_metal)
170+
.expect("Could not write temporary patched ggml-metal.m");
171+
172+
patched_ggml_metal_path
173+
};
174+
175+
build.file(ggml_metal_path);
176+
}

0 commit comments

Comments
 (0)