Skip to content

Commit b170442

Browse files
authored
Merge pull request #246 from tinglou/main
new feature `metal` to turn on/off metal framework on macos
2 parents 12f895e + 6ae0e01 commit b170442

File tree

4 files changed

+35
-6
lines changed

4 files changed

+35
-6
lines changed

llama-cpp-2/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,12 @@ tracing = { workspace = true }
1515

1616
[features]
1717
cublas = ["llama-cpp-sys-2/cublas"]
18+
metal = ["llama-cpp-sys-2/metal"]
1819
sampler = []
1920

21+
[target.'cfg(all(target_os = "macos", any(target_arch = "aarch64", target_arch = "arm64")))'.dependencies]
22+
llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features=["metal"], version = "0.1.48" }
23+
2024
[lints]
2125
workspace = true
2226

llama-cpp-sys-2/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,5 @@ cc = { workspace = true, features = ["parallel"] }
4646

4747
[features]
4848
cublas = []
49+
metal = []
4950

llama-cpp-sys-2/build.rs

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,18 @@ fn main() {
2323
ggml.cpp(false);
2424
llama_cpp.cpp(true);
2525

26+
// CMakeFiles.txt: set(LLAMA_SCHED_MAX_COPIES "4" CACHE STRING "llama: max input copies for pipeline parallelism")
27+
// get LLAMA_SCHED_MAX_COPIES from env, default to 4
28+
let mut max_copies = "4".to_owned();
29+
if let Ok(env_max_copies) = env::var("LLAMA_SCHED_MAX_COPIES") {
30+
if let Ok(v) = env_max_copies.parse::<u32>() {
31+
if v > 0 {
32+
max_copies = env_max_copies;
33+
}
34+
}
35+
}
36+
ggml.define("GGML_SCHED_MAX_COPIES", Some(max_copies.as_str()));
37+
2638
// https://github.com/ggerganov/llama.cpp/blob/a836c8f534ab789b02da149fbdaf7735500bff74/Makefile#L364-L368
2739
if let Some(ggml_cuda) = &mut ggml_cuda {
2840
for lib in [
@@ -118,22 +130,30 @@ fn main() {
118130
if cfg!(target_os = "macos") {
119131
assert!(!cublas_enabled, "CUBLAS is not supported on macOS");
120132

121-
println!("cargo:rustc-link-lib=framework=Metal");
133+
let metal_enabled = env::var("CARGO_FEATURE_METAL").is_ok();
134+
122135
println!("cargo:rustc-link-lib=framework=Foundation");
123-
println!("cargo:rustc-link-lib=framework=MetalPerformanceShaders");
124-
println!("cargo:rustc-link-lib=framework=MetalKit");
136+
if metal_enabled {
137+
println!("cargo:rustc-link-lib=framework=Metal");
138+
println!("cargo:rustc-link-lib=framework=MetalPerformanceShaders");
139+
println!("cargo:rustc-link-lib=framework=MetalKit");
140+
}
125141

126142
llama_cpp.define("_DARWIN_C_SOURCE", None);
127143

128144
// https://github.com/ggerganov/llama.cpp/blob/3c0d25c4756742ebf15ad44700fabc0700c638bd/Makefile#L340-L343
129-
llama_cpp.define("GGML_USE_METAL", None);
145+
if metal_enabled {
146+
llama_cpp.define("GGML_USE_METAL", None);
147+
}
130148
llama_cpp.define("GGML_USE_ACCELERATE", None);
131149
llama_cpp.define("ACCELERATE_NEW_LAPACK", None);
132150
llama_cpp.define("ACCELERATE_LAPACK_ILP64", None);
133151
println!("cargo:rustc-link-lib=framework=Accelerate");
134152

135-
metal_hack(&mut ggml);
136-
ggml.include("./llama.cpp/ggml-metal.h");
153+
if metal_enabled {
154+
metal_hack(&mut ggml);
155+
ggml.include("./llama.cpp/ggml-metal.h");
156+
}
137157
}
138158

139159
if cfg!(target_os = "dragonfly") {
@@ -167,6 +187,9 @@ fn main() {
167187
if let Some(cuda) = ggml_cuda.as_mut() {
168188
cuda.define("NDEBUG", None);
169189
}
190+
191+
ggml.opt_level(3);
192+
llama_cpp.opt_level(3);
170193
}
171194

172195
if let Some(ggml_cuda) = ggml_cuda {

simple/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ encoding_rs = { workspace = true }
1414

1515
[features]
1616
cublas = ["llama-cpp-2/cublas"]
17+
metal = ["llama-cpp-2/metal"]
1718

1819
[lints]
1920
workspace = true

0 commit comments

Comments
 (0)