@@ -23,6 +23,18 @@ fn main() {
2323 ggml. cpp ( false ) ;
2424 llama_cpp. cpp ( true ) ;
2525
26+ // CMakeFiles.txt: set(LLAMA_SCHED_MAX_COPIES "4" CACHE STRING "llama: max input copies for pipeline parallelism")
27+ // get LLAMA_SCHED_MAX_COPIES from env, default to 4
28+ let mut max_copies = "4" . to_owned ( ) ;
29+ if let Ok ( env_max_copies) = env:: var ( "LLAMA_SCHED_MAX_COPIES" ) {
30+ if let Ok ( v) = env_max_copies. parse :: < u32 > ( ) {
31+ if v > 0 {
32+ max_copies = env_max_copies;
33+ }
34+ }
35+ }
36+ ggml. define ( "GGML_SCHED_MAX_COPIES" , Some ( max_copies. as_str ( ) ) ) ;
37+
2638 // https://github.com/ggerganov/llama.cpp/blob/a836c8f534ab789b02da149fbdaf7735500bff74/Makefile#L364-L368
2739 if let Some ( ggml_cuda) = & mut ggml_cuda {
2840 for lib in [
@@ -118,22 +130,30 @@ fn main() {
118130 if cfg ! ( target_os = "macos" ) {
119131 assert ! ( !cublas_enabled, "CUBLAS is not supported on macOS" ) ;
120132
121- println ! ( "cargo:rustc-link-lib=framework=Metal" ) ;
133+ let metal_enabled = env:: var ( "CARGO_FEATURE_METAL" ) . is_ok ( ) ;
134+
122135 println ! ( "cargo:rustc-link-lib=framework=Foundation" ) ;
123- println ! ( "cargo:rustc-link-lib=framework=MetalPerformanceShaders" ) ;
124- println ! ( "cargo:rustc-link-lib=framework=MetalKit" ) ;
136+ if metal_enabled {
137+ println ! ( "cargo:rustc-link-lib=framework=Metal" ) ;
138+ println ! ( "cargo:rustc-link-lib=framework=MetalPerformanceShaders" ) ;
139+ println ! ( "cargo:rustc-link-lib=framework=MetalKit" ) ;
140+ }
125141
126142 llama_cpp. define ( "_DARWIN_C_SOURCE" , None ) ;
127143
128144 // https://github.com/ggerganov/llama.cpp/blob/3c0d25c4756742ebf15ad44700fabc0700c638bd/Makefile#L340-L343
129- llama_cpp. define ( "GGML_USE_METAL" , None ) ;
145+ if metal_enabled {
146+ llama_cpp. define ( "GGML_USE_METAL" , None ) ;
147+ }
130148 llama_cpp. define ( "GGML_USE_ACCELERATE" , None ) ;
131149 llama_cpp. define ( "ACCELERATE_NEW_LAPACK" , None ) ;
132150 llama_cpp. define ( "ACCELERATE_LAPACK_ILP64" , None ) ;
133151 println ! ( "cargo:rustc-link-lib=framework=Accelerate" ) ;
134152
135- metal_hack ( & mut ggml) ;
136- ggml. include ( "./llama.cpp/ggml-metal.h" ) ;
153+ if metal_enabled {
154+ metal_hack ( & mut ggml) ;
155+ ggml. include ( "./llama.cpp/ggml-metal.h" ) ;
156+ }
137157 }
138158
139159 if cfg ! ( target_os = "dragonfly" ) {
@@ -167,6 +187,9 @@ fn main() {
167187 if let Some ( cuda) = ggml_cuda. as_mut ( ) {
168188 cuda. define ( "NDEBUG" , None ) ;
169189 }
190+
191+ ggml. opt_level ( 3 ) ;
192+ llama_cpp. opt_level ( 3 ) ;
170193 }
171194
172195 if let Some ( ggml_cuda) = ggml_cuda {
0 commit comments