@@ -23,6 +23,18 @@ fn main() {
23
23
ggml. cpp ( false ) ;
24
24
llama_cpp. cpp ( true ) ;
25
25
26
+ // CMakeFiles.txt: set(LLAMA_SCHED_MAX_COPIES "4" CACHE STRING "llama: max input copies for pipeline parallelism")
27
+ // get LLAMA_SCHED_MAX_COPIES from env, default to 4
28
+ let mut max_copies = "4" . to_owned ( ) ;
29
+ if let Ok ( env_max_copies) = env:: var ( "LLAMA_SCHED_MAX_COPIES" ) {
30
+ if let Ok ( v) = env_max_copies. parse :: < u32 > ( ) {
31
+ if v > 0 {
32
+ max_copies = env_max_copies;
33
+ }
34
+ }
35
+ }
36
+ ggml. define ( "GGML_SCHED_MAX_COPIES" , Some ( max_copies. as_str ( ) ) ) ;
37
+
26
38
// https://github.com/ggerganov/llama.cpp/blob/a836c8f534ab789b02da149fbdaf7735500bff74/Makefile#L364-L368
27
39
if let Some ( ggml_cuda) = & mut ggml_cuda {
28
40
for lib in [
@@ -118,22 +130,30 @@ fn main() {
118
130
if cfg ! ( target_os = "macos" ) {
119
131
assert ! ( !cublas_enabled, "CUBLAS is not supported on macOS" ) ;
120
132
121
- println ! ( "cargo:rustc-link-lib=framework=Metal" ) ;
133
+ let metal_enabled = env:: var ( "CARGO_FEATURE_METAL" ) . is_ok ( ) ;
134
+
122
135
println ! ( "cargo:rustc-link-lib=framework=Foundation" ) ;
123
- println ! ( "cargo:rustc-link-lib=framework=MetalPerformanceShaders" ) ;
124
- println ! ( "cargo:rustc-link-lib=framework=MetalKit" ) ;
136
+ if metal_enabled {
137
+ println ! ( "cargo:rustc-link-lib=framework=Metal" ) ;
138
+ println ! ( "cargo:rustc-link-lib=framework=MetalPerformanceShaders" ) ;
139
+ println ! ( "cargo:rustc-link-lib=framework=MetalKit" ) ;
140
+ }
125
141
126
142
llama_cpp. define ( "_DARWIN_C_SOURCE" , None ) ;
127
143
128
144
// https://github.com/ggerganov/llama.cpp/blob/3c0d25c4756742ebf15ad44700fabc0700c638bd/Makefile#L340-L343
129
- llama_cpp. define ( "GGML_USE_METAL" , None ) ;
145
+ if metal_enabled {
146
+ llama_cpp. define ( "GGML_USE_METAL" , None ) ;
147
+ }
130
148
llama_cpp. define ( "GGML_USE_ACCELERATE" , None ) ;
131
149
llama_cpp. define ( "ACCELERATE_NEW_LAPACK" , None ) ;
132
150
llama_cpp. define ( "ACCELERATE_LAPACK_ILP64" , None ) ;
133
151
println ! ( "cargo:rustc-link-lib=framework=Accelerate" ) ;
134
152
135
- metal_hack ( & mut ggml) ;
136
- ggml. include ( "./llama.cpp/ggml-metal.h" ) ;
153
+ if metal_enabled {
154
+ metal_hack ( & mut ggml) ;
155
+ ggml. include ( "./llama.cpp/ggml-metal.h" ) ;
156
+ }
137
157
}
138
158
139
159
if cfg ! ( target_os = "dragonfly" ) {
@@ -167,6 +187,9 @@ fn main() {
167
187
if let Some ( cuda) = ggml_cuda. as_mut ( ) {
168
188
cuda. define ( "NDEBUG" , None ) ;
169
189
}
190
+
191
+ ggml. opt_level ( 3 ) ;
192
+ llama_cpp. opt_level ( 3 ) ;
170
193
}
171
194
172
195
if let Some ( ggml_cuda) = ggml_cuda {
0 commit comments