44 config ,
55 stdenv ,
66 mkShell ,
7+ runCommand ,
78 cmake ,
89 ninja ,
910 pkg-config ,
1011 git ,
1112 python3 ,
1213 mpi ,
13- openblas , # TODO: Use the generic ` blas` so users could switch between alternative implementations
14+ blas ,
1415 cudaPackages ,
1516 darwin ,
1617 rocmPackages ,
2324 useOpenCL
2425 useRocm
2526 useVulkan
26- ] ,
27+ ] && blas . meta . available ,
2728 useCuda ? config . cudaSupport ,
2829 useMetalKit ? stdenv . isAarch64 && stdenv . isDarwin && ! useOpenCL ,
2930 useMpi ? false , # Increases the runtime closure size by ~700M
3536 # It's necessary to consistently use backendStdenv when building with CUDA support,
3637 # otherwise we get libstdc++ errors downstream.
3738 effectiveStdenv ? if useCuda then cudaPackages . backendStdenv else stdenv ,
38- enableStatic ? effectiveStdenv . hostPlatform . isStatic
39+ enableStatic ? effectiveStdenv . hostPlatform . isStatic ,
40+ precompileMetalShaders ? false
3941} @inputs :
4042
4143let
6567 strings . optionalString ( suffices != [ ] )
6668 ", accelerated with ${ strings . concatStringsSep ", " suffices } " ;
6769
70+ executableSuffix = effectiveStdenv . hostPlatform . extensions . executable ;
71+
6872 # TODO: package the Python in this repository in a Nix-like way.
6973 # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
7074 # is PEP 517-compatible, and ensure the correct .dist-info is generated.
7175 # https://peps.python.org/pep-0517/
76+ #
77+ # TODO: Package up each Python script or service appropriately, by making
78+ # them into "entrypoints"
7279 llama-python = python3 . withPackages (
7380 ps : [
7481 ps . numpy
8794 ]
8895 ) ;
8996
97+ xcrunHost = runCommand "xcrunHost" { } ''
98+ mkdir -p $out/bin
99+ ln -s /usr/bin/xcrun $out/bin
100+ '' ;
101+
90102 # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
91103 # separately
92104 darwinBuildInputs =
@@ -150,13 +162,18 @@ effectiveStdenv.mkDerivation (
150162 postPatch = ''
151163 substituteInPlace ./ggml-metal.m \
152164 --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
153-
154- # TODO: Package up each Python script or service appropriately.
155- # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
156- # we could make those *.py into setuptools' entrypoints
157- substituteInPlace ./*.py --replace "/usr/bin/env python" "${ llama-python } /bin/python"
165+ substituteInPlace ./ggml-metal.m \
166+ --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
158167 '' ;
159168
169+ # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
170+ # `default.metallib` may be compiled with Metal compiler from XCode
171+ # and we need to escape sandbox on MacOS to access Metal compiler.
172+ # `xcrun` is used find the path of the Metal compiler, which is varible
173+ # and not on $PATH
174+ # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
175+ __noChroot = effectiveStdenv . isDarwin && useMetalKit && precompileMetalShaders ;
176+
160177 nativeBuildInputs =
161178 [
162179 cmake
@@ -173,6 +190,8 @@ effectiveStdenv.mkDerivation (
173190 ]
174191 ++ optionals ( effectiveStdenv . hostPlatform . isGnu && enableStatic ) [
175192 glibc . static
193+ ] ++ optionals ( effectiveStdenv . isDarwin && useMetalKit && precompileMetalShaders ) [
194+ xcrunHost
176195 ] ;
177196
178197 buildInputs =
@@ -181,6 +200,7 @@ effectiveStdenv.mkDerivation (
181200 ++ optionals useMpi [ mpi ]
182201 ++ optionals useOpenCL [ clblast ]
183202 ++ optionals useRocm rocmBuildInputs
203+ ++ optionals useBlas [ blas ]
184204 ++ optionals useVulkan vulkanBuildInputs ;
185205
186206 cmakeFlags =
@@ -191,7 +211,7 @@ effectiveStdenv.mkDerivation (
191211 ( cmakeBool "CMAKE_SKIP_BUILD_RPATH" true )
192212 ( cmakeBool "LLAMA_BLAS" useBlas )
193213 ( cmakeBool "LLAMA_CLBLAST" useOpenCL )
194- ( cmakeBool "LLAMA_CUBLAS " useCuda )
214+ ( cmakeBool "LLAMA_CUDA " useCuda )
195215 ( cmakeBool "LLAMA_HIPBLAS" useRocm )
196216 ( cmakeBool "LLAMA_METAL" useMetalKit )
197217 ( cmakeBool "LLAMA_MPI" useMpi )
@@ -216,14 +236,16 @@ effectiveStdenv.mkDerivation (
216236 # Should likely use `rocmPackages.clr.gpuTargets`.
217237 "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
218238 ]
219- ++ optionals useMetalKit [ ( lib . cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1" ) ]
220- ++ optionals useBlas [ ( lib . cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS" ) ] ;
239+ ++ optionals useMetalKit [
240+ ( lib . cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1" )
241+ ( cmakeBool "LLAMA_METAL_EMBED_LIBRARY" ( ! precompileMetalShaders ) )
242+ ] ;
221243
222244 # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
223245 # if they haven't been added yet.
224246 postInstall = ''
225- mv $out/bin/main $out/bin/llama
226- mv $out/bin/server $out/bin/llama-server
247+ mv $out/bin/main${ executableSuffix } $out/bin/llama${ executableSuffix }
248+ mv $out/bin/server${ executableSuffix } $out/bin/llama-server${ executableSuffix }
227249 mkdir -p $out/include
228250 cp $src/llama.h $out/include/
229251 '' ;
0 commit comments