prefix-dev · wolfv · May 8, 2024 · May 9, 2024
diff --git a/llama/pixi.lock b/llama/pixi.lock
diff --git a/llama/pixi.toml b/llama/pixi.toml
@@ -4,7 +4,7 @@ version = "0.1.0"
 description = "Add a short description here"
 authors = ["Wolf Vollprecht <w.vollprecht@gmail.com>"]
 channels = ["../output", "conda-forge"]
-platforms = ["osx-arm64"]
+platforms = ["linux-64"] # "osx-arm64", 
 
 [tasks]
 start = "main -m $CONDA_PREFIX/share/llama-cpp/models/llava-v1.5-7b-Q4_K.gguf -i"

diff --git a/recipes/llama-cpp/discrete-device.patch b/recipes/llama-cpp/discrete-device.patch
@@ -1,34 +1,47 @@
 diff --git a/ggml-metal.m b/ggml-metal.m
-index 419d8b9e..f00a703c 100644
+index 78cac504..fa45fa5d 100644
 --- a/ggml-metal.m
 +++ b/ggml-metal.m
-@@ -269,13 +269,28 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
-     for (id<MTLDevice> device in devices) {
-         GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
-     }
--    [devices release]; // since it was created by a *Copy* C method
+@@ -274,6 +274,9 @@ static void * ggml_metal_host_malloc(size_t n) {
+     return data;
+ }
+
++// Metal device selection (implementation comes later)
++static id<MTLDevice> ggml_backend_metal_get_device(void);
++
+ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
+     GGML_METAL_LOG_INFO("%s: allocating\n", __func__);
+
+@@ -287,7 +290,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
  #endif
 
      // Pick and show default Metal device
-     id<MTLDevice> device = MTLCreateSystemDefaultDevice();
+-    id<MTLDevice> device = MTLCreateSystemDefaultDevice();
++    id<MTLDevice> device = ggml_backend_metal_get_device();
      GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);
 
-+    devices = MTLCopyAllDevices();
-+    id <MTLDevice> dev;
-+    NSString * s;
+     // Configure context
+@@ -2806,7 +2809,22 @@ static int g_backend_device_ref_count = 0;
+
+ static id<MTLDevice> ggml_backend_metal_get_device(void) {
+     if (g_backend_device == nil) {
+-        g_backend_device = MTLCreateSystemDefaultDevice();
++        NSArray * select_devices = MTLCopyAllDevices();
++        NSString * s;
++        id<MTLDevice> dev;
 +
-+    for (dev in devices) {
-+        s = [dev name];
-+        if ([dev isRemovable]) {
-+            GGML_METAL_LOG_INFO("%s: found external device: %s\n", __func__, [s UTF8String]);
-+        } else if ([dev isLowPower]) {
-+            GGML_METAL_LOG_INFO("%s: found integrated device: %s\n", __func__, [s UTF8String]);
-+        } else {
-+            GGML_METAL_LOG_INFO("%s: found discrete device: %s\n", __func__, [s UTF8String]);
-+            device = dev;
++        for (dev in select_devices) {
++            s = [dev name];
++            if ([dev isRemovable]) {
++                GGML_METAL_LOG_INFO("%s: found external device: %s\n", __func__, [s UTF8String]);
++            } else if ([dev isLowPower]) {
++                GGML_METAL_LOG_INFO("%s: found integrated device: %s\n", __func__, [s UTF8String]);
++            } else {
++                GGML_METAL_LOG_INFO("%s: found discrete device: %s\n", __func__, [s UTF8String]);
++                g_backend_device = dev;
++            }
 +        }
-+    }
-+
-     // Configure context
-     struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
-     ctx->device = device;
++        [select_devices release]; // since it was created by a *Copy* C method
+     }
+
+     g_backend_device_ref_count++;
diff --git a/recipes/llama-cpp/recipe.yaml b/recipes/llama-cpp/recipe.yaml
@@ -1,32 +1,44 @@
+context:
+  version: "b2813"
+
 recipe:
   name: llama-cpp
-  version: b2636
+  version: ${{ version }}
 
 source:
-  url: https://github.com/ggerganov/llama.cpp/archive/refs/tags/b2636.tar.gz
-  sha256: 80afcede909fea958dc9524fe1d7936f9d9cc3e276a48db4f3bfc1ac0a1f8115
-  # patches:
-    # - discrete-device.patch
+  url: https://github.com/ggerganov/llama.cpp/archive/refs/tags/${{ version }}.tar.gz
+  sha256: 59bb58e9b5e424b8d45a8cce1f492035edbcca49bdc9c69e7478b0e2df5c0207
+  patches:
+    - if: osx
+      then: discrete-device.patch
 
 outputs:
   - package: 
       name: llama-cpp
     requirements:
       build:
-        # for unknown reasons the conda-forge compiler seems to not work with Metal
-        # - ${{ compiler('cxx') }}
+        - ${{ compiler('cxx') }}
+        - if: cuda
+          then:
+            - ${{ compiler('cuda') }}
         - ninja
         - cmake
       host:
         - openssl
+        - if: cuda
+          then:
+            - libcublas-dev
 
     build:
       script: |
-        export CPPFLAGS="-D_FORTIFY_SOURCE=2 -isystem $PREFIX/include -mmacosx-version-min=14.0"
-        cmake -GNinja -S . -B build -DCMAKE_BUILD_TYPE=Release \
-          -DCMAKE_C_FLAGS="-D__ARM_FEATURE_DOTPROD=1" \
-          -DCMAKE_INSTALL_PREFIX=$PREFIX -DLLAMA_BUILD_TESTS=OFF \
-          -DLLAMA_METAL_EMBED_LIBRARY=1
+        if [[ "${{ cuda }}" == "true" ]]; then
+          export CUDA_ARGS="-DLLAMA_CUDA=ON"
+        fi
+
+        cmake -GNinja -S . -B build ${CMAKE_ARGS} \
+          -DLLAMA_BUILD_TESTS=OFF \
+          -DLLAMA_METAL_EMBED_LIBRARY=ON \
+          $CUDA_ARGS
 
         cmake --build build
         cmake --install build