Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
213 changes: 174 additions & 39 deletions llama/pixi.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion llama/pixi.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ version = "0.1.0"
description = "Add a short description here"
authors = ["Wolf Vollprecht <w.vollprecht@gmail.com>"]
channels = ["../output", "conda-forge"]
platforms = ["osx-arm64"]
platforms = ["linux-64"] # "osx-arm64",

[tasks]
start = "main -m $CONDA_PREFIX/share/llama-cpp/models/llava-v1.5-7b-Q4_K.gguf -i"
Expand Down
61 changes: 37 additions & 24 deletions recipes/llama-cpp/discrete-device.patch
Original file line number Diff line number Diff line change
@@ -1,34 +1,47 @@
diff --git a/ggml-metal.m b/ggml-metal.m
index 419d8b9e..f00a703c 100644
index 78cac504..fa45fa5d 100644
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -269,13 +269,28 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
for (id<MTLDevice> device in devices) {
GGML_METAL_LOG_INFO("%s: found device: %s\n", __func__, [[device name] UTF8String]);
}
- [devices release]; // since it was created by a *Copy* C method
@@ -274,6 +274,9 @@ static void * ggml_metal_host_malloc(size_t n) {
return data;
}

+// Metal device selection (implementation comes later)
+static id<MTLDevice> ggml_backend_metal_get_device(void);
+
static struct ggml_metal_context * ggml_metal_init(int n_cb) {
GGML_METAL_LOG_INFO("%s: allocating\n", __func__);

@@ -287,7 +290,7 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
#endif

// Pick and show default Metal device
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
- id<MTLDevice> device = MTLCreateSystemDefaultDevice();
+ id<MTLDevice> device = ggml_backend_metal_get_device();
GGML_METAL_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]);

+ devices = MTLCopyAllDevices();
+ id <MTLDevice> dev;
+ NSString * s;
// Configure context
@@ -2806,7 +2809,22 @@ static int g_backend_device_ref_count = 0;

static id<MTLDevice> ggml_backend_metal_get_device(void) {
if (g_backend_device == nil) {
- g_backend_device = MTLCreateSystemDefaultDevice();
+ NSArray * select_devices = MTLCopyAllDevices();
+ NSString * s;
+ id<MTLDevice> dev;
+
+ for (dev in devices) {
+ s = [dev name];
+ if ([dev isRemovable]) {
+ GGML_METAL_LOG_INFO("%s: found external device: %s\n", __func__, [s UTF8String]);
+ } else if ([dev isLowPower]) {
+ GGML_METAL_LOG_INFO("%s: found integrated device: %s\n", __func__, [s UTF8String]);
+ } else {
+ GGML_METAL_LOG_INFO("%s: found discrete device: %s\n", __func__, [s UTF8String]);
+ device = dev;
+ for (dev in select_devices) {
+ s = [dev name];
+ if ([dev isRemovable]) {
+ GGML_METAL_LOG_INFO("%s: found external device: %s\n", __func__, [s UTF8String]);
+ } else if ([dev isLowPower]) {
+ GGML_METAL_LOG_INFO("%s: found integrated device: %s\n", __func__, [s UTF8String]);
+ } else {
+ GGML_METAL_LOG_INFO("%s: found discrete device: %s\n", __func__, [s UTF8String]);
+ g_backend_device = dev;
+ }
+ }
+ }
+
// Configure context
struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
ctx->device = device;
+ [select_devices release]; // since it was created by a *Copy* C method
}

g_backend_device_ref_count++;
36 changes: 24 additions & 12 deletions recipes/llama-cpp/recipe.yaml
Original file line number Diff line number Diff line change
@@ -1,32 +1,44 @@
context:
version: "b2813"

recipe:
name: llama-cpp
version: b2636
version: ${{ version }}

source:
url: https://github.com/ggerganov/llama.cpp/archive/refs/tags/b2636.tar.gz
sha256: 80afcede909fea958dc9524fe1d7936f9d9cc3e276a48db4f3bfc1ac0a1f8115
# patches:
# - discrete-device.patch
url: https://github.com/ggerganov/llama.cpp/archive/refs/tags/${{ version }}.tar.gz
sha256: 59bb58e9b5e424b8d45a8cce1f492035edbcca49bdc9c69e7478b0e2df5c0207
patches:
- if: osx
then: discrete-device.patch

outputs:
- package:
name: llama-cpp
requirements:
build:
# for unknown reasons the conda-forge compiler seems to not work with Metal
# - ${{ compiler('cxx') }}
- ${{ compiler('cxx') }}
- if: cuda
then:
- ${{ compiler('cuda') }}
- ninja
- cmake
host:
- openssl
- if: cuda
then:
- libcublas-dev

build:
script: |
export CPPFLAGS="-D_FORTIFY_SOURCE=2 -isystem $PREFIX/include -mmacosx-version-min=14.0"
cmake -GNinja -S . -B build -DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_FLAGS="-D__ARM_FEATURE_DOTPROD=1" \
-DCMAKE_INSTALL_PREFIX=$PREFIX -DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_METAL_EMBED_LIBRARY=1
if [[ "${{ cuda }}" == "true" ]]; then
export CUDA_ARGS="-DLLAMA_CUDA=ON"
fi

cmake -GNinja -S . -B build ${CMAKE_ARGS} \
-DLLAMA_BUILD_TESTS=OFF \
-DLLAMA_METAL_EMBED_LIBRARY=ON \
$CUDA_ARGS

cmake --build build
cmake --install build
Expand Down
Loading