|
26 | 26 | # include "ggml-blas.h" |
27 | 27 | #endif |
28 | 28 |
|
29 | | -#ifdef GGML_USE_METAL |
30 | | -# include "ggml-metal.h" |
31 | | -#endif |
32 | | - |
33 | 29 | // TODO: replace with ggml API call |
34 | 30 | #define QK_K 256 |
35 | 31 |
|
@@ -3292,9 +3288,6 @@ struct llama_context { |
3292 | 3288 | std::unordered_map<struct llama_lora_adapter *, float> lora_adapters; |
3293 | 3289 |
|
3294 | 3290 | std::vector<ggml_backend_t> backends; |
3295 | | -#ifdef GGML_USE_METAL |
3296 | | - ggml_backend_t backend_metal = nullptr; |
3297 | | -#endif |
3298 | 3291 | #ifdef GGML_USE_BLAS |
3299 | 3292 | ggml_backend_t backend_blas = nullptr; |
3300 | 3293 | #endif |
@@ -3420,9 +3413,7 @@ static int llama_get_device_count(const llama_model & model) { |
3420 | 3413 | count += (int) model.rpc_servers.size(); |
3421 | 3414 | #endif |
3422 | 3415 |
|
3423 | | -#if defined(GGML_USE_METAL) |
3424 | | - count += 1; |
3425 | | -#elif defined(GGML_USE_SYCL) |
| 3416 | +#if defined(GGML_USE_SYCL) |
3426 | 3417 | count += ggml_backend_sycl_get_device_count(); |
3427 | 3418 | #elif defined(GGML_USE_VULKAN) |
3428 | 3419 | count += ggml_backend_vk_get_device_count(); |
@@ -3488,9 +3479,7 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_offload(const llama_ |
3488 | 3479 | } |
3489 | 3480 | device -= (int)model.devices.size(); |
3490 | 3481 |
|
3491 | | -#if defined(GGML_USE_METAL) |
3492 | | - buft = ggml_backend_metal_buffer_type(); |
3493 | | -#elif defined(GGML_USE_VULKAN) |
| 3482 | +#if defined(GGML_USE_VULKAN) |
3494 | 3483 | buft = ggml_backend_vk_buffer_type(device); |
3495 | 3484 | #elif defined(GGML_USE_SYCL) |
3496 | 3485 | buft = ggml_backend_sycl_buffer_type(device); |
@@ -8937,25 +8926,6 @@ static bool llm_load_tensors( |
8937 | 8926 | bufs.emplace(idx, buf); |
8938 | 8927 | } |
8939 | 8928 | } |
8940 | | -#ifdef GGML_USE_METAL |
8941 | | - else if (ml.use_mmap && use_mmap_buffer && buft == ggml_backend_metal_buffer_type()) { |
8942 | | - for (uint32_t idx = 0; idx < ml.files.size(); idx++) { |
8943 | | - const size_t max_size = ggml_get_max_tensor_size(ctx); |
8944 | | - void * addr = nullptr; |
8945 | | - size_t first, last; |
8946 | | - ml.get_mapping_range(&first, &last, &addr, idx, ctx); |
8947 | | - if (first >= last) { |
8948 | | - continue; |
8949 | | - } |
8950 | | - ggml_backend_buffer_t buf = ggml_backend_metal_buffer_from_ptr((char *) addr + first, last - first, max_size); |
8951 | | - if (buf == nullptr) { |
8952 | | - throw std::runtime_error("unable to allocate backend metal buffer"); |
8953 | | - } |
8954 | | - model.bufs.push_back(buf); |
8955 | | - bufs.emplace(idx, buf); |
8956 | | - } |
8957 | | - } |
8958 | | -#endif |
8959 | 8929 | else { |
8960 | 8930 | ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft); |
8961 | 8931 | if (buf == nullptr) { |
@@ -19041,7 +19011,7 @@ bool llama_supports_mlock(void) { |
19041 | 19011 | } |
19042 | 19012 |
|
19043 | 19013 | bool llama_supports_gpu_offload(void) { |
19044 | | -#if defined(GGML_USE_METAL) || defined(GGML_USE_VULKAN) || \ |
| 19014 | +#if defined(GGML_USE_VULKAN) || \ |
19045 | 19015 | defined(GGML_USE_SYCL) || defined(GGML_USE_KOMPUTE) || defined(GGML_USE_RPC) |
19046 | 19016 | // Defined when llama.cpp is compiled with support for offloading model layers to GPU. |
19047 | 19017 | return true; |
@@ -19344,17 +19314,7 @@ struct llama_context * llama_new_context_with_model( |
19344 | 19314 | } |
19345 | 19315 | #endif |
19346 | 19316 |
|
19347 | | -#if defined(GGML_USE_METAL) |
19348 | | - if (model->n_gpu_layers > 0) { |
19349 | | - ctx->backend_metal = ggml_backend_metal_init(); |
19350 | | - if (ctx->backend_metal == nullptr) { |
19351 | | - LLAMA_LOG_ERROR("%s: failed to initialize Metal backend\n", __func__); |
19352 | | - llama_free(ctx); |
19353 | | - return nullptr; |
19354 | | - } |
19355 | | - ctx->backends.push_back(ctx->backend_metal); |
19356 | | - } |
19357 | | -#elif defined(GGML_USE_VULKAN) |
| 19317 | +#if defined(GGML_USE_VULKAN) |
19358 | 19318 | if (model->split_mode == LLAMA_SPLIT_MODE_ROW) { |
19359 | 19319 | LLAMA_LOG_ERROR("%s: Row split not supported. Failed to initialize Vulkan backend\n", __func__); |
19360 | 19320 | llama_free(ctx); |
|
0 commit comments