Skip to content

Commit 4a48e25

Browse files
authored
Merge branch 'ggml-org:master' into master
2 parents 1ed1980 + 2df255d commit 4a48e25

File tree

15 files changed

+171
-91
lines changed

15 files changed

+171
-91
lines changed

docs/build.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ cmake --build build --config Release
6868
cmake --build build-x64-windows-llvm-release
6969
```
7070
- Curl usage is enabled by default and can be turned off with `-DLLAMA_CURL=OFF`. Otherwise you need to install development libraries for libcurl.
71+
- **Debian / Ubuntu:** `sudo apt-get install libcurl4-openssl-dev` # (or `libcurl4-gnutls-dev` if you prefer GnuTLS)
72+
- **Fedora / RHEL / Rocky / Alma:** `sudo dnf install libcurl-devel`
73+
- **Arch / Manjaro:** `sudo pacman -S curl` # includes libcurl headers
7174
7275
## BLAS Build
7376

ggml/cmake/ggml-config.cmake.in

Lines changed: 85 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,94 +1,130 @@
1-
2-
@GGML_VARIABLES_EXPANDED@
3-
41
@PACKAGE_INIT@
52

6-
set_and_check(GGML_INCLUDE_DIR "@PACKAGE_GGML_INCLUDE_INSTALL_DIR@")
7-
set_and_check(GGML_LIB_DIR "@PACKAGE_GGML_LIB_INSTALL_DIR@")
8-
#set_and_check(GGML_BIN_DIR "@PACKAGE_GGML_BIN_INSTALL_DIR@")
9-
10-
find_package(Threads REQUIRED)
11-
12-
find_library(GGML_LIBRARY ggml
13-
REQUIRED
14-
HINTS ${GGML_LIB_DIR}
15-
NO_CMAKE_FIND_ROOT_PATH)
16-
17-
add_library(ggml::ggml UNKNOWN IMPORTED)
18-
set_target_properties(ggml::ggml
19-
PROPERTIES
20-
IMPORTED_LOCATION "${GGML_LIBRARY}")
21-
22-
find_library(GGML_BASE_LIBRARY ggml-base
23-
REQUIRED
24-
HINTS ${GGML_LIB_DIR}
25-
NO_CMAKE_FIND_ROOT_PATH)
26-
27-
add_library(ggml::ggml-base UNKNOWN IMPORTED)
28-
set_target_properties(ggml::ggml-base
29-
PROPERTIES
30-
IMPORTED_LOCATION "${GGML_BASE_LIBRARY}")
3+
@GGML_VARIABLES_EXPANDED@
314

5+
# Find all dependencies before creating any target.
6+
include(CMakeFindDependencyMacro)
7+
find_dependency(Threads)
328
if (NOT GGML_SHARED_LIB)
9+
set(GGML_CPU_INTERFACE_LINK_LIBRARIES "")
10+
set(GGML_CPU_INTERFACE_LINK_OPTIONS "")
11+
3312
if (APPLE AND GGML_ACCELERATE)
34-
find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
13+
find_library(ACCELERATE_FRAMEWORK Accelerate)
14+
if(NOT ACCELERATE_FRAMEWORK)
15+
set(${CMAKE_FIND_PACKAGE_NAME}_FOUND 0)
16+
return()
17+
endif()
3518
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES ${ACCELERATE_FRAMEWORK})
3619
endif()
3720

38-
if (GGML_OPENMP)
39-
find_package(OpenMP REQUIRED)
21+
if (GGML_OPENMP_ENABLED)
22+
find_dependency(OpenMP)
4023
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
4124
endif()
4225

4326
if (GGML_CPU_HBM)
44-
find_library(memkind memkind REQUIRED)
27+
find_library(memkind memkind)
28+
if(NOT memkind)
29+
set(${CMAKE_FIND_PACKAGE_NAME}_FOUND 0)
30+
return()
31+
endif()
4532
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES memkind)
4633
endif()
4734

4835
if (GGML_BLAS)
49-
find_package(BLAS REQUIRED)
36+
find_dependency(BLAS)
5037
list(APPEND GGML_CPU_INTERFACE_LINK_LIBRARIES ${BLAS_LIBRARIES})
5138
list(APPEND GGML_CPU_INTERFACE_LINK_OPTIONS ${BLAS_LINKER_FLAGS})
5239
endif()
5340

5441
if (GGML_CUDA)
55-
find_package(CUDAToolkit REQUIRED)
42+
set(GGML_CUDA_INTERFACE_LINK_LIBRARIES "")
43+
find_dependency(CUDAToolkit)
44+
if (GGML_STATIC)
45+
list(APPEND GGML_CUDA_INTERFACE_LINK_LIBRARIES $<LINK_ONLY:CUDA::cudart_static>)
46+
if (WIN32)
47+
list(APPEND GGML_CUDA_INTERFACE_LINK_LIBRARIES $<LINK_ONLY:CUDA::cublas> $<LINK_ONLY:CUDA::cublasLt>)
48+
else()
49+
list(APPEND GGML_CUDA_INTERFACE_LINK_LIBRARIES $<LINK_ONLY:CUDA::cublas_static> $<LINK_ONLY:CUDA::cublasLt_static>)
50+
endif()
51+
endif()
52+
if (NOT GGML_CUDA_NO_VMM)
53+
list(APPEND GGML_CUDA_INTERFACE_LINK_LIBRARIES $<LINK_ONLY:CUDA::cuda_driver>)
54+
endif()
5655
endif()
5756

5857
if (GGML_METAL)
59-
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
60-
find_library(METAL_FRAMEWORK Metal REQUIRED)
61-
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
58+
find_library(FOUNDATION_LIBRARY Foundation)
59+
find_library(METAL_FRAMEWORK Metal)
60+
find_library(METALKIT_FRAMEWORK MetalKit)
61+
if(NOT FOUNDATION_LIBRARY OR NOT METAL_FRAMEWORK OR NOT METALKIT_FRAMEWORK)
62+
set(${CMAKE_FIND_PACKAGE_NAME}_FOUND 0)
63+
return()
64+
endif()
65+
set(GGML_METAL_INTERFACE_LINK_LIBRARIES
66+
${FOUNDATION_LIBRARY} ${METAL_FRAMEWORK} ${METALKIT_FRAMEWORK})
67+
endif()
6268

63-
list(APPEND GGML_METAL_INTERFACE_LINK_LIBRARIES
64-
${FOUNDATION_LIBRARY} ${METAL_FRAMEWORK} ${METALKIT_FRAMEWORK})
69+
if (GGML_OPENCL)
70+
find_dependency(OpenCL)
71+
set(GGML_OPENCL_INTERFACE_LINK_LIBRARIES $<LINK_ONLY:OpenCL::OpenCL>)
6572
endif()
6673

6774
if (GGML_VULKAN)
68-
find_package(Vulkan REQUIRED)
69-
list(APPEND GGML_VULKAN_INTERFACE_LINK_LIBRARIES Vulkan::Vulkan)
75+
find_dependency(Vulkan)
76+
set(GGML_VULKAN_INTERFACE_LINK_LIBRARIES $<LINK_ONLY:Vulkan::Vulkan>)
7077
endif()
7178

7279
if (GGML_HIP)
73-
find_package(hip REQUIRED)
74-
find_package(hipblas REQUIRED)
75-
find_package(rocblas REQUIRED)
76-
list(APPEND GGML_HIP_INTERFACE_LINK_LIBRARIES hip::host roc::rocblas roc::hipblas)
80+
find_dependency(hip)
81+
find_dependency(hipblas)
82+
find_dependency(rocblas)
83+
set(GGML_HIP_INTERFACE_LINK_LIBRARIES hip::host roc::rocblas roc::hipblas)
7784
endif()
7885

7986
if (GGML_SYCL)
87+
set(GGML_SYCL_INTERFACE_LINK_LIBRARIES "")
8088
find_package(DNNL)
8189
if (${DNNL_FOUND} AND GGML_SYCL_TARGET STREQUAL "INTEL")
8290
list(APPEND GGML_SYCL_INTERFACE_LINK_LIBRARIES DNNL::dnnl)
8391
endif()
8492
if (WIN32)
85-
find_package(IntelSYCL REQUIRED)
86-
find_package(MKL REQUIRED)
93+
find_dependency(IntelSYCL)
94+
find_dependency(MKL)
8795
list(APPEND GGML_SYCL_INTERFACE_LINK_LIBRARIES IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
8896
endif()
8997
endif()
9098
endif()
9199

100+
set_and_check(GGML_INCLUDE_DIR "@PACKAGE_GGML_INCLUDE_INSTALL_DIR@")
101+
set_and_check(GGML_LIB_DIR "@PACKAGE_GGML_LIB_INSTALL_DIR@")
102+
#set_and_check(GGML_BIN_DIR "@PACKAGE_GGML_BIN_INSTALL_DIR@")
103+
104+
if(NOT TARGET ggml::ggml)
105+
106+
find_package(Threads REQUIRED)
107+
108+
find_library(GGML_LIBRARY ggml
109+
REQUIRED
110+
HINTS ${GGML_LIB_DIR}
111+
NO_CMAKE_FIND_ROOT_PATH)
112+
113+
add_library(ggml::ggml UNKNOWN IMPORTED)
114+
set_target_properties(ggml::ggml
115+
PROPERTIES
116+
IMPORTED_LOCATION "${GGML_LIBRARY}")
117+
118+
find_library(GGML_BASE_LIBRARY ggml-base
119+
REQUIRED
120+
HINTS ${GGML_LIB_DIR}
121+
NO_CMAKE_FIND_ROOT_PATH)
122+
123+
add_library(ggml::ggml-base UNKNOWN IMPORTED)
124+
set_target_properties(ggml::ggml-base
125+
PROPERTIES
126+
IMPORTED_LOCATION "${GGML_BASE_LIBRARY}")
127+
92128
set(_ggml_all_targets "")
93129
foreach(_ggml_backend ${GGML_AVAILABLE_BACKENDS})
94130
string(REPLACE "-" "_" _ggml_backend_pfx "${_ggml_backend}")
@@ -149,4 +185,6 @@ set_target_properties(ggml::all
149185
PROPERTIES
150186
INTERFACE_LINK_LIBRARIES "${_ggml_all_targets}")
151187

188+
endif() # TARGET ggml::ggml
189+
152190
check_required_components(ggml)

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
7070
if (GGML_OPENMP)
7171
find_package(OpenMP)
7272
if (OpenMP_FOUND)
73+
set(GGML_OPENMP_ENABLED "ON" CACHE INTERNAL "")
7374
target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_OPENMP)
7475

7576
target_link_libraries(${GGML_CPU_NAME} PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
7677
else()
78+
set(GGML_OPENMP_ENABLED "OFF" CACHE INTERNAL "")
7779
message(WARNING "OpenMP not found")
7880
endif()
7981
endif()

ggml/src/ggml-cpu/repack.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#include <cmath>
1515
#include <cstring>
1616
#include <cassert>
17-
#include <cstdlib> // for qsort
1817
#include <cstdio> // for GGML_ASSERT
1918

2019
#include "repack.h"

ggml/src/ggml-metal/ggml-metal.m

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1955,6 +1955,7 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex
19551955
static int ggml_metal_encode_node(
19561956
ggml_backend_t backend,
19571957
int idx,
1958+
int idx_end,
19581959
id<MTLComputeCommandEncoder> encoder,
19591960
struct ggml_metal_mem_pool * mem_pool) {
19601961
struct ggml_backend_metal_context * ctx = backend->context;
@@ -2181,7 +2182,9 @@ static int ggml_metal_encode_node(
21812182
size_t offs_fuse;
21822183
id<MTLBuffer> id_fuse;
21832184

2184-
for (n_fuse = 0; n_fuse <= 6; ++n_fuse) {
2185+
// note: in metal, we sometimes encode the graph in parallel so we have to avoid fusing nodes
2186+
// across splits. idx_end indicates the last node in the current split
2187+
for (n_fuse = 0; n_fuse <= 6 && idx + n_fuse + 1 < idx_end; ++n_fuse) {
21852188
if (!ggml_can_fuse(gf, idx + n_fuse, ops + n_fuse, 2)) {
21862189
break;
21872190
}
@@ -4288,7 +4291,7 @@ static int ggml_metal_encode_node(
42884291
ops[1] = GGML_OP_MUL;
42894292
ops[2] = GGML_OP_ADD;
42904293

4291-
for (n_fuse = 0; n_fuse <= 1; ++n_fuse) {
4294+
for (n_fuse = 0; n_fuse <= 1 && idx + n_fuse + 1 < idx_end; ++n_fuse) {
42924295
if (!ggml_can_fuse(gf, idx + n_fuse, ops + n_fuse, 2)) {
42934296
break;
42944297
}
@@ -6271,7 +6274,11 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) {
62716274
[encoder pushDebugGroup:[NSString stringWithCString:ggml_op_desc(ggml_graph_node(ctx->gf, idx)) encoding:NSUTF8StringEncoding]];
62726275
}
62736276

6274-
const int res = ggml_metal_encode_node(backend, idx, encoder, mem_pool);
6277+
const int res = ggml_metal_encode_node(backend, idx, node_end, encoder, mem_pool);
6278+
if (idx + res > node_end) {
6279+
GGML_ABORT("fusion error: nodes spanning multiple encoders have been fused. this indicates a bug in the fusion logic %s",
6280+
"https://github.com/ggml-org/llama.cpp/pull/14849");
6281+
}
62756282

62766283
if (should_capture) {
62776284
[encoder popDebugGroup];

ggml/src/ggml-sycl/ggml-sycl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3531,7 +3531,7 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx,
35313531
stream->memset(dev_cur_src1_row.get(), 0, sizeof(int))));
35323532

35333533
const unsigned int max_work_group_size = ggml_sycl_info().max_work_group_sizes[ctx.device];
3534-
assert(work_group_size % (WARP_SIZE * WARP_SIZE) == 0);
3534+
assert(max_work_group_size % (WARP_SIZE * WARP_SIZE) == 0);
35353535

35363536
{
35373537
sycl::range<3> block_dims(1, 1, std::min((unsigned int)ne10, max_work_group_size));

ggml/src/ggml-sycl/quants.hpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,11 @@ template <> struct block_q_t<GGML_TYPE_Q4_0> {
4848
};
4949

5050
static constexpr std::pair<int, int> get_block_offset(const int block_index, const int /* nblocks */) {
51-
return { block_index * (traits::qk / traits::qr), 0 };
51+
return { block_index * (QK4_0 / QR4_0), 0 };
5252
}
5353

5454
static constexpr std::pair<int, int> get_d_offset(int nrows, int ncols, const int block_index) {
55-
return { (ncols / traits::qr * nrows) + block_index * sizeof(ggml_half), 0 };
55+
return { (ncols / QR4_0 * nrows) + block_index * sizeof(ggml_half), 0 };
5656
}
5757

5858
static constexpr int block_to_q8_1_ratio() { return traits::qk / QK8_1; }
@@ -71,14 +71,12 @@ template <> struct block_q_t<GGML_TYPE_Q4_K> {
7171
}
7272

7373
static constexpr std::pair<int, int> get_d_offset(int nrows, int ncols, const int block_index) {
74-
auto nblocks = (nrows * (ncols / traits::qk));
75-
return { nblocks * (QK_K / 2),
74+
auto nblocks = (nrows * (ncols / QK_K));
75+
return { nblocks * (QK_K / 2) + (block_index * K_SCALE_SIZE),
7676
(nblocks * QK_K / 2) + (nblocks * K_SCALE_SIZE) + (block_index * sizeof(ggml_half2)) };
7777
}
7878

7979
static constexpr int block_to_q8_1_ratio() { return traits::qk / QK8_1; }
80-
81-
constexpr size_t get_total_qs_bytes(int nblocks) { return nblocks * QK_K / 2; }
8280
};
8381

8482
template <> struct block_q_t<GGML_TYPE_Q6_K> {
@@ -90,22 +88,23 @@ template <> struct block_q_t<GGML_TYPE_Q6_K> {
9088
};
9189

9290
static constexpr std::pair<int, int> get_block_offset(const int block_index, const int n_blocks) {
93-
auto low_bits_index = block_index * (traits::qk / traits::qr);
91+
auto low_bits_index = block_index * (QK_K / QR6_K);
9492
// the index of high bits it's after all low bits
9593
auto high_bits_index = n_blocks * (QK_K / 2) + (block_index * (QK_K / 4));
9694
return { low_bits_index, high_bits_index };
9795
}
9896

9997
static constexpr std::pair<int, int> get_d_offset(int nrows, int ncols, const int block_index) {
100-
auto nblocks = (nrows * (ncols / traits::qk));
98+
auto nblocks = (nrows * (ncols / QK_K));
10199
auto total_qs_bytes = nblocks * (QK_K / 2) + nblocks * (QK_K / 4);
102100
auto block_scales = total_qs_bytes + block_index * (QK_K / 16);
103-
auto sb_scale = total_qs_bytes + nblocks * (QK_K / 16);
101+
auto sb_scale = total_qs_bytes + nblocks * (QK_K / 16) + block_index * sizeof(ggml_half);
104102
return { block_scales, sb_scale };
105103
}
106104

107105
static constexpr int block_to_q8_1_ratio() { return traits::qk / QK8_1; }
108106
};
107+
109108
} // namespace ggml_sycl_reordered
110109

111110
#endif // GGML_SYCL_QUANTS_HPP

ggml/src/ggml-sycl/vecdotq.hpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -350,11 +350,9 @@ template <> struct reorder_vec_dot_q_sycl<GGML_TYPE_Q4_K> {
350350
__dpct_inline__ float operator()(const void * __restrict__ vbq, const std::pair<int, int> ibx_offset,
351351
const std::pair<int, int> d_offset, const int8_t * q8_1_quant_ptr,
352352
const sycl::half2 * q8_1_ds, const int & iqs) {
353-
const int ib = ibx_offset.first / (QK_K / 2);
354-
355353
const uint8_t * base = static_cast<const uint8_t *>(vbq);
356354
const uint8_t * qs = base + ibx_offset.first;
357-
const uint8_t * scs = base + d_offset.first + ib * K_SCALE_SIZE;
355+
const uint8_t * scs = base + d_offset.first;
358356
const ggml_half2 * dms = reinterpret_cast<const ggml_half2 *>(base + d_offset.second);
359357

360358
const int bq8_offset = QR4_K * ((iqs / 2) / (QI8_1 / 2));
@@ -427,13 +425,11 @@ template <> struct reorder_vec_dot_q_sycl<GGML_TYPE_Q6_K> {
427425
__dpct_inline__ float operator()(const void * __restrict__ vbq, const std::pair<int, int> ibx_offset,
428426
const std::pair<int, int> d_offset, const int8_t * q8_1_quant_ptr, const sycl::half2 * q8_1_ds,
429427
const int iqs) {
430-
const int ib = ibx_offset.first / (QK_K / 2);
431-
432428
const uint8_t * base = static_cast<const uint8_t *>(vbq);
433429
const uint8_t * ql = base + ibx_offset.first;
434430
const uint8_t * qh = base + ibx_offset.second;
435431
const int8_t * scales = reinterpret_cast<const int8_t *>(base + d_offset.first);
436-
const ggml_half * d = (const ggml_half *) (base + d_offset.second) + ib;
432+
const ggml_half * d = (const ggml_half *) (base + d_offset.second);
437433

438434
const int bq8_offset = 2 * QR6_K * (iqs / (QI6_K / 2)) + (iqs % (QI6_K / 2)) / (QI6_K / 4);
439435
const int scale_offset = (QI6_K / 4) * (iqs / (QI6_K / 2)) + (iqs % (QI6_K / 2)) / (QI6_K / 8);

include/llama.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -956,6 +956,7 @@ extern "C" {
956956
// in the order they have appeared in the batch.
957957
// Rows: number of tokens for which llama_batch.logits[i] != 0
958958
// Cols: n_vocab
959+
// TODO: deprecate in favor of llama_get_logits_ith() (ref: https://github.com/ggml-org/llama.cpp/pull/14853#issuecomment-3113143522)
959960
LLAMA_API float * llama_get_logits(struct llama_context * ctx);
960961

961962
// Logits for the ith token. For positive indices, Equivalent to:
@@ -970,6 +971,7 @@ extern "C" {
970971
// in the order they have appeared in the batch.
971972
// shape: [n_outputs*n_embd]
972973
// Otherwise, returns NULL.
974+
// TODO: deprecate in favor of llama_get_embeddings_ith() (ref: https://github.com/ggml-org/llama.cpp/pull/14853#issuecomment-3113143522)
973975
LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);
974976

975977
// Get the embeddings for the ith token. For positive indices, Equivalent to:

scripts/sync-ggml.last

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
3323219cd3cc050e5c7133cd4fc1e50d1f590faf
1+
56938c4a3b2d923f42040f9ad32d229c76c466cd

0 commit comments

Comments
 (0)