Skip to content

Commit 04a0711

Browse files
authored
Merge branch 'ggerganov:master' into server-chat-templates
2 parents 3b837fe + dd3a6ce commit 04a0711

File tree

10 files changed

+166
-79
lines changed

10 files changed

+166
-79
lines changed

CMakePresets.json

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,12 @@
2424
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
2525
}
2626
},
27-
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
28-
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
29-
{ "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
30-
{ "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } },
31-
{ "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } },
27+
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
28+
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
29+
{ "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
30+
{ "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } },
31+
{ "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } },
32+
{ "name": "vulkan", "hidden": true, "cacheVariables": { "GGML_VULKAN": "ON" } },
3233

3334
{
3435
"name": "arm64-windows-msvc", "hidden": true,
@@ -57,25 +58,28 @@
5758
}
5859
},
5960

60-
{ "name": "arm64-windows-llvm-debug" , "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
61-
{ "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] },
62-
{ "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] },
61+
{ "name": "arm64-windows-llvm-debug", "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
62+
{ "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] },
63+
{ "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] },
6364

64-
{ "name": "arm64-apple-clang-debug" , "inherits": [ "base", "arm64-apple-clang", "debug" ] },
65-
{ "name": "arm64-apple-clang-release" , "inherits": [ "base", "arm64-apple-clang", "reldbg" ] },
66-
{ "name": "arm64-apple-clang+static-release" , "inherits": [ "base", "arm64-apple-clang", "reldbg", "static" ] },
65+
{ "name": "arm64-apple-clang-debug", "inherits": [ "base", "arm64-apple-clang", "debug" ] },
66+
{ "name": "arm64-apple-clang-release", "inherits": [ "base", "arm64-apple-clang", "reldbg" ] },
67+
{ "name": "arm64-apple-clang+static-release", "inherits": [ "base", "arm64-apple-clang", "reldbg", "static" ] },
6768

68-
{ "name": "arm64-windows-msvc-debug" , "inherits": [ "base", "arm64-windows-msvc", "debug" ] },
69+
{ "name": "arm64-windows-msvc-debug", "inherits": [ "base", "arm64-windows-msvc", "debug" ] },
6970
{ "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] },
7071
{ "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] },
7172

72-
{ "name": "x64-windows-msvc-debug" , "inherits": [ "base", "debug" ] },
73+
{ "name": "x64-windows-msvc-debug", "inherits": [ "base", "debug" ] },
7374
{ "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
7475
{ "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },
7576

76-
{ "name": "x64-windows-sycl-debug" , "inherits": [ "sycl-base", "debug" ] },
77+
{ "name": "x64-windows-sycl-debug", "inherits": [ "sycl-base", "debug" ] },
7778
{ "name": "x64-windows-sycl-debug-f16", "inherits": [ "sycl-base", "debug", "sycl_f16" ] },
7879
{ "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] },
79-
{ "name": "x64-windows-sycl-release-f16", "inherits": [ "sycl-base", "release", "sycl_f16" ] }
80+
{ "name": "x64-windows-sycl-release-f16", "inherits": [ "sycl-base", "release", "sycl_f16" ] },
81+
82+
{ "name": "x64-windows-vulkan-debug", "inherits": [ "base", "vulkan", "debug" ] },
83+
{ "name": "x64-windows-vulkan-release", "inherits": [ "base", "vulkan", "release" ] }
8084
]
8185
}

Makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,10 @@ ifdef LLAMA_SERVER_SSL
359359
MK_LDFLAGS += -lssl -lcrypto
360360
endif
361361

362+
ifndef GGML_NO_CPU_AARCH64
363+
MK_CPPFLAGS += -DGGML_USE_CPU_AARCH64
364+
endif
365+
362366
# warnings
363367
WARN_FLAGS = \
364368
-Wall \
@@ -940,10 +944,6 @@ ggml/src/ggml-cuda/%.o: \
940944
$(MCC) $(CXXFLAGS) $(MUSAFLAGS) -x musa -mtgpu -c -o $@ $<
941945
endif # GGML_MUSA
942946

943-
ifndef GGML_NO_CPU_AARCH64
944-
MK_CPPFLAGS += -DGGML_USE_CPU_AARCH64
945-
endif
946-
947947
ifdef GGML_METAL
948948
MK_CPPFLAGS += -DGGML_USE_METAL
949949
MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit

ggml/src/ggml-aarch64.c

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,42 @@
88

99
#define UNUSED GGML_UNUSED
1010

11-
static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int blck_size_interleave, unsigned int xor_mask) {
11+
static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int blck_size_interleave) {
1212
block_q4_0x4 out;
1313

1414
for (int i = 0; i < 4; i++) {
1515
out.d[i] = in[i].d;
1616
}
1717

18-
for (int i = 0; i < QK4_0 * 2; i++) {
19-
int src_offset = (i / (4 * blck_size_interleave)) * blck_size_interleave;
20-
int src_id = (i % (4 * blck_size_interleave)) / blck_size_interleave;
21-
src_offset += (i % blck_size_interleave);
18+
const int end = QK4_0 * 2 / blck_size_interleave;
2219

23-
out.qs[i] = in[src_id].qs[src_offset] ^ xor_mask;
20+
if (blck_size_interleave == 8) {
21+
const uint64_t xor_mask = 0x8888888888888888ULL;
22+
for (int i = 0; i < end; ++i) {
23+
int src_id = i % 4;
24+
int src_offset = (i / 4) * blck_size_interleave;
25+
int dst_offset = i * blck_size_interleave;
26+
27+
uint64_t elems;
28+
// Using memcpy to avoid unaligned memory accesses
29+
memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t));
30+
elems ^= xor_mask;
31+
memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
32+
}
33+
} else if (blck_size_interleave == 4) {
34+
const uint32_t xor_mask = 0x88888888;
35+
for (int i = 0; i < end; ++i) {
36+
int src_id = i % 4;
37+
int src_offset = (i / 4) * blck_size_interleave;
38+
int dst_offset = i * blck_size_interleave;
39+
40+
uint32_t elems;
41+
memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint32_t));
42+
elems ^= xor_mask;
43+
memcpy(&out.qs[dst_offset], &elems, sizeof(uint32_t));
44+
}
45+
} else {
46+
GGML_ASSERT(false);
2447
}
2548

2649
return out;
@@ -30,19 +53,25 @@ static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int blck_size_in
3053
// returns an interleaved block_q4_0x8
3154
// in the interleaved block_q4_0x8, place deltas for 8 block_q4_0 blocks
3255
// first, then interleave quants from 8 block_q4_0s in blocks of blck_size_interleave
33-
static block_q4_0x8 make_block_q4_0x8(block_q4_0 * in, unsigned int blck_size_interleave, unsigned int xor_mask) {
56+
static block_q4_0x8 make_block_q4_0x8(block_q4_0 * in, unsigned int blck_size_interleave) {
3457
block_q4_0x8 out;
3558

3659
for (int i = 0; i < 8; i++) {
3760
out.d[i] = in[i].d;
3861
}
3962

40-
for (int i = 0; i < QK4_0 * 4; i++) {
41-
int src_offset = (i / (8 * blck_size_interleave)) * blck_size_interleave;
42-
int src_id = (i % (8 * blck_size_interleave)) / blck_size_interleave;
43-
src_offset += (i % blck_size_interleave);
63+
const int end = QK4_0 * 4 / blck_size_interleave;
64+
const uint64_t xor_mask = 0x8888888888888888ULL;
65+
66+
for (int i = 0; i < end; ++i) {
67+
int src_id = i % 8;
68+
int src_offset = (i / 8) * blck_size_interleave;
69+
int dst_offset = i * blck_size_interleave;
4470

45-
out.qs[i] = in[src_id].qs[src_offset] ^ xor_mask;
71+
uint64_t elems;
72+
memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t));
73+
elems ^= xor_mask;
74+
memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
4675
}
4776

4877
return out;
@@ -71,11 +100,11 @@ static size_t quantize_q4_0_nr_bl(const float * restrict src, void * restrict ds
71100
}
72101

73102
if (nrows_interleaved == 8) {
74-
*(block_q4_0x8 *) out_ptr = make_block_q4_0x8(dst_tmp, blck_size_interleave, 0x88);
103+
*(block_q4_0x8 *) out_ptr = make_block_q4_0x8(dst_tmp, blck_size_interleave);
75104
out_ptr = (block_q4_0x8 *) out_ptr + 1;
76105
}
77106
else if (nrows_interleaved == 4) {
78-
*(block_q4_0x4 *) out_ptr = make_block_q4_0x4(dst_tmp, blck_size_interleave, 0x88);
107+
*(block_q4_0x4 *) out_ptr = make_block_q4_0x4(dst_tmp, blck_size_interleave);
79108
out_ptr = (block_q4_0x4 *) out_ptr + 1;
80109
}
81110
}

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,14 +143,23 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
143143
if (GGML_AVX512_VBMI)
144144
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>)
145145
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>)
146+
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
147+
list(APPEND ARCH_FLAGS -mavx512vbmi)
148+
endif()
146149
endif()
147150
if (GGML_AVX512_VNNI)
148151
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>)
149152
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>)
153+
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
154+
list(APPEND ARCH_FLAGS -mavx512vnni)
155+
endif()
150156
endif()
151157
if (GGML_AVX512_BF16)
152158
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>)
153159
add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>)
160+
if (CMAKE_C_COMPILER_ID STREQUAL "Clang")
161+
list(APPEND ARCH_FLAGS -mavx512bf16)
162+
endif()
154163
endif()
155164
if (GGML_AMX_TILE)
156165
add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AMX_TILE__>)
@@ -211,10 +220,13 @@ elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LW
211220
endif()
212221
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64")
213222
message(STATUS "PowerPC detected")
214-
execute_process(COMMAND bash -c "grep POWER10 /proc/cpuinfo | head -n 1"
215-
OUTPUT_VARIABLE POWER10_M)
216-
string(FIND ${POWER10_M} "POWER10" substring_index)
217-
if(${substring_index} GREATER_EQUAL 0)
223+
execute_process(COMMAND bash -c "grep POWER10 /proc/cpuinfo | head -n 1" OUTPUT_VARIABLE POWER10_M)
224+
string(FIND "${POWER10_M}" "POWER10" substring_index)
225+
if (NOT DEFINED substring_index OR "${substring_index}" STREQUAL "")
226+
set(substring_index -1)
227+
endif()
228+
229+
if (${substring_index} GREATER_EQUAL 0)
218230
list(APPEND ARCH_FLAGS -mcpu=power10)
219231
elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le")
220232
list(APPEND ARCH_FLAGS -mcpu=powerpc64le)

ggml/src/ggml-cpu/ggml-cpu-aarch64.c

Lines changed: 43 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3387,19 +3387,42 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
33873387
}
33883388

33893389
// FIXME: this code is duplicated from ggml-aarch64.c
3390-
static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int blck_size_interleave, unsigned int xor_mask) {
3390+
static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int blck_size_interleave) {
33913391
block_q4_0x4 out;
33923392

33933393
for (int i = 0; i < 4; i++) {
33943394
out.d[i] = in[i].d;
33953395
}
33963396

3397-
for (int i = 0; i < QK4_0 * 2; i++) {
3398-
int src_offset = (i / (4 * blck_size_interleave)) * blck_size_interleave;
3399-
int src_id = (i % (4 * blck_size_interleave)) / blck_size_interleave;
3400-
src_offset += (i % blck_size_interleave);
3397+
const int end = QK4_0 * 2 / blck_size_interleave;
34013398

3402-
out.qs[i] = in[src_id].qs[src_offset] ^ xor_mask;
3399+
if (blck_size_interleave == 8) {
3400+
const uint64_t xor_mask = 0x8888888888888888ULL;
3401+
for (int i = 0; i < end; ++i) {
3402+
int src_id = i % 4;
3403+
int src_offset = (i / 4) * blck_size_interleave;
3404+
int dst_offset = i * blck_size_interleave;
3405+
3406+
uint64_t elems;
3407+
// Using memcpy to avoid unaligned memory accesses
3408+
memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t));
3409+
elems ^= xor_mask;
3410+
memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
3411+
}
3412+
} else if (blck_size_interleave == 4) {
3413+
const uint32_t xor_mask = 0x88888888;
3414+
for (int i = 0; i < end; ++i) {
3415+
int src_id = i % 4;
3416+
int src_offset = (i / 4) * blck_size_interleave;
3417+
int dst_offset = i * blck_size_interleave;
3418+
3419+
uint32_t elems;
3420+
memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint32_t));
3421+
elems ^= xor_mask;
3422+
memcpy(&out.qs[dst_offset], &elems, sizeof(uint32_t));
3423+
}
3424+
} else {
3425+
GGML_ASSERT(false);
34033426
}
34043427

34053428
return out;
@@ -3409,19 +3432,25 @@ static block_q4_0x4 make_block_q4_0x4(block_q4_0 * in, unsigned int blck_size_in
34093432
// returns an interleaved block_q4_0x8
34103433
// in the interleaved block_q4_0x8, place deltas for 8 block_q4_0 blocks
34113434
// first, then interleave quants from 8 block_q4_0s in blocks of blck_size_interleave
3412-
static block_q4_0x8 make_block_q4_0x8(block_q4_0 * in, unsigned int blck_size_interleave, unsigned int xor_mask) {
3435+
static block_q4_0x8 make_block_q4_0x8(block_q4_0 * in, unsigned int blck_size_interleave) {
34133436
block_q4_0x8 out;
34143437

34153438
for (int i = 0; i < 8; i++) {
34163439
out.d[i] = in[i].d;
34173440
}
34183441

3419-
for (int i = 0; i < QK4_0 * 4; i++) {
3420-
int src_offset = (i / (8 * blck_size_interleave)) * blck_size_interleave;
3421-
int src_id = (i % (8 * blck_size_interleave)) / blck_size_interleave;
3422-
src_offset += (i % blck_size_interleave);
3442+
const int end = QK4_0 * 4 / blck_size_interleave;
3443+
const uint64_t xor_mask = 0x8888888888888888ULL;
3444+
3445+
for (int i = 0; i < end; ++i) {
3446+
int src_id = i % 8;
3447+
int src_offset = (i / 8) * blck_size_interleave;
3448+
int dst_offset = i * blck_size_interleave;
34233449

3424-
out.qs[i] = in[src_id].qs[src_offset] ^ xor_mask;
3450+
uint64_t elems;
3451+
memcpy(&elems, &in[src_id].qs[src_offset], sizeof(uint64_t));
3452+
elems ^= xor_mask;
3453+
memcpy(&out.qs[dst_offset], &elems, sizeof(uint64_t));
34253454
}
34263455

34273456
return out;
@@ -3449,7 +3478,7 @@ static int repack_q4_0_to_q4_0_4_bl(struct ggml_tensor * t, int interleave_block
34493478
for (int i = 0; i < nrows_interleaved; i++) {
34503479
dst_tmp[i] = src[x + i * nblocks];
34513480
}
3452-
*dst++ = make_block_q4_0x4(dst_tmp, interleave_block, 0x88);
3481+
*dst++ = make_block_q4_0x4(dst_tmp, interleave_block);
34533482
}
34543483
src += nrows_interleaved * nblocks;
34553484
}
@@ -3480,7 +3509,7 @@ static int repack_q4_0_to_q4_0_8_bl(struct ggml_tensor *t, int interleave_block,
34803509
for (int i = 0; i < nrows_interleaved; i++ ) {
34813510
dst_tmp[i] = src[x + i * nblocks];
34823511
}
3483-
*dst++ = make_block_q4_0x8(dst_tmp, interleave_block, 0x88);
3512+
*dst++ = make_block_q4_0x8(dst_tmp, interleave_block);
34843513
}
34853514
src += nrows_interleaved * nblocks;
34863515
}

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1763,7 +1763,8 @@ static void ggml_vk_print_gpu_info(size_t idx) {
17631763
fp16 = fp16 && vk12_features.shaderFloat16;
17641764

17651765
std::string device_name = props2.properties.deviceName.data();
1766-
std::cerr << GGML_VK_NAME << idx << ": " << device_name << " (" << driver_props.driverName << ") | uma: " << uma << " | fp16: " << fp16 << " | warp size: " << subgroup_size << std::endl;
1766+
GGML_LOG_DEBUG("ggml_vulkan: %d = %s (%s) | uma: %d | fp16: %d | warp size: %d\n",
1767+
idx, device_name.c_str(), driver_props.driverName, uma, fp16, subgroup_size);
17671768

17681769
if (props2.properties.deviceType == vk::PhysicalDeviceType::eCpu) {
17691770
std::cerr << "ggml_vulkan: Warning: Device type is CPU. This is probably not the device you want." << std::endl;
@@ -1821,8 +1822,7 @@ void ggml_vk_instance_init() {
18211822
};
18221823
validation_features.setPNext(nullptr);
18231824
instance_create_info.setPNext(&validation_features);
1824-
1825-
std::cerr << "ggml_vulkan: Validation layers enabled" << std::endl;
1825+
GGML_LOG_DEBUG("ggml_vulkan: Validation layers enabled\n");
18261826
}
18271827
vk_instance.instance = vk::createInstance(instance_create_info);
18281828

@@ -1936,8 +1936,8 @@ void ggml_vk_instance_init() {
19361936
vk_instance.device_indices.push_back(0);
19371937
}
19381938
}
1939+
GGML_LOG_DEBUG("ggml_vulkan: Found %d Vulkan devices:\n", vk_instance.device_indices.size());
19391940

1940-
std::cerr << "ggml_vulkan: Found " << vk_instance.device_indices.size() << " Vulkan devices:" << std::endl;
19411941

19421942
for (size_t i = 0; i < vk_instance.device_indices.size(); i++) {
19431943
ggml_vk_print_gpu_info(i);

ggml/src/ggml.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,14 @@
4949

5050
#define UNUSED GGML_UNUSED
5151

52+
#if defined(_MSC_VER)
53+
#define m512bh(p) p
54+
#define m512i(p) p
55+
#else
56+
#define m512bh(p) (__m512bh)(p)
57+
#define m512i(p) (__m512i)(p)
58+
#endif
59+
5260
// precomputed f32 table for f16 (256 KB) (ggml-impl.h)
5361
float ggml_table_f32_f16[1 << 16];
5462

scripts/compare-llama-bench.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,22 @@
1919

2020
# Properties by which to differentiate results per commit:
2121
KEY_PROPERTIES = [
22-
"cpu_info", "gpu_info", "n_gpu_layers", "cuda", "vulkan", "kompute", "metal", "sycl", "rpc", "gpu_blas",
23-
"blas", "model_filename", "model_type", "n_batch", "n_ubatch", "embeddings", "n_threads",
24-
"type_k", "type_v", "use_mmap", "no_kv_offload", "split_mode", "main_gpu", "tensor_split", "flash_attn", "n_prompt", "n_gen"
22+
"cpu_info", "gpu_info", "backends", "n_gpu_layers", "model_filename", "model_type", "n_batch", "n_ubatch",
23+
"embeddings", "cpu_mask", "cpu_strict", "poll", "n_threads", "type_k", "type_v", "use_mmap", "no_kv_offload",
24+
"split_mode", "main_gpu", "tensor_split", "flash_attn", "n_prompt", "n_gen"
2525
]
2626

2727
# Properties that are boolean and are converted to Yes/No for the table:
28-
BOOL_PROPERTIES = ["cuda", "vulkan", "kompute", "metal", "sycl", "gpu_blas", "blas", "embeddings", "use_mmap", "no_kv_offload", "flash_attn"]
28+
BOOL_PROPERTIES = ["embeddings", "cpu_strict", "use_mmap", "no_kv_offload", "flash_attn"]
2929

3030
# Header names for the table:
3131
PRETTY_NAMES = {
32-
"cuda": "CUDA", "vulkan": "Vulkan", "kompute": "Kompute", "metal": "Metal", "sycl": "SYCL", "rpc": "RPC",
33-
"gpu_blas": "GPU BLAS", "blas": "BLAS", "cpu_info": "CPU", "gpu_info": "GPU", "model_filename": "File", "model_type": "Model",
34-
"model_size": "Model Size [GiB]", "model_n_params": "Num. of Par.", "n_batch": "Batch size", "n_ubatch": "Microbatch size",
35-
"n_threads": "Threads", "type_k": "K type", "type_v": "V type", "n_gpu_layers": "GPU layers", "split_mode": "Split mode",
36-
"main_gpu": "Main GPU", "no_kv_offload": "NKVO", "flash_attn": "FlashAttention", "tensor_split": "Tensor split",
37-
"use_mmap": "Use mmap", "embeddings": "Embeddings",
32+
"cpu_info": "CPU", "gpu_info": "GPU", "backends": "Backends", "n_gpu_layers": "GPU layers",
33+
"model_filename": "File", "model_type": "Model", "model_size": "Model size [GiB]",
34+
"model_n_params": "Num. of par.", "n_batch": "Batch size", "n_ubatch": "Microbatch size",
35+
"embeddings": "Embeddings", "cpu_mask": "CPU mask", "cpu_strict": "CPU strict", "poll": "Poll",
36+
"n_threads": "Threads", "type_k": "K type", "type_v": "V type", "split_mode": "Split mode", "main_gpu": "Main GPU",
37+
"no_kv_offload": "NKVO", "flash_attn": "FlashAttention", "tensor_split": "Tensor split", "use_mmap": "Use mmap",
3838
}
3939

4040
DEFAULT_SHOW = ["model_type"] # Always show these properties by default.

scripts/sync-ggml.last

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
8a3d799484d861748f86eb87c8314fa2dbccc254
1+
9d0708e863f3aa2fc1eb0b75d433303c30bd0dbc

0 commit comments

Comments
 (0)