Skip to content

Commit 9c621bf

Browse files
authored
Merge b3781
b3781
2 parents b5e9546 + 5e2727f commit 9c621bf

File tree

7 files changed

+54
-28
lines changed

7 files changed

+54
-28
lines changed

ggml/CMakeLists.txt

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,15 @@ else()
5050
set(GGML_BLAS_VENDOR_DEFAULT "Generic")
5151
endif()
5252

53+
if (CMAKE_CROSSCOMPILING)
54+
set(GGML_NATIVE_DEFAULT OFF)
55+
else()
56+
set(GGML_NATIVE_DEFAULT ON)
57+
endif()
58+
5359
# general
5460
option(GGML_STATIC "ggml: static link libraries" OFF)
55-
option(GGML_NATIVE "ggml: enable -march=native flag" ON)
61+
option(GGML_NATIVE "ggml: enable -march=native flag" ${GGML_NATIVE_DEFAULT})
5662
option(GGML_LTO "ggml: enable link time optimization" OFF)
5763
option(GGML_CCACHE "ggml: use ccache if available" ON)
5864

@@ -70,7 +76,7 @@ option(GGML_SANITIZE_ADDRESS "ggml: enable address sanitizer" OFF)
7076
option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF)
7177

7278
# instruction set specific
73-
if (GGML_NATIVE)
79+
if (GGML_NATIVE OR NOT GGML_NATIVE_DEFAULT)
7480
set(INS_ENB OFF)
7581
else()
7682
set(INS_ENB ON)

ggml/src/ggml-quants.c

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4190,15 +4190,18 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
41904190
sumf = hsum_float_4x4(acc_0, acc_1, acc_2, acc_3);
41914191
#endif
41924192
for (; ib < nb; ++ib) {
4193-
int sumi = 0;
4193+
int sumi0 = 0;
4194+
int sumi1 = 0;
41944195

41954196
for (int j = 0; j < qk/2; ++j) {
41964197
const int v0 = (x[ib].qs[j] & 0x0F) - 8;
41974198
const int v1 = (x[ib].qs[j] >> 4) - 8;
41984199

4199-
sumi += (v0 * y[ib].qs[j]) + (v1 * y[ib].qs[j + qk/2]);
4200+
sumi0 += (v0 * y[ib].qs[j]);
4201+
sumi1 += (v1 * y[ib].qs[j + qk/2]);
42004202
}
42014203

4204+
int sumi = sumi0 + sumi1;
42024205
sumf += sumi*GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d);
42034206
}
42044207

@@ -4474,15 +4477,18 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
44744477
sumf = hsum_float_8(acc) + summs;
44754478
#endif
44764479
for (; ib < nb; ++ib) {
4477-
int sumi = 0;
4480+
int sumi0 = 0;
4481+
int sumi1 = 0;
44784482

44794483
for (int j = 0; j < qk/2; ++j) {
44804484
const int v0 = (x[ib].qs[j] & 0x0F);
44814485
const int v1 = (x[ib].qs[j] >> 4);
44824486

4483-
sumi += (v0 * y[ib].qs[j]) + (v1 * y[ib].qs[j + qk/2]);
4487+
sumi0 += (v0 * y[ib].qs[j]);
4488+
sumi1 += (v1 * y[ib].qs[j + qk/2]);
44844489
}
44854490

4491+
int sumi = sumi0 + sumi1;
44864492
sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s);
44874493
}
44884494

@@ -4823,18 +4829,21 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * r
48234829
uint32_t qh;
48244830
memcpy(&qh, x[ib].qh, sizeof(qh));
48254831

4826-
int sumi = 0;
4832+
int sumi0 = 0;
4833+
int sumi1 = 0;
48274834

48284835
for (int j = 0; j < qk/2; ++j) {
48294836
const uint8_t xh_0 = ((qh & (1u << (j + 0 ))) >> (j + 0 )) << 4;
48304837
const uint8_t xh_1 = ((qh & (1u << (j + 16))) >> (j + 12));
48314838

4832-
const int32_t x0 = ((x[ib].qs[j] & 0x0F) | xh_0) - 16;
4833-
const int32_t x1 = ((x[ib].qs[j] >> 4) | xh_1) - 16;
4839+
const int32_t x0 = (int8_t)(((x[ib].qs[j] & 0x0F) | xh_0) - 16);
4840+
const int32_t x1 = (int8_t)(((x[ib].qs[j] >> 4) | xh_1) - 16);
48344841

4835-
sumi += (x0 * y[ib].qs[j]) + (x1 * y[ib].qs[j + qk/2]);
4842+
sumi0 += (x0 * y[ib].qs[j]);
4843+
sumi1 += (x1 * y[ib].qs[j + qk/2]);
48364844
}
48374845

4846+
int sumi = sumi0 + sumi1;
48384847
sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)) * sumi;
48394848
}
48404849

@@ -5194,7 +5203,8 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
51945203
uint32_t qh;
51955204
memcpy(&qh, x[ib].qh, sizeof(qh));
51965205

5197-
int sumi = 0;
5206+
int sumi0 = 0;
5207+
int sumi1 = 0;
51985208

51995209
for (int j = 0; j < qk/2; ++j) {
52005210
const uint8_t xh_0 = ((qh >> (j + 0)) << 4) & 0x10;
@@ -5203,9 +5213,11 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
52035213
const int32_t x0 = (x[ib].qs[j] & 0xF) | xh_0;
52045214
const int32_t x1 = (x[ib].qs[j] >> 4) | xh_1;
52055215

5206-
sumi += (x0 * y[ib].qs[j]) + (x1 * y[ib].qs[j + qk/2]);
5216+
sumi0 += (x0 * y[ib].qs[j]);
5217+
sumi1 += (x1 * y[ib].qs[j + qk/2]);
52075218
}
52085219

5220+
int sumi = sumi0 + sumi1;
52095221
sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s);
52105222
}
52115223

ggml/src/ggml-vulkan.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,8 +236,8 @@ struct vk_device_struct {
236236
};
237237

238238
struct vk_buffer_struct {
239-
vk::Buffer buffer;
240-
vk::DeviceMemory device_memory;
239+
vk::Buffer buffer = VK_NULL_HANDLE;
240+
vk::DeviceMemory device_memory = VK_NULL_HANDLE;
241241
vk::MemoryPropertyFlags memory_property_flags;
242242
void * ptr;
243243
size_t size = 0;

ggml/src/ggml.c

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14746,7 +14746,7 @@ static void ggml_compute_forward_pool_1d_sk_p0(
1474614746

1474714747
const struct ggml_tensor * src = dst->src[0];
1474814748

14749-
assert(src->type == GGML_TYPE_F32);
14749+
assert(src->type == GGML_TYPE_F32 || src->type == GGML_TYPE_F16);
1475014750

1475114751
if (params->ith != 0) {
1475214752
return;
@@ -14759,21 +14759,20 @@ static void ggml_compute_forward_pool_1d_sk_p0(
1475914759
const int64_t rs = dst->ne[0];
1476014760

1476114761
while (cdata < data_end) {
14762-
const float * const srow = (const float *)cdata;
14763-
14762+
const void * srow = (const void *)cdata;
1476414763
int j = 0;
14765-
1476614764
for (int64_t i = 0; i < rs; ++i) {
1476714765
switch (op) {
1476814766
case GGML_OP_POOL_AVG: drow[i] = 0; break;
1476914767
case GGML_OP_POOL_MAX: drow[i] = -FLT_MAX; break;
1477014768
case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error");
1477114769
}
1477214770
for (int ki = 0; ki < k; ++ki) {
14771+
const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t*)srow)[j]);
1477314772
switch (op) {
14774-
case GGML_OP_POOL_AVG: drow[i] += srow[j]; break;
14775-
case GGML_OP_POOL_MAX: if (srow[j] > drow[i]) drow[i] = srow[j]; break;
14776-
case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error");
14773+
case GGML_OP_POOL_AVG: drow[i] += srow_j; break;
14774+
case GGML_OP_POOL_MAX: if (srow_j > drow[i]) drow[i] = srow_j; break;
14775+
case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error");
1477714776
}
1477814777
++j;
1477914778
}
@@ -14814,7 +14813,7 @@ static void ggml_compute_forward_pool_2d(
1481414813

1481514814
const struct ggml_tensor * src = dst->src[0];
1481614815

14817-
GGML_ASSERT(src->type == GGML_TYPE_F32);
14816+
assert(src->type == GGML_TYPE_F32 || src->type == GGML_TYPE_F16);
1481814817

1481914818
if (params->ith != 0) {
1482014819
return;
@@ -14857,14 +14856,15 @@ static void ggml_compute_forward_pool_2d(
1485714856

1485814857
for (int ky = 0; ky < k1; ++ky) {
1485914858
if (iy + ky < 0 || iy + ky >= src->ne[1]) continue;
14860-
const float * const srow = (const float *)(cdata + src->nb[1] * (iy + ky));
14859+
const void * srow = (const void *)(cdata + src->nb[1] * (iy + ky));
1486114860
for (int kx = 0; kx < k0; ++kx) {
1486214861
int j = ix + kx;
1486314862
if (j < 0 || j >= src->ne[0]) continue;
14863+
const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t*)srow)[j]);
1486414864
switch (op) {
14865-
case GGML_OP_POOL_AVG: *out += srow[j]; break;
14866-
case GGML_OP_POOL_MAX: if (srow[j] > *out) *out = srow[j]; break;
14867-
case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error");
14865+
case GGML_OP_POOL_AVG: *out += srow_j; break;
14866+
case GGML_OP_POOL_MAX: if (srow_j > *out) *out = srow_j; break;
14867+
case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error");
1486814868
}
1486914869
}
1487014870
}
@@ -18078,7 +18078,6 @@ static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_ten
1807818078
}
1807918079

1808018080
const int n0 = cgraph->n_nodes;
18081-
UNUSED(n0);
1808218081

1808318082
ggml_visit_parents(cgraph, tensor);
1808418083

scripts/sync-ggml-am.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
102102
# cmake/FindSIMD.cmake -> ggml/cmake/FindSIMD.cmake
103103
#
104104
# src/ggml.c -> ggml/src/ggml.c
105+
# src/ggml-aarch64.c -> ggml/src/ggml-aarch64.c
106+
# src/ggml-aarch64.h -> ggml/src/ggml-aarch64.h
105107
# src/ggml-alloc.c -> ggml/src/ggml-alloc.c
106108
# src/ggml-backend-impl.h -> ggml/src/ggml-backend-impl.h
107109
# src/ggml-backend.c -> ggml/src/ggml-backend.c
@@ -117,6 +119,7 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
117119
# src/ggml-sycl/* -> ggml/src/ggml-sycl/
118120
# src/ggml-sycl.cpp -> ggml/src/ggml-sycl.cpp
119121
# src/ggml-vulkan.cpp -> ggml/src/ggml-vulkan.cpp
122+
# src/vulkan-shaders/* -> ggml/src/vulkan-shaders/
120123
#
121124
# include/ggml.h -> ggml/include/ggml.h
122125
# include/ggml-alloc.h -> ggml/include/ggml-alloc.h
@@ -143,6 +146,8 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
143146
-e 's/([[:space:]]|[ab]\/)src\/CMakeLists.txt/\1ggml\/src\/CMakeLists.txt/g' \
144147
-e 's/([[:space:]]|[ab]\/)cmake\/FindSIMD.cmake/\1ggml\/cmake\/FindSIMD.cmake/g' \
145148
-e 's/([[:space:]]|[ab]\/)src\/ggml\.c/\1ggml\/src\/ggml.c/g' \
149+
-e 's/([[:space:]]|[ab]\/)src\/ggml-aarch64\.c/\1ggml\/src\/ggml-aarch64.c/g' \
150+
-e 's/([[:space:]]|[ab]\/)src\/ggml-aarch64\.h/\1ggml\/src\/ggml-aarch64.h/g' \
146151
-e 's/([[:space:]]|[ab]\/)src\/ggml-alloc\.c/\1ggml\/src\/ggml-alloc.c/g' \
147152
-e 's/([[:space:]]|[ab]\/)src\/ggml-backend-impl\.h/\1ggml\/src\/ggml-backend-impl.h/g' \
148153
-e 's/([[:space:]]|[ab]\/)src\/ggml-backend\.c/\1ggml\/src\/ggml-backend.c/g' \
@@ -158,6 +163,7 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then
158163
-e 's/([[:space:]]|[ab]\/)src\/ggml-sycl\//\1ggml\/src\/ggml-sycl\//g' \
159164
-e 's/([[:space:]]|[ab]\/)src\/ggml-sycl\.cpp/\1ggml\/src\/ggml-sycl.cpp/g' \
160165
-e 's/([[:space:]]|[ab]\/)src\/ggml-vulkan\.cpp/\1ggml\/src\/ggml-vulkan.cpp/g' \
166+
-e 's/([[:space:]]|[ab]\/)src\/vulkan-shaders\//\1ggml\/src\/vulkan-shaders\//g' \
161167
-e 's/([[:space:]]|[ab]\/)include\/ggml\.h/\1ggml\/include\/ggml.h/g' \
162168
-e 's/([[:space:]]|[ab]\/)include\/ggml-alloc\.h/\1ggml\/include\/ggml-alloc.h/g' \
163169
-e 's/([[:space:]]|[ab]\/)include\/ggml-backend\.h/\1ggml\/include\/ggml-backend.h/g' \

scripts/sync-ggml.last

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
e3b3846976c94163f2b3dd128cc959782653edbb
1+
31d544f87835a55602883fe09156bb85a4c163d8

scripts/sync-ggml.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ cp -rpv ../ggml/src/CMakeLists.txt ./ggml/src/CMakeLists.txt
55
cp -rpv ../ggml/cmake/FindSIMD.cmake ./ggml/cmake/FindSIMD.cmake
66

77
cp -rpv ../ggml/src/ggml.c ./ggml/src/ggml.c
8+
cp -rpv ../ggml/src/ggml-aarch64.c ./ggml/src/ggml-aarch64.c
9+
cp -rpv ../ggml/src/ggml-aarch64.h ./ggml/src/ggml-aarch64.h
810
cp -rpv ../ggml/src/ggml-alloc.c ./ggml/src/ggml-alloc.c
911
cp -rpv ../ggml/src/ggml-backend-impl.h ./ggml/src/ggml-backend-impl.h
1012
cp -rpv ../ggml/src/ggml-backend.c ./ggml/src/ggml-backend.c
@@ -21,6 +23,7 @@ cp -rpv ../ggml/src/ggml-rpc.cpp ./ggml/src/ggml-rpc.cpp
2123
cp -rpv ../ggml/src/ggml-sycl/* ./ggml/src/ggml-sycl/
2224
cp -rpv ../ggml/src/ggml-sycl.cpp ./ggml/src/ggml-sycl.cpp
2325
cp -rpv ../ggml/src/ggml-vulkan.cpp ./ggml/src/ggml-vulkan.cpp
26+
cp -rpv ../ggml/src/vulkan-shaders/* ./ggml/src/vulkan-shaders/
2427

2528
cp -rpv ../ggml/include/ggml.h ./ggml/include/ggml.h
2629
cp -rpv ../ggml/include/ggml-alloc.h ./ggml/include/ggml-alloc.h

0 commit comments

Comments
 (0)