Skip to content

Commit 871036d

Browse files
committed
add check for tensor dimensions
1 parent 5947d72 commit 871036d

File tree

8 files changed

+30
-27
lines changed

8 files changed

+30
-27
lines changed

Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -874,9 +874,9 @@ ggml/src/ggml-cuda/%.o: \
874874
$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
875875
endif # GGML_HIPBLAS
876876

877-
ifdef GGML_CPU_AARCH64
878-
MK_CPPFLAGS += -DGGML_USE_CPU_AARCH64
879-
MK_CFLAGS += -DGGML_USE_CPU_AARCH64
877+
ifdef GGML_RUNTIME_REPACK
878+
MK_CPPFLAGS += -DGGML_USE_RUNTIME_REPACK
879+
MK_CFLAGS += -DGGML_USE_RUNTIME_REPACK
880880
endif
881881

882882
ifdef GGML_METAL

ggml/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ else()
9292
endif()
9393

9494
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
95-
option(GGML_CPU_AARCH64 "ggml: use runtime weight quantization to enable optimized GEMM/GEMV kernels for AARCH64 cpu" OFF)
95+
option(GGML_RUNTIME_REPACK "ggml: use runtime weight quantization to enable optimized GEMM/GEMV kernels for AARCH64 cpu" OFF)
9696

9797
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
9898
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})

ggml/include/ggml-cpu.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ extern "C" {
145145
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void);
146146
#endif
147147

148-
#ifdef GGML_USE_CPU_AARCH64
148+
#ifdef GGML_USE_RUNTIME_REPACK
149149
GGML_API ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void);
150150
#endif
151151

ggml/src/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -880,10 +880,10 @@ if (GGML_CPU_HBM)
880880
target_link_libraries(ggml PUBLIC memkind)
881881
endif()
882882

883-
if (GGML_CPU_AARCH64)
883+
if (GGML_RUNTIME_REPACK)
884884
message(STATUS "Using runtime weight quantization to enable optimized GEMM/GEMV kernels for AARCH64 cpu")
885885

886-
add_compile_definitions(GGML_USE_CPU_AARCH64)
886+
add_compile_definitions(GGML_USE_RUNTIME_REPACK)
887887
endif()
888888

889889
if (GGML_CANN)

ggml/src/ggml-aarch64.c

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3477,10 +3477,9 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * restrict s, size_t bs, const void *
34773477
}
34783478
}
34793479

3480-
#ifdef GGML_USE_CPU_AARCH64
3481-
static void repack_q4_0_to_q4_0_4_bl(struct ggml_tensor * t, int interleave_block, const void * data, size_t data_size) {
3480+
#ifdef GGML_USE_RUNTIME_REPACK
3481+
static int repack_q4_0_to_q4_0_4_bl(struct ggml_tensor * t, int interleave_block, const void * data, size_t data_size) {
34823482
GGML_ASSERT(t->type == GGML_TYPE_Q4_0);
3483-
GGML_ASSERT(t->ne[0] % 8 == 0);
34843483
GGML_ASSERT(interleave_block == 4 || interleave_block == 8);
34853484

34863485
block_q4_0x4 *dst = (block_q4_0x4 *)t->data;
@@ -3492,23 +3491,26 @@ static void repack_q4_0_to_q4_0_4_bl(struct ggml_tensor * t, int interleave_bloc
34923491

34933492
GGML_ASSERT(data_size == nrow * nblocks * sizeof(block_q4_0));
34943493

3494+
if (nrow % nrows_interleaved != 0 || t->ne[0] % 8 != 0) {
3495+
return -1;
3496+
}
3497+
34953498
for (int b = 0; b < nrow; b += nrows_interleaved) {
3496-
for (int64_t x = 0; x < nblocks; x++)
3497-
{
3499+
for (int64_t x = 0; x < nblocks; x++) {
34983500
for (int i = 0; i < nrows_interleaved; i++) {
34993501
dst_tmp[i] = src[x + i * nblocks];
35003502
}
35013503
*dst++ = make_block_q4_0x4(dst_tmp, interleave_block, 0x88);
35023504
}
35033505
src += nrows_interleaved * nblocks;
35043506
}
3507+
return 0;
35053508

35063509
GGML_UNUSED(data_size);
35073510
}
35083511

3509-
static void repack_q4_0_to_q4_0_8_bl(struct ggml_tensor *t, int interleave_block, const void * data, size_t data_size) {
3512+
static int repack_q4_0_to_q4_0_8_bl(struct ggml_tensor *t, int interleave_block, const void * data, size_t data_size) {
35103513
GGML_ASSERT(t->type == GGML_TYPE_Q4_0);
3511-
GGML_ASSERT(t->ne[0] % 8 == 0);
35123514
GGML_ASSERT(interleave_block == 8);
35133515

35143516
block_q4_0x8 *dst = (block_q4_0x8*)t->data;
@@ -3520,6 +3522,10 @@ static void repack_q4_0_to_q4_0_8_bl(struct ggml_tensor *t, int interleave_block
35203522

35213523
GGML_ASSERT(data_size == nrow * nblocks * sizeof(block_q4_0));
35223524

3525+
if (nrow % nrows_interleaved != 0 || t->ne[0] % 8 != 0) {
3526+
return -1;
3527+
}
3528+
35233529
for (int b = 0; b < nrow; b += nrows_interleaved) {
35243530
for (int64_t x = 0; x < nblocks; x++) {
35253531
for (int i = 0; i < nrows_interleaved; i++ ) {
@@ -3529,29 +3535,26 @@ static void repack_q4_0_to_q4_0_8_bl(struct ggml_tensor *t, int interleave_block
35293535
}
35303536
src += nrows_interleaved * nblocks;
35313537
}
3538+
return 0;
35323539

35333540
GGML_UNUSED(data_size);
35343541
}
35353542

35363543
// Prepare for optimized kernels if applicable
35373544
int ggml_prepare_optimal_kernel(struct ggml_tensor * cur, const void * data, size_t data_size) {
35383545
GGML_ASSERT(cur->type == GGML_TYPE_Q4_0);
3539-
int ret = -1;
35403546
#if defined(__ARM_ARCH)
35413547
if (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0) {
3542-
repack_q4_0_to_q4_0_8_bl(cur, 8, data, data_size);
3543-
ret = 0;
3548+
return repack_q4_0_to_q4_0_8_bl(cur, 8, data, data_size);
35443549
}
35453550
else if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) {
3546-
repack_q4_0_to_q4_0_4_bl(cur, 8, data, data_size);
3547-
ret = 0;
3551+
return repack_q4_0_to_q4_0_4_bl(cur, 8, data, data_size);
35483552
}
35493553
else if (ggml_cpu_has_neon()) {
3550-
repack_q4_0_to_q4_0_4_bl(cur, 4, data, data_size);
3551-
ret = 0;
3554+
return repack_q4_0_to_q4_0_4_bl(cur, 4, data, data_size);
35523555
}
35533556
#endif
3554-
return ret;
3557+
return -1;
35553558

35563559
GGML_UNUSED(cur);
35573560
GGML_UNUSED(data);

ggml/src/ggml-aarch64.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo
3333
void ggml_gemm_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
3434
void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc);
3535

36-
#ifdef GGML_USE_CPU_AARCH64
36+
#ifdef GGML_USE_RUNTIME_REPACK
3737
int ggml_prepare_optimal_kernel(struct ggml_tensor * cur, const void * data, size_t data_size);
3838
enum ggml_type ggml_get_optimal_type(const struct ggml_tensor * cur);
3939
#endif

ggml/src/ggml-backend.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2239,7 +2239,7 @@ ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void) {
22392239
}
22402240
#endif
22412241

2242-
#ifdef GGML_USE_CPU_AARCH64
2242+
#ifdef GGML_USE_RUNTIME_REPACK
22432243

22442244
// buffer type AARCH64
22452245

@@ -2316,7 +2316,7 @@ static ggml_backend_buffer_type_t * ggml_backend_cpu_get_extra_bufts(ggml_backen
23162316
bufts[index++] = ggml_backend_cpu_hbm_buffer_type();
23172317
#endif
23182318

2319-
#ifdef GGML_USE_CPU_AARCH64
2319+
#ifdef GGML_USE_RUNTIME_REPACK
23202320
if (ggml_cpu_has_neon() || ggml_cpu_has_matmul_int8() || ggml_cpu_has_sve()) {
23212321
bufts[index++] = ggml_backend_cpu_aarch64_buffer_type();
23222322
}
@@ -2635,7 +2635,7 @@ static ggml_backend_buffer_t ggml_backend_cpu_device_buffer_from_host_ptr(ggml_b
26352635
}
26362636

26372637
static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) {
2638-
#ifdef GGML_USE_CPU_AARCH64
2638+
#ifdef GGML_USE_RUNTIME_REPACK
26392639
const struct ggml_tensor *tensor = op->src[0];
26402640
if (tensor && tensor->buffer && (strcmp(tensor->buffer->buft->iface.get_name(tensor->buffer->buft),"CPU_AARCH64") == 0)) {
26412641
if (op->op == GGML_OP_MUL_MAT && tensor->type == GGML_TYPE_Q4_0) {

ggml/src/ggml-cpu.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7427,7 +7427,7 @@ static void ggml_compute_forward_mul_mat(
74277427

74287428
enum ggml_type type = src0->type;
74297429

7430-
#ifdef GGML_USE_CPU_AARCH64
7430+
#ifdef GGML_USE_RUNTIME_REPACK
74317431
if (strcmp(src0->buffer->buft->iface.get_name(src0->buffer->buft),"CPU_AARCH64") == 0) {
74327432
type = ggml_get_optimal_type(src0);
74337433
}

0 commit comments

Comments
 (0)