Skip to content

Commit d46f363

Browse files
committed
Updated Quantize_row_q8_0() function
1 parent b1487ec commit d46f363

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

ggml/src/ggml-cpu/arch/arm/quants.c

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,10 @@ void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i
4848
const int sve_register_length = ggml_cpu_get_sve_cnt() * 8;
4949
const int ggml_f32_epr = sve_register_length / 32;
5050
const svfloat32_t inactive1 = svdup_n_f32(0.0f);
51-
const svint32_t inactive2 = svdup_n_s32(0);
5251
const svbool_t pg = svptrue_b32();
52+
svfloat32_t zero = svdup_f32(0.0f);
53+
svfloat32_t half = svdup_f32(0.5f);
54+
5355
for (int i = 0; i < nb; i+=1) {
5456
svfloat32_t srcv1, asrcv1;
5557
svfloat32_t sv_max = svdup_n_f32(0.0f);
@@ -67,8 +69,14 @@ void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i
6769
for (int j = 0; j < QK8_0; j+=ggml_f32_epr) {
6870
srcv1 = svld1_f32(pg, x + i*32 + j);
6971
const svfloat32_t v1 = svmul_n_f32_m(pg, srcv1, id);
70-
const svint32_t vi1 = svcvt_s32_f32_m(inactive2, pg, v1);
71-
svst1b_s32(pg, &y[i].qs[j], vi1);
72+
73+
svbool_t ge_zero = svcmpge_f32(pg, v1, zero);
74+
svfloat32_t v_pos = svadd_f32_m(pg, v1, half);
75+
svfloat32_t v_neg = svsub_f32_m(pg, v1, half);
76+
77+
svfloat32_t v_rounded = svsel_f32(ge_zero, v_pos, v_neg);
78+
svint32_t result = svcvt_s32_f32_x(pg, v_rounded);
79+
svst1b_s32(pg, &y[i].qs[j], result);
7280
}
7381
}
7482
#elif defined(__ARM_NEON)

0 commit comments

Comments
 (0)