Skip to content

Commit 4200bea

Browse files
committed
ggml-cpu: disable more faulty code for rework
Signed-off-by: Aaron Teo <[email protected]>
1 parent ed91ef6 commit 4200bea

File tree

1 file changed

+18
-18
lines changed

1 file changed

+18
-18
lines changed

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3206,24 +3206,24 @@ void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) {
32063206
__m128i y_vec = _mm_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
32073207
_mm_storel_epi64((__m128i *)(y + i), y_vec);
32083208
}
3209-
#elif defined(__NNPA__)
3210-
for (; i + 7 < n; i += 8) {
3211-
float32x4_t v_xh = vec_xl(0, (const float *)(x + i + 0));
3212-
float32x4_t v_xl = vec_xl(0, (const float *)(x + i + 4));
3213-
uint16x8_t v_yd = vec_round_from_fp32(v_xh, v_xl, 0);
3214-
uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0);
3215-
vec_xst(v_y, 0, (ggml_fp16_t *)(y + i));
3216-
}
3217-
for (; i + 3 < n; i += 4) {
3218-
float32x4_t v_x = vec_xl(0, (const float *)(x + i));
3219-
float32x4_t v_zero = vec_splats(0.0f);
3220-
uint16x8_t v_yd = vec_round_from_fp32(v_x, v_zero, 0);
3221-
uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0);
3222-
y[i + 0] = vec_extract(v_y, 0);
3223-
y[i + 1] = vec_extract(v_y, 1);
3224-
y[i + 2] = vec_extract(v_y, 2);
3225-
y[i + 3] = vec_extract(v_y, 3);
3226-
}
3209+
// #elif defined(__NNPA__)
3210+
// for (; i + 7 < n; i += 8) {
3211+
// float32x4_t v_xh = vec_xl(0, (const float *)(x + i + 0));
3212+
// float32x4_t v_xl = vec_xl(0, (const float *)(x + i + 4));
3213+
// uint16x8_t v_yd = vec_round_from_fp32(v_xh, v_xl, 0);
3214+
// uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0);
3215+
// vec_xst(v_y, 0, (ggml_fp16_t *)(y + i));
3216+
// }
3217+
// for (; i + 3 < n; i += 4) {
3218+
// float32x4_t v_x = vec_xl(0, (const float *)(x + i));
3219+
// float32x4_t v_zero = vec_splats(0.0f);
3220+
// uint16x8_t v_yd = vec_round_from_fp32(v_x, v_zero, 0);
3221+
// uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0);
3222+
// y[i + 0] = vec_extract(v_y, 0);
3223+
// y[i + 1] = vec_extract(v_y, 1);
3224+
// y[i + 2] = vec_extract(v_y, 2);
3225+
// y[i + 3] = vec_extract(v_y, 3);
3226+
// }
32273227
#endif
32283228
for (; i < n; ++i) {
32293229
y[i] = GGML_CPU_FP32_TO_FP16(x[i]);

0 commit comments

Comments
 (0)