@@ -3206,24 +3206,24 @@ void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) {
32063206 __m128i y_vec = _mm_cvtps_ph (x_vec , _MM_FROUND_TO_NEAREST_INT );
32073207 _mm_storel_epi64 ((__m128i * )(y + i ), y_vec );
32083208 }
3209- #elif defined(__NNPA__ )
3210- for (; i + 7 < n ; i += 8 ) {
3211- float32x4_t v_xh = vec_xl (0 , (const float * )(x + i + 0 ));
3212- float32x4_t v_xl = vec_xl (0 , (const float * )(x + i + 4 ));
3213- uint16x8_t v_yd = vec_round_from_fp32 (v_xh , v_xl , 0 );
3214- uint16x8_t v_y = vec_convert_to_fp16 (v_yd , 0 );
3215- vec_xst (v_y , 0 , (ggml_fp16_t * )(y + i ));
3216- }
3217- for (; i + 3 < n ; i += 4 ) {
3218- float32x4_t v_x = vec_xl (0 , (const float * )(x + i ));
3219- float32x4_t v_zero = vec_splats (0.0f );
3220- uint16x8_t v_yd = vec_round_from_fp32 (v_x , v_zero , 0 );
3221- uint16x8_t v_y = vec_convert_to_fp16 (v_yd , 0 );
3222- y [i + 0 ] = vec_extract (v_y , 0 );
3223- y [i + 1 ] = vec_extract (v_y , 1 );
3224- y [i + 2 ] = vec_extract (v_y , 2 );
3225- y [i + 3 ] = vec_extract (v_y , 3 );
3226- }
3209+ // #elif defined(__NNPA__)
3210+ // for (; i + 7 < n; i += 8) {
3211+ // float32x4_t v_xh = vec_xl(0, (const float *)(x + i + 0));
3212+ // float32x4_t v_xl = vec_xl(0, (const float *)(x + i + 4));
3213+ // uint16x8_t v_yd = vec_round_from_fp32(v_xh, v_xl, 0);
3214+ // uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0);
3215+ // vec_xst(v_y, 0, (ggml_fp16_t *)(y + i));
3216+ // }
3217+ // for (; i + 3 < n; i += 4) {
3218+ // float32x4_t v_x = vec_xl(0, (const float *)(x + i));
3219+ // float32x4_t v_zero = vec_splats(0.0f);
3220+ // uint16x8_t v_yd = vec_round_from_fp32(v_x, v_zero, 0);
3221+ // uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0);
3222+ // y[i + 0] = vec_extract(v_y, 0);
3223+ // y[i + 1] = vec_extract(v_y, 1);
3224+ // y[i + 2] = vec_extract(v_y, 2);
3225+ // y[i + 3] = vec_extract(v_y, 3);
3226+ // }
32273227#endif
32283228 for (; i < n ; ++ i ) {
32293229 y [i ] = GGML_CPU_FP32_TO_FP16 (x [i ]);
0 commit comments