@@ -3145,15 +3145,15 @@ void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) {
31453145 for (; i + 7 < n ; i += 8 ) {
31463146 float32x4_t v_xh = vec_xl (0 , (const float * )(x + i + 0 ));
31473147 float32x4_t v_xl = vec_xl (0 , (const float * )(x + i + 4 ));
3148- uint16x8_t v_xd = vec_round_from_fp32 (v_xh , v_xl , 0 );
3149- uint16x8_t v_y = vec_convert_to_fp16 (v_xd , 0 );
3148+ uint16x8_t v_yd = vec_round_from_fp32 (v_xh , v_xl , 0 );
3149+ uint16x8_t v_y = vec_convert_to_fp16 (v_yd , 0 );
31503150 vec_xst (v_y , 0 , (ggml_fp16_t * )(y + i ));
31513151 }
31523152 for (; i + 3 < n ; i += 4 ) {
31533153 float32x4_t v_x = vec_xl (0 , (const float * )(x + i ));
31543154 float32x4_t v_zero = vec_splats (0.0f );
3155- uint16x8_t v_xd = vec_round_from_fp32 (v_x , v_zero , 0 );
3156- uint16x8_t v_y = vec_convert_to_fp16 (v_xd , 0 );
3155+ uint16x8_t v_yd = vec_round_from_fp32 (v_x , v_zero , 0 );
3156+ uint16x8_t v_y = vec_convert_to_fp16 (v_yd , 0 );
31573157 vec_xst (v_y , 0 , (ggml_fp16_t * )(y + i ));
31583158 }
31593159#endif
@@ -3185,18 +3185,17 @@ void ggml_cpu_fp16_to_fp32(const ggml_fp16_t * x, float * y, int64_t n) {
31853185#elif defined(__NNPA__ )
31863186 for (; i + 7 < n ; i += 8 ) {
31873187 uint16x8_t v_x = vec_xl (0 , (const ggml_fp16_t * )(x + i ));
3188- uint16x8_t v_xd = vec_convert_from_fp16 (v_x , 0 );
3189- float32x4_t v_xdh = vec_extend_to_fp32_hi (v_xd , 0 );
3190- float32x4_t v_xdl = vec_extend_to_fp32_lo (v_xd , 0 );
3191- vec_xst (v_xdh , 0 , (float * )(y + i + 0 ));
3192- vec_xst (v_xdl , 0 , (float * )(y + i + 4 ));
3188+ uint16x8_t v_yd = vec_convert_from_fp16 (v_x , 0 );
3189+ float32x4_t v_yh = vec_extend_to_fp32_hi (v_yd , 0 );
3190+ float32x4_t v_yl = vec_extend_to_fp32_lo (v_yd , 0 );
3191+ vec_xst (v_yh , 0 , (float * )(y + i + 0 ));
3192+ vec_xst (v_yl , 0 , (float * )(y + i + 4 ));
31933193 }
3194-
31953194 for (; i + 3 < n ; i += 4 ) {
31963195 uint16x8_t v_x = vec_xl (0 , (const ggml_fp16_t * )(x + i ));
3197- uint16x8_t v_xd = vec_convert_from_fp16 (v_x , 0 );
3198- float32x4_t v_xdh = vec_extend_to_fp32_hi (v_xd , 0 );
3199- vec_xst (v_xdh , 0 , (float * )(y + i ));
3196+ uint16x8_t v_yd = vec_convert_from_fp16 (v_x , 0 );
3197+ float32x4_t v_yh = vec_extend_to_fp32_hi (v_yd , 0 );
3198+ vec_xst (v_yh , 0 , (float * )(y + i ));
32003199 }
32013200#endif
32023201
0 commit comments