@@ -340,37 +340,20 @@ void dequantize_row_q5_1(const block_q5_1 * GGML_RESTRICT x, float * GGML_RESTRI
340340 }
341341}
342342
343- // SVE Support added for Scaler Implementation
344343void dequantize_row_q8_0 (const block_q8_0 * GGML_RESTRICT x , float * GGML_RESTRICT y , int64_t k ) {
345344 static const int qk = QK8_0 ;
346345
347346 assert (k % qk == 0 );
348347
349348 const int nb = k / qk ;
350349
351- #if defined(__ARM_FEATURE_SVE )
352- svbool_t pg = svptrue_b32 ();
353- const svfloat32_t inactive1 = svdup_n_f32 (0.0f );
354- const int ggml_f32_epr = svcntw ();
355- for (int i = 0 ; i < nb ; i += 1 ) {
356- const float d1 = GGML_FP16_TO_FP32 (x [i ].d ); // d:0
357- const int8_t * x_data1 = x [i ].qs ;
358- float * y_base = y + i * qk ;
359- for (int j = 0 ; j < qk ; j += ggml_f32_epr ) {
360- svint32_t vec0 = svld1sb_s32 (pg , x_data1 + j );
361- svfloat32_t fvec0 = svmul_n_f32_m (pg , svcvt_f32_s32_m (inactive1 , pg , vec0 ), d1 ); // Convert to float and scale
362- svst1_f32 (pg , y_base + j , fvec0 );
363- }
364- }
365- #else
366- for (int i = 0 ; i < nb ; i ++ ) {
367- const float d = GGML_FP16_TO_FP32 (x [i ].d );
350+ for (int i = 0 ; i < nb ; i ++ ) {
351+ const float d = GGML_FP16_TO_FP32 (x [i ].d );
368352
369- for (int j = 0 ; j < qk ; ++ j ) {
370- y [i * qk + j ] = x [i ].qs [j ]* d ;
371- }
353+ for (int j = 0 ; j < qk ; ++ j ) {
354+ y [i * qk + j ] = x [i ].qs [j ]* d ;
372355 }
373- #endif
356+ }
374357}
375358
376359//
0 commit comments