@@ -77,85 +77,16 @@ inline static void ggml_vec_add_f16 (const int n, ggml_fp16_t * z, const ggml_fp
7777 z [i ] = GGML_CPU_FP32_TO_FP16 (GGML_CPU_FP16_TO_FP32 (x [i ]) + GGML_CPU_FP16_TO_FP32 (y [i ]));
7878 }
7979}
80- inline static void ggml_vec_add1_f32 (const int n , float * z , const float * x , const float v ) {
81- int i = 0 ;
82- #if defined(GGML_SIMD )
83- const int np = (n & ~(GGML_F32_STEP - 1 ));
84-
85- GGML_F32_VEC vv = GGML_F32_VEC_SET1 (v );
86-
87- for (; i < np ; i += GGML_F32_STEP ) {
88- for (int j = 0 ; j < GGML_F32_ARR ; ++ j ) {
89- GGML_F32_VEC ax = GGML_F32_VEC_LOAD (x + i + j * GGML_F32_EPR );
90- GGML_F32_VEC az = GGML_F32_VEC_ADD (ax , vv );
91- GGML_F32_VEC_STORE (z + i + j * GGML_F32_EPR , az );
92- }
93- }
94- #endif
95- for (; i < n ; ++ i ) {
96- z [i ] = x [i ] + v ;
97- }
98- }
99- inline static void ggml_vec_acc_f32 (const int n , float * y , const float * x ) {
100- int i = 0 ;
101- #if defined(GGML_SIMD )
102- const int np = (n & ~(GGML_F32_STEP - 1 ));
103-
104- for (; i < np ; i += GGML_F32_STEP ) {
105- for (int j = 0 ; j < GGML_F32_ARR ; ++ j ) {
106- GGML_F32_VEC ay = GGML_F32_VEC_LOAD (y + i + j * GGML_F32_EPR );
107- GGML_F32_VEC ax = GGML_F32_VEC_LOAD (x + i + j * GGML_F32_EPR );
108- ay = GGML_F32_VEC_ADD (ay , ax );
109- GGML_F32_VEC_STORE (y + i + j * GGML_F32_EPR , ay );
110- }
111- }
112- #endif
113- for (; i < n ; ++ i ) {
114- y [i ] += x [i ];
115- }
116- }
117- inline static void ggml_vec_acc1_f32 (const int n , float * y , const float v ) {
118- int i = 0 ;
119- #if defined(GGML_SIMD )
120- const int np = (n & ~(GGML_F32_STEP - 1 ));
121-
122- GGML_F32_VEC vv = GGML_F32_VEC_SET1 (v );
123-
124- for (; i < np ; i += GGML_F32_STEP ) {
125- for (int j = 0 ; j < GGML_F32_ARR ; ++ j ) {
126- GGML_F32_VEC ay = GGML_F32_VEC_LOAD (y + i + j * GGML_F32_EPR );
127- ay = GGML_F32_VEC_ADD (ay , vv );
128- GGML_F32_VEC_STORE (y + i + j * GGML_F32_EPR , ay );
129- }
130- }
131- #endif
132- for (; i < n ; ++ i ) {
133- y [i ] += v ;
134- }
135- }
80+ inline static void ggml_vec_add1_f32 (const int n , float * z , const float * x , const float v ) { for (int i = 0 ; i < n ; ++ i ) z [i ] = x [i ] + v ; }
81+ inline static void ggml_vec_acc_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] += x [i ]; }
82+ inline static void ggml_vec_acc1_f32 (const int n , float * y , const float v ) { for (int i = 0 ; i < n ; ++ i ) y [i ] += v ; }
13683inline static void ggml_vec_sub_f32 (const int n , float * z , const float * x , const float * y ) { for (int i = 0 ; i < n ; ++ i ) z [i ] = x [i ] - y [i ]; }
13784inline static void ggml_vec_sub_f16 (const int n , ggml_fp16_t * z , const ggml_fp16_t * x , const ggml_fp16_t * y ) {
13885 for (int i = 0 ; i < n ; ++ i ) {
13986 z [i ] = GGML_CPU_FP32_TO_FP16 (GGML_CPU_FP16_TO_FP32 (x [i ]) - GGML_CPU_FP16_TO_FP32 (y [i ]));
14087 }
14188}
142- inline static void ggml_vec_set_f32 (const int n , float * x , const float v ) {
143- int i = 0 ;
144- #if defined(GGML_SIMD )
145- const int np = (n & ~(GGML_F32_STEP - 1 ));
146-
147- GGML_F32_VEC vx = GGML_F32_VEC_SET1 (v );
148-
149- for (; i < np ; i += GGML_F32_STEP ) {
150- for (int j = 0 ; j < GGML_F32_ARR ; ++ j ) {
151- GGML_F32_VEC_STORE (x + i + j * GGML_F32_EPR , vx );
152- }
153- }
154- #endif
155- for (; i < n ; ++ i ) {
156- x [i ] = v ;
157- }
158- }
89+ inline static void ggml_vec_set_f32 (const int n , float * x , const float v ) { for (int i = 0 ; i < n ; ++ i ) x [i ] = v ; }
15990inline static void ggml_vec_cpy_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = x [i ]; }
16091inline static void ggml_vec_neg_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = - x [i ]; }
16192inline static void ggml_vec_neg_f16 (const int n , ggml_fp16_t * y , const ggml_fp16_t * x ) {
@@ -164,24 +95,7 @@ inline static void ggml_vec_neg_f16 (const int n, ggml_fp16_t * y, const ggml_fp
16495 }
16596}
16697
167- inline static void ggml_vec_mul_f32 (const int n , float * z , const float * x , const float * y ) {
168- int i = 0 ;
169- #if defined(GGML_SIMD )
170- const int np = (n & ~(GGML_F32_STEP - 1 ));
171-
172- for (; i < np ; i += GGML_F32_STEP ) {
173- for (int j = 0 ; j < GGML_F32_ARR ; ++ j ) {
174- GGML_F32_VEC ax = GGML_F32_VEC_LOAD (x + i + j * GGML_F32_EPR );
175- GGML_F32_VEC ay = GGML_F32_VEC_LOAD (y + i + j * GGML_F32_EPR );
176- GGML_F32_VEC az = GGML_F32_VEC_MUL (ax , ay );
177- GGML_F32_VEC_STORE (z + i + j * GGML_F32_EPR , az );
178- }
179- }
180- #endif
181- for (; i < n ; ++ i ) {
182- z [i ] = x [i ]* y [i ];
183- }
184- }
98+ inline static void ggml_vec_mul_f32 (const int n , float * z , const float * x , const float * y ) { for (int i = 0 ; i < n ; ++ i ) z [i ] = x [i ]* y [i ]; }
18599inline static void ggml_vec_mul_f16 (const int n , ggml_fp16_t * z , const ggml_fp16_t * x , const ggml_fp16_t * y ) {
186100 for (int i = 0 ; i < n ; ++ i ) {
187101 z [i ] = GGML_CPU_FP32_TO_FP16 (GGML_CPU_FP16_TO_FP32 (x [i ]) * GGML_CPU_FP16_TO_FP32 (y [i ]));
0 commit comments