@@ -78,72 +78,60 @@ inline static void ggml_vec_add_f16 (const int n, ggml_fp16_t * z, const ggml_fp
7878 }
7979}
8080inline static void ggml_vec_add1_f32 (const int n , float * z , const float * x , const float v ) {
81+ int i = 0 ;
8182#if defined(GGML_SIMD )
8283 const int np = (n & ~(GGML_F32_STEP - 1 ));
8384
8485 GGML_F32_VEC vv = GGML_F32_VEC_SET1 (v );
8586
86- for (int i = 0 ; i < np ; i += GGML_F32_STEP ) {
87+ for (; i < np ; i += GGML_F32_STEP ) {
8788 for (int j = 0 ; j < GGML_F32_ARR ; ++ j ) {
8889 GGML_F32_VEC ax = GGML_F32_VEC_LOAD (x + i + j * GGML_F32_EPR );
8990 GGML_F32_VEC az = GGML_F32_VEC_ADD (ax , vv );
9091 GGML_F32_VEC_STORE (z + i + j * GGML_F32_EPR , az );
9192 }
9293 }
93-
94- for (int i = np ; i < n ; ++ i ) {
95- z [i ] = x [i ] + v ;
96- }
97- #else
98- for (int i = 0 ; i < n ; ++ i ) {
94+ #endif
95+ for (; i < n ; ++ i ) {
9996 z [i ] = x [i ] + v ;
10097 }
101- #endif
10298}
10399inline static void ggml_vec_acc_f32 (const int n , float * y , const float * x ) {
100+ int i = 0 ;
104101#if defined(GGML_SIMD )
105102 const int np = (n & ~(GGML_F32_STEP - 1 ));
106103
107- for (int i = 0 ; i < np ; i += GGML_F32_STEP ) {
104+ for (; i < np ; i += GGML_F32_STEP ) {
108105 for (int j = 0 ; j < GGML_F32_ARR ; ++ j ) {
109106 GGML_F32_VEC ay = GGML_F32_VEC_LOAD (y + i + j * GGML_F32_EPR );
110107 GGML_F32_VEC ax = GGML_F32_VEC_LOAD (x + i + j * GGML_F32_EPR );
111108 ay = GGML_F32_VEC_ADD (ay , ax );
112109 GGML_F32_VEC_STORE (y + i + j * GGML_F32_EPR , ay );
113110 }
114111 }
115-
116- for (int i = np ; i < n ; ++ i ) {
117- y [i ] += x [i ];
118- }
119- #else
120- for (int i = 0 ; i < n ; ++ i ) {
112+ #endif
113+ for (; i < n ; ++ i ) {
121114 y [i ] += x [i ];
122115 }
123- #endif
124116}
125117inline static void ggml_vec_acc1_f32 (const int n , float * y , const float v ) {
118+ int i = 0 ;
126119#if defined(GGML_SIMD )
127120 const int np = (n & ~(GGML_F32_STEP - 1 ));
128121
129122 GGML_F32_VEC vv = GGML_F32_VEC_SET1 (v );
130123
131- for (int i = 0 ; i < np ; i += GGML_F32_STEP ) {
124+ for (; i < np ; i += GGML_F32_STEP ) {
132125 for (int j = 0 ; j < GGML_F32_ARR ; ++ j ) {
133126 GGML_F32_VEC ay = GGML_F32_VEC_LOAD (y + i + j * GGML_F32_EPR );
134127 ay = GGML_F32_VEC_ADD (ay , vv );
135128 GGML_F32_VEC_STORE (y + i + j * GGML_F32_EPR , ay );
136129 }
137130 }
138-
139- for (int i = np ; i < n ; ++ i ) {
140- y [i ] += v ;
141- }
142- #else
143- for (int i = 0 ; i < n ; ++ i ) {
131+ #endif
132+ for (; i < n ; ++ i ) {
144133 y [i ] += v ;
145134 }
146- #endif
147135}
148136inline static void ggml_vec_sub_f32 (const int n , float * z , const float * x , const float * y ) { for (int i = 0 ; i < n ; ++ i ) z [i ] = x [i ] - y [i ]; }
149137inline static void ggml_vec_sub_f16 (const int n , ggml_fp16_t * z , const ggml_fp16_t * x , const ggml_fp16_t * y ) {
@@ -152,25 +140,21 @@ inline static void ggml_vec_sub_f16 (const int n, ggml_fp16_t * z, const ggml_fp
152140 }
153141}
154142inline static void ggml_vec_set_f32 (const int n , float * x , const float v ) {
143+ int i = 0 ;
155144#if defined(GGML_SIMD )
156145 const int np = (n & ~(GGML_F32_STEP - 1 ));
157146
158147 GGML_F32_VEC vx = GGML_F32_VEC_SET1 (v );
159148
160- for (int i = 0 ; i < np ; i += GGML_F32_STEP ) {
149+ for (; i < np ; i += GGML_F32_STEP ) {
161150 for (int j = 0 ; j < GGML_F32_ARR ; ++ j ) {
162151 GGML_F32_VEC_STORE (x + i + j * GGML_F32_EPR , vx );
163152 }
164153 }
165-
166- for (int i = np ; i < n ; ++ i ) {
167- x [i ] = v ;
168- }
169- #else
170- for (int i = 0 ; i < n ; ++ i ) {
154+ #endif
155+ for (; i < n ; ++ i ) {
171156 x [i ] = v ;
172157 }
173- #endif
174158}
175159inline static void ggml_vec_cpy_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = x [i ]; }
176160inline static void ggml_vec_neg_f32 (const int n , float * y , const float * x ) { for (int i = 0 ; i < n ; ++ i ) y [i ] = - x [i ]; }
@@ -181,26 +165,22 @@ inline static void ggml_vec_neg_f16 (const int n, ggml_fp16_t * y, const ggml_fp
181165}
182166
183167inline static void ggml_vec_mul_f32 (const int n , float * z , const float * x , const float * y ) {
168+ int i = 0 ;
184169#if defined(GGML_SIMD )
185170 const int np = (n & ~(GGML_F32_STEP - 1 ));
186171
187- for (int i = 0 ; i < np ; i += GGML_F32_STEP ) {
172+ for (; i < np ; i += GGML_F32_STEP ) {
188173 for (int j = 0 ; j < GGML_F32_ARR ; ++ j ) {
189174 GGML_F32_VEC ax = GGML_F32_VEC_LOAD (x + i + j * GGML_F32_EPR );
190175 GGML_F32_VEC ay = GGML_F32_VEC_LOAD (y + i + j * GGML_F32_EPR );
191176 GGML_F32_VEC az = GGML_F32_VEC_MUL (ax , ay );
192177 GGML_F32_VEC_STORE (z + i + j * GGML_F32_EPR , az );
193178 }
194179 }
195-
196- for (int i = np ; i < n ; ++ i ) {
197- z [i ] = x [i ]* y [i ];
198- }
199- #else
200- for (int i = 0 ; i < n ; ++ i ) {
180+ #endif
181+ for (; i < n ; ++ i ) {
201182 z [i ] = x [i ]* y [i ];
202183 }
203- #endif
204184}
205185inline static void ggml_vec_mul_f16 (const int n , ggml_fp16_t * z , const ggml_fp16_t * x , const ggml_fp16_t * y ) {
206186 for (int i = 0 ; i < n ; ++ i ) {
0 commit comments