Skip to content

Commit e4189f7

Browse files
author
Aaron
committed
Normalize f32 helper tails for ggml vec ops
1 parent dff1173 commit e4189f7

File tree

1 file changed

+20
-40
lines changed

1 file changed

+20
-40
lines changed

ggml/src/ggml-cpu/vec.h

Lines changed: 20 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -78,72 +78,60 @@ inline static void ggml_vec_add_f16 (const int n, ggml_fp16_t * z, const ggml_fp
7878
}
7979
}
8080
inline static void ggml_vec_add1_f32(const int n, float * z, const float * x, const float v) {
81+
int i = 0;
8182
#if defined(GGML_SIMD)
8283
const int np = (n & ~(GGML_F32_STEP - 1));
8384

8485
GGML_F32_VEC vv = GGML_F32_VEC_SET1(v);
8586

86-
for (int i = 0; i < np; i += GGML_F32_STEP) {
87+
for (; i < np; i += GGML_F32_STEP) {
8788
for (int j = 0; j < GGML_F32_ARR; ++j) {
8889
GGML_F32_VEC ax = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR);
8990
GGML_F32_VEC az = GGML_F32_VEC_ADD(ax, vv);
9091
GGML_F32_VEC_STORE(z + i + j*GGML_F32_EPR, az);
9192
}
9293
}
93-
94-
for (int i = np; i < n; ++i) {
95-
z[i] = x[i] + v;
96-
}
97-
#else
98-
for (int i = 0; i < n; ++i) {
94+
#endif
95+
for (; i < n; ++i) {
9996
z[i] = x[i] + v;
10097
}
101-
#endif
10298
}
10399
inline static void ggml_vec_acc_f32 (const int n, float * y, const float * x) {
100+
int i = 0;
104101
#if defined(GGML_SIMD)
105102
const int np = (n & ~(GGML_F32_STEP - 1));
106103

107-
for (int i = 0; i < np; i += GGML_F32_STEP) {
104+
for (; i < np; i += GGML_F32_STEP) {
108105
for (int j = 0; j < GGML_F32_ARR; ++j) {
109106
GGML_F32_VEC ay = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR);
110107
GGML_F32_VEC ax = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR);
111108
ay = GGML_F32_VEC_ADD(ay, ax);
112109
GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay);
113110
}
114111
}
115-
116-
for (int i = np; i < n; ++i) {
117-
y[i] += x[i];
118-
}
119-
#else
120-
for (int i = 0; i < n; ++i) {
112+
#endif
113+
for (; i < n; ++i) {
121114
y[i] += x[i];
122115
}
123-
#endif
124116
}
125117
inline static void ggml_vec_acc1_f32(const int n, float * y, const float v) {
118+
int i = 0;
126119
#if defined(GGML_SIMD)
127120
const int np = (n & ~(GGML_F32_STEP - 1));
128121

129122
GGML_F32_VEC vv = GGML_F32_VEC_SET1(v);
130123

131-
for (int i = 0; i < np; i += GGML_F32_STEP) {
124+
for (; i < np; i += GGML_F32_STEP) {
132125
for (int j = 0; j < GGML_F32_ARR; ++j) {
133126
GGML_F32_VEC ay = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR);
134127
ay = GGML_F32_VEC_ADD(ay, vv);
135128
GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay);
136129
}
137130
}
138-
139-
for (int i = np; i < n; ++i) {
140-
y[i] += v;
141-
}
142-
#else
143-
for (int i = 0; i < n; ++i) {
131+
#endif
132+
for (; i < n; ++i) {
144133
y[i] += v;
145134
}
146-
#endif
147135
}
148136
inline static void ggml_vec_sub_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] - y[i]; }
149137
inline static void ggml_vec_sub_f16 (const int n, ggml_fp16_t * z, const ggml_fp16_t * x, const ggml_fp16_t * y) {
@@ -152,25 +140,21 @@ inline static void ggml_vec_sub_f16 (const int n, ggml_fp16_t * z, const ggml_fp
152140
}
153141
}
154142
inline static void ggml_vec_set_f32 (const int n, float * x, const float v) {
143+
int i = 0;
155144
#if defined(GGML_SIMD)
156145
const int np = (n & ~(GGML_F32_STEP - 1));
157146

158147
GGML_F32_VEC vx = GGML_F32_VEC_SET1(v);
159148

160-
for (int i = 0; i < np; i += GGML_F32_STEP) {
149+
for (; i < np; i += GGML_F32_STEP) {
161150
for (int j = 0; j < GGML_F32_ARR; ++j) {
162151
GGML_F32_VEC_STORE(x + i + j*GGML_F32_EPR, vx);
163152
}
164153
}
165-
166-
for (int i = np; i < n; ++i) {
167-
x[i] = v;
168-
}
169-
#else
170-
for (int i = 0; i < n; ++i) {
154+
#endif
155+
for (; i < n; ++i) {
171156
x[i] = v;
172157
}
173-
#endif
174158
}
175159
inline static void ggml_vec_cpy_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]; }
176160
inline static void ggml_vec_neg_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = -x[i]; }
@@ -181,26 +165,22 @@ inline static void ggml_vec_neg_f16 (const int n, ggml_fp16_t * y, const ggml_fp
181165
}
182166

183167
inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) {
168+
int i = 0;
184169
#if defined(GGML_SIMD)
185170
const int np = (n & ~(GGML_F32_STEP - 1));
186171

187-
for (int i = 0; i < np; i += GGML_F32_STEP) {
172+
for (; i < np; i += GGML_F32_STEP) {
188173
for (int j = 0; j < GGML_F32_ARR; ++j) {
189174
GGML_F32_VEC ax = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR);
190175
GGML_F32_VEC ay = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR);
191176
GGML_F32_VEC az = GGML_F32_VEC_MUL(ax, ay);
192177
GGML_F32_VEC_STORE(z + i + j*GGML_F32_EPR, az);
193178
}
194179
}
195-
196-
for (int i = np; i < n; ++i) {
197-
z[i] = x[i]*y[i];
198-
}
199-
#else
200-
for (int i = 0; i < n; ++i) {
180+
#endif
181+
for (; i < n; ++i) {
201182
z[i] = x[i]*y[i];
202183
}
203-
#endif
204184
}
205185
inline static void ggml_vec_mul_f16 (const int n, ggml_fp16_t * z, const ggml_fp16_t * x, const ggml_fp16_t * y) {
206186
for (int i = 0; i < n; ++i) {

0 commit comments

Comments
 (0)