Skip to content

Commit a2e0088

Browse files
authored
Revert "ggml : Leverage the existing GGML_F32_VEC helpers to vectorize ggml_v…" (#16723)
This reverts commit 19a5a3e.
1 parent 9b9201f commit a2e0088

File tree

1 file changed

+5
-91
lines changed

1 file changed

+5
-91
lines changed

ggml/src/ggml-cpu/vec.h

Lines changed: 5 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -77,85 +77,16 @@ inline static void ggml_vec_add_f16 (const int n, ggml_fp16_t * z, const ggml_fp
7777
z[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(x[i]) + GGML_CPU_FP16_TO_FP32(y[i]));
7878
}
7979
}
80-
inline static void ggml_vec_add1_f32(const int n, float * z, const float * x, const float v) {
81-
int i = 0;
82-
#if defined(GGML_SIMD)
83-
const int np = (n & ~(GGML_F32_STEP - 1));
84-
85-
GGML_F32_VEC vv = GGML_F32_VEC_SET1(v);
86-
87-
for (; i < np; i += GGML_F32_STEP) {
88-
for (int j = 0; j < GGML_F32_ARR; ++j) {
89-
GGML_F32_VEC ax = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR);
90-
GGML_F32_VEC az = GGML_F32_VEC_ADD(ax, vv);
91-
GGML_F32_VEC_STORE(z + i + j*GGML_F32_EPR, az);
92-
}
93-
}
94-
#endif
95-
for (; i < n; ++i) {
96-
z[i] = x[i] + v;
97-
}
98-
}
99-
inline static void ggml_vec_acc_f32 (const int n, float * y, const float * x) {
100-
int i = 0;
101-
#if defined(GGML_SIMD)
102-
const int np = (n & ~(GGML_F32_STEP - 1));
103-
104-
for (; i < np; i += GGML_F32_STEP) {
105-
for (int j = 0; j < GGML_F32_ARR; ++j) {
106-
GGML_F32_VEC ay = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR);
107-
GGML_F32_VEC ax = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR);
108-
ay = GGML_F32_VEC_ADD(ay, ax);
109-
GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay);
110-
}
111-
}
112-
#endif
113-
for (; i < n; ++i) {
114-
y[i] += x[i];
115-
}
116-
}
117-
inline static void ggml_vec_acc1_f32(const int n, float * y, const float v) {
118-
int i = 0;
119-
#if defined(GGML_SIMD)
120-
const int np = (n & ~(GGML_F32_STEP - 1));
121-
122-
GGML_F32_VEC vv = GGML_F32_VEC_SET1(v);
123-
124-
for (; i < np; i += GGML_F32_STEP) {
125-
for (int j = 0; j < GGML_F32_ARR; ++j) {
126-
GGML_F32_VEC ay = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR);
127-
ay = GGML_F32_VEC_ADD(ay, vv);
128-
GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay);
129-
}
130-
}
131-
#endif
132-
for (; i < n; ++i) {
133-
y[i] += v;
134-
}
135-
}
80+
inline static void ggml_vec_add1_f32(const int n, float * z, const float * x, const float v) { for (int i = 0; i < n; ++i) z[i] = x[i] + v; }
81+
inline static void ggml_vec_acc_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] += x[i]; }
82+
inline static void ggml_vec_acc1_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] += v; }
13683
inline static void ggml_vec_sub_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] - y[i]; }
13784
inline static void ggml_vec_sub_f16 (const int n, ggml_fp16_t * z, const ggml_fp16_t * x, const ggml_fp16_t * y) {
13885
for (int i = 0; i < n; ++i) {
13986
z[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(x[i]) - GGML_CPU_FP16_TO_FP32(y[i]));
14087
}
14188
}
142-
inline static void ggml_vec_set_f32 (const int n, float * x, const float v) {
143-
int i = 0;
144-
#if defined(GGML_SIMD)
145-
const int np = (n & ~(GGML_F32_STEP - 1));
146-
147-
GGML_F32_VEC vx = GGML_F32_VEC_SET1(v);
148-
149-
for (; i < np; i += GGML_F32_STEP) {
150-
for (int j = 0; j < GGML_F32_ARR; ++j) {
151-
GGML_F32_VEC_STORE(x + i + j*GGML_F32_EPR, vx);
152-
}
153-
}
154-
#endif
155-
for (; i < n; ++i) {
156-
x[i] = v;
157-
}
158-
}
89+
inline static void ggml_vec_set_f32 (const int n, float * x, const float v) { for (int i = 0; i < n; ++i) x[i] = v; }
15990
inline static void ggml_vec_cpy_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]; }
16091
inline static void ggml_vec_neg_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = -x[i]; }
16192
inline static void ggml_vec_neg_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
@@ -164,24 +95,7 @@ inline static void ggml_vec_neg_f16 (const int n, ggml_fp16_t * y, const ggml_fp
16495
}
16596
}
16697

167-
inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) {
168-
int i = 0;
169-
#if defined(GGML_SIMD)
170-
const int np = (n & ~(GGML_F32_STEP - 1));
171-
172-
for (; i < np; i += GGML_F32_STEP) {
173-
for (int j = 0; j < GGML_F32_ARR; ++j) {
174-
GGML_F32_VEC ax = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR);
175-
GGML_F32_VEC ay = GGML_F32_VEC_LOAD(y + i + j*GGML_F32_EPR);
176-
GGML_F32_VEC az = GGML_F32_VEC_MUL(ax, ay);
177-
GGML_F32_VEC_STORE(z + i + j*GGML_F32_EPR, az);
178-
}
179-
}
180-
#endif
181-
for (; i < n; ++i) {
182-
z[i] = x[i]*y[i];
183-
}
184-
}
98+
inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]*y[i]; }
18599
inline static void ggml_vec_mul_f16 (const int n, ggml_fp16_t * z, const ggml_fp16_t * x, const ggml_fp16_t * y) {
186100
for (int i = 0; i < n; ++i) {
187101
z[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(x[i]) * GGML_CPU_FP16_TO_FP32(y[i]));

0 commit comments

Comments
 (0)