Skip to content

Commit 2f142ee

Browse files
author
Chip Kerchner
committed
More common code.
1 parent 39fd29f commit 2f142ee

File tree

3 files changed

+22
-20
lines changed

3 files changed

+22
-20
lines changed

kernel/power/sbgemv_common.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,12 @@ FORCEINLINE vec_f32 vec_loadN_f32(void *src, BLASLONG n)
138138
return (vec_f32)vec_loadN(src, n * (sizeof(FLOAT) / sizeof(IFLOAT)));
139139
}
140140

141+
FORCEINLINE void vec_loadN2_f32(vec_f32 *data, vec_f32 *src, BLASLONG n)
142+
{
143+
data[0] = src[0];
144+
data[1] = vec_loadN_f32(&src[1], n);
145+
}
146+
141147
FORCEINLINE void vec_storeN_f32(vec_f32 data, void *dst, BLASLONG n)
142148
{
143149
FLOAT *dst2 = (FLOAT *)(dst);
@@ -160,6 +166,12 @@ FORCEINLINE void vec_storeN_f32(vec_f32 data, void *dst, BLASLONG n)
160166
#endif
161167
}
162168

169+
FORCEINLINE void vec_storeN2_f32(vec_f32 *data, vec_f32 *dst, BLASLONG n)
170+
{
171+
dst[0] = data[0];
172+
vec_storeN_f32(data[1], &dst[1], n);
173+
}
174+
163175
FORCEINLINE vec_f32 vec_mult(vec_f32 *inp, vec_bf16 in0, vec_bf16 zero)
164176
{
165177
vec_f32 v_in00 = BF16_HI(in0, zero);

kernel/power/sbgemv_n.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,10 @@ static void BF16GEMV_N_beta(BLASLONG n, FLOAT *output_vector, FLOAT *input_vecto
7575
n &= 7;
7676
if (n > 4) {
7777
BLASLONG n3 = n & 3;
78-
v_inp0[0] = in[(i * 2) + 0];
79-
v_inp0[1] = vec_loadN_f32(&in[(i * 2) + 1], n3);
78+
vec_loadN2_f32(v_inp0, &in[(i * 2) + 0], n3);
8079
v_inp0[0] *= b;
8180
v_inp0[1] *= b;
82-
out[(i * 2) + 0] = v_inp0[0];
83-
vec_storeN_f32(v_inp0[1], &out[(i * 2) + 1], n3);
81+
vec_storeN2_f32(v_inp0, &out[(i * 2) + 0], n3);
8482
} else if (n) {
8583
v_inp0[0] = vec_loadN_f32(&in[(i * 2) + 0], n);
8684
v_inp0[0] *= b;

kernel/power/sbgemv_n_vsx.c

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,11 @@ static void BF16GEMV_N_VSX_1(BLASLONG n, IFLOAT **ap, IFLOAT *xo, FLOAT *y, FLOA
6464
n &= 7;
6565
if (n > 4) {
6666
BLASLONG n3 = n & 3;
67-
vy0[0] = v_y[(i * 2) + 0];
68-
vy0[1] = vec_loadN_f32(&v_y[(i * 2) + 1], n3);
67+
vec_loadN2_f32(vy0, &v_y[(i * 2) + 0], n3);
6968

7069
vec_loadN_mult2(v_x0, &va0[i], n, zero, vy0);
7170

72-
v_y[(i * 2) + 0] = vy0[0];
73-
vec_storeN_f32(vy0[1], &v_y[(i * 2) + 1], n3);
71+
vec_storeN2_f32(vy0, &v_y[(i * 2) + 0], n3);
7472
} else if (n) {
7573
vec_f32 vy0 = vec_loadN_f32(&v_y[(i * 2) + 0], n);
7674

@@ -116,14 +114,12 @@ static void BF16GEMV_N_VSX_2(BLASLONG n, IFLOAT **ap, IFLOAT *xo, FLOAT *y, FLOA
116114
n &= 7;
117115
if (n > 4) {
118116
BLASLONG n3 = n & 3;
119-
vy0[0] = v_y[(i * 2) + 0];
120-
vy0[1] = vec_loadN_f32(&v_y[(i * 2) + 1], n3);
117+
vec_loadN2_f32(vy0, &v_y[(i * 2) + 0], n3);
121118

122119
vec_loadN_mult2(v_x0, &va0[i], n, zero, vy0);
123120
vec_loadN_mult2(v_x1, &va1[i], n, zero, vy0);
124121

125-
v_y[(i * 2) + 0] = vy0[0];
126-
vec_storeN_f32(vy0[1], &v_y[(i * 2) + 1], n3);
122+
vec_storeN2_f32(vy0, &v_y[(i * 2) + 0], n3);
127123
} else if (n) {
128124
vec_f32 vy0 = vec_loadN_f32(&v_y[(i * 2) + 0], n);
129125

@@ -178,16 +174,14 @@ static void BF16GEMV_N_VSX_4(BLASLONG n, IFLOAT **ap, IFLOAT *xo, FLOAT *y, FLOA
178174
n &= 7;
179175
if (n > 4) {
180176
BLASLONG n3 = n & 3;
181-
vy0[0] = v_y[(i * 2) + 0];
182-
vy0[1] = vec_loadN_f32(&v_y[(i * 2) + 1], n3);
177+
vec_loadN2_f32(vy0, &v_y[(i * 2) + 0], n3);
183178

184179
vec_loadN_mult2(v_x0, &va0[i], n, zero, vy0);
185180
vec_loadN_mult2(v_x1, &va1[i], n, zero, vy0);
186181
vec_loadN_mult2(v_x2, &va2[i], n, zero, vy0);
187182
vec_loadN_mult2(v_x3, &va3[i], n, zero, vy0);
188183

189-
v_y[(i * 2) + 0] = vy0[0];
190-
vec_storeN_f32(vy0[1], &v_y[(i * 2) + 1], n3);
184+
vec_storeN2_f32(vy0, &v_y[(i * 2) + 0], n3);
191185
} else if (n) {
192186
vec_f32 vy0 = vec_loadN_f32(&v_y[(i * 2) + 0], n);
193187

@@ -263,8 +257,7 @@ static void BF16GEMV_N_VSX_8(BLASLONG n, IFLOAT **ap, IFLOAT *xo, FLOAT *y, BLAS
263257
n &= 7;
264258
if (n > 4) {
265259
BLASLONG n3 = n & 3;
266-
vy0[0] = v_y[(i * 2) + 0];
267-
vy0[1] = vec_loadN_f32(&v_y[(i * 2) + 1], n3);
260+
vec_loadN2_f32(vy0, &v_y[(i * 2) + 0], n3);
268261

269262
vec_loadN_mult2(v_x0, &va0[i], n, zero, vy0);
270263
vec_loadN_mult2(v_x1, &va1[i], n, zero, vy0);
@@ -275,8 +268,7 @@ static void BF16GEMV_N_VSX_8(BLASLONG n, IFLOAT **ap, IFLOAT *xo, FLOAT *y, BLAS
275268
vec_loadN_mult2(v_x6, &vb2[i], n, zero, vy0);
276269
vec_loadN_mult2(v_x7, &vb3[i], n, zero, vy0);
277270

278-
v_y[(i * 2) + 0] = vy0[0];
279-
vec_storeN_f32(vy0[1], &v_y[(i * 2) + 1], n3);
271+
vec_storeN2_f32(vy0, &v_y[(i * 2) + 0], n3);
280272
} else
281273
if (n) {
282274
vec_f32 vy0 = vec_loadN_f32(&v_y[(i * 2) + 0], n);

0 commit comments

Comments
 (0)