Skip to content

Commit 0571c31

Browse files
author
Rajalakshmi Srinivasaraghavan
committed
POWER10: Rename mma builtins
The LLVM and GCC teams agreed to rename the __builtin_mma_assemble_pair and __builtin_mma_disassemble_pair built-ins to __builtin_vsx_assemble_pair and __builtin_vsx_disassemble_pair respectively. This patch is to make corresponding changes in dgemm kernel. Also made changes in inputs to those builtins to avoid some potential typecasting issues. Reference gcc commit id:77ef995c1fbcab76a2a69b9f4700bcfd005d8e62
1 parent d12a2d0 commit 0571c31

File tree

1 file changed

+37
-40
lines changed

1 file changed

+37
-40
lines changed

kernel/power/dgemm_kernel_power10.c

Lines changed: 37 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2929

3030
typedef __vector unsigned char vec_t;
3131
typedef FLOAT v4sf_t __attribute__ ((vector_size (16)));
32-
typedef FLOAT v2sf_t __attribute__ ((vector_size (8)));
32+
#if !__has_builtin(__builtin_vsx_assemble_pair)
33+
#define __builtin_vsx_assemble_pair __builtin_mma_assemble_pair
34+
#endif
35+
36+
#if !__has_builtin(__builtin_vsx_disassemble_pair)
37+
#define __builtin_vsx_disassemble_pair __builtin_mma_disassemble_pair
38+
#endif
3339

3440
#ifdef TRMMKERNEL
3541
#define SAVE_ACC(ACC, J) \
@@ -186,8 +192,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
186192
vec_t *rowA = (vec_t *) & AO[0];
187193
vec_t *rb = (vec_t *) & BO[0];
188194
__vector_pair rowB, rowB1;
189-
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
190-
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]);
195+
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
196+
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
191197
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
192198
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
193199
__builtin_mma_xvf64ger (&acc2, rowB, rowA[1]);
@@ -200,8 +206,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
200206
{
201207
rowA = (vec_t *) & AO[l << 3];
202208
rb = (vec_t *) & BO[l << 3];
203-
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
204-
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]);
209+
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
210+
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
205211
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
206212
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
207213
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]);
@@ -242,8 +248,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
242248
vec_t *rowA = (vec_t *) & AO[0];
243249
__vector_pair rowB, rowB1;
244250
vec_t *rb = (vec_t *) & BO[0];
245-
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
246-
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]);
251+
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
252+
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
247253
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
248254
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
249255
__builtin_mma_xvf64ger (&acc2, rowB, rowA[1]);
@@ -252,8 +258,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
252258
{
253259
rowA = (vec_t *) & AO[l << 2];
254260
rb = (vec_t *) & BO[l << 3];
255-
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
256-
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]);
261+
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
262+
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
257263
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
258264
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
259265
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]);
@@ -286,16 +292,16 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
286292
vec_t *rowA = (vec_t *) & AO[0];
287293
__vector_pair rowB, rowB1;
288294
vec_t *rb = (vec_t *) & BO[0];
289-
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
290-
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]);
295+
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
296+
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
291297
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
292298
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
293299
for (l = 1; l < temp; l++)
294300
{
295301
rowA = (vec_t *) & AO[l << 1];
296302
rb = (vec_t *) & BO[l << 3];
297-
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
298-
__builtin_mma_assemble_pair (&rowB1, rb[3], rb[2]);
303+
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
304+
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
299305
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
300306
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
301307
}
@@ -398,7 +404,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
398404
vec_t *rowA = (vec_t *) & AO[0];
399405
__vector_pair rowB;
400406
vec_t *rb = (vec_t *) & BO[0];
401-
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
407+
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
402408
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
403409
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
404410
__builtin_mma_xvf64ger (&acc2, rowB, rowA[2]);
@@ -407,7 +413,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
407413
{
408414
rowA = (vec_t *) & AO[l << 3];
409415
rb = (vec_t *) & BO[l << 2];
410-
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
416+
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
411417
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
412418
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
413419
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
@@ -440,14 +446,14 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
440446
vec_t *rowA = (vec_t *) & AO[0];
441447
__vector_pair rowB;
442448
vec_t *rb = (vec_t *) & BO[0];
443-
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
449+
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
444450
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
445451
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
446452
for (l = 1; l < temp; l++)
447453
{
448454
rowA = (vec_t *) & AO[l << 2];
449455
rb = (vec_t *) & BO[l << 2];
450-
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
456+
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
451457
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
452458
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
453459
}
@@ -476,13 +482,13 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
476482
vec_t *rowA = (vec_t *) & AO[0];
477483
__vector_pair rowB;
478484
vec_t *rb = (vec_t *) & BO[0];
479-
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
485+
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
480486
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
481487
for (l = 1; l < temp; l++)
482488
{
483489
rowA = (vec_t *) & AO[l << 1];
484490
rb = (vec_t *) & BO[l << 2];
485-
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
491+
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
486492
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
487493
}
488494
SAVE_ACC (&acc0, 0);
@@ -562,21 +568,18 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
562568
v4sf_t result[4];
563569
__vector_quad acc0, acc1, acc2, acc3;
564570
BLASLONG l = 0;
565-
FLOAT t[4] = { 0, 0, 0, 0 };
566-
t[0] = BO[0], t[1] = BO[1];
567571
__vector_pair rowB;
568-
vec_t *rb = (vec_t *) & t[0];
569-
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
572+
vec_t *rb = (vec_t *) & BO[0];
573+
__builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
570574
vec_t *rowA = (vec_t *) & AO[0];
571575
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
572576
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
573577
__builtin_mma_xvf64ger (&acc2, rowB, rowA[2]);
574578
__builtin_mma_xvf64ger (&acc3, rowB, rowA[3]);
575579
for (l = 1; l < temp; l++)
576580
{
577-
t[0] = BO[l << 1], t[1] = BO[(l << 1) + 1];
578-
rb = (vec_t *) & t[0];
579-
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
581+
rb = (vec_t *) & BO[l << 1];
582+
__builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
580583
rowA = (vec_t *) & AO[l << 3];
581584
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
582585
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
@@ -607,19 +610,16 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
607610
v4sf_t result[4];
608611
__vector_quad acc0, acc1;
609612
BLASLONG l = 0;
610-
FLOAT t[4] = { 0, 0, 0, 0 };
611-
t[0] = BO[0], t[1] = BO[1];
612613
__vector_pair rowB;
613-
vec_t *rb = (vec_t *) & t[0];
614-
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
614+
vec_t *rb = (vec_t *) & BO[0];
615+
__builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
615616
vec_t *rowA = (vec_t *) & AO[0];
616617
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
617618
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
618619
for (l = 1; l < temp; l++)
619620
{
620-
t[0] = BO[l << 1], t[1] = BO[(l << 1) + 1];
621-
rb = (vec_t *) & t[0];
622-
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
621+
rb = (vec_t *) & BO[l << 1];
622+
__builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
623623
rowA = (vec_t *) & AO[l << 2];
624624
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
625625
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
@@ -646,18 +646,15 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
646646
v4sf_t result[4];
647647
__vector_quad acc0;
648648
BLASLONG l = 0;
649-
FLOAT t[4] = { 0, 0, 0, 0 };
650-
t[0] = BO[0], t[1] = BO[1];
651649
__vector_pair rowB;
652-
vec_t *rb = (vec_t *) & t[0];
653-
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
650+
vec_t *rb = (vec_t *) & BO[0];
651+
__builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
654652
vec_t *rowA = (vec_t *) & AO[0];
655653
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
656654
for (l = 1; l < temp; l++)
657655
{
658-
t[0] = BO[l << 1], t[1] = BO[(l << 1) + 1];
659-
rb = (vec_t *) & t[0];
660-
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
656+
rb = (vec_t *) & BO[l << 1];
657+
__builtin_vsx_assemble_pair (&rowB, rb[0], rb[0]);
661658
rowA = (vec_t *) & AO[l << 1];
662659
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
663660
}

0 commit comments

Comments
 (0)