@@ -190,10 +190,9 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
190
190
__vector_quad acc0 , acc1 , acc2 , acc3 , acc4 ,acc5 ,acc6 ,acc7 ;
191
191
BLASLONG l = 0 ;
192
192
vec_t * rowA = (vec_t * ) & AO [0 ];
193
- vec_t * rb = (vec_t * ) & BO [0 ];
194
193
__vector_pair rowB , rowB1 ;
195
- __builtin_vsx_assemble_pair ( & rowB , rb [ 1 ], rb [0 ]);
196
- __builtin_vsx_assemble_pair ( & rowB1 , rb [ 3 ], rb [ 2 ] );
194
+ rowB = * (( __vector_pair * )(( void * ) & BO [0 ]) );
195
+ rowB1 = * (( __vector_pair * )(( void * ) & BO [ 4 ]) );
197
196
__builtin_mma_xvf64ger (& acc0 , rowB , rowA [0 ]);
198
197
__builtin_mma_xvf64ger (& acc1 , rowB1 , rowA [0 ]);
199
198
__builtin_mma_xvf64ger (& acc2 , rowB , rowA [1 ]);
@@ -205,9 +204,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
205
204
for (l = 1 ; l < temp ; l ++ )
206
205
{
207
206
rowA = (vec_t * ) & AO [l << 3 ];
208
- rb = (vec_t * ) & BO [l << 3 ];
209
- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
210
- __builtin_vsx_assemble_pair (& rowB1 , rb [3 ], rb [2 ]);
207
+ rowB = * ((__vector_pair * )((void * )& BO [l << 3 ]));
208
+ rowB1 = * ((__vector_pair * )((void * )& BO [(l << 3 ) + 4 ]));
211
209
__builtin_mma_xvf64gerpp (& acc0 , rowB , rowA [0 ]);
212
210
__builtin_mma_xvf64gerpp (& acc1 , rowB1 , rowA [0 ]);
213
211
__builtin_mma_xvf64gerpp (& acc2 , rowB , rowA [1 ]);
@@ -247,19 +245,17 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
247
245
BLASLONG l = 0 ;
248
246
vec_t * rowA = (vec_t * ) & AO [0 ];
249
247
__vector_pair rowB , rowB1 ;
250
- vec_t * rb = (vec_t * ) & BO [0 ];
251
- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
252
- __builtin_vsx_assemble_pair (& rowB1 , rb [3 ], rb [2 ]);
248
+ rowB = * ((__vector_pair * )((void * )& BO [0 ]));
249
+ rowB1 = * ((__vector_pair * )((void * )& BO [4 ]));
253
250
__builtin_mma_xvf64ger (& acc0 , rowB , rowA [0 ]);
254
251
__builtin_mma_xvf64ger (& acc1 , rowB1 , rowA [0 ]);
255
252
__builtin_mma_xvf64ger (& acc2 , rowB , rowA [1 ]);
256
253
__builtin_mma_xvf64ger (& acc3 , rowB1 , rowA [1 ]);
257
254
for (l = 1 ; l < temp ; l ++ )
258
255
{
259
256
rowA = (vec_t * ) & AO [l << 2 ];
260
- rb = (vec_t * ) & BO [l << 3 ];
261
- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
262
- __builtin_vsx_assemble_pair (& rowB1 , rb [3 ], rb [2 ]);
257
+ rowB = * ((__vector_pair * )((void * )& BO [l << 3 ]));
258
+ rowB1 = * ((__vector_pair * )((void * )& BO [(l << 3 ) + 4 ]));
263
259
__builtin_mma_xvf64gerpp (& acc0 , rowB , rowA [0 ]);
264
260
__builtin_mma_xvf64gerpp (& acc1 , rowB1 , rowA [0 ]);
265
261
__builtin_mma_xvf64gerpp (& acc2 , rowB , rowA [1 ]);
@@ -291,17 +287,15 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
291
287
BLASLONG l = 0 ;
292
288
vec_t * rowA = (vec_t * ) & AO [0 ];
293
289
__vector_pair rowB , rowB1 ;
294
- vec_t * rb = (vec_t * ) & BO [0 ];
295
- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
296
- __builtin_vsx_assemble_pair (& rowB1 , rb [3 ], rb [2 ]);
290
+ rowB = * ((__vector_pair * )((void * )& BO [0 ]));
291
+ rowB1 = * ((__vector_pair * )((void * )& BO [4 ]));
297
292
__builtin_mma_xvf64ger (& acc0 , rowB , rowA [0 ]);
298
293
__builtin_mma_xvf64ger (& acc1 , rowB1 , rowA [0 ]);
299
294
for (l = 1 ; l < temp ; l ++ )
300
295
{
301
296
rowA = (vec_t * ) & AO [l << 1 ];
302
- rb = (vec_t * ) & BO [l << 3 ];
303
- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
304
- __builtin_vsx_assemble_pair (& rowB1 , rb [3 ], rb [2 ]);
297
+ rowB = * ((__vector_pair * )((void * )& BO [l << 3 ]));
298
+ rowB1 = * ((__vector_pair * )((void * )& BO [(l << 3 ) + 4 ]));
305
299
__builtin_mma_xvf64gerpp (& acc0 , rowB , rowA [0 ]);
306
300
__builtin_mma_xvf64gerpp (& acc1 , rowB1 , rowA [0 ]);
307
301
}
@@ -403,17 +397,15 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
403
397
BLASLONG l = 0 ;
404
398
vec_t * rowA = (vec_t * ) & AO [0 ];
405
399
__vector_pair rowB ;
406
- vec_t * rb = (vec_t * ) & BO [0 ];
407
- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
400
+ rowB = * ((__vector_pair * )((void * )& BO [0 ]));
408
401
__builtin_mma_xvf64ger (& acc0 , rowB , rowA [0 ]);
409
402
__builtin_mma_xvf64ger (& acc1 , rowB , rowA [1 ]);
410
403
__builtin_mma_xvf64ger (& acc2 , rowB , rowA [2 ]);
411
404
__builtin_mma_xvf64ger (& acc3 , rowB , rowA [3 ]);
412
405
for (l = 1 ; l < temp ; l ++ )
413
406
{
414
407
rowA = (vec_t * ) & AO [l << 3 ];
415
- rb = (vec_t * ) & BO [l << 2 ];
416
- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
408
+ rowB = * ((__vector_pair * )((void * )& BO [l << 2 ]));
417
409
__builtin_mma_xvf64gerpp (& acc0 , rowB , rowA [0 ]);
418
410
__builtin_mma_xvf64gerpp (& acc1 , rowB , rowA [1 ]);
419
411
__builtin_mma_xvf64gerpp (& acc2 , rowB , rowA [2 ]);
@@ -445,15 +437,13 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
445
437
BLASLONG l = 0 ;
446
438
vec_t * rowA = (vec_t * ) & AO [0 ];
447
439
__vector_pair rowB ;
448
- vec_t * rb = (vec_t * ) & BO [0 ];
449
- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
440
+ rowB = * ((__vector_pair * )((void * )& BO [0 ]));
450
441
__builtin_mma_xvf64ger (& acc0 , rowB , rowA [0 ]);
451
442
__builtin_mma_xvf64ger (& acc1 , rowB , rowA [1 ]);
452
443
for (l = 1 ; l < temp ; l ++ )
453
444
{
454
445
rowA = (vec_t * ) & AO [l << 2 ];
455
- rb = (vec_t * ) & BO [l << 2 ];
456
- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
446
+ rowB = * ((__vector_pair * )((void * )& BO [l << 2 ]));
457
447
__builtin_mma_xvf64gerpp (& acc0 , rowB , rowA [0 ]);
458
448
__builtin_mma_xvf64gerpp (& acc1 , rowB , rowA [1 ]);
459
449
}
@@ -481,14 +471,12 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
481
471
BLASLONG l = 0 ;
482
472
vec_t * rowA = (vec_t * ) & AO [0 ];
483
473
__vector_pair rowB ;
484
- vec_t * rb = (vec_t * ) & BO [0 ];
485
- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
474
+ rowB = * ((__vector_pair * )((void * )& BO [0 ]));
486
475
__builtin_mma_xvf64ger (& acc0 , rowB , rowA [0 ]);
487
476
for (l = 1 ; l < temp ; l ++ )
488
477
{
489
478
rowA = (vec_t * ) & AO [l << 1 ];
490
- rb = (vec_t * ) & BO [l << 2 ];
491
- __builtin_vsx_assemble_pair (& rowB , rb [1 ], rb [0 ]);
479
+ rowB = * ((__vector_pair * )((void * )& BO [l << 2 ]));
492
480
__builtin_mma_xvf64gerpp (& acc0 , rowB , rowA [0 ]);
493
481
}
494
482
SAVE_ACC (& acc0 , 0 );
0 commit comments