Skip to content

Commit 5c66d77

Browse files
committed
adapt test after rebase
1 parent f631324 commit 5c66d77

File tree

1 file changed

+61
-78
lines changed

1 file changed

+61
-78
lines changed

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.exp.large.mir

Lines changed: 61 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -464,10 +464,16 @@
464464
; GCN-NEXT: buffer_load_dwordx2 v[130:131], v64, s[0:3], 0 offen sc0 sc1
465465
; GCN-NEXT: s_waitcnt vmcnt(0)
466466
; GCN-NEXT: buffer_inv sc0 sc1
467+
; GCN-NEXT: v_fma_f32 v57, s4, v57, -v134
467468
; GCN-NEXT: v_fma_f32 v48, s4, v48, -v134
469+
; GCN-NEXT: v_fma_f32 v96, s4, v58, -v134
470+
; GCN-NEXT: v_mul_f32_e32 v57, 0x3fb8aa3b, v57
468471
; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v48
469472
; GCN-NEXT: v_fma_f32 v64, s4, v49, -v134
473+
; GCN-NEXT: v_exp_f32_e32 v163, v57
474+
; GCN-NEXT: v_mul_f32_e32 v57, 0x3fb8aa3b, v96
470475
; GCN-NEXT: v_fma_f32 v66, s4, v50, -v134
476+
; GCN-NEXT: v_exp_f32_e32 v164, v57
471477
; GCN-NEXT: v_exp_f32_e32 v49, v48
472478
; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v64
473479
; GCN-NEXT: v_fma_f32 v67, s4, v51, -v134
@@ -489,35 +495,35 @@
489495
; GCN-NEXT: ds_read_b128 v[140:143], v139
490496
; GCN-NEXT: s_waitcnt lgkmcnt(0)
491497
; GCN-NEXT: buffer_inv sc0 sc1
492-
; GCN-NEXT: ds_read_b128 v[144:147], v139 offset:576
493-
; GCN-NEXT: s_waitcnt lgkmcnt(0)
494-
; GCN-NEXT: buffer_inv sc0 sc1
495498
; GCN-NEXT: v_exp_f32_e32 v54, v48
496499
; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v70
497500
; GCN-NEXT: v_exp_f32_e32 v55, v48
498501
; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v71
502+
; GCN-NEXT: ds_read_b128 v[144:147], v139 offset:576
503+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
504+
; GCN-NEXT: buffer_inv sc0 sc1
499505
; GCN-NEXT: v_fma_f32 v66, s4, v56, -v134
500506
; GCN-NEXT: v_exp_f32_e32 v56, v48
501507
; GCN-NEXT: v_sub_f32_e32 v48, v65, v134
502-
; GCN-NEXT: ds_read_b128 v[148:151], v139 offset:1152
503-
; GCN-NEXT: s_waitcnt lgkmcnt(0)
504-
; GCN-NEXT: buffer_inv sc0 sc1
505508
; GCN-NEXT: v_cvt_f16_f32_e32 v64, v49
506509
; GCN-NEXT: v_cvt_f16_f32_e32 v67, v50
507510
; GCN-NEXT: v_cvt_f16_f32_e32 v68, v51
508-
; GCN-NEXT: v_fma_f32 v96, s4, v58, -v134
509511
; GCN-NEXT: v_cvt_f16_f32_e32 v58, v52
510512
; GCN-NEXT: v_mul_f32_e32 v48, 0x3fb8aa3b, v48
513+
; GCN-NEXT: ds_read_b128 v[148:151], v139 offset:1152
514+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
515+
; GCN-NEXT: buffer_inv sc0 sc1
511516
; GCN-NEXT: v_exp_f32_e32 v48, v48
512-
; GCN-NEXT: v_fma_f32 v57, s4, v57, -v134
513517
; GCN-NEXT: v_pack_b32_f16 v161, v68, v58
514518
; GCN-NEXT: v_pack_b32_f16 v160, v64, v67
515519
; GCN-NEXT: v_mul_f32_e32 v58, 0x3fb8aa3b, v66
516520
; GCN-NEXT: ; implicit-def: $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79
517521
; GCN-NEXT: ds_read_b128 v[152:155], v139 offset:1728
518522
; GCN-NEXT: s_waitcnt lgkmcnt(0)
519523
; GCN-NEXT: buffer_inv sc0 sc1
520-
; GCN-NEXT: ; implicit-def: $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95
524+
; GCN-NEXT: v_fma_f32 v162, s4, v61, -v134
525+
; GCN-NEXT: v_cvt_f16_f32_e32 v61, v55
526+
; GCN-NEXT: v_cvt_f16_f32_e32 v57, v56
521527
; GCN-NEXT: v_pk_mul_f32 v[64:65], v[64:65], v[48:49] op_sel_hi:[1,0]
522528
; GCN-NEXT: v_pk_mul_f32 v[66:67], v[66:67], v[48:49] op_sel_hi:[1,0]
523529
; GCN-NEXT: v_pk_mul_f32 v[68:69], v[68:69], v[48:49] op_sel_hi:[1,0]
@@ -526,7 +532,8 @@
526532
; GCN-NEXT: v_pk_mul_f32 v[74:75], v[74:75], v[48:49] op_sel_hi:[1,0]
527533
; GCN-NEXT: v_pk_mul_f32 v[76:77], v[76:77], v[48:49] op_sel_hi:[1,0]
528534
; GCN-NEXT: v_pk_mul_f32 v[78:79], v[78:79], v[48:49] op_sel_hi:[1,0]
529-
; GCN-NEXT: v_mul_f32_e32 v57, 0x3fb8aa3b, v57
535+
; GCN-NEXT: ; implicit-def: $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95
536+
; GCN-NEXT: v_fma_f32 v59, s4, v59, -v134
530537
; GCN-NEXT: v_pk_mul_f32 v[80:81], v[80:81], v[48:49] op_sel_hi:[1,0]
531538
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[140:141], v[160:161], v[64:79]
532539
; GCN-NEXT: v_mul_f32_e64 v82, v82, v48
@@ -535,36 +542,20 @@
535542
; GCN-NEXT: v_mul_f32_e64 v85, v85, v48
536543
; GCN-NEXT: v_mul_f32_e64 v86, v86, v48
537544
; GCN-NEXT: v_mul_f32_e64 v87, v87, v48
538-
; GCN-NEXT: v_pk_mul_f32 v[82:83], v[82:83], v[48:49] op_sel_hi:[1,0]
539-
; GCN-NEXT: v_pk_mul_f32 v[84:85], v[84:85], v[48:49] op_sel_hi:[1,0]
540-
; GCN-NEXT: v_pk_mul_f32 v[86:87], v[86:87], v[48:49] op_sel_hi:[1,0]
541545
; GCN-NEXT: v_pk_mul_f32 v[88:89], v[88:89], v[48:49] op_sel_hi:[1,0]
542546
; GCN-NEXT: v_pk_mul_f32 v[90:91], v[90:91], v[48:49] op_sel_hi:[1,0]
543547
; GCN-NEXT: v_pk_mul_f32 v[92:93], v[92:93], v[48:49] op_sel_hi:[1,0]
544548
; GCN-NEXT: v_pk_mul_f32 v[94:95], v[94:95], v[48:49] op_sel_hi:[1,0]
545-
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[140:141], v[160:161], v[64:79]
549+
; GCN-NEXT: ; implicit-def: $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111
546550
; GCN-NEXT: v_exp_f32_e32 v58, v58
547-
; GCN-NEXT: v_fma_f32 v162, s4, v61, -v134
548-
; GCN-NEXT: v_cvt_f16_f32_e32 v61, v55
549-
; GCN-NEXT: v_cvt_f16_f32_e32 v140, v53
550-
; GCN-NEXT: v_cvt_f16_f32_e32 v141, v54
551-
; GCN-NEXT: v_fma_f32 v59, s4, v59, -v134
552-
; GCN-NEXT: v_fma_f32 v60, s4, v60, -v134
551+
; GCN-NEXT: v_pk_mul_f32 v[96:97], v[96:97], v[48:49] op_sel_hi:[1,0]
553552
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[144:145], v[160:161], v[80:95]
554553
; GCN-NEXT: v_mul_f32_e64 v98, v98, v48
555554
; GCN-NEXT: v_mul_f32_e64 v99, v99, v48
556555
; GCN-NEXT: v_mul_f32_e64 v100, v100, v48
557556
; GCN-NEXT: v_mul_f32_e64 v101, v101, v48
558557
; GCN-NEXT: v_mul_f32_e64 v102, v102, v48
559558
; GCN-NEXT: v_mul_f32_e64 v103, v103, v48
560-
; GCN-NEXT: v_exp_f32_e32 v163, v57
561-
; GCN-NEXT: v_mul_f32_e32 v57, 0x3fb8aa3b, v96
562-
; GCN-NEXT: ; implicit-def: $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111
563-
; GCN-NEXT: v_pk_mul_f32 v[112:113], v[112:113], v[48:49] op_sel_hi:[1,0]
564-
; GCN-NEXT: v_pk_mul_f32 v[96:97], v[96:97], v[48:49] op_sel_hi:[1,0]
565-
; GCN-NEXT: v_pk_mul_f32 v[98:99], v[98:99], v[48:49] op_sel_hi:[1,0]
566-
; GCN-NEXT: v_pk_mul_f32 v[100:101], v[100:101], v[48:49] op_sel_hi:[1,0]
567-
; GCN-NEXT: v_pk_mul_f32 v[102:103], v[102:103], v[48:49] op_sel_hi:[1,0]
568559
; GCN-NEXT: v_pk_mul_f32 v[104:105], v[104:105], v[48:49] op_sel_hi:[1,0]
569560
; GCN-NEXT: v_pk_mul_f32 v[106:107], v[106:107], v[48:49] op_sel_hi:[1,0]
570561
; GCN-NEXT: v_pk_mul_f32 v[108:109], v[108:109], v[48:49] op_sel_hi:[1,0]
@@ -580,54 +571,49 @@
580571
; GCN-NEXT: v_mul_f32_e64 v113, v113, v48
581572
; GCN-NEXT: v_mul_f32_e64 v114, v114, v48
582573
; GCN-NEXT: v_mul_f32_e64 v115, v115, v48
583-
; GCN-NEXT: v_pk_mul_f32 v[114:115], v[114:115], v[48:49] op_sel_hi:[1,0]
584574
; GCN-NEXT: v_pk_mul_f32 v[116:117], v[116:117], v[48:49] op_sel_hi:[1,0]
585-
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[148:149], v[160:161], v[96:111]
586-
; GCN-NEXT: v_exp_f32_e32 v164, v57
587-
; GCN-NEXT: v_cvt_f16_f32_e32 v57, v56
588575
; GCN-NEXT: v_pk_mul_f32 v[118:119], v[118:119], v[48:49] op_sel_hi:[1,0]
589576
; GCN-NEXT: v_pk_mul_f32 v[120:121], v[120:121], v[48:49] op_sel_hi:[1,0]
590577
; GCN-NEXT: v_pk_mul_f32 v[122:123], v[122:123], v[48:49] op_sel_hi:[1,0]
591578
; GCN-NEXT: v_pk_mul_f32 v[124:125], v[124:125], v[48:49] op_sel_hi:[1,0]
592579
; GCN-NEXT: v_pk_mul_f32 v[126:127], v[126:127], v[48:49] op_sel_hi:[1,0]
593-
; GCN-NEXT: v_pack_b32_f16 v145, v61, v57
594-
; GCN-NEXT: v_mul_f32_e32 v57, 0x3fb8aa3b, v59
595580
; GCN-NEXT: v_fma_f32 v148, s4, v62, -v134
581+
; GCN-NEXT: v_pack_b32_f16 v144, v140, v141
596582
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[152:153], v[160:161], v[112:127]
597583
; GCN-NEXT: v_fma_f32 v152, s4, v63, -v134
598-
; GCN-NEXT: v_pack_b32_f16 v144, v140, v141
599-
; GCN-NEXT: v_exp_f32_e32 v59, v57
600584
; GCN-NEXT: v_mul_f32_e32 v149, 0x3fb8aa3b, v60
601585
; GCN-NEXT: ; implicit-def: $vgpr57
602586
; GCN-NEXT: ds_read_b128 v[60:63], v57
603587
; GCN-NEXT: s_waitcnt lgkmcnt(0)
604588
; GCN-NEXT: buffer_inv sc0 sc1
589+
; GCN-NEXT: v_exp_f32_e32 v160, v149
605590
; GCN-NEXT: v_fma_f32 v161, s4, v33, -v134
606591
; GCN-NEXT: v_mul_f32_e32 v33, 0x3fb8aa3b, v148
607-
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[142:143], v[144:145], v[64:79]
608-
; GCN-NEXT: v_exp_f32_e32 v160, v149
609592
; GCN-NEXT: v_cvt_f16_f32_e32 v153, v58
593+
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[142:143], v[144:145], v[64:79]
610594
; GCN-NEXT: v_fma_f32 v32, s4, v32, -v134
611595
; GCN-NEXT: ds_read_b128 v[140:143], v57 offset:576
612596
; GCN-NEXT: s_waitcnt lgkmcnt(0)
613597
; GCN-NEXT: buffer_inv sc0 sc1
614598
; GCN-NEXT: v_fma_f32 v40, s4, v40, -v134
615599
; GCN-NEXT: v_fma_f32 v44, s4, v44, -v134
616600
; GCN-NEXT: v_fma_f32 v16, s4, v16, -v134
601+
; GCN-NEXT: v_fma_f32 v166, s4, v20, -v134
602+
; GCN-NEXT: v_fma_f32 v24, s4, v24, -v134
617603
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[146:147], v[144:145], v[80:95]
618604
; GCN-NEXT: v_mul_f32_e32 v146, 0x3fb8aa3b, v162
619605
; GCN-NEXT: v_cvt_f16_f32_e32 v147, v163
620606
; GCN-NEXT: v_exp_f32_e32 v162, v146
621607
; GCN-NEXT: v_cvt_f16_f32_e32 v146, v164
622-
; GCN-NEXT: v_fma_f32 v166, s4, v20, -v134
608+
; GCN-NEXT: v_fma_f32 v28, s4, v28, -v134
623609
; GCN-NEXT: v_pack_b32_f16 v148, v153, v147
624-
; GCN-NEXT: v_fma_f32 v24, s4, v24, -v134
610+
; GCN-NEXT: v_fma_f32 v0, s4, v0, -v134
625611
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[150:151], v[144:145], v[96:111]
626612
; GCN-NEXT: v_exp_f32_e32 v151, v33
627613
; GCN-NEXT: v_cvt_f16_f32_e32 v33, v59
628614
; GCN-NEXT: v_fma_f32 v150, s4, v34, -v134
629-
; GCN-NEXT: v_fma_f32 v28, s4, v28, -v134
630-
; GCN-NEXT: v_fma_f32 v0, s4, v0, -v134
615+
; GCN-NEXT: v_fma_f32 v8, s4, v8, -v134
616+
; GCN-NEXT: v_fma_f32 v12, s4, v12, -v134
631617
; GCN-NEXT: v_pack_b32_f16 v149, v146, v33
632618
; GCN-NEXT: v_mul_f32_e32 v33, 0x3fb8aa3b, v152
633619
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[154:155], v[144:145], v[112:127]
@@ -636,8 +622,6 @@
636622
; GCN-NEXT: v_fma_f32 v155, s4, v36, -v134
637623
; GCN-NEXT: v_perm_b32 v36, v158, v156, s5
638624
; GCN-NEXT: v_cvt_f16_f32_e32 v154, v160
639-
; GCN-NEXT: v_fma_f32 v8, s4, v8, -v134
640-
; GCN-NEXT: v_fma_f32 v12, s4, v12, -v134
641625
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[60:61], v[148:149], v[64:79]
642626
; GCN-NEXT: v_mul_f32_e32 v60, 0x3fb8aa3b, v32
643627
; GCN-NEXT: ds_read_b128 v[32:35], v57 offset:1152
@@ -811,14 +795,12 @@
811795
; GCN-NEXT: v_cvt_f16_f32_e32 v45, v158
812796
; GCN-NEXT: v_perm_b32 v21, v148, v144, s5
813797
; GCN-NEXT: v_perm_b32 v37, v148, v144, s8
798+
; GCN-NEXT: v_cvt_f16_f32_e32 v44, v63
814799
; GCN-NEXT: ;;#ASMSTART
815800
; GCN-NEXT: s_waitcnt vmcnt(8)
816801
; GCN-NEXT: ;;#ASMEND
817802
; GCN-NEXT: buffer_wbl2 sc0 sc1
818803
; GCN-NEXT: ds_write_b64 v135, v[20:21]
819-
; GCN-NEXT: buffer_wbl2 sc0 sc1
820-
; GCN-NEXT: s_waitcnt lgkmcnt(0)
821-
; GCN-NEXT: ds_write_b64 v136, v[36:37]
822804
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[16:17], v[40:41], v[96:111]
823805
; GCN-NEXT: v_perm_b32 v16, v141, v131, s5
824806
; GCN-NEXT: v_fma_f32 v131, s4, v22, -v134
@@ -828,31 +810,33 @@
828810
; GCN-NEXT: v_perm_b32 v17, v149, v145, s5
829811
; GCN-NEXT: buffer_wbl2 sc0 sc1
830812
; GCN-NEXT: s_waitcnt lgkmcnt(0)
831-
; GCN-NEXT: ds_write_b64 v137, v[16:17]
813+
; GCN-NEXT: ds_write_b64 v136, v[36:37]
832814
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[32:33], v[40:41], v[112:127]
833815
; GCN-NEXT: v_pack_b32_f16 v33, v45, v22
834816
; GCN-NEXT: v_mul_f32_e32 v22, 0x3fb8aa3b, v60
835817
; GCN-NEXT: v_exp_f32_e32 v144, v22
818+
; GCN-NEXT: buffer_wbl2 sc0 sc1
819+
; GCN-NEXT: s_waitcnt lgkmcnt(0)
820+
; GCN-NEXT: ds_write_b64 v137, v[16:17]
836821
; GCN-NEXT: ; implicit-def: $vgpr17
837822
; GCN-NEXT: ; implicit-def: $vgpr22
838-
; GCN-NEXT: v_cvt_f16_f32_e32 v44, v63
839823
; GCN-NEXT: buffer_wbl2 sc0 sc1
840824
; GCN-NEXT: s_waitcnt lgkmcnt(0)
841825
; GCN-NEXT: ds_write_b64 v138, v[42:43]
842826
; GCN-NEXT: v_add_u32_e32 v22, v132, v22
843827
; GCN-NEXT: v_add_u32_e32 v17, v132, v17
828+
; GCN-NEXT: ; implicit-def: $vgpr20
829+
; GCN-NEXT: ; implicit-def: $vgpr21
844830
; GCN-NEXT: s_waitcnt lgkmcnt(0)
845831
; GCN-NEXT: buffer_load_dwordx2 v[40:41], v22, s[0:3], 0 offen sc0 sc1
846832
; GCN-NEXT: s_waitcnt vmcnt(0)
847833
; GCN-NEXT: buffer_inv sc0 sc1
848834
; GCN-NEXT: buffer_load_dwordx2 v[42:43], v17, s[0:3], 0 offen sc0 sc1
849835
; GCN-NEXT: s_waitcnt vmcnt(0)
850836
; GCN-NEXT: buffer_inv sc0 sc1
851-
; GCN-NEXT: ; implicit-def: $vgpr20
852-
; GCN-NEXT: ; implicit-def: $vgpr21
853-
; GCN-NEXT: v_pack_b32_f16 v32, v61, v44
854837
; GCN-NEXT: v_add_u32_e32 v20, v132, v20
855838
; GCN-NEXT: v_add_u32_e32 v21, v132, v21
839+
; GCN-NEXT: v_pack_b32_f16 v32, v61, v44
856840
; GCN-NEXT: buffer_load_dwordx2 v[44:45], v20, s[0:3], 0 offen sc0 sc1
857841
; GCN-NEXT: s_waitcnt vmcnt(0)
858842
; GCN-NEXT: buffer_inv sc0 sc1
@@ -983,27 +967,27 @@
983967
; GCN-NEXT: buffer_wbl2 sc0 sc1
984968
; GCN-NEXT: s_waitcnt lgkmcnt(0)
985969
; GCN-NEXT: ds_write_b64 v136, v[20:21]
986-
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[16:17], v[24:25], v[112:127]
987-
; GCN-NEXT: v_pack_b32_f16 v17, v40, v6
988-
; GCN-NEXT: v_mul_f32_e32 v6, 0x3fb8aa3b, v32
989970
; GCN-NEXT: buffer_wbl2 sc0 sc1
990971
; GCN-NEXT: s_waitcnt lgkmcnt(0)
991972
; GCN-NEXT: ds_write_b64 v137, v[0:1]
992973
; GCN-NEXT: buffer_wbl2 sc0 sc1
993974
; GCN-NEXT: s_waitcnt lgkmcnt(0)
994975
; GCN-NEXT: ds_write_b64 v138, v[26:27]
995-
; GCN-NEXT: v_exp_f32_e32 v25, v6
976+
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[16:17], v[24:25], v[112:127]
977+
; GCN-NEXT: v_pack_b32_f16 v17, v40, v6
978+
; GCN-NEXT: v_mul_f32_e32 v6, 0x3fb8aa3b, v32
996979
; GCN-NEXT: ;;#ASMSTART
997980
; GCN-NEXT: s_waitcnt vmcnt(8)
998981
; GCN-NEXT: ;;#ASMEND
999982
; GCN-NEXT: v_pack_b32_f16 v16, v37, v28
1000983
; GCN-NEXT: v_fma_f32 v24, s4, v7, -v134
1001-
; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v149
984+
; GCN-NEXT: v_exp_f32_e32 v25, v6
1002985
; GCN-NEXT: s_waitcnt lgkmcnt(0)
1003986
; GCN-NEXT: ds_read_b128 v[4:7], v139
1004987
; GCN-NEXT: s_waitcnt lgkmcnt(0)
1005988
; GCN-NEXT: buffer_inv sc0 sc1
1006989
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[22:23], v[16:17], v[64:79]
990+
; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v149
1007991
; GCN-NEXT: v_exp_f32_e32 v26, v0
1008992
; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v29
1009993
; GCN-NEXT: v_cvt_f16_f32_e32 v1, v150
@@ -1022,13 +1006,13 @@
10221006
; GCN-NEXT: v_cvt_f16_f32_e32 v0, v25
10231007
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[18:19], v[16:17], v[112:127]
10241008
; GCN-NEXT: v_pack_b32_f16 v17, v2, v0
1025-
; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v24
10261009
; GCN-NEXT: v_pack_b32_f16 v16, v1, v27
1010+
; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v24
1011+
; GCN-NEXT: v_fma_f32 v18, s4, v11, -v134
10271012
; GCN-NEXT: v_exp_f32_e32 v19, v0
10281013
; GCN-NEXT: ds_read_b128 v[0:3], v139 offset:1152
10291014
; GCN-NEXT: s_waitcnt lgkmcnt(0)
10301015
; GCN-NEXT: buffer_inv sc0 sc1
1031-
; GCN-NEXT: v_fma_f32 v18, s4, v11, -v134
10321016
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[4:5], v[16:17], v[64:79]
10331017
; GCN-NEXT: v_mul_f32_e32 v4, 0x3fb8aa3b, v8
10341018
; GCN-NEXT: ds_read_b128 v[8:11], v139 offset:1728
@@ -1037,41 +1021,41 @@
10371021
; GCN-NEXT: v_exp_f32_e32 v24, v4
10381022
; GCN-NEXT: v_mul_f32_e32 v4, 0x3fb8aa3b, v28
10391023
; GCN-NEXT: v_cvt_f16_f32_e32 v5, v26
1040-
; GCN-NEXT: v_fma_f32 v28, s4, v14, -v134
1041-
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[20:21], v[16:17], v[80:95]
10421024
; GCN-NEXT: v_exp_f32_e32 v27, v4
1043-
; GCN-NEXT: v_cvt_f16_f32_e32 v20, v29
10441025
; GCN-NEXT: v_mul_f32_e32 v4, 0x3fb8aa3b, v18
1026+
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[20:21], v[16:17], v[80:95]
1027+
; GCN-NEXT: v_cvt_f16_f32_e32 v20, v29
10451028
; GCN-NEXT: v_fma_f32 v21, s4, v13, -v134
1029+
; GCN-NEXT: v_fma_f32 v28, s4, v14, -v134
10461030
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[0:1], v[16:17], v[96:111]
10471031
; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v30
1048-
; GCN-NEXT: v_exp_f32_e32 v30, v0
10491032
; GCN-NEXT: v_cvt_f16_f32_e32 v1, v31
1033+
; GCN-NEXT: v_exp_f32_e32 v30, v0
10501034
; GCN-NEXT: v_cvt_f16_f32_e32 v0, v19
10511035
; GCN-NEXT: v_pack_b32_f16 v1, v1, v0
10521036
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[8:9], v[16:17], v[112:127]
10531037
; GCN-NEXT: v_exp_f32_e32 v16, v4
10541038
; GCN-NEXT: v_pack_b32_f16 v0, v5, v20
10551039
; GCN-NEXT: v_mul_f32_e32 v9, 0x3fb8aa3b, v12
1056-
; GCN-NEXT: v_fma_f32 v8, s4, v15, -v134
1057-
; GCN-NEXT: v_cvt_f16_f32_e32 v17, v24
1058-
; GCN-NEXT: v_cvt_f16_f32_e32 v20, v27
1059-
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[6:7], v[0:1], v[64:79]
10601040
; GCN-NEXT: v_exp_f32_e32 v18, v9
10611041
; GCN-NEXT: v_mul_f32_e32 v9, 0x3fb8aa3b, v21
1042+
; GCN-NEXT: v_exp_f32_e32 v21, v9
1043+
; GCN-NEXT: v_fma_f32 v8, s4, v15, -v134
1044+
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[6:7], v[0:1], v[64:79]
10621045
; GCN-NEXT: ds_read_b128 v[4:7], v57
10631046
; GCN-NEXT: s_waitcnt lgkmcnt(0)
10641047
; GCN-NEXT: buffer_inv sc0 sc1
10651048
; GCN-NEXT: ds_read_b128 v[12:15], v57 offset:576
10661049
; GCN-NEXT: s_waitcnt lgkmcnt(0)
10671050
; GCN-NEXT: buffer_inv sc0 sc1
1051+
; GCN-NEXT: v_cvt_f16_f32_e32 v17, v24
1052+
; GCN-NEXT: v_cvt_f16_f32_e32 v20, v27
10681053
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[22:23], v[0:1], v[80:95]
1069-
; GCN-NEXT: v_exp_f32_e32 v21, v9
1070-
; GCN-NEXT: v_cvt_f16_f32_e32 v23, v18
10711054
; GCN-NEXT: v_cvt_f16_f32_e32 v22, v21
1055+
; GCN-NEXT: v_cvt_f16_f32_e32 v23, v18
10721056
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[96:111], v[2:3], v[0:1], v[96:111]
1073-
; GCN-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v28
10741057
; GCN-NEXT: v_cvt_f16_f32_e32 v3, v30
1058+
; GCN-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v28
10751059
; GCN-NEXT: v_exp_f32_e32 v2, v2
10761060
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[112:127], v[10:11], v[0:1], v[112:127]
10771061
; GCN-NEXT: v_cvt_f16_f32_e32 v0, v16
@@ -1126,30 +1110,29 @@
11261110
; GCN-NEXT: v_add_f32_e32 v3, v36, v3
11271111
; GCN-NEXT: v_add_f32_e32 v3, v39, v3
11281112
; GCN-NEXT: v_add_f32_e32 v3, v148, v3
1113+
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[12:13], v[8:9], v[80:95]
11291114
; GCN-NEXT: v_add_f32_e32 v3, v34, v3
11301115
; GCN-NEXT: v_add_f32_e32 v3, v150, v3
1116+
; GCN-NEXT: v_cvt_f16_f32_e32 v1, v10
1117+
; GCN-NEXT: v_cvt_f16_f32_e32 v11, v2
11311118
; GCN-NEXT: v_add_f32_e32 v3, v38, v3
11321119
; GCN-NEXT: v_add_f32_e32 v3, v42, v3
11331120
; GCN-NEXT: v_add_f32_e32 v3, v25, v3
11341121
; GCN-NEXT: v_add_f32_e32 v3, v26, v3
1122+
; GCN-NEXT: v_pack_b32_f16 v1, v11, v1
1123+
; GCN-NEXT: v_pack_b32_f16 v0, v23, v22
11351124
; GCN-NEXT: v_add_f32_e32 v3, v29, v3
1136-
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[4:5], v[8:9], v[64:79]
11371125
; GCN-NEXT: v_add_f32_e32 v3, v31, v3
1126+
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[14:15], v[0:1], v[80:95]
11381127
; GCN-NEXT: v_add_f32_e32 v3, v19, v3
11391128
; GCN-NEXT: v_add_f32_e32 v3, v24, v3
1140-
; GCN-NEXT: v_cvt_f16_f32_e32 v1, v10
1141-
; GCN-NEXT: v_cvt_f16_f32_e32 v11, v2
11421129
; GCN-NEXT: v_add_f32_e32 v3, v27, v3
11431130
; GCN-NEXT: v_add_f32_e32 v3, v30, v3
1144-
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[12:13], v[8:9], v[80:95]
11451131
; GCN-NEXT: v_add_f32_e32 v3, v16, v3
11461132
; GCN-NEXT: v_add_f32_e32 v3, v18, v3
1147-
; GCN-NEXT: v_pack_b32_f16 v1, v11, v1
1148-
; GCN-NEXT: v_pack_b32_f16 v0, v23, v22
11491133
; GCN-NEXT: v_add_f32_e32 v3, v21, v3
1150-
; GCN-NEXT: s_nop 0
1134+
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[4:5], v[8:9], v[64:79]
11511135
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[64:79], v[6:7], v[0:1], v[64:79]
1152-
; GCN-NEXT: v_mfma_f32_32x32x8_f16 v[80:95], v[14:15], v[0:1], v[80:95]
11531136
; GCN-NEXT: v_add_f32_e32 v0, v2, v3
11541137
; GCN-NEXT: v_add_f32_e32 v4, v10, v0
11551138
; GCN-NEXT: ds_bpermute_b32 v5, v133, v4

0 commit comments

Comments
 (0)