Skip to content

Commit 7741c0d

Browse files
committed
[AIE2P] revert separate subregister index for accumulator
1 parent d1a8b6d commit 7741c0d

File tree

5 files changed

+114
-129
lines changed

5 files changed

+114
-129
lines changed

llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -618,22 +618,22 @@ void AIE2PInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
618618
} else if (AIE2P::VEC1024RegClass.contains(SrcReg) &&
619619
AIE2P::ACC1024RegClass.contains(DstReg)) {
620620
BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x),
621-
TRI.getSubReg(DstReg, AIE2P::sub_512_acc_lo))
621+
TRI.getSubReg(DstReg, AIE2P::sub_512_lo))
622622
.addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_lo),
623623
getKillRegState(KillSrc));
624624
BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x),
625-
TRI.getSubReg(DstReg, AIE2P::sub_512_acc_hi))
625+
TRI.getSubReg(DstReg, AIE2P::sub_512_hi))
626626
.addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_hi),
627627
getKillRegState(KillSrc));
628628
} else if (AIE2P::ACC1024RegClass.contains(SrcReg) &&
629629
AIE2P::VEC1024RegClass.contains(DstReg)) {
630630
BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x),
631631
TRI.getSubReg(DstReg, AIE2P::sub_512_lo))
632-
.addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_acc_lo),
632+
.addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_lo),
633633
getKillRegState(KillSrc));
634634
BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x),
635635
TRI.getSubReg(DstReg, AIE2P::sub_512_hi))
636-
.addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_acc_hi),
636+
.addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_hi),
637637
getKillRegState(KillSrc));
638638
} else if ((AIE2P::ACC2048RegClass.contains(SrcReg)) &&
639639
(AIE2P::ACC2048RegClass.contains(DstReg))) {
@@ -715,20 +715,20 @@ void AIE2PInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
715715
(AIE2P::FIFO1024RegClass.contains(DstReg))) {
716716
BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x),
717717
TRI.getSubReg(DstReg, AIE2P::sub_lo_fifo))
718-
.addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_acc_lo),
718+
.addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_lo),
719719
getKillRegState(KillSrc));
720720
BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x),
721721
TRI.getSubReg(DstReg, AIE2P::sub_hi_fifo))
722-
.addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_acc_hi),
722+
.addReg(TRI.getSubReg(SrcReg, AIE2P::sub_512_hi),
723723
getKillRegState(KillSrc));
724724
} else if ((AIE2P::FIFO1024RegClass.contains(SrcReg)) &&
725725
(AIE2P::ACC1024RegClass.contains(DstReg))) {
726726
BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x),
727-
TRI.getSubReg(DstReg, AIE2P::sub_512_acc_lo))
727+
TRI.getSubReg(DstReg, AIE2P::sub_512_lo))
728728
.addReg(TRI.getSubReg(SrcReg, AIE2P::sub_lo_fifo),
729729
getKillRegState(KillSrc));
730730
BuildMI(MBB, MBBI, DL, get(AIE2P::VMOV_alu_mv_mv_x),
731-
TRI.getSubReg(DstReg, AIE2P::sub_512_acc_hi))
731+
TRI.getSubReg(DstReg, AIE2P::sub_512_hi))
732732
.addReg(TRI.getSubReg(SrcReg, AIE2P::sub_hi_fifo),
733733
getKillRegState(KillSrc));
734734
} else if ((AIE2P::eLRegClass.contains(SrcReg)) &&
@@ -1025,8 +1025,8 @@ AIE2PInstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const {
10251025
return {{AIE2P::ST_dms_sts_spill, AIE2P::sub_l_even},
10261026
{AIE2P::ST_dms_sts_spill, AIE2P::sub_l_odd}};
10271027
case AIE2P::VST_CM_SPILL:
1028-
return {{AIE2P::VST_dmx_sts_bm_spill, AIE2P::sub_512_acc_lo},
1029-
{AIE2P::VST_dmx_sts_bm_spill, AIE2P::sub_512_acc_hi}};
1028+
return {{AIE2P::VST_dmx_sts_bm_spill, AIE2P::sub_512_lo},
1029+
{AIE2P::VST_dmx_sts_bm_spill, AIE2P::sub_512_hi}};
10301030
case AIE2P::VST_FIFO_SPILL:
10311031
return {{AIE2P::VST_dmx_sts_fifohl_spill, AIE2P::sub_lo_fifo},
10321032
{AIE2P::VST_dmx_sts_fifohl_spill, AIE2P::sub_hi_fifo}};
@@ -1036,8 +1036,8 @@ AIE2PInstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const {
10361036
{AIE2P::ST_dms_sts_spill, AIE2P::sub_ptr}};
10371037

10381038
case AIE2P::VST_DM_SPILL:
1039-
return {{AIE2P::VST_CM_SPILL, AIE2P::sub_1024_acc_lo},
1040-
{AIE2P::VST_CM_SPILL, AIE2P::sub_1024_acc_hi}};
1039+
return {{AIE2P::VST_CM_SPILL, AIE2P::sub_1024_lo},
1040+
{AIE2P::VST_CM_SPILL, AIE2P::sub_1024_hi}};
10411041
case AIE2P::VST_Y_SPILL:
10421042
return {{AIE2P::VST_dmx_sts_x_spill, AIE2P::sub_512_lo},
10431043
{AIE2P::VST_dmx_sts_x_spill, AIE2P::sub_512_hi}};
@@ -1062,8 +1062,8 @@ AIE2PInstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const {
10621062
return {{AIE2P::LDA_dms_lda_spill, AIE2P::sub_l_even},
10631063
{AIE2P::LDA_dms_lda_spill, AIE2P::sub_l_odd}};
10641064
case AIE2P::VLDA_CM_SPILL:
1065-
return {{AIE2P::VLDA_dmx_lda_bm_spill, AIE2P::sub_512_acc_lo},
1066-
{AIE2P::VLDA_dmx_lda_bm_spill, AIE2P::sub_512_acc_hi}};
1065+
return {{AIE2P::VLDA_dmx_lda_bm_spill, AIE2P::sub_512_lo},
1066+
{AIE2P::VLDA_dmx_lda_bm_spill, AIE2P::sub_512_hi}};
10671067
case AIE2P::VLDA_FIFO_SPILL:
10681068
return {{AIE2P::VLDA_dmx_lda_fifohl_spill, AIE2P::sub_lo_fifo},
10691069
{AIE2P::VLDA_dmx_lda_fifohl_spill, AIE2P::sub_hi_fifo}};
@@ -1074,8 +1074,8 @@ AIE2PInstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const {
10741074
{AIE2P::LDA_dms_lda_spill, AIE2P::sub_ptr},
10751075
};
10761076
case AIE2P::VLDA_DM_SPILL:
1077-
return {{AIE2P::VLDA_CM_SPILL, AIE2P::sub_1024_acc_lo},
1078-
{AIE2P::VLDA_CM_SPILL, AIE2P::sub_1024_acc_hi}};
1077+
return {{AIE2P::VLDA_CM_SPILL, AIE2P::sub_1024_lo},
1078+
{AIE2P::VLDA_CM_SPILL, AIE2P::sub_1024_hi}};
10791079
case AIE2P::VLDA_Y_SPILL:
10801080
return {{AIE2P::VLDA_dmx_lda_x_spill, AIE2P::sub_512_lo},
10811081
{AIE2P::VLDA_dmx_lda_x_spill, AIE2P::sub_512_hi}};

llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td

Lines changed: 43 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -243,19 +243,19 @@ def : Pat<(v64bf16 (fmul v64bf16:$vec1, v64bf16:$vec2)),
243243
(VCONV_bf16_fp32_mv_x_srs_bf
244244
(EXTRACT_SUBREG
245245
(VMUL_f_vmul_bf_vmul_bf_core_Y_Y VEC1024:$vec1, VEC1024:$vec2, (i32 mulbf16_vecconf.ConfBits)),
246-
sub_1024_acc_lo)),
246+
sub_1024_lo)),
247247
sub_512_lo,
248248
(VCONV_bf16_fp32_mv_x_srs_bf
249249
(EXTRACT_SUBREG
250250
(VMUL_f_vmul_bf_vmul_bf_core_Y_Y VEC1024:$vec1, VEC1024:$vec2, (i32 mulbf16_vecconf.ConfBits)),
251-
sub_1024_acc_hi)),
251+
sub_1024_hi)),
252252
sub_512_hi))>;
253253

254254
def : Pat<(v32bf16 (fmul v32bf16:$vec1, v32bf16:$vec2)),
255255
(VCONV_bf16_fp32_mv_x_srs_bf
256256
(EXTRACT_SUBREG
257257
(VMUL_f_vmul_bf_vmul_bf_core_X_X VEC512:$vec1, VEC512:$vec2, (i32 mulbf16_vecconf.ConfBits)),
258-
sub_1024_acc_lo))>;
258+
sub_1024_lo))>;
259259

260260
// VMUL/VMAC Intrinsics
261261

@@ -303,40 +303,40 @@ def : Pat<(int_aie2p_I512_I1024_ACC2048_mul_conf VEC512:$s1, VEC1024:$s2, eR:$ac
303303
def : Pat<(int_aie2p_I512_I512_ACC1024_addmac_conf VEC512:$s1, VEC512:$s2, ACC1024:$acc1, ACC1024:$acc2, eR:$acc),
304304
(EXTRACT_SUBREG
305305
(VADDMAC_vmac_cm2_add_reg_vmul_cm_core_X_X
306-
(REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_acc_lo),
307-
(REG_SEQUENCE ACC2048, ACC1024:$acc2, sub_1024_acc_lo),
306+
(REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_lo),
307+
(REG_SEQUENCE ACC2048, ACC1024:$acc2, sub_1024_lo),
308308
VEC512:$s1, VEC512:$s2, eR:$acc),
309-
sub_1024_acc_lo)>;
309+
sub_1024_lo)>;
310310

311311
def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC1024_bf_addmac_conf VEC512:$s1, VEC512:$s2, ACC1024:$acc1, ACC1024:$acc2, eR:$acc),
312312
(EXTRACT_SUBREG
313313
(VADDMAC_f_vaddmac_bf_vmac_cm2_add_reg_vmul_bf_core_X_X
314-
(REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_acc_lo),
315-
(REG_SEQUENCE ACC2048, ACC1024:$acc2, sub_1024_acc_lo),
314+
(REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_lo),
315+
(REG_SEQUENCE ACC2048, ACC1024:$acc2, sub_1024_lo),
316316
VEC512:$s1, VEC512:$s2, eR:$acc),
317-
sub_1024_acc_lo)>;
317+
sub_1024_lo)>;
318318

319319
def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC1024_bf_mac_conf VEC512:$s1, VEC512:$s2, ACC1024:$acc1, eR:$acc),
320320
(EXTRACT_SUBREG
321321
(VMAC_f_vmac_bf_vmul_bf_core_X_X
322-
(REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_acc_lo),
322+
(REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_lo),
323323
VEC512:$s1, VEC512:$s2, eR:$acc),
324-
sub_1024_acc_lo)>;
324+
sub_1024_lo)>;
325325

326326
def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC1024_bf_mul_conf VEC512:$s1, VEC512:$s2, eR:$acc),
327327
(EXTRACT_SUBREG
328328
(VMUL_f_vmul_bf_vmul_bf_core_X_X VEC512:$s1, VEC512:$s2, eR:$acc),
329-
sub_1024_acc_lo)>;
329+
sub_1024_lo)>;
330330

331331
def : Pat<(int_aie2p_I512_I512_ACC1024_mac_conf VEC512:$s1, VEC512:$s2, ACC1024:$acc1, eR:$acc),
332332
(EXTRACT_SUBREG
333333
(VMAC_vmul_cm_core_X_X
334-
(REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_acc_lo),
334+
(REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_lo),
335335
VEC512:$s1, VEC512:$s2, eR:$acc),
336-
sub_1024_acc_lo)>;
336+
sub_1024_lo)>;
337337

338338
def : Pat<(int_aie2p_I512_I512_ACC1024_mul_conf VEC512:$s1, VEC512:$s2, eR:$acc),
339-
(EXTRACT_SUBREG (VMUL_vmul_cm_core_X_X VEC512:$s1, VEC512:$s2, eR:$acc), sub_1024_acc_lo)>;
339+
(EXTRACT_SUBREG (VMUL_vmul_cm_core_X_X VEC512:$s1, VEC512:$s2, eR:$acc), sub_1024_lo)>;
340340

341341

342342
def : Pat<(int_aie2p_I512_I512_ACC2048_addmac_conf VEC512:$s1, VEC512:$s2, ACC2048:$acc1, ACC2048:$acc2, eR:$acc),
@@ -361,18 +361,18 @@ def : Pat<(int_aie2p_I512_I512_ACC2048_mul_conf VEC512:$s1, VEC512:$s2, eR:$acc)
361361

362362
def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC512_bf_addmac_conf VEC512:$s1, VEC512:$s2, ACC512:$acc1, ACC512:$acc2, eR:$acc),
363363
(EXTRACT_SUBREG (VADDMAC_f_vaddmac_bf_vmac_cm2_add_reg_vmul_bf_core_X_X
364-
(REG_SEQUENCE ACC2048, ACC512:$acc1, sub_512_acc_lo),
365-
(REG_SEQUENCE ACC2048, ACC512:$acc2, sub_512_acc_lo),
366-
VEC512:$s1, VEC512:$s2, eR:$acc), sub_512_acc_lo)>;
364+
(REG_SEQUENCE ACC2048, ACC512:$acc1, sub_512_lo),
365+
(REG_SEQUENCE ACC2048, ACC512:$acc2, sub_512_lo),
366+
VEC512:$s1, VEC512:$s2, eR:$acc), sub_512_lo)>;
367367

368368
def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC512_bf_mac_conf VEC512:$s1, VEC512:$s2, ACC512:$acc1, eR:$acc),
369369
(EXTRACT_SUBREG
370370
(VMAC_f_vmac_bf_vmul_bf_core_X_X
371-
(REG_SEQUENCE ACC2048, ACC512:$acc1, sub_512_acc_lo),
372-
VEC512:$s1, VEC512:$s2, eR:$acc), sub_512_acc_lo)>;
371+
(REG_SEQUENCE ACC2048, ACC512:$acc1, sub_512_lo),
372+
VEC512:$s1, VEC512:$s2, eR:$acc), sub_512_lo)>;
373373

374374
def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC512_bf_mul_conf VEC512:$s1, VEC512:$s2, eR:$acc),
375-
(EXTRACT_SUBREG (VMUL_f_vmul_bf_vmul_bf_core_X_X VEC512:$s1, VEC512:$s2, eR:$acc), sub_512_acc_lo)>;
375+
(EXTRACT_SUBREG (VMUL_f_vmul_bf_vmul_bf_core_X_X VEC512:$s1, VEC512:$s2, eR:$acc), sub_512_lo)>;
376376

377377
// BFP16 VMUL/VMAC Intrinsics
378378

@@ -591,59 +591,59 @@ def : Pat<(int_aie2p_I512_I512_ACC1024_addmsc_conf
591591
ACC1024:$acc2,
592592
eR:$acc),
593593
(EXTRACT_SUBREG (VADDMSC_vmac_cm2_add_reg_vmul_cm_core_X_X
594-
(REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_acc_lo),
595-
(REG_SEQUENCE ACC2048, ACC1024:$acc2, sub_1024_acc_lo),
594+
(REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_lo),
595+
(REG_SEQUENCE ACC2048, ACC1024:$acc2, sub_1024_lo),
596596
VEC512:$s1,
597597
VEC512:$s2,
598-
eR:$acc), sub_1024_acc_lo)>;
598+
eR:$acc), sub_1024_lo)>;
599599
def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC1024_bf_addmsc_conf
600600
VEC512:$s1,
601601
VEC512:$s2,
602602
ACC1024:$acc1,
603603
ACC1024:$acc2,
604604
eR:$acc),
605605
(EXTRACT_SUBREG (VADDMSC_f_vaddmac_bf_vmac_cm2_add_reg_vmul_bf_core_X_X
606-
(REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_acc_lo),
607-
(REG_SEQUENCE ACC2048, ACC1024:$acc2, sub_1024_acc_lo),
606+
(REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_lo),
607+
(REG_SEQUENCE ACC2048, ACC1024:$acc2, sub_1024_lo),
608608
VEC512:$s1,
609609
VEC512:$s2,
610-
eR:$acc), sub_1024_acc_lo)>;
610+
eR:$acc), sub_1024_lo)>;
611611
def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC1024_bf_msc_conf
612612
VEC512:$s1,
613613
VEC512:$s2,
614614
ACC1024:$acc1,
615615
eR:$acc),
616616
(EXTRACT_SUBREG (VMSC_f_vmac_bf_vmul_bf_core_X_X
617-
(REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_acc_lo),
617+
(REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_lo),
618618
VEC512:$s1,
619619
VEC512:$s2,
620-
eR:$acc), sub_1024_acc_lo)>;
620+
eR:$acc), sub_1024_lo)>;
621621
def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC1024_bf_negmul_conf
622622
VEC512:$s1,
623623
VEC512:$s2,
624624
eR:$acc),
625625
(EXTRACT_SUBREG (VNEGMUL_f_vmul_bf_vmul_bf_core_X_X
626626
VEC512:$s1,
627627
VEC512:$s2,
628-
eR:$acc), sub_1024_acc_lo)>;
628+
eR:$acc), sub_1024_lo)>;
629629
def : Pat<(int_aie2p_I512_I512_ACC1024_msc_conf
630630
VEC512:$s1,
631631
VEC512:$s2,
632632
ACC1024:$acc1,
633633
eR:$acc),
634634
(EXTRACT_SUBREG (VMSC_vmul_cm_core_X_X
635-
(REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_acc_lo),
635+
(REG_SEQUENCE ACC2048, ACC1024:$acc1, sub_1024_lo),
636636
VEC512:$s1,
637637
VEC512:$s2,
638-
eR:$acc), sub_1024_acc_lo)>;
638+
eR:$acc), sub_1024_lo)>;
639639
def : Pat<(int_aie2p_I512_I512_ACC1024_negmul_conf
640640
VEC512:$s1,
641641
VEC512:$s2,
642642
eR:$acc),
643643
(EXTRACT_SUBREG (VNEGMUL_vmul_cm_core_X_X
644644
VEC512:$s1,
645645
VEC512:$s2,
646-
eR:$acc), sub_1024_acc_lo)>;
646+
eR:$acc), sub_1024_lo)>;
647647
def : Pat<(int_aie2p_I512_I512_ACC2048_addmsc_conf
648648
VEC512:$s1,
649649
VEC512:$s2,
@@ -711,29 +711,29 @@ def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC512_bf_addmsc_conf
711711
ACC512:$acc2,
712712
eR:$acc),
713713
(EXTRACT_SUBREG (VADDMSC_f_vaddmac_bf_vmac_cm2_add_reg_vmul_bf_core_X_X
714-
(REG_SEQUENCE ACC2048, ACC512:$acc1, sub_512_acc_lo),
715-
(REG_SEQUENCE ACC2048, ACC512:$acc2, sub_512_acc_lo),
714+
(REG_SEQUENCE ACC2048, ACC512:$acc1, sub_512_lo),
715+
(REG_SEQUENCE ACC2048, ACC512:$acc2, sub_512_lo),
716716
VEC512:$s1,
717717
VEC512:$s2,
718-
eR:$acc), sub_512_acc_lo)>;
718+
eR:$acc), sub_512_lo)>;
719719
def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC512_bf_msc_conf
720720
VEC512:$s1,
721721
VEC512:$s2,
722722
ACC512:$acc1,
723723
eR:$acc),
724724
(EXTRACT_SUBREG (VMSC_f_vmac_bf_vmul_bf_core_X_X
725-
(REG_SEQUENCE ACC2048, ACC512:$acc1, sub_512_acc_lo),
725+
(REG_SEQUENCE ACC2048, ACC512:$acc1, sub_512_lo),
726726
VEC512:$s1,
727727
VEC512:$s2,
728-
eR:$acc), sub_512_acc_lo)>;
728+
eR:$acc), sub_512_lo)>;
729729
def : PatInaccessibleMem<(int_aie2p_I512_I512_ACC512_bf_negmul_conf
730730
VEC512:$s1,
731731
VEC512:$s2,
732732
eR:$acc),
733733
(EXTRACT_SUBREG (VNEGMUL_f_vmul_bf_vmul_bf_core_X_X
734734
VEC512:$s1,
735735
VEC512:$s2,
736-
eR:$acc), sub_512_acc_lo)>;
736+
eR:$acc), sub_512_lo)>;
737737

738738
// SRS Intrinsic
739739
// Note : Non-constant sign is handled in .cpp
@@ -867,7 +867,6 @@ def : Pat<(int_aie2p_vshift_I512_I512 VEC512:$s1, VEC512:$s2, mSs:$pre, eR:$shif
867867
(VSHIFT_ALIGN VEC512:$s1, mSs:$pre, VEC512:$s2, eR:$shift)>;
868868
def : Pat<(int_aie2p_vshift_bf512_bf512 VEC512:$s1, VEC512:$s2, mSs:$pre, eR:$shift),
869869
(VSHIFT_ALIGN VEC512:$s1, mSs:$pre, VEC512:$s2, eR:$shift)>;
870-
871870
// Combine broadcast + shift into VPUSH_hi_64
872871
def : Pat<(int_aie2p_vshift_I512_I512 (v16i32 VEC512:$s0), (v16i32 (bcst_vector_node(v2i32 eL : $s1))), 0x0, 0x8),
873872
(VPUSH_hi_64 VEC512:$s0, eL:$s1)>;
@@ -998,16 +997,16 @@ def : Pat<(v128i8 (concat_vectors (v64i8 VEC512:$src0), (v64i8 VEC512:$src1))),
998997

999998
// concat_vector - accumulator bank
1000999
def : Pat<(v16i64 (concat_vectors (v8i64 ACC512:$src0), (v8i64 ACC512:$src1))),
1001-
(v16i64 (REG_SEQUENCE ACC1024, ACC512:$src0, sub_512_acc_lo, ACC512:$src1, sub_512_acc_hi))>;
1000+
(v16i64 (REG_SEQUENCE ACC1024, ACC512:$src0, sub_512_lo, ACC512:$src1, sub_512_hi))>;
10021001

10031002
def : Pat<(v32i32 (concat_vectors (v16i32 ACC512:$src0), (v16i32 ACC512:$src1))),
1004-
(v32i32 (REG_SEQUENCE ACC1024, ACC512:$src0, sub_512_acc_lo, ACC512:$src1, sub_512_acc_hi))>;
1003+
(v32i32 (REG_SEQUENCE ACC1024, ACC512:$src0, sub_512_lo, ACC512:$src1, sub_512_hi))>;
10051004

10061005
def : Pat<(v32i64 (concat_vectors (v16i64 ACC1024:$src0), (v16i64 ACC1024:$src1))),
1007-
(v32i64 (REG_SEQUENCE ACC2048, ACC1024:$src0, sub_1024_acc_lo, ACC1024:$src1, sub_1024_acc_hi))>;
1006+
(v32i64 (REG_SEQUENCE ACC2048, ACC1024:$src0, sub_1024_lo, ACC1024:$src1, sub_1024_hi))>;
10081007

10091008
def : Pat<(v64i32 (concat_vectors (v32i32 ACC1024:$src0), (v32i32 ACC1024:$src1))),
1010-
(v64i32 (REG_SEQUENCE ACC2048, ACC1024:$src0, sub_1024_acc_lo, ACC1024:$src1, sub_1024_acc_hi))>;
1009+
(v64i32 (REG_SEQUENCE ACC2048, ACC1024:$src0, sub_1024_lo, ACC1024:$src1, sub_1024_hi))>;
10111010

10121011
// concat_vector - gpr bank
10131012
foreach Ty = [v4i16, v8i8] in {

0 commit comments

Comments
 (0)