Skip to content

Commit e5d23b8

Browse files
- Ensure both performFirstTrueTestVectorCombine & optimizePTestInstr only
consider extracts/copies from the first result of whilecc_x2 - Add negative tests
1 parent cbf9dd2 commit e5d23b8

11 files changed

+435
-17
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20235,7 +20235,7 @@ performFirstTrueTestVectorCombine(SDNode *N,
2023520235

2023620236
// Restricted the DAG combine to only cases where we're extracting from a
2023720237
// flag-setting operation.
20238-
if (!isPredicateCCSettingOp(N0))
20238+
if (!isPredicateCCSettingOp(N0) || N0.getResNo() != 0)
2023920239
return SDValue();
2024020240

2024120241
// Extracts of lane 0 for SVE can be expressed as PTEST(Op, FIRST) ? 1 : 0

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1488,21 +1488,6 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
14881488
bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
14891489
bool PredIsWhileLike = isWhileOpcode(PredOpcode);
14901490

1491-
uint64_t PredEltSize = 0;
1492-
if (PredIsWhileLike)
1493-
PredEltSize = getElementSizeForOpcode(PredOpcode);
1494-
1495-
if (Pred->isCopy()) {
1496-
// Instructions which return a multi-vector (e.g. WHILECC_x2) require copies
1497-
// before the branch to extract each subregister.
1498-
auto Op = Pred->getOperand(1);
1499-
if (Op.isReg() && Op.getReg().isVirtual() && Op.getSubReg() != 0) {
1500-
MachineInstr *DefMI = MRI->getVRegDef(Op.getReg());
1501-
PredIsWhileLike = isWhileOpcode(DefMI->getOpcode());
1502-
PredEltSize = getElementSizeForOpcode(DefMI->getOpcode());
1503-
}
1504-
}
1505-
15061491
if (PredIsWhileLike) {
15071492
// For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
15081493
// instruction and the condition is "any" since WHILcc does an implicit
@@ -1514,7 +1499,8 @@ AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
15141499
// redundant since WHILE performs an implicit PTEST with an all active
15151500
// mask.
15161501
if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1517-
getElementSizeForOpcode(MaskOpcode) == PredEltSize)
1502+
getElementSizeForOpcode(MaskOpcode) ==
1503+
getElementSizeForOpcode(PredOpcode))
15181504
return PredOpcode;
15191505

15201506
return {};
@@ -1627,6 +1613,15 @@ bool AArch64InstrInfo::optimizePTestInstr(
16271613
const MachineRegisterInfo *MRI) const {
16281614
auto *Mask = MRI->getUniqueVRegDef(MaskReg);
16291615
auto *Pred = MRI->getUniqueVRegDef(PredReg);
1616+
1617+
if (Pred->isCopy()) {
1618+
// Instructions which return a multi-vector (e.g. WHILECC_x2) require copies
1619+
// before the branch to extract each subregister.
1620+
auto Op = Pred->getOperand(1);
1621+
if (Op.isReg() && Op.getSubReg() == AArch64::psub0)
1622+
Pred = MRI->getUniqueVRegDef(Op.getReg());
1623+
}
1624+
16301625
unsigned PredOpcode = Pred->getOpcode();
16311626
auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
16321627
if (!NewOp)

llvm/test/CodeGen/AArch64/sve-cmp-folds.ll

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,21 @@ define i1 @whilelt_x2_first(i64 %next, i64 %end) {
316316
ret i1 %bit
317317
}
318318

319+
; Do not combine to ptest when the extract is not from the first vector result
320+
define i1 @whilege_x2_second_result(i64 %next, i64 %end) {
321+
; CHECK-LABEL: whilege_x2_second_result:
322+
; CHECK: // %bb.0:
323+
; CHECK-NEXT: whilege { p0.s, p1.s }, x0, x1
324+
; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1
325+
; CHECK-NEXT: fmov w8, s0
326+
; CHECK-NEXT: and w0, w8, #0x1
327+
; CHECK-NEXT: ret
328+
%predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilege.x2.nxv4i1.i64(i64 %next, i64 %end)
329+
%predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 1
330+
%bit = extractelement <vscale x 4 x i1> %predicate, i64 0
331+
ret i1 %bit
332+
}
333+
319334
declare i64 @llvm.vscale.i64()
320335
declare <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64, i64)
321336
declare <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i64(i64, i64)

llvm/test/CodeGen/AArch64/sve-ptest-removal-whilege.mir

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,3 +587,54 @@ body: |
587587
$w0 = COPY %7
588588
RET_ReallyLR implicit $w0
589589
...
590+
591+
# PTEST is not redundant when it's Pg operand is a subregister copy, but not
592+
# from the first subregister of ppr2mul2
593+
---
594+
name: whilege_x2_b64_s64_psub1
595+
alignment: 2
596+
tracksRegLiveness: true
597+
registers:
598+
- { id: 0, class: gpr64 }
599+
- { id: 1, class: gpr64 }
600+
- { id: 2, class: ppr }
601+
- { id: 3, class: ppr2mul2 }
602+
- { id: 4, class: ppr }
603+
- { id: 5, class: ppr }
604+
- { id: 6, class: gpr32 }
605+
- { id: 7, class: gpr32 }
606+
liveins:
607+
- { reg: '$x0', virtual-reg: '%0' }
608+
- { reg: '$x1', virtual-reg: '%1' }
609+
frameInfo:
610+
maxCallFrameSize: 0
611+
body: |
612+
bb.0.entry:
613+
liveins: $x0, $x1
614+
615+
; CHECK-LABEL: name: whilege_x2_b64_s64_psub1
616+
; CHECK: liveins: $x0, $x1
617+
; CHECK-NEXT: {{ $}}
618+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
619+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
620+
; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
621+
; CHECK-NEXT: [[WHILEGE_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEGE_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
622+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEGE_2PXX_D]].psub0
623+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEGE_2PXX_D]].psub1
624+
; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
625+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
626+
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
627+
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
628+
; CHECK-NEXT: RET_ReallyLR implicit $w0
629+
%0:gpr64 = COPY $x0
630+
%1:gpr64 = COPY $x1
631+
%2:ppr = PTRUE_D 31, implicit $vg
632+
%3:ppr2mul2 = WHILEGE_2PXX_D %0, %1, implicit-def $nzcv
633+
%4:ppr = COPY %3.psub0
634+
%5:ppr = COPY %3.psub1
635+
PTEST_PP killed %2, killed %5, implicit-def $nzcv
636+
%6:gpr32 = COPY $wzr
637+
%7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
638+
$w0 = COPY %7
639+
RET_ReallyLR implicit $w0
640+
...

llvm/test/CodeGen/AArch64/sve-ptest-removal-whilegt.mir

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,3 +627,54 @@ body: |
627627
$w0 = COPY %7
628628
RET_ReallyLR implicit $w0
629629
...
630+
631+
# PTEST is not redundant when it's Pg operand is a subregister copy, but not
632+
# from the first subregister of ppr2mul2
633+
---
634+
name: whilegt_x2_b64_s64_psub1
635+
alignment: 2
636+
tracksRegLiveness: true
637+
registers:
638+
- { id: 0, class: gpr64 }
639+
- { id: 1, class: gpr64 }
640+
- { id: 2, class: ppr }
641+
- { id: 3, class: ppr2mul2 }
642+
- { id: 4, class: ppr }
643+
- { id: 5, class: ppr }
644+
- { id: 6, class: gpr32 }
645+
- { id: 7, class: gpr32 }
646+
liveins:
647+
- { reg: '$x0', virtual-reg: '%0' }
648+
- { reg: '$x1', virtual-reg: '%1' }
649+
frameInfo:
650+
maxCallFrameSize: 0
651+
body: |
652+
bb.0.entry:
653+
liveins: $x0, $x1
654+
655+
; CHECK-LABEL: name: whilegt_x2_b64_s64_psub1
656+
; CHECK: liveins: $x0, $x1
657+
; CHECK-NEXT: {{ $}}
658+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
659+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
660+
; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
661+
; CHECK-NEXT: [[WHILEGT_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEGT_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
662+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEGT_2PXX_D]].psub0
663+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEGT_2PXX_D]].psub1
664+
; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
665+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
666+
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
667+
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
668+
; CHECK-NEXT: RET_ReallyLR implicit $w0
669+
%0:gpr64 = COPY $x0
670+
%1:gpr64 = COPY $x1
671+
%2:ppr = PTRUE_D 31, implicit $vg
672+
%3:ppr2mul2 = WHILEGT_2PXX_D %0, %1, implicit-def $nzcv
673+
%4:ppr = COPY %3.psub0
674+
%5:ppr = COPY %3.psub1
675+
PTEST_PP killed %2, killed %5, implicit-def $nzcv
676+
%6:gpr32 = COPY $wzr
677+
%7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
678+
$w0 = COPY %7
679+
RET_ReallyLR implicit $w0
680+
...

llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehi.mir

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,3 +587,54 @@ body: |
587587
$w0 = COPY %7
588588
RET_ReallyLR implicit $w0
589589
...
590+
591+
# PTEST is not redundant when it's Pg operand is a subregister copy, but not
592+
# from the first subregister of ppr2mul2
593+
---
594+
name: whilehi_x2_b64_s64_psub1
595+
alignment: 2
596+
tracksRegLiveness: true
597+
registers:
598+
- { id: 0, class: gpr64 }
599+
- { id: 1, class: gpr64 }
600+
- { id: 2, class: ppr }
601+
- { id: 3, class: ppr2mul2 }
602+
- { id: 4, class: ppr }
603+
- { id: 5, class: ppr }
604+
- { id: 6, class: gpr32 }
605+
- { id: 7, class: gpr32 }
606+
liveins:
607+
- { reg: '$x0', virtual-reg: '%0' }
608+
- { reg: '$x1', virtual-reg: '%1' }
609+
frameInfo:
610+
maxCallFrameSize: 0
611+
body: |
612+
bb.0.entry:
613+
liveins: $x0, $x1
614+
615+
; CHECK-LABEL: name: whilehi_x2_b64_s64_psub1
616+
; CHECK: liveins: $x0, $x1
617+
; CHECK-NEXT: {{ $}}
618+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
619+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
620+
; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
621+
; CHECK-NEXT: [[WHILEHI_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEHI_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
622+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEHI_2PXX_D]].psub0
623+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEHI_2PXX_D]].psub1
624+
; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
625+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
626+
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
627+
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
628+
; CHECK-NEXT: RET_ReallyLR implicit $w0
629+
%0:gpr64 = COPY $x0
630+
%1:gpr64 = COPY $x1
631+
%2:ppr = PTRUE_D 31, implicit $vg
632+
%3:ppr2mul2 = WHILEHI_2PXX_D %0, %1, implicit-def $nzcv
633+
%4:ppr = COPY %3.psub0
634+
%5:ppr = COPY %3.psub1
635+
PTEST_PP killed %2, killed %5, implicit-def $nzcv
636+
%6:gpr32 = COPY $wzr
637+
%7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
638+
$w0 = COPY %7
639+
RET_ReallyLR implicit $w0
640+
...

llvm/test/CodeGen/AArch64/sve-ptest-removal-whilehs.mir

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,3 +587,54 @@ body: |
587587
$w0 = COPY %7
588588
RET_ReallyLR implicit $w0
589589
...
590+
591+
# PTEST is not redundant when it's Pg operand is a subregister copy, but not
592+
# from the first subregister of ppr2mul2
593+
---
594+
name: whilehs_x2_b64_s64_psub1
595+
alignment: 2
596+
tracksRegLiveness: true
597+
registers:
598+
- { id: 0, class: gpr64 }
599+
- { id: 1, class: gpr64 }
600+
- { id: 2, class: ppr }
601+
- { id: 3, class: ppr2mul2 }
602+
- { id: 4, class: ppr }
603+
- { id: 5, class: ppr }
604+
- { id: 6, class: gpr32 }
605+
- { id: 7, class: gpr32 }
606+
liveins:
607+
- { reg: '$x0', virtual-reg: '%0' }
608+
- { reg: '$x1', virtual-reg: '%1' }
609+
frameInfo:
610+
maxCallFrameSize: 0
611+
body: |
612+
bb.0.entry:
613+
liveins: $x0, $x1
614+
615+
; CHECK-LABEL: name: whilehs_x2_b64_s64_psub1
616+
; CHECK: liveins: $x0, $x1
617+
; CHECK-NEXT: {{ $}}
618+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
619+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
620+
; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
621+
; CHECK-NEXT: [[WHILEHS_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILEHS_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
622+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILEHS_2PXX_D]].psub0
623+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILEHS_2PXX_D]].psub1
624+
; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
625+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
626+
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
627+
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
628+
; CHECK-NEXT: RET_ReallyLR implicit $w0
629+
%0:gpr64 = COPY $x0
630+
%1:gpr64 = COPY $x1
631+
%2:ppr = PTRUE_D 31, implicit $vg
632+
%3:ppr2mul2 = WHILEHS_2PXX_D %0, %1, implicit-def $nzcv
633+
%4:ppr = COPY %3.psub0
634+
%5:ppr = COPY %3.psub1
635+
PTEST_PP killed %2, killed %5, implicit-def $nzcv
636+
%6:gpr32 = COPY $wzr
637+
%7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
638+
$w0 = COPY %7
639+
RET_ReallyLR implicit $w0
640+
...

llvm/test/CodeGen/AArch64/sve-ptest-removal-whilele.mir

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,3 +587,54 @@ body: |
587587
$w0 = COPY %7
588588
RET_ReallyLR implicit $w0
589589
...
590+
591+
# PTEST is not redundant when it's Pg operand is a subregister copy, but not
592+
# from the first subregister of ppr2mul2
593+
---
594+
name: whilele_x2_b64_s64_psub1
595+
alignment: 2
596+
tracksRegLiveness: true
597+
registers:
598+
- { id: 0, class: gpr64 }
599+
- { id: 1, class: gpr64 }
600+
- { id: 2, class: ppr }
601+
- { id: 3, class: ppr2mul2 }
602+
- { id: 4, class: ppr }
603+
- { id: 5, class: ppr }
604+
- { id: 6, class: gpr32 }
605+
- { id: 7, class: gpr32 }
606+
liveins:
607+
- { reg: '$x0', virtual-reg: '%0' }
608+
- { reg: '$x1', virtual-reg: '%1' }
609+
frameInfo:
610+
maxCallFrameSize: 0
611+
body: |
612+
bb.0.entry:
613+
liveins: $x0, $x1
614+
615+
; CHECK-LABEL: name: whilele_x2_b64_s64_psub1
616+
; CHECK: liveins: $x0, $x1
617+
; CHECK-NEXT: {{ $}}
618+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
619+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
620+
; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
621+
; CHECK-NEXT: [[WHILELE_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELE_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
622+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELE_2PXX_D]].psub0
623+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELE_2PXX_D]].psub1
624+
; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
625+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
626+
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
627+
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
628+
; CHECK-NEXT: RET_ReallyLR implicit $w0
629+
%0:gpr64 = COPY $x0
630+
%1:gpr64 = COPY $x1
631+
%2:ppr = PTRUE_D 31, implicit $vg
632+
%3:ppr2mul2 = WHILELE_2PXX_D %0, %1, implicit-def $nzcv
633+
%4:ppr = COPY %3.psub0
634+
%5:ppr = COPY %3.psub1
635+
PTEST_PP killed %2, killed %5, implicit-def $nzcv
636+
%6:gpr32 = COPY $wzr
637+
%7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
638+
$w0 = COPY %7
639+
RET_ReallyLR implicit $w0
640+
...

0 commit comments

Comments
 (0)