Skip to content

Commit b9fd1e6

Browse files
[AArch64][SVE2p1] Remove redundant PTESTs when predicate is a WHILEcc_x2 (#156478)
The optimisation in canRemovePTestInstr tries to remove ptest instructions when the predicate is the result of a WHILEcc. This patch extends the support to WHILEcc (predicate pair) by: - Including the WHILEcc_x2 intrinsics in isPredicateCCSettingOp, allowing performFirstTrueTestVectorCombine to create the PTEST. - Setting the isWhile flag for the predicate pair instructions in tablegen. - Looking through copies in canRemovePTestInstr to test isWhileOpcode.
1 parent c09cc2c commit b9fd1e6

File tree

6 files changed

+307
-19
lines changed

6 files changed

+307
-19
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20195,13 +20195,21 @@ static bool isPredicateCCSettingOp(SDValue N) {
2019520195
(N.getOpcode() == ISD::GET_ACTIVE_LANE_MASK) ||
2019620196
(N.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2019720197
(N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege ||
20198+
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege_x2 ||
2019820199
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt ||
20200+
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt_x2 ||
2019920201
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi ||
20202+
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi_x2 ||
2020020203
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs ||
20204+
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs_x2 ||
2020120205
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele ||
20206+
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele_x2 ||
2020220207
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo ||
20208+
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo_x2 ||
2020320209
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels ||
20204-
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt)))
20210+
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels_x2 ||
20211+
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt ||
20212+
N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt_x2)))
2020520213
return true;
2020620214

2020720215
return false;
@@ -20227,7 +20235,7 @@ performFirstTrueTestVectorCombine(SDNode *N,
2022720235

2022820236
// Restricted the DAG combine to only cases where we're extracting from a
2022920237
// flag-setting operation.
20230-
if (!isPredicateCCSettingOp(N0))
20238+
if (!isPredicateCCSettingOp(N0) || N0.getResNo() != 0)
2023120239
return SDValue();
2023220240

2023320241
// Extracts of lane 0 for SVE can be expressed as PTEST(Op, FIRST) ? 1 : 0

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1613,6 +1613,16 @@ bool AArch64InstrInfo::optimizePTestInstr(
16131613
const MachineRegisterInfo *MRI) const {
16141614
auto *Mask = MRI->getUniqueVRegDef(MaskReg);
16151615
auto *Pred = MRI->getUniqueVRegDef(PredReg);
1616+
1617+
if (Pred->isCopy() && PTest->getOpcode() == AArch64::PTEST_PP_FIRST) {
1618+
// Instructions which return a multi-vector (e.g. WHILECC_x2) require copies
1619+
// before the branch to extract each subregister.
1620+
auto Op = Pred->getOperand(1);
1621+
if (Op.isReg() && Op.getReg().isVirtual() &&
1622+
Op.getSubReg() == AArch64::psub0)
1623+
Pred = MRI->getUniqueVRegDef(Op.getReg());
1624+
}
1625+
16161626
unsigned PredOpcode = Pred->getOpcode();
16171627
auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
16181628
if (!NewOp)

llvm/lib/Target/AArch64/AArch64RegisterInfo.td

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,25 +1164,22 @@ class PPRVectorListMul<int ElementWidth, int NumRegs> : PPRVectorList<ElementWid
11641164
", AArch64::PPRMul2RegClassID>";
11651165
}
11661166

1167+
class PPR2MulRegOp<string Suffix, int Size, ElementSizeEnum ES>
1168+
: RegisterOperand<PPR2Mul2, "printTypedVectorList<0,'"#Suffix#"'>"> {
1169+
ElementSizeEnum ElementSize;
1170+
let ElementSize = ES;
1171+
let ParserMatchClass = PPRVectorListMul<Size, 2>;
1172+
}
1173+
11671174
let EncoderMethod = "EncodeRegMul_MinMax<2, 0, 14>",
11681175
DecoderMethod = "DecodePPR2Mul2RegisterClass" in {
1169-
def PP_b_mul_r : RegisterOperand<PPR2Mul2, "printTypedVectorList<0,'b'>"> {
1170-
let ParserMatchClass = PPRVectorListMul<8, 2>;
1171-
}
1172-
1173-
def PP_h_mul_r : RegisterOperand<PPR2Mul2, "printTypedVectorList<0,'h'>"> {
1174-
let ParserMatchClass = PPRVectorListMul<16, 2>;
1175-
}
11761176

1177-
def PP_s_mul_r : RegisterOperand<PPR2Mul2, "printTypedVectorList<0,'s'>"> {
1178-
let ParserMatchClass = PPRVectorListMul<32, 2>;
1179-
}
1180-
1181-
def PP_d_mul_r : RegisterOperand<PPR2Mul2, "printTypedVectorList<0,'d'>"> {
1182-
let ParserMatchClass = PPRVectorListMul<64, 2>;
1183-
}
1184-
} // end let EncoderMethod/DecoderMethod
1177+
def PP_b_mul_r : PPR2MulRegOp<"b", 8, ElementSizeB>;
1178+
def PP_h_mul_r : PPR2MulRegOp<"h", 16, ElementSizeH>;
1179+
def PP_s_mul_r : PPR2MulRegOp<"s", 32, ElementSizeS>;
1180+
def PP_d_mul_r : PPR2MulRegOp<"d", 64, ElementSizeD>;
11851181

1182+
} // end let EncoderMethod/DecoderMethod
11861183

11871184
//===----------------------------------------------------------------------===//
11881185
// SVE vector register classes

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10405,7 +10405,7 @@ multiclass sve2p1_int_while_rr_pn<string mnemonic, bits<3> opc> {
1040510405

1040610406
// SVE integer compare scalar count and limit (predicate pair)
1040710407
class sve2p1_int_while_rr_pair<string mnemonic, bits<2> sz, bits<3> opc,
10408-
RegisterOperand ppr_ty>
10408+
PPR2MulRegOp ppr_ty>
1040910409
: I<(outs ppr_ty:$Pd), (ins GPR64:$Rn, GPR64:$Rm),
1041010410
mnemonic, "\t$Pd, $Rn, $Rm",
1041110411
"", []>, Sched<[]> {
@@ -10425,6 +10425,8 @@ class sve2p1_int_while_rr_pair<string mnemonic, bits<2> sz, bits<3> opc,
1042510425

1042610426
let Defs = [NZCV];
1042710427
let hasSideEffects = 0;
10428+
let ElementSize = ppr_ty.ElementSize;
10429+
let isWhile = 1;
1042810430
}
1042910431

1043010432

llvm/test/CodeGen/AArch64/sve-cmp-folds.ll

Lines changed: 121 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=aarch64-linux-unknown -mattr=+sve2 -o - < %s | FileCheck %s
2+
; RUN: llc -mtriple=aarch64-linux-unknown -mattr=+sve2p1 -o - < %s | FileCheck %s
33

44
define <vscale x 8 x i1> @not_icmp_sle_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
55
; CHECK-LABEL: not_icmp_sle_nxv8i16:
@@ -220,6 +220,117 @@ define i1 @lane_mask_first(i64 %next, i64 %end) {
220220
ret i1 %bit
221221
}
222222

223+
define i1 @whilege_x2_first(i64 %next, i64 %end) {
224+
; CHECK-LABEL: whilege_x2_first:
225+
; CHECK: // %bb.0:
226+
; CHECK-NEXT: whilege { p0.s, p1.s }, x0, x1
227+
; CHECK-NEXT: cset w0, mi
228+
; CHECK-NEXT: ret
229+
%predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilege.x2.nxv4i1.i64(i64 %next, i64 %end)
230+
%predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
231+
%bit = extractelement <vscale x 4 x i1> %predicate, i64 0
232+
ret i1 %bit
233+
}
234+
235+
define i1 @whilegt_x2_first(i64 %next, i64 %end) {
236+
; CHECK-LABEL: whilegt_x2_first:
237+
; CHECK: // %bb.0:
238+
; CHECK-NEXT: whilegt { p0.s, p1.s }, x0, x1
239+
; CHECK-NEXT: cset w0, mi
240+
; CHECK-NEXT: ret
241+
%predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv4i1.i64(i64 %next, i64 %end)
242+
%predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
243+
%bit = extractelement <vscale x 4 x i1> %predicate, i64 0
244+
ret i1 %bit
245+
}
246+
247+
define i1 @whilehi_x2_first(i64 %next, i64 %end) {
248+
; CHECK-LABEL: whilehi_x2_first:
249+
; CHECK: // %bb.0:
250+
; CHECK-NEXT: whilehi { p0.s, p1.s }, x0, x1
251+
; CHECK-NEXT: cset w0, mi
252+
; CHECK-NEXT: ret
253+
%predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv4i1.i64(i64 %next, i64 %end)
254+
%predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
255+
%bit = extractelement <vscale x 4 x i1> %predicate, i64 0
256+
ret i1 %bit
257+
}
258+
259+
define i1 @whilehs_x2_first(i64 %next, i64 %end) {
260+
; CHECK-LABEL: whilehs_x2_first:
261+
; CHECK: // %bb.0:
262+
; CHECK-NEXT: whilehs { p0.s, p1.s }, x0, x1
263+
; CHECK-NEXT: cset w0, mi
264+
; CHECK-NEXT: ret
265+
%predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv4i1.i64(i64 %next, i64 %end)
266+
%predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
267+
%bit = extractelement <vscale x 4 x i1> %predicate, i64 0
268+
ret i1 %bit
269+
}
270+
271+
define i1 @whilele_x2_first(i64 %next, i64 %end) {
272+
; CHECK-LABEL: whilele_x2_first:
273+
; CHECK: // %bb.0:
274+
; CHECK-NEXT: whilele { p0.s, p1.s }, x0, x1
275+
; CHECK-NEXT: cset w0, mi
276+
; CHECK-NEXT: ret
277+
%predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilele.x2.nxv4i1.i64(i64 %next, i64 %end)
278+
%predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
279+
%bit = extractelement <vscale x 4 x i1> %predicate, i64 0
280+
ret i1 %bit
281+
}
282+
283+
define i1 @whilelo_x2_first(i64 %next, i64 %end) {
284+
; CHECK-LABEL: whilelo_x2_first:
285+
; CHECK: // %bb.0:
286+
; CHECK-NEXT: whilelo { p0.s, p1.s }, x0, x1
287+
; CHECK-NEXT: cset w0, mi
288+
; CHECK-NEXT: ret
289+
%predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv4i1.i64(i64 %next, i64 %end)
290+
%predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
291+
%bit = extractelement <vscale x 4 x i1> %predicate, i64 0
292+
ret i1 %bit
293+
}
294+
295+
define i1 @whilels_x2_first(i64 %next, i64 %end) {
296+
; CHECK-LABEL: whilels_x2_first:
297+
; CHECK: // %bb.0:
298+
; CHECK-NEXT: whilels { p0.s, p1.s }, x0, x1
299+
; CHECK-NEXT: cset w0, mi
300+
; CHECK-NEXT: ret
301+
%predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilels.x2.nxv4i1.i64(i64 %next, i64 %end)
302+
%predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
303+
%bit = extractelement <vscale x 4 x i1> %predicate, i64 0
304+
ret i1 %bit
305+
}
306+
307+
define i1 @whilelt_x2_first(i64 %next, i64 %end) {
308+
; CHECK-LABEL: whilelt_x2_first:
309+
; CHECK: // %bb.0:
310+
; CHECK-NEXT: whilelt { p0.s, p1.s }, x0, x1
311+
; CHECK-NEXT: cset w0, mi
312+
; CHECK-NEXT: ret
313+
%predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv4i1.i64(i64 %next, i64 %end)
314+
%predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 0
315+
%bit = extractelement <vscale x 4 x i1> %predicate, i64 0
316+
ret i1 %bit
317+
}
318+
319+
; Do not combine to ptest when the extract is not from the first vector result
320+
define i1 @whilege_x2_second_result(i64 %next, i64 %end) {
321+
; CHECK-LABEL: whilege_x2_second_result:
322+
; CHECK: // %bb.0:
323+
; CHECK-NEXT: whilege { p0.s, p1.s }, x0, x1
324+
; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1
325+
; CHECK-NEXT: fmov w8, s0
326+
; CHECK-NEXT: and w0, w8, #0x1
327+
; CHECK-NEXT: ret
328+
%predpair = call { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilege.x2.nxv4i1.i64(i64 %next, i64 %end)
329+
%predicate = extractvalue { <vscale x 4 x i1>, <vscale x 4 x i1> } %predpair, 1
330+
%bit = extractelement <vscale x 4 x i1> %predicate, i64 0
331+
ret i1 %bit
332+
}
333+
223334
declare i64 @llvm.vscale.i64()
224335
declare <vscale x 4 x i1> @llvm.aarch64.sve.whilege.nxv4i1.i64(i64, i64)
225336
declare <vscale x 4 x i1> @llvm.aarch64.sve.whilegt.nxv4i1.i64(i64, i64)
@@ -230,3 +341,12 @@ declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelo.nxv4i1.i64(i64, i64)
230341
declare <vscale x 4 x i1> @llvm.aarch64.sve.whilels.nxv4i1.i64(i64, i64)
231342
declare <vscale x 4 x i1> @llvm.aarch64.sve.whilelt.nxv4i1.i64(i64, i64)
232343
declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64)
344+
345+
declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64, i64)
346+
declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64, i64)
347+
declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64, i64)
348+
declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64, i64)
349+
declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64, i64)
350+
declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64, i64)
351+
declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64, i64)
352+
declare { <vscale x 4 x i1>, <vscale x 4 x i1> } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64, i64)

llvm/test/CodeGen/AArch64/sve-ptest-removal-whilelo.mir

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -538,3 +538,154 @@ body: |
538538
RET_ReallyLR implicit $w0
539539
540540
...
541+
542+
# WHILELO (predicate pair)
543+
---
544+
name: whilelo_x2_b64_s64
545+
alignment: 2
546+
tracksRegLiveness: true
547+
registers:
548+
- { id: 0, class: gpr64 }
549+
- { id: 1, class: gpr64 }
550+
- { id: 2, class: ppr }
551+
- { id: 3, class: ppr2mul2 }
552+
- { id: 4, class: ppr }
553+
- { id: 5, class: ppr }
554+
- { id: 6, class: gpr32 }
555+
- { id: 7, class: gpr32 }
556+
liveins:
557+
- { reg: '$x0', virtual-reg: '%0' }
558+
- { reg: '$x1', virtual-reg: '%1' }
559+
frameInfo:
560+
maxCallFrameSize: 0
561+
body: |
562+
bb.0.entry:
563+
liveins: $x0, $x1
564+
565+
; CHECK-LABEL: name: whilelo_x2_b64_s64
566+
; CHECK: liveins: $x0, $x1
567+
; CHECK-NEXT: {{ $}}
568+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
569+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
570+
; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
571+
; CHECK-NEXT: [[WHILELO_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELO_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
572+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELO_2PXX_D]].psub0
573+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELO_2PXX_D]].psub1
574+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
575+
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
576+
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
577+
; CHECK-NEXT: RET_ReallyLR implicit $w0
578+
%0:gpr64 = COPY $x0
579+
%1:gpr64 = COPY $x1
580+
%2:ppr = PTRUE_D 31, implicit $vg
581+
%3:ppr2mul2 = WHILELO_2PXX_D %0, %1, implicit-def $nzcv
582+
%4:ppr = COPY %3.psub0
583+
%5:ppr = COPY %3.psub1
584+
PTEST_PP_FIRST killed %2, killed %4, implicit-def $nzcv
585+
%6:gpr32 = COPY $wzr
586+
%7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
587+
$w0 = COPY %7
588+
RET_ReallyLR implicit $w0
589+
...
590+
591+
# PTEST is not redundant when it's Pg operand is a subregister copy, but not
592+
# from the first subregister of ppr2mul2
593+
---
594+
name: whilelo_x2_b64_s64_psub1
595+
alignment: 2
596+
tracksRegLiveness: true
597+
registers:
598+
- { id: 0, class: gpr64 }
599+
- { id: 1, class: gpr64 }
600+
- { id: 2, class: ppr }
601+
- { id: 3, class: ppr2mul2 }
602+
- { id: 4, class: ppr }
603+
- { id: 5, class: ppr }
604+
- { id: 6, class: gpr32 }
605+
- { id: 7, class: gpr32 }
606+
liveins:
607+
- { reg: '$x0', virtual-reg: '%0' }
608+
- { reg: '$x1', virtual-reg: '%1' }
609+
frameInfo:
610+
maxCallFrameSize: 0
611+
body: |
612+
bb.0.entry:
613+
liveins: $x0, $x1
614+
615+
; CHECK-LABEL: name: whilelo_x2_b64_s64_psub1
616+
; CHECK: liveins: $x0, $x1
617+
; CHECK-NEXT: {{ $}}
618+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
619+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
620+
; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
621+
; CHECK-NEXT: [[WHILELO_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELO_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
622+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELO_2PXX_D]].psub0
623+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELO_2PXX_D]].psub1
624+
; CHECK-NEXT: PTEST_PP_FIRST killed [[PTRUE_D]], killed [[COPY3]], implicit-def $nzcv
625+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
626+
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
627+
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
628+
; CHECK-NEXT: RET_ReallyLR implicit $w0
629+
%0:gpr64 = COPY $x0
630+
%1:gpr64 = COPY $x1
631+
%2:ppr = PTRUE_D 31, implicit $vg
632+
%3:ppr2mul2 = WHILELO_2PXX_D %0, %1, implicit-def $nzcv
633+
%4:ppr = COPY %3.psub0
634+
%5:ppr = COPY %3.psub1
635+
PTEST_PP_FIRST killed %2, killed %5, implicit-def $nzcv
636+
%6:gpr32 = COPY $wzr
637+
%7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
638+
$w0 = COPY %7
639+
RET_ReallyLR implicit $w0
640+
...
641+
642+
# PTEST is not redundant when it's Pg operand is a copy from subregister 0
643+
# if the condition is not FIRST_ACTIVE
644+
---
645+
name: whilelo_x2_b64_s64_not_first
646+
alignment: 2
647+
tracksRegLiveness: true
648+
registers:
649+
- { id: 0, class: gpr64 }
650+
- { id: 1, class: gpr64 }
651+
- { id: 2, class: ppr }
652+
- { id: 3, class: ppr2mul2 }
653+
- { id: 4, class: ppr }
654+
- { id: 5, class: ppr }
655+
- { id: 6, class: gpr32 }
656+
- { id: 7, class: gpr32 }
657+
liveins:
658+
- { reg: '$x0', virtual-reg: '%0' }
659+
- { reg: '$x1', virtual-reg: '%1' }
660+
frameInfo:
661+
maxCallFrameSize: 0
662+
body: |
663+
bb.0.entry:
664+
liveins: $x0, $x1
665+
666+
; CHECK-LABEL: name: whilelo_x2_b64_s64_not_first
667+
; CHECK: liveins: $x0, $x1
668+
; CHECK-NEXT: {{ $}}
669+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
670+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
671+
; CHECK-NEXT: [[PTRUE_D:%[0-9]+]]:ppr = PTRUE_D 31, implicit $vg
672+
; CHECK-NEXT: [[WHILELO_2PXX_D:%[0-9]+]]:ppr2mul2 = WHILELO_2PXX_D [[COPY]], [[COPY1]], implicit-def $nzcv
673+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ppr = COPY [[WHILELO_2PXX_D]].psub0
674+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ppr = COPY [[WHILELO_2PXX_D]].psub1
675+
; CHECK-NEXT: PTEST_PP killed [[PTRUE_D]], killed [[COPY2]], implicit-def $nzcv
676+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
677+
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr [[COPY4]], $wzr, 0, implicit $nzcv
678+
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
679+
; CHECK-NEXT: RET_ReallyLR implicit $w0
680+
%0:gpr64 = COPY $x0
681+
%1:gpr64 = COPY $x1
682+
%2:ppr = PTRUE_D 31, implicit $vg
683+
%3:ppr2mul2 = WHILELO_2PXX_D %0, %1, implicit-def $nzcv
684+
%4:ppr = COPY %3.psub0
685+
%5:ppr = COPY %3.psub1
686+
PTEST_PP killed %2, killed %4, implicit-def $nzcv
687+
%6:gpr32 = COPY $wzr
688+
%7:gpr32 = CSINCWr %6, $wzr, 0, implicit $nzcv
689+
$w0 = COPY %7
690+
RET_ReallyLR implicit $w0
691+
...

0 commit comments

Comments
 (0)