Skip to content

Commit 47a3be9

Browse files
committed
[SelectionDAG] Fix AArch64 machine verifier bug when expanding LOOP_DEPENDENCE_MASK
We did not ensure new opcodes like mi/pl were filtered out when swapping, and TargetConstant nodes don't match TableGen ImmLeaf patterns during instruction selection. When this zero constant flows into the AArch64 CCMP formation code, the machine verifier hit an assertion in expensive checks.
1 parent 0a6887f commit 47a3be9

File tree

5 files changed

+34
-31
lines changed

5 files changed

+34
-31
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1829,7 +1829,7 @@ SDValue VectorLegalizer::ExpandLOOP_DEPENDENCE_MASK(SDNode *N) {
18291829
// If the difference is positive then some elements may alias
18301830
EVT CmpVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
18311831
Diff.getValueType());
1832-
SDValue Zero = DAG.getTargetConstant(0, DL, PtrVT);
1832+
SDValue Zero = DAG.getConstant(0, DL, PtrVT);
18331833
SDValue Cmp = DAG.getSetCC(DL, CmpVT, Diff, Zero,
18341834
IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE);
18351835

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOOP_DEPENDENCE_MASK(SDNode *N) {
413413
SDValue Diff = DAG.getNode(ISD::SUB, DL, PtrVT, SinkValue, SourceValue);
414414
EVT CmpVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
415415
Diff.getValueType());
416-
SDValue Zero = DAG.getTargetConstant(0, DL, PtrVT);
416+
SDValue Zero = DAG.getConstant(0, DL, PtrVT);
417417
return DAG.getNode(ISD::OR, DL, CmpVT,
418418
DAG.getSetCC(DL, CmpVT, Diff, EltSize, ISD::SETGE),
419419
DAG.getSetCC(DL, CmpVT, Diff, Zero, ISD::SETEQ));

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26013,9 +26013,12 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
2601326013
// Try again with the operands of the SUBS instruction and the condition
2601426014
// swapped. Due to canonicalization, this only helps for non-constant
2601526015
// operands of the SUBS instruction.
26016-
std::swap(CmpOpToMatch, CmpOpOther);
26017-
if (SDValue R = Fold(getSwappedCondition(CC), CmpOpToMatch, CmpOpToMatch))
26018-
return R;
26016+
auto NewCC = getSwappedCondition(CC);
26017+
if (NewCC != AArch64CC::AL) {
26018+
std::swap(CmpOpToMatch, CmpOpOther);
26019+
if (SDValue R = Fold(NewCC, CmpOpToMatch, CmpOpToMatch))
26020+
return R;
26021+
}
2601926022
return SDValue();
2602026023
}
2602126024

llvm/test/CodeGen/AArch64/alias_mask.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -793,9 +793,8 @@ define <1 x i1> @whilewr_8_scalarize(ptr %a, ptr %b) {
793793
; CHECK-LABEL: whilewr_8_scalarize:
794794
; CHECK: // %bb.0: // %entry
795795
; CHECK-NEXT: sub x8, x1, x0
796-
; CHECK-NEXT: cmp x8, #0
797-
; CHECK-NEXT: ccmp x8, #0, #4, le
798-
; CHECK-NEXT: cset w0, eq
796+
; CHECK-NEXT: cmn x8, #1
797+
; CHECK-NEXT: cset w0, gt
799798
; CHECK-NEXT: ret
800799
entry:
801800
%0 = call <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr %a, ptr %b, i64 1)
@@ -845,9 +844,8 @@ define <1 x i1> @whilerw_8_scalarize(ptr %a, ptr %b) {
845844
; CHECK-LABEL: whilerw_8_scalarize:
846845
; CHECK: // %bb.0: // %entry
847846
; CHECK-NEXT: sub x8, x1, x0
848-
; CHECK-NEXT: cmp x8, #0
849-
; CHECK-NEXT: ccmp x8, #0, #4, le
850-
; CHECK-NEXT: cset w0, eq
847+
; CHECK-NEXT: cmn x8, #1
848+
; CHECK-NEXT: cset w0, gt
851849
; CHECK-NEXT: ret
852850
entry:
853851
%0 = call <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr %a, ptr %b, i64 1)

llvm/test/CodeGen/AArch64/loop-dependence-mask-ccmp.ll

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -19,32 +19,34 @@
1919
; Assembly output would look identical for both, so we must check machine IR.
2020

2121
define <1 x i1> @test_war_mask_ccmp(ptr %a, ptr %b) {
22-
; CHECK-LABEL: name: test_war_mask_ccmp
23-
; CHECK: bb.0.entry:
24-
; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
25-
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
26-
; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64common = SUBSXrr [[COPY1]], [[COPY]], implicit-def dead $nzcv
27-
; CHECK: [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[SUBSXrr]], 0, 0, implicit-def $nzcv
28-
; CHECK: CCMPXi [[SUBSXrr]], 0, 4, 13, implicit-def $nzcv, implicit $nzcv
29-
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
30-
; CHECK: $w0 = COPY [[CSINCWr]]
31-
; CHECK: RET_ReallyLR implicit $w0
22+
; CHECK-LABEL: name: test_war_mask_ccmp
23+
; CHECK: bb.0.entry:
24+
; CHECK-NEXT: liveins: $x0, $x1
25+
; CHECK-NEXT: {{ $}}
26+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
27+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0
28+
; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64common = SUBSXrr [[COPY]], [[COPY1]], implicit-def dead $nzcv
29+
; CHECK-NEXT: [[ADDSXri:%[0-9]+]]:gpr64 = ADDSXri killed [[SUBSXrr]], 1, 0, implicit-def $nzcv
30+
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
31+
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
32+
; CHECK-NEXT: RET_ReallyLR implicit $w0
3233
entry:
3334
%0 = call <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr %a, ptr %b, i64 1)
3435
ret <1 x i1> %0
3536
}
3637

3738
define <1 x i1> @test_raw_mask_ccmp(ptr %a, ptr %b) {
38-
; CHECK-LABEL: name: test_raw_mask_ccmp
39-
; CHECK: bb.0.entry:
40-
; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
41-
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
42-
; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64common = SUBSXrr [[COPY1]], [[COPY]], implicit-def dead $nzcv
43-
; CHECK: [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[SUBSXrr]], 0, 0, implicit-def $nzcv
44-
; CHECK: CCMPXi [[SUBSXrr]], 0, 4, 13, implicit-def $nzcv, implicit $nzcv
45-
; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
46-
; CHECK: $w0 = COPY [[CSINCWr]]
47-
; CHECK: RET_ReallyLR implicit $w0
39+
; CHECK-LABEL: name: test_raw_mask_ccmp
40+
; CHECK: bb.0.entry:
41+
; CHECK-NEXT: liveins: $x0, $x1
42+
; CHECK-NEXT: {{ $}}
43+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
44+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0
45+
; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64common = SUBSXrr [[COPY]], [[COPY1]], implicit-def dead $nzcv
46+
; CHECK-NEXT: [[ADDSXri:%[0-9]+]]:gpr64 = ADDSXri killed [[SUBSXrr]], 1, 0, implicit-def $nzcv
47+
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
48+
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
49+
; CHECK-NEXT: RET_ReallyLR implicit $w0
4850
entry:
4951
%0 = call <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr %a, ptr %b, i64 1)
5052
ret <1 x i1> %0

0 commit comments

Comments
 (0)