Skip to content

Commit d831f8d

Browse files
authored
[SelectionDAG] Fix AArch64 machine verifier bug when expanding LOOP_DEPENDENCE_MASK (#168221)
TargetConstant nodes don't match TableGen ImmLeaf patterns during instruction selection. When this zero constant flows into the AArch64 CCMP formation code, the machine verifier hits an assertion in expensive checks. Fixes: #168227
1 parent f5b7376 commit d831f8d

File tree

4 files changed

+51
-8
lines changed

4 files changed

+51
-8
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1829,7 +1829,7 @@ SDValue VectorLegalizer::ExpandLOOP_DEPENDENCE_MASK(SDNode *N) {
18291829
// If the difference is positive then some elements may alias
18301830
EVT CmpVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
18311831
Diff.getValueType());
1832-
SDValue Zero = DAG.getTargetConstant(0, DL, PtrVT);
1832+
SDValue Zero = DAG.getConstant(0, DL, PtrVT);
18331833
SDValue Cmp = DAG.getSetCC(DL, CmpVT, Diff, Zero,
18341834
IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE);
18351835

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOOP_DEPENDENCE_MASK(SDNode *N) {
413413
SDValue Diff = DAG.getNode(ISD::SUB, DL, PtrVT, SinkValue, SourceValue);
414414
EVT CmpVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
415415
Diff.getValueType());
416-
SDValue Zero = DAG.getTargetConstant(0, DL, PtrVT);
416+
SDValue Zero = DAG.getConstant(0, DL, PtrVT);
417417
return DAG.getNode(ISD::OR, DL, CmpVT,
418418
DAG.getSetCC(DL, CmpVT, Diff, EltSize, ISD::SETGE),
419419
DAG.getSetCC(DL, CmpVT, Diff, Zero, ISD::SETEQ));

llvm/test/CodeGen/AArch64/alias_mask.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -793,9 +793,8 @@ define <1 x i1> @whilewr_8_scalarize(ptr %a, ptr %b) {
793793
; CHECK-LABEL: whilewr_8_scalarize:
794794
; CHECK: // %bb.0: // %entry
795795
; CHECK-NEXT: sub x8, x1, x0
796-
; CHECK-NEXT: cmp x8, #0
797-
; CHECK-NEXT: ccmp x8, #0, #4, le
798-
; CHECK-NEXT: cset w0, eq
796+
; CHECK-NEXT: cmn x8, #1
797+
; CHECK-NEXT: cset w0, gt
799798
; CHECK-NEXT: ret
800799
entry:
801800
%0 = call <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr %a, ptr %b, i64 1)
@@ -845,9 +844,8 @@ define <1 x i1> @whilerw_8_scalarize(ptr %a, ptr %b) {
845844
; CHECK-LABEL: whilerw_8_scalarize:
846845
; CHECK: // %bb.0: // %entry
847846
; CHECK-NEXT: sub x8, x1, x0
848-
; CHECK-NEXT: cmp x8, #0
849-
; CHECK-NEXT: ccmp x8, #0, #4, le
850-
; CHECK-NEXT: cset w0, eq
847+
; CHECK-NEXT: cmn x8, #1
848+
; CHECK-NEXT: cset w0, gt
851849
; CHECK-NEXT: ret
852850
entry:
853851
%0 = call <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr %a, ptr %b, i64 1)
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=aarch64 -mattr=+sve2 -verify-machineinstrs -stop-after=finalize-isel %s -o - | FileCheck %s
3+
4+
; Regression test for a bug where getTargetConstant(0) was used instead of
5+
; getConstant(0) in ScalarizeVecRes_LOOP_DEPENDENCE_MASK, causing instruction
6+
; selection to incorrectly generate CCMPXr (register form) with an immediate
7+
; operand instead of CCMPXi (immediate form).
8+
;
9+
10+
define <1 x i1> @test_war_mask_ccmp(ptr %a, ptr %b) {
11+
; CHECK-LABEL: name: test_war_mask_ccmp
12+
; CHECK: bb.0.entry:
13+
; CHECK-NEXT: liveins: $x0, $x1
14+
; CHECK-NEXT: {{ $}}
15+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
16+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0
17+
; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64common = SUBSXrr [[COPY]], [[COPY1]], implicit-def dead $nzcv
18+
; CHECK-NEXT: [[ADDSXri:%[0-9]+]]:gpr64 = ADDSXri killed [[SUBSXrr]], 1, 0, implicit-def $nzcv
19+
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
20+
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
21+
; CHECK-NEXT: RET_ReallyLR implicit $w0
22+
entry:
23+
%0 = call <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr %a, ptr %b, i64 1)
24+
ret <1 x i1> %0
25+
}
26+
27+
define <1 x i1> @test_raw_mask_ccmp(ptr %a, ptr %b) {
28+
; CHECK-LABEL: name: test_raw_mask_ccmp
29+
; CHECK: bb.0.entry:
30+
; CHECK-NEXT: liveins: $x0, $x1
31+
; CHECK-NEXT: {{ $}}
32+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
33+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0
34+
; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64common = SUBSXrr [[COPY]], [[COPY1]], implicit-def dead $nzcv
35+
; CHECK-NEXT: [[ADDSXri:%[0-9]+]]:gpr64 = ADDSXri killed [[SUBSXrr]], 1, 0, implicit-def $nzcv
36+
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
37+
; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
38+
; CHECK-NEXT: RET_ReallyLR implicit $w0
39+
entry:
40+
%0 = call <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr %a, ptr %b, i64 1)
41+
ret <1 x i1> %0
42+
}
43+
44+
declare <1 x i1> @llvm.loop.dependence.war.mask.v1i1(ptr, ptr, i64)
45+
declare <1 x i1> @llvm.loop.dependence.raw.mask.v1i1(ptr, ptr, i64)

0 commit comments

Comments
 (0)