Skip to content

Commit 94c48a2

Browse files
authored
[AArch64][SVE] Fix hang in VECTOR_HISTOGRAM DAG combine (#152539)
The histogram DAG combine went into an infinite loop of creating the same histogram node due to an incorrect use of the `refineUniformBase` and `refineIndexType` APIs. These APIs take SDValues by reference (SDValue&) and return `true` if they were "refined" (i.e., set to new values). Previously, this DAG combine would create the `Ops` array (used to create the new histogram node) before calling the `refine*` APIs, which copies the SDValues into the array, meaning the updated values were not used to create the new histogram node. Reproducer: https://godbolt.org/z/hsGWhTaqY (it will timeout)
1 parent e9d71ef commit 94c48a2

File tree

2 files changed

+76
-7
lines changed

2 files changed

+76
-7
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12843,22 +12843,21 @@ SDValue DAGCombiner::visitMHISTOGRAM(SDNode *N) {
1284312843
SDLoc DL(HG);
1284412844

1284512845
EVT MemVT = HG->getMemoryVT();
12846+
EVT DataVT = Index.getValueType();
1284612847
MachineMemOperand *MMO = HG->getMemOperand();
1284712848
ISD::MemIndexType IndexType = HG->getIndexType();
1284812849

1284912850
if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
1285012851
return Chain;
1285112852

12852-
SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
12853-
HG->getScale(), HG->getIntID()};
12854-
if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL))
12853+
if (refineUniformBase(BasePtr, Index, HG->isIndexScaled(), DAG, DL) ||
12854+
refineIndexType(Index, IndexType, DataVT, DAG)) {
12855+
SDValue Ops[] = {Chain, Inc, Mask, BasePtr, Index,
12856+
HG->getScale(), HG->getIntID()};
1285512857
return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
1285612858
MMO, IndexType);
12859+
}
1285712860

12858-
EVT DataVT = Index.getValueType();
12859-
if (refineIndexType(Index, IndexType, DataVT, DAG))
12860-
return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, Ops,
12861-
MMO, IndexType);
1286212861
return SDValue();
1286312862
}
1286412863

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mattr=+sve2 -verify-machineinstrs < %s -o - | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
; This test is reduced from a real world example that would cause the DAGCombiner to hang.
7+
8+
define void @histcnt_loop(ptr %0, i64 %1, ptr %2, i64 %3, i64 %4) {
9+
; CHECK-LABEL: histcnt_loop:
10+
; CHECK: // %bb.0: // %entry
11+
; CHECK-NEXT: mov z0.d, #1 // =0x1
12+
; CHECK-NEXT: ptrue p0.d
13+
; CHECK-NEXT: mov x8, xzr
14+
; CHECK-NEXT: add x9, x0, x1
15+
; CHECK-NEXT: .LBB0_1: // %loop
16+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
17+
; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, x8, lsl #1]
18+
; CHECK-NEXT: lsl x10, x8, #1
19+
; CHECK-NEXT: add x11, x0, x10
20+
; CHECK-NEXT: add x10, x9, x10
21+
; CHECK-NEXT: lsl z1.d, z1.d, #1
22+
; CHECK-NEXT: ld1h { z4.d }, p0/z, [x11, #1, mul vl]
23+
; CHECK-NEXT: ld1h { z5.d }, p0/z, [x10, #1, mul vl]
24+
; CHECK-NEXT: histcnt z2.d, p0/z, z1.d, z1.d
25+
; CHECK-NEXT: ld1h { z3.d }, p0/z, [x2, z1.d]
26+
; CHECK-NEXT: mad z2.d, p0/m, z0.d, z3.d
27+
; CHECK-NEXT: ld1h { z3.d }, p0/z, [x9, x8, lsl #1]
28+
; CHECK-NEXT: add x8, x8, x3
29+
; CHECK-NEXT: cmp x4, x8
30+
; CHECK-NEXT: st1h { z2.d }, p0, [x2, z1.d]
31+
; CHECK-NEXT: lsl z1.d, z4.d, #1
32+
; CHECK-NEXT: histcnt z2.d, p0/z, z1.d, z1.d
33+
; CHECK-NEXT: ld1h { z4.d }, p0/z, [x2, z1.d]
34+
; CHECK-NEXT: mad z2.d, p0/m, z0.d, z4.d
35+
; CHECK-NEXT: st1h { z2.d }, p0, [x2, z1.d]
36+
; CHECK-NEXT: lsl z1.d, z3.d, #1
37+
; CHECK-NEXT: histcnt z2.d, p0/z, z1.d, z1.d
38+
; CHECK-NEXT: ld1h { z3.d }, p0/z, [x2, z1.d]
39+
; CHECK-NEXT: mad z2.d, p0/m, z0.d, z3.d
40+
; CHECK-NEXT: st1h { z2.d }, p0, [x2, z1.d]
41+
; CHECK-NEXT: lsl z1.d, z5.d, #1
42+
; CHECK-NEXT: histcnt z2.d, p0/z, z1.d, z1.d
43+
; CHECK-NEXT: ld1h { z3.d }, p0/z, [x2, z1.d]
44+
; CHECK-NEXT: mad z2.d, p0/m, z0.d, z3.d
45+
; CHECK-NEXT: st1h { z2.d }, p0, [x2, z1.d]
46+
; CHECK-NEXT: b.ne .LBB0_1
47+
; CHECK-NEXT: // %bb.2: // %exit
48+
; CHECK-NEXT: ret
49+
entry:
50+
br label %loop
51+
52+
loop:
53+
%6 = phi i64 [ 0, %entry ], [ %15, %loop ]
54+
%7 = getelementptr inbounds nuw i16, ptr %0, i64 %6
55+
%8 = getelementptr inbounds nuw i8, ptr %7, i64 %1
56+
%9 = load <vscale x 4 x i16>, ptr %7, align 2
57+
%10 = load <vscale x 4 x i16>, ptr %8, align 2
58+
%11 = zext <vscale x 4 x i16> %9 to <vscale x 4 x i64>
59+
%12 = zext <vscale x 4 x i16> %10 to <vscale x 4 x i64>
60+
%13 = getelementptr inbounds nuw [16 x i16], ptr %2, i64 0, <vscale x 4 x i64> %11
61+
%14 = getelementptr inbounds nuw [16 x i16], ptr %2, i64 0, <vscale x 4 x i64> %12
62+
call void @llvm.experimental.vector.histogram.add.nxv4p0.i16(<vscale x 4 x ptr> %13, i16 1, <vscale x 4 x i1> splat (i1 true))
63+
call void @llvm.experimental.vector.histogram.add.nxv4p0.i16(<vscale x 4 x ptr> %14, i16 1, <vscale x 4 x i1> splat (i1 true))
64+
%15 = add nuw i64 %6, %3
65+
%16 = icmp eq i64 %15, %4
66+
br i1 %16, label %exit, label %loop
67+
68+
exit:
69+
ret void
70+
}

0 commit comments

Comments
 (0)