Skip to content

Commit 360a02c

Browse files
davemgreenaokblast
authored andcommitted
[DAG][AArch64] Ensure that ResNo is correct for uses of Ptr when considering postinc. (llvm#164810)
We might be looking at a different use, for example in the uses of a i32,i64,ch preindex load. Fixes llvm#164775
1 parent 3782145 commit 360a02c

File tree

2 files changed

+78
-1
lines changed

2 files changed

+78
-1
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19993,8 +19993,12 @@ static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
1999319993
// nor a successor of N. Otherwise, if Op is folded that would
1999419994
// create a cycle.
1999519995
unsigned MaxSteps = SelectionDAG::getHasPredecessorMaxSteps();
19996-
for (SDNode *Op : Ptr->users()) {
19996+
for (SDUse &U : Ptr->uses()) {
19997+
if (U.getResNo() != Ptr.getResNo())
19998+
continue;
19999+
1999720000
// Check for #1.
20001+
SDNode *Op = U.getUser();
1999820002
if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
1999920003
continue;
2000020004

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc < %s -O3 -mtriple=aarch64 | FileCheck %s
3+
4+
; From #164775, this generates a pre-index load feeding a post-index store, that
5+
; was checking the wrong uses for post-inc. It seems quite delicate for it to
6+
; generate this combination at the wrong point to hit the same issue.
7+
8+
@g_260 = dso_local global i16 0
9+
@g_480 = dso_local global i16 0
10+
11+
define i32 @func_1(ptr %l_3253) {
12+
; CHECK-LABEL: func_1:
13+
; CHECK: // %bb.0: // %entry
14+
; CHECK-NEXT: sub sp, sp, #128
15+
; CHECK-NEXT: .cfi_def_cfa_offset 128
16+
; CHECK-NEXT: movi v0.2d, #0000000000000000
17+
; CHECK-NEXT: mov w9, #2 // =0x2
18+
; CHECK-NEXT: mov w10, #96 // =0x60
19+
; CHECK-NEXT: strb wzr, [x9]
20+
; CHECK-NEXT: mov w9, #111 // =0x6f
21+
; CHECK-NEXT: mov x8, xzr
22+
; CHECK-NEXT: str wzr, [x9]
23+
; CHECK-NEXT: mov w9, #80 // =0x50
24+
; CHECK-NEXT: adrp x1, .L_MergedGlobals
25+
; CHECK-NEXT: add x1, x1, :lo12:.L_MergedGlobals
26+
; CHECK-NEXT: strh wzr, [x8]
27+
; CHECK-NEXT: str q0, [x9]
28+
; CHECK-NEXT: mov w9, #48 // =0x30
29+
; CHECK-NEXT: str q0, [x9]
30+
; CHECK-NEXT: mov w9, #32 // =0x20
31+
; CHECK-NEXT: str q0, [x10]
32+
; CHECK-NEXT: mov w10, #64 // =0x40
33+
; CHECK-NEXT: str q0, [x9]
34+
; CHECK-NEXT: mov w9, #16 // =0x10
35+
; CHECK-NEXT: str q0, [x10]
36+
; CHECK-NEXT: str q0, [x9]
37+
; CHECK-NEXT: str q0, [x8]
38+
; CHECK-NEXT: adrp x8, .L_MergedGlobals
39+
; CHECK-NEXT: strb wzr, [x0, #8]
40+
; CHECK-NEXT: strb wzr, [x0, #12]
41+
; CHECK-NEXT: strb wzr, [x0, #16]
42+
; CHECK-NEXT: strb wzr, [x0, #20]
43+
; CHECK-NEXT: mov w0, wzr
44+
; CHECK-NEXT: ldrh wzr, [x8, :lo12:.L_MergedGlobals]
45+
; CHECK-NEXT: ldrh w8, [x1, #4]!
46+
; CHECK-NEXT: sub w8, w8, #1
47+
; CHECK-NEXT: strh w8, [x1]
48+
; CHECK-NEXT: add sp, sp, #128
49+
; CHECK-NEXT: b use
50+
entry:
51+
%l_32531.sroa.3 = alloca [3 x i8], align 4
52+
%l_32531.sroa.4 = alloca [115 x i8], align 4
53+
call void @llvm.lifetime.start.p0(ptr %l_32531.sroa.3)
54+
call void @llvm.lifetime.start.p0(ptr %l_32531.sroa.4)
55+
call void @llvm.memset.p0.i64(ptr null, i8 0, i64 3, i1 false)
56+
call void @llvm.memset.p0.i64(ptr null, i8 0, i64 115, i1 false)
57+
%0 = getelementptr inbounds i8, ptr %l_3253, i64 8
58+
store i8 0, ptr %0, align 4
59+
%1 = getelementptr inbounds i8, ptr %l_3253, i64 12
60+
store i8 0, ptr %1, align 4
61+
%2 = getelementptr inbounds i8, ptr %l_3253, i64 16
62+
store i8 0, ptr %2, align 4
63+
%3 = getelementptr inbounds i8, ptr %l_3253, i64 20
64+
store i8 0, ptr %3, align 4
65+
%4 = load volatile i16, ptr @g_260, align 4
66+
%5 = load i16, ptr @g_480, align 4
67+
%dec.i.i = add i16 %5, -1
68+
store i16 %dec.i.i, ptr @g_480, align 4
69+
%call1 = tail call i32 @use(i32 0, ptr @g_480)
70+
ret i32 %call1
71+
}
72+
73+
declare i32 @use(i32, ptr)

0 commit comments

Comments
 (0)