Skip to content

Commit dae8a51

Browse files
committed
[AArch64] Don't try to sink and(load)
If we sink the and in and(load), CGP can hoist is back again to the laod, getting into an infinite loop. This prevents sinking the and in this case. Fixes #122074
1 parent c05fc9b commit dae8a51

File tree

2 files changed

+50
-1
lines changed

2 files changed

+50
-1
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5514,7 +5514,10 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
55145514
NumZExts++;
55155515
}
55165516

5517-
Ops.push_back(&Insert->getOperandUse(1));
5517+
// And(Load) is excluded to prevent CGP getting stuck in a loop of sinking
5518+
// the And, just to hoist it again back to the load.
5519+
if (!match(OperandInstr, m_And(m_Load(m_Value()), m_Value())))
5520+
Ops.push_back(&Insert->getOperandUse(1));
55185521
Ops.push_back(&Shuffle->getOperandUse(0));
55195522
Ops.push_back(&Op);
55205523
}

llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,49 @@ vector.body: ; preds = %vector.body, %vecto
4040
store <2 x i32> %3, ptr %4, align 4
4141
br label %vector.body
4242
}
43+
44+
; This test got stuck in a loop hoisting the and to the load, and sinking it back to the mull
45+
define i32 @dup_and_load(ptr %p, i1 %c) {
46+
; CHECK-LABEL: dup_and_load:
47+
; CHECK: // %bb.0: // %for.body.lr.ph
48+
; CHECK-NEXT: mov x8, x0
49+
; CHECK-NEXT: ldrb w0, [x0]
50+
; CHECK-NEXT: tbz w1, #0, .LBB1_3
51+
; CHECK-NEXT: // %bb.1: // %ph
52+
; CHECK-NEXT: dup v0.8h, w0
53+
; CHECK-NEXT: mov w9, wzr
54+
; CHECK-NEXT: .LBB1_2: // %vector.body
55+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
56+
; CHECK-NEXT: ldr d1, [x8]
57+
; CHECK-NEXT: add w9, w9, #1
58+
; CHECK-NEXT: cmp w9, #100
59+
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
60+
; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
61+
; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h
62+
; CHECK-NEXT: stp q1, q2, [x8]
63+
; CHECK-NEXT: b.lt .LBB1_2
64+
; CHECK-NEXT: .LBB1_3: // %end
65+
; CHECK-NEXT: ret
66+
for.body.lr.ph:
67+
%l = load i32, ptr %p
68+
%conv314 = and i32 %l, 255
69+
br i1 %c, label %ph, label %end
70+
71+
ph:
72+
%broadcast.splatinsert = insertelement <8 x i32> poison, i32 %conv314, i32 0
73+
%broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> poison, <8 x i32> zeroinitializer
74+
br label %vector.body
75+
76+
vector.body: ; preds = %vector.body, %vector.ph
77+
%iv = phi i32 [ 0, %ph ], [ %iv.next, %vector.body ]
78+
%wide.load = load <8 x i8>, ptr %p, align 4
79+
%0 = zext <8 x i8> %wide.load to <8 x i32>
80+
%1 = mul <8 x i32> %broadcast.splat, %0
81+
store <8 x i32> %1, ptr %p, align 4
82+
%iv.next = add i32 %iv, 1
83+
%e = icmp slt i32 %iv.next, 100
84+
br i1 %e, label %vector.body, label %end
85+
86+
end:
87+
ret i32 %conv314
88+
}

0 commit comments

Comments
 (0)