Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5514,7 +5514,10 @@ bool AArch64TTIImpl::isProfitableToSinkOperands(
NumZExts++;
}

Ops.push_back(&Insert->getOperandUse(1));
// And(Load) is excluded to prevent CGP getting stuck in a loop of sinking
// the And, just to hoist it again back to the load.
if (!match(OperandInstr, m_And(m_Load(m_Value()), m_Value())))
Ops.push_back(&Insert->getOperandUse(1));
Ops.push_back(&Shuffle->getOperandUse(0));
Ops.push_back(&Op);
}
Expand Down
50 changes: 48 additions & 2 deletions llvm/test/CodeGen/AArch64/aarch64-dup-ext-crash.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ target triple = "aarch64-unknown-linux-gnu"
; here, only that this case no longer causes said crash.
define dso_local i32 @dupext_crashtest(i32 %e) local_unnamed_addr {
; CHECK-LABEL: dupext_crashtest:
; CHECK: // %bb.0: // %for.body.lr.ph
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: dup v0.2s, w0
; CHECK-NEXT: .LBB0_1: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
Expand All @@ -18,7 +18,7 @@ define dso_local i32 @dupext_crashtest(i32 %e) local_unnamed_addr {
; CHECK-NEXT: xtn v1.2s, v1.2d
; CHECK-NEXT: str d1, [x8]
; CHECK-NEXT: b .LBB0_1
for.body.lr.ph:
entry:
%conv314 = zext i32 %e to i64
br label %vector.memcheck

Expand All @@ -40,3 +40,49 @@ vector.body: ; preds = %vector.body, %vecto
store <2 x i32> %3, ptr %4, align 4
br label %vector.body
}

; This test got stuck in a loop hoisting the and to the load, and sinking it back to the mull
define i32 @dup_and_load(ptr %p, i1 %c) {
; CHECK-LABEL: dup_and_load:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov x8, x0
; CHECK-NEXT: ldrb w0, [x0]
; CHECK-NEXT: tbz w1, #0, .LBB1_3
; CHECK-NEXT: // %bb.1: // %ph
; CHECK-NEXT: dup v0.8h, w0
; CHECK-NEXT: mov w9, wzr
; CHECK-NEXT: .LBB1_2: // %vector.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldr d1, [x8]
; CHECK-NEXT: add w9, w9, #1
; CHECK-NEXT: cmp w9, #100
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h
; CHECK-NEXT: stp q1, q2, [x8]
; CHECK-NEXT: b.lt .LBB1_2
; CHECK-NEXT: .LBB1_3: // %end
; CHECK-NEXT: ret
entry:
%l = load i32, ptr %p
%and255 = and i32 %l, 255
br i1 %c, label %ph, label %end

ph:
%broadcast.splatinsert = insertelement <8 x i32> poison, i32 %and255, i32 0
%broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> poison, <8 x i32> zeroinitializer
br label %vector.body

vector.body: ; preds = %vector.body, %vector.ph
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need to be a loop in order to trigger the crash? Just wondering if it could be a normal block to reduce the lines of IR.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is OK - it was copied from the test above and modified enough to trigger the issue.

%iv = phi i32 [ 0, %ph ], [ %iv.next, %vector.body ]
%wide.load = load <8 x i8>, ptr %p, align 4
%0 = zext <8 x i8> %wide.load to <8 x i32>
%1 = mul <8 x i32> %broadcast.splat, %0
store <8 x i32> %1, ptr %p, align 4
%iv.next = add i32 %iv, 1
%e = icmp slt i32 %iv.next, 100
br i1 %e, label %vector.body, label %end

end:
ret i32 %and255
}
Loading