Skip to content

Commit 144736b

Browse files
authored
[VPlan] Don't fold live ins with both scalar and vector operands (llvm#154067)
If we end up with a extract_element VPInstruction where both operands are live-ins, we will try to fold the live-ins even though the first operand is a vector whilst the live-in is scalar. This fixes it by just returning the vector live-in instead of calling the folder, and removes the handling for insertelement where we aren't able to do the fold. From some quick testing we previously never hit this fold anyway, and were probably just missing test coverage. Fixes llvm#154045
1 parent c24c23d commit 144736b

File tree

2 files changed

+77
-3
lines changed

2 files changed

+77
-3
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -980,10 +980,11 @@ static Value *tryToFoldLiveIns(const VPRecipeBase &R, unsigned Opcode,
980980
return Folder.FoldGEP(IntegerType::getInt8Ty(TypeInfo.getContext()), Ops[0],
981981
Ops[1],
982982
cast<VPRecipeWithIRFlags>(R).getGEPNoWrapFlags());
983-
case Instruction::InsertElement:
984-
return Folder.FoldInsertElement(Ops[0], Ops[1], Ops[2]);
983+
// An extract of a live-in is an extract of a broadcast, so return the
984+
// broadcasted element.
985985
case Instruction::ExtractElement:
986-
return Folder.FoldExtractElement(Ops[0], Ops[1]);
986+
assert(!Ops[0]->getType()->isVectorTy() && "Live-ins should be scalar");
987+
return Ops[0];
987988
}
988989
return nullptr;
989990
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
2+
; RUN: opt -p loop-vectorize -force-vector-width=2 -force-widen-divrem-via-safe-divisor=false -S %s | FileCheck %s
3+
4+
; Make sure we don't try to fold a Instruction::ExtractElement ir<0>, ir<0>,
5+
; since we can't materialize the live-in for the vector operand.
6+
7+
define void @pr154045(ptr %p, i1 %c, i64 %x) {
8+
; CHECK-LABEL: define void @pr154045(
9+
; CHECK-SAME: ptr [[P:%.*]], i1 [[C:%.*]], i64 [[X:%.*]]) {
10+
; CHECK-NEXT: [[ENTRY:.*:]]
11+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
12+
; CHECK: [[VECTOR_PH]]:
13+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0
14+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
15+
; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
16+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
17+
; CHECK: [[VECTOR_BODY]]:
18+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
19+
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_SREM_IF:.*]], label %[[PRED_SREM_CONTINUE:.*]]
20+
; CHECK: [[PRED_SREM_IF]]:
21+
; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE]]
22+
; CHECK: [[PRED_SREM_CONTINUE]]:
23+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
24+
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SREM_IF1:.*]], label %[[PRED_SREM_CONTINUE2:.*]]
25+
; CHECK: [[PRED_SREM_IF1]]:
26+
; CHECK-NEXT: br label %[[PRED_SREM_CONTINUE2]]
27+
; CHECK: [[PRED_SREM_CONTINUE2]]:
28+
; CHECK-NEXT: store i32 0, ptr [[P]], align 4
29+
; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
30+
; CHECK: [[MIDDLE_BLOCK]]:
31+
; CHECK-NEXT: br label %[[EXIT:.*]]
32+
; CHECK: [[SCALAR_PH]]:
33+
; CHECK-NEXT: br label %[[LOOP:.*]]
34+
; CHECK: [[LOOP]]:
35+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ]
36+
; CHECK-NEXT: br i1 [[C]], label %[[LATCH]], label %[[ELSE:.*]]
37+
; CHECK: [[ELSE]]:
38+
; CHECK-NEXT: [[REM:%.*]] = srem i64 0, [[X]]
39+
; CHECK-NEXT: br label %[[LATCH]]
40+
; CHECK: [[LATCH]]:
41+
; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[REM]], %[[ELSE]] ], [ 0, %[[LOOP]] ]
42+
; CHECK-NEXT: [[PHI_TRUNC:%.*]] = trunc i64 [[PHI]] to i32
43+
; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[PHI_TRUNC]], 0
44+
; CHECK-NEXT: store i32 [[SHL]], ptr [[P]], align 4
45+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
46+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1
47+
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
48+
; CHECK: [[EXIT]]:
49+
; CHECK-NEXT: ret void
50+
;
51+
entry:
52+
br label %loop
53+
54+
loop:
55+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
56+
br i1 %c, label %latch, label %else
57+
58+
else:
59+
%rem = srem i64 0, %x
60+
br label %latch
61+
62+
latch:
63+
%phi = phi i64 [ %rem, %else ], [ 0, %loop ]
64+
%phi.trunc = trunc i64 %phi to i32
65+
%shl = shl i32 %phi.trunc, 0
66+
store i32 %shl, ptr %p
67+
%iv.next = add i64 %iv, 1
68+
%exitcond = icmp eq i64 %iv, 1
69+
br i1 %exitcond, label %exit, label %loop
70+
71+
exit:
72+
ret void
73+
}

0 commit comments

Comments
 (0)