Skip to content

Commit 6e68fa9

Browse files
committed
[SLP]Fix PR106909: add a check for unsafe FP operations.
NEON has non-IEEE compliant denormal flushing and the compiler should check if it safe to vectorize instructions for NEON in non-fast math mode. Fixes #106909
1 parent 803ab28 commit 6e68fa9

File tree

2 files changed

+32
-4
lines changed

2 files changed

+32
-4
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6584,6 +6584,13 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
65846584
OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps) const {
65856585
assert(S.MainOp && "Expected instructions with same/alternate opcodes only.");
65866586

6587+
if (S.MainOp->getType()->isFloatingPointTy() &&
6588+
TTI->isFPVectorizationPotentiallyUnsafe() && any_of(VL, [](Value *V) {
6589+
auto *I = dyn_cast<Instruction>(V);
6590+
return I && (I->isBinaryOp() || isa<CallInst>(I)) && !I->isFast();
6591+
}))
6592+
return TreeEntry::NeedToGather;
6593+
65876594
unsigned ShuffleOrOp =
65886595
S.isAltShuffle() ? (unsigned)Instruction::ShuffleVector : S.getOpcode();
65896596
auto *VL0 = cast<Instruction>(S.OpValue);

llvm/test/Transforms/SLPVectorizer/ARM/invalid-fp-operations.ll

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,31 @@
44
define void @test(ptr %a, ptr %b, ptr %r) {
55
; CHECK-LABEL: define void @test(
66
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[R:%.*]]) #[[ATTR0:[0-9]+]] {
7-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[A]], align 4
8-
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[B]], align 4
9-
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP1]], [[TMP2]]
10-
; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[R]], align 4
7+
; CHECK-NEXT: [[V_A_0:%.*]] = load float, ptr [[A]], align 4
8+
; CHECK-NEXT: [[A_1:%.*]] = getelementptr i8, ptr [[A]], i64 4
9+
; CHECK-NEXT: [[V_A_1:%.*]] = load float, ptr [[A_1]], align 4
10+
; CHECK-NEXT: [[A_2:%.*]] = getelementptr i8, ptr [[A]], i64 8
11+
; CHECK-NEXT: [[V_A_2:%.*]] = load float, ptr [[A_2]], align 4
12+
; CHECK-NEXT: [[A_3:%.*]] = getelementptr i8, ptr [[A]], i64 12
13+
; CHECK-NEXT: [[V_A_3:%.*]] = load float, ptr [[A_3]], align 4
14+
; CHECK-NEXT: [[V_B_0:%.*]] = load float, ptr [[B]], align 4
15+
; CHECK-NEXT: [[B_1:%.*]] = getelementptr i8, ptr [[B]], i64 4
16+
; CHECK-NEXT: [[V_B_1:%.*]] = load float, ptr [[B_1]], align 4
17+
; CHECK-NEXT: [[B_2:%.*]] = getelementptr i8, ptr [[B]], i64 8
18+
; CHECK-NEXT: [[V_B_2:%.*]] = load float, ptr [[B_2]], align 4
19+
; CHECK-NEXT: [[B_3:%.*]] = getelementptr i8, ptr [[B]], i64 12
20+
; CHECK-NEXT: [[V_B_3:%.*]] = load float, ptr [[B_3]], align 4
21+
; CHECK-NEXT: [[V_R_0:%.*]] = fadd float [[V_A_0]], [[V_B_0]]
22+
; CHECK-NEXT: [[V_R_1:%.*]] = fadd float [[V_A_1]], [[V_B_1]]
23+
; CHECK-NEXT: [[V_R_2:%.*]] = fadd float [[V_A_2]], [[V_B_2]]
24+
; CHECK-NEXT: [[V_R_3:%.*]] = fadd float [[V_A_3]], [[V_B_3]]
25+
; CHECK-NEXT: store float [[V_R_0]], ptr [[R]], align 4
26+
; CHECK-NEXT: [[R_1:%.*]] = getelementptr i8, ptr [[R]], i64 4
27+
; CHECK-NEXT: store float [[V_R_1]], ptr [[R_1]], align 4
28+
; CHECK-NEXT: [[R_2:%.*]] = getelementptr i8, ptr [[R]], i64 8
29+
; CHECK-NEXT: store float [[V_R_2]], ptr [[R_2]], align 4
30+
; CHECK-NEXT: [[R_3:%.*]] = getelementptr i8, ptr [[R]], i64 12
31+
; CHECK-NEXT: store float [[V_R_3]], ptr [[R_3]], align 4
1132
; CHECK-NEXT: ret void
1233
;
1334
%v.a.0 = load float, ptr %a

0 commit comments

Comments
 (0)