Skip to content

Commit 77adbe6

Browse files
committed
[SLP] fix fast-math requirements for fmin/fmax reductions
a6f0221 enabled intersection of FMF on reduction instructions, so it is safe to ease the check here. There is still some room to improve here - it looks like we have nearly duplicate flags propagation logic inside of the LoopUtils helper but it is limited targets that do not form reduction intrinsics (they form the shuffle expansion).
1 parent 1bc8dab commit 77adbe6

File tree

3 files changed

+9
-23
lines changed

3 files changed

+9
-23
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6422,9 +6422,7 @@ class HorizontalReduction {
64226422
// FP min/max are associative except for NaN and -0.0. We do not
64236423
// have to rule out -0.0 here because the intrinsic semantics do not
64246424
// specify a fixed result for it.
6425-
// TODO: This is artificially restricted to fast because the code that
6426-
// creates reductions assumes/produces fast ops.
6427-
return I->getFastMathFlags().isFast();
6425+
return I->getFastMathFlags().noNaNs();
64286426
}
64296427

64306428
return I->isAssociative();

llvm/test/Transforms/SLPVectorizer/X86/fmaxnum.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -361,21 +361,15 @@ define float @reduction_v4f32_fast(float* %p) {
361361
ret float %m3
362362
}
363363

364-
; TODO: This should become a reduce intrinsic.
365-
366364
define float @reduction_v4f32_nnan(float* %p) {
367365
; CHECK-LABEL: @reduction_v4f32_nnan(
368366
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
369367
; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
370368
; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
371-
; CHECK-NEXT: [[T0:%.*]] = load float, float* [[P]], align 4
372-
; CHECK-NEXT: [[T1:%.*]] = load float, float* [[G1]], align 4
373-
; CHECK-NEXT: [[T2:%.*]] = load float, float* [[G2]], align 4
374-
; CHECK-NEXT: [[T3:%.*]] = load float, float* [[G3]], align 4
375-
; CHECK-NEXT: [[M1:%.*]] = tail call nnan float @llvm.maxnum.f32(float [[T1]], float [[T0]])
376-
; CHECK-NEXT: [[M2:%.*]] = tail call nnan float @llvm.maxnum.f32(float [[T2]], float [[M1]])
377-
; CHECK-NEXT: [[M3:%.*]] = tail call nnan float @llvm.maxnum.f32(float [[T3]], float [[M2]])
378-
; CHECK-NEXT: ret float [[M3]]
369+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
370+
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
371+
; CHECK-NEXT: [[TMP3:%.*]] = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP2]])
372+
; CHECK-NEXT: ret float [[TMP3]]
379373
;
380374
%g1 = getelementptr inbounds float, float* %p, i64 1
381375
%g2 = getelementptr inbounds float, float* %p, i64 2

llvm/test/Transforms/SLPVectorizer/X86/fminnum.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -361,21 +361,15 @@ define float @reduction_v4f32_fast(float* %p) {
361361
ret float %m3
362362
}
363363

364-
; TODO: This should become a reduce intrinsic.
365-
366364
define float @reduction_v4f32_nnan(float* %p) {
367365
; CHECK-LABEL: @reduction_v4f32_nnan(
368366
; CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1
369367
; CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2
370368
; CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds float, float* [[P]], i64 3
371-
; CHECK-NEXT: [[T0:%.*]] = load float, float* [[P]], align 4
372-
; CHECK-NEXT: [[T1:%.*]] = load float, float* [[G1]], align 4
373-
; CHECK-NEXT: [[T2:%.*]] = load float, float* [[G2]], align 4
374-
; CHECK-NEXT: [[T3:%.*]] = load float, float* [[G3]], align 4
375-
; CHECK-NEXT: [[M1:%.*]] = tail call nnan float @llvm.minnum.f32(float [[T1]], float [[T0]])
376-
; CHECK-NEXT: [[M2:%.*]] = tail call nnan float @llvm.minnum.f32(float [[T2]], float [[M1]])
377-
; CHECK-NEXT: [[M3:%.*]] = tail call nnan float @llvm.minnum.f32(float [[T3]], float [[M2]])
378-
; CHECK-NEXT: ret float [[M3]]
369+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to <4 x float>*
370+
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
371+
; CHECK-NEXT: [[TMP3:%.*]] = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP2]])
372+
; CHECK-NEXT: ret float [[TMP3]]
379373
;
380374
%g1 = getelementptr inbounds float, float* %p, i64 1
381375
%g2 = getelementptr inbounds float, float* %p, i64 2

0 commit comments

Comments
 (0)