Skip to content

Commit af4367a

Browse files
authored
[InstCombine] Skip foldFBinOpOfIntCastsFromSign for vector ops (#162804)
Converting a vector float op into a vector int op may be non-profitable, especially for targets where the float op for a given type is legal, but the integer op is not. We could of course also try to address this via a reverse transform in the backend, but I don't think it's worth the bother, given that vectors were never the intended use case for this transform in the first place. Fixes #162749.
1 parent 89c9f7e commit af4367a

File tree

3 files changed

+31
-3
lines changed

3 files changed

+31
-3
lines changed

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1690,6 +1690,11 @@ Instruction *InstCombinerImpl::foldFBinOpOfIntCastsFromSign(
16901690
// 2) (fp_binop ({s|u}itofp x), FpC)
16911691
// -> ({s|u}itofp (int_binop x, (fpto{s|u}i FpC)))
16921692
Instruction *InstCombinerImpl::foldFBinOpOfIntCasts(BinaryOperator &BO) {
1693+
// Don't perform the fold on vectors, as the integer operation may be much
1694+
// more expensive than the float operation in that case.
1695+
if (BO.getType()->isVectorTy())
1696+
return nullptr;
1697+
16931698
std::array<Value *, 2> IntOps = {nullptr, nullptr};
16941699
Constant *Op1FpC = nullptr;
16951700
// Check for:

llvm/test/Transforms/InstCombine/add-sitofp.ll

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,12 +99,15 @@ define float @test_3(i32 %a, i32 %b) {
9999
ret float %p
100100
}
101101

102+
; Don't perform the fold on vector operations, as the integer op may be
103+
; much more expensive than the float op in that case.
102104
define <4 x double> @test_4(<4 x i32> %a, <4 x i32> %b) {
103105
; CHECK-LABEL: @test_4(
104106
; CHECK-NEXT: [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 1073741823)
105107
; CHECK-NEXT: [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], splat (i32 1073741823)
106-
; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i32> [[A_AND]], [[B_AND]]
107-
; CHECK-NEXT: [[RES:%.*]] = uitofp nneg <4 x i32> [[TMP1]] to <4 x double>
108+
; CHECK-NEXT: [[A_AND_FP:%.*]] = uitofp nneg <4 x i32> [[A_AND]] to <4 x double>
109+
; CHECK-NEXT: [[B_AND_FP:%.*]] = uitofp nneg <4 x i32> [[B_AND]] to <4 x double>
110+
; CHECK-NEXT: [[RES:%.*]] = fadd <4 x double> [[A_AND_FP]], [[B_AND_FP]]
108111
; CHECK-NEXT: ret <4 x double> [[RES]]
109112
;
110113
; Drop two highest bits to guarantee that %a + %b doesn't overflow

llvm/test/Transforms/InstCombine/binop-itofp.ll

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1063,6 +1063,25 @@ define float @negzero_check_on_constant_for_si_fmul(i1 %c, i1 %.b, ptr %g_2345)
10631063
ret float %mul3.i.i
10641064
}
10651065

1066+
; Don't perform the fold on vector operations, as the integer op may be
1067+
; much more expensive than the float op in that case.
1068+
define <2 x half> @test_ui_ui_i8_mul_vec(<2 x i8> noundef %x_in, <2 x i8> noundef %y_in) {
1069+
; CHECK-LABEL: @test_ui_ui_i8_mul_vec(
1070+
; CHECK-NEXT: [[X:%.*]] = and <2 x i8> [[X_IN:%.*]], splat (i8 15)
1071+
; CHECK-NEXT: [[Y:%.*]] = and <2 x i8> [[Y_IN:%.*]], splat (i8 15)
1072+
; CHECK-NEXT: [[XF:%.*]] = uitofp nneg <2 x i8> [[X]] to <2 x half>
1073+
; CHECK-NEXT: [[YF:%.*]] = uitofp nneg <2 x i8> [[Y]] to <2 x half>
1074+
; CHECK-NEXT: [[R:%.*]] = fmul <2 x half> [[XF]], [[YF]]
1075+
; CHECK-NEXT: ret <2 x half> [[R]]
1076+
;
1077+
%x = and <2 x i8> %x_in, splat (i8 15)
1078+
%y = and <2 x i8> %y_in, splat (i8 15)
1079+
%xf = uitofp <2 x i8> %x to <2 x half>
1080+
%yf = uitofp <2 x i8> %y to <2 x half>
1081+
%r = fmul <2 x half> %xf, %yf
1082+
ret <2 x half> %r
1083+
}
1084+
10661085
define <2 x float> @nonzero_check_on_constant_for_si_fmul_vec_w_poison(i1 %c, i1 %.b, ptr %g_2345) {
10671086
; CHECK-LABEL: @nonzero_check_on_constant_for_si_fmul_vec_w_poison(
10681087
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i32 65529, i32 53264
@@ -1091,8 +1110,9 @@ define <2 x float> @nonzero_check_on_constant_for_si_fmul_nz_vec_w_poison(i1 %c,
10911110
; CHECK-NEXT: [[CONV_I_V:%.*]] = insertelement <2 x i16> poison, i16 [[CONV_I_S]], i64 0
10921111
; CHECK-NEXT: [[CONV_I:%.*]] = shufflevector <2 x i16> [[CONV_I_V]], <2 x i16> poison, <2 x i32> zeroinitializer
10931112
; CHECK-NEXT: [[MUL3_I_I:%.*]] = sitofp <2 x i16> [[CONV_I]] to <2 x float>
1113+
; CHECK-NEXT: [[MUL3_I_I1:%.*]] = fmul <2 x float> [[MUL3_I_I]], <float poison, float 1.000000e+00>
10941114
; CHECK-NEXT: store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
1095-
; CHECK-NEXT: ret <2 x float> [[MUL3_I_I]]
1115+
; CHECK-NEXT: ret <2 x float> [[MUL3_I_I1]]
10961116
;
10971117
%sel = select i1 %c, i32 65529, i32 53264
10981118
%conv.i.s = trunc i32 %sel to i16

0 commit comments

Comments
 (0)