-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[InstCombine] Transform vector.reduce.add and splat into multiplication
#161020
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
fa77c2c
fb492be
e9cc989
a8b32af
d11a108
01eb571
0adef1d
d2f235e
045f0ef
027efe7
8e2c2e5
ff6491b
38ca5ce
78a00dd
b86f985
2dd6052
91e53d4
8c31848
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -64,6 +64,7 @@ | |||||||||
| #include "llvm/Support/KnownBits.h" | ||||||||||
| #include "llvm/Support/KnownFPClass.h" | ||||||||||
| #include "llvm/Support/MathExtras.h" | ||||||||||
| #include "llvm/Support/TypeSize.h" | ||||||||||
| #include "llvm/Support/raw_ostream.h" | ||||||||||
| #include "llvm/Transforms/InstCombine/InstCombiner.h" | ||||||||||
| #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" | ||||||||||
|
|
@@ -3761,6 +3762,27 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { | |||||||||
| return replaceInstUsesWith(CI, Res); | ||||||||||
| } | ||||||||||
| } | ||||||||||
|
|
||||||||||
| // Handle the case where a splat is summarized. In that case we have a | ||||||||||
| // multpilication. For example: %2 = insertelement <4 x i32> poison, i32 | ||||||||||
| // %0, i64 0 %3 = shufflevector <4 x i32> %2, poison, <4 x i32> | ||||||||||
| // zeroinitializer %4 = tail call i32 @llvm.vector.reduce.add.v4i32(%3) | ||||||||||
| // => | ||||||||||
| // %2 = mul i32 %0, 4 | ||||||||||
| if (Value *Splat = getSplatValue(Arg)) { | ||||||||||
| // It is only a multiplication if we add the same element over and over. | ||||||||||
| assert(Arg->getType()->isVectorTy() && | ||||||||||
| "The vector.reduce.add intrinsic's argument must be a vector!"); | ||||||||||
spaits marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||
| ElementCount ReducedVectorElementCount = | ||||||||||
| static_cast<VectorType *>(Arg->getType())->getElementCount(); | ||||||||||
|
||||||||||
| static_cast<VectorType *>(Arg->getType())->getElementCount(); | |
| cast<VectorType>(Arg->getType())->getElementCount(); |
And remove the assert.
spaits marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| Value *Res = | |
| Builder.CreateMul(Splat, ConstantInt::get(SplatType, VectorSize)); | |
| return replaceInstUsesWith(CI, Res); | |
| return BinaryOperator::CreateMul(Splat, ConstantInt::get(SplatType, VectorSize)); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -308,3 +308,148 @@ define i32 @diff_of_sums_type_mismatch2(<8 x i32> %v0, <4 x i32> %v1) { | |
| %r = sub i32 %r0, %r1 | ||
| ret i32 %r | ||
| } | ||
|
|
||
nikic marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| define i32 @constant_multiplied_at_0(i32 %0) { | ||
| ; CHECK-LABEL: @constant_multiplied_at_0( | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 2 | ||
| ; CHECK-NEXT: ret i32 [[TMP2]] | ||
| ; | ||
| %2 = insertelement <4 x i32> poison, i32 %0, i64 0 | ||
| %3 = shufflevector <4 x i32> %2, <4 x i32> poison, <4 x i32> zeroinitializer | ||
| %4 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %3) | ||
| ret i32 %4 | ||
| } | ||
|
|
||
| define i64 @constant_multiplied_at_0_64bits(i64 %0) { | ||
| ; CHECK-LABEL: @constant_multiplied_at_0_64bits( | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 2 | ||
| ; CHECK-NEXT: ret i64 [[TMP2]] | ||
| ; | ||
| %2 = insertelement <4 x i64> poison, i64 %0, i64 0 | ||
| %3 = shufflevector <4 x i64> %2, <4 x i64> poison, <4 x i32> zeroinitializer | ||
| %4 = tail call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %3) | ||
| ret i64 %4 | ||
| } | ||
|
|
||
| define i32 @constant_multiplied_at_0_two_pow8(i32 %0) { | ||
| ; CHECK-LABEL: @constant_multiplied_at_0_two_pow8( | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 3 | ||
| ; CHECK-NEXT: ret i32 [[TMP2]] | ||
| ; | ||
| %2 = insertelement <4 x i32> poison, i32 %0, i64 0 | ||
| %3 = shufflevector <4 x i32> %2, <4 x i32> poison, <8 x i32> zeroinitializer | ||
| %4 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %3) | ||
| ret i32 %4 | ||
| } | ||
|
|
||
|
|
||
| define i32 @constant_multiplied_at_0_two_pow16(i32 %0) { | ||
| ; CHECK-LABEL: @constant_multiplied_at_0_two_pow16( | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 4 | ||
| ; CHECK-NEXT: ret i32 [[TMP2]] | ||
| ; | ||
| %2 = insertelement <4 x i32> poison, i32 %0, i64 0 | ||
| %3 = shufflevector <4 x i32> %2, <4 x i32> poison, <16 x i32> zeroinitializer | ||
| %4 = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %3) | ||
| ret i32 %4 | ||
| } | ||
|
|
||
|
|
||
| define i32 @constant_multiplied_at_1(i32 %0) { | ||
| ; CHECK-LABEL: @constant_multiplied_at_1( | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 2 | ||
| ; CHECK-NEXT: ret i32 [[TMP2]] | ||
| ; | ||
| %2 = insertelement <4 x i32> poison, i32 %0, i64 1 | ||
| %3 = shufflevector <4 x i32> %2, <4 x i32> poison, | ||
| <4 x i32> <i32 1, i32 1, i32 1, i32 1> | ||
| %4 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %3) | ||
| ret i32 %4 | ||
| } | ||
|
|
||
| define i32 @negative_constant_multiplied_at_1(i32 %0) { | ||
| ; CHECK-LABEL: @negative_constant_multiplied_at_1( | ||
| ; CHECK-NEXT: ret i32 poison | ||
| ; | ||
| %2 = insertelement <4 x i32> poison, i32 %0, i64 1 | ||
| %3 = shufflevector <4 x i32> %2, <4 x i32> poison, <4 x i32> zeroinitializer | ||
| %4 = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %3) | ||
| ret i32 %4 | ||
| } | ||
|
|
||
| define i32 @constant_multiplied_non_power_of_2(i32 %0) { | ||
| ; CHECK-LABEL: @constant_multiplied_non_power_of_2( | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP0:%.*]], 6 | ||
| ; CHECK-NEXT: ret i32 [[TMP2]] | ||
| ; | ||
| %2 = insertelement <4 x i32> poison, i32 %0, i64 0 | ||
| %3 = shufflevector <4 x i32> %2, <4 x i32> poison, <6 x i32> zeroinitializer | ||
| %4 = tail call i32 @llvm.vector.reduce.add.v6i32(<6 x i32> %3) | ||
| ret i32 %4 | ||
| } | ||
|
|
||
| define i64 @constant_multiplied_non_power_of_2_i64(i64 %0) { | ||
| ; CHECK-LABEL: @constant_multiplied_non_power_of_2_i64( | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0:%.*]], 6 | ||
| ; CHECK-NEXT: ret i64 [[TMP2]] | ||
| ; | ||
| %2 = insertelement <4 x i64> poison, i64 %0, i64 0 | ||
| %3 = shufflevector <4 x i64> %2, <4 x i64> poison, <6 x i32> zeroinitializer | ||
| %4 = tail call i64 @llvm.vector.reduce.add.v6i64(<6 x i64> %3) | ||
| ret i64 %4 | ||
| } | ||
|
|
||
| define i1 @constant_multiplied_non_power_of_2_i1(i1 %0) { | ||
| ; CHECK-LABEL: @constant_multiplied_non_power_of_2_i1( | ||
| ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i1> poison, i1 [[TMP0:%.*]], i64 0 | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP6]], <8 x i1> poison, <8 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i1> [[TMP3]] to i8 | ||
| ; CHECK-NEXT: [[TMP5:%.*]] = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 [[TMP4]]) | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = trunc i8 [[TMP5]] to i1 | ||
| ; CHECK-NEXT: ret i1 [[TMP2]] | ||
| ; | ||
| %2 = insertelement <8 x i1> poison, i1 %0, i32 0 | ||
| %3 = shufflevector <8 x i1> %2, <8 x i1> poison, <8 x i32> zeroinitializer | ||
| %4 = tail call i1 @llvm.vector.reduce.add.v8i1(<8 x i1> %3) | ||
| ret i1 %4 | ||
| } | ||
|
|
||
| define i1 @constant_multiplied_non_power_of_2_i1x4(i1 %0) { | ||
| ; CHECK-LABEL: @constant_multiplied_non_power_of_2_i1x4( | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i1> poison, i1 [[TMP0:%.*]], i64 0 | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> poison, <4 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 | ||
| ; CHECK-NEXT: [[TMP5:%.*]] = call range(i4 0, 5) i4 @llvm.ctpop.i4(i4 [[TMP4]]) | ||
| ; CHECK-NEXT: [[TMP6:%.*]] = trunc i4 [[TMP5]] to i1 | ||
| ; CHECK-NEXT: ret i1 [[TMP6]] | ||
| ; | ||
| %2 = insertelement <4 x i1> poison, i1 %0, i32 0 | ||
| %3 = shufflevector <4 x i1> %2, <4 x i1> poison, <4 x i32> zeroinitializer | ||
| %4 = tail call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> %3) | ||
| ret i1 %4 | ||
| } | ||
|
|
||
| define i1 @constant_multiplied_non_power_of_2_i1x2(i1 %0) { | ||
| ; CHECK-LABEL: @constant_multiplied_non_power_of_2_i1x2( | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i1> poison, i1 [[TMP0:%.*]], i64 0 | ||
| ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i1> [[TMP2]], <2 x i1> poison, <2 x i32> zeroinitializer | ||
| ; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i1> [[TMP3]] to i2 | ||
| ; CHECK-NEXT: [[TMP5:%.*]] = call range(i2 0, -1) i2 @llvm.ctpop.i2(i2 [[TMP4]]) | ||
| ; CHECK-NEXT: [[TMP6:%.*]] = trunc i2 [[TMP5]] to i1 | ||
|
||
| ; CHECK-NEXT: ret i1 [[TMP6]] | ||
| ; | ||
| %2 = insertelement <2 x i1> poison, i1 %0, i32 0 | ||
| %3 = shufflevector <2 x i1> %2, <2 x i1> poison, <2 x i32> zeroinitializer | ||
| %4 = tail call i1 @llvm.vector.reduce.add.v2i1(<2 x i1> %3) | ||
| ret i1 %4 | ||
| } | ||
|
|
||
| define i2 @constant_multiplied_non_power_of_2_i2x4(i2 %0) { | ||
| ; CHECK-LABEL: @constant_multiplied_non_power_of_2_i2x4( | ||
| ; CHECK-NEXT: ret i2 0 | ||
| ; | ||
| %2 = insertelement <4 x i2> poison, i2 %0, i32 0 | ||
| %3 = shufflevector <4 x i2> %2, <4 x i2> poison, <4 x i32> zeroinitializer | ||
| %4 = tail call i2 @llvm.vector.reduce.add.v4i2(<4 x i2> %3) | ||
| ret i2 %4 | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.