Skip to content

Commit 5e05939

Browse files
committed
merge add and fadd
1 parent 3756060 commit 5e05939

File tree

2 files changed

+73
-21
lines changed

2 files changed

+73
-21
lines changed

llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -73,35 +73,38 @@ static bool isIntrinsicExpansion(Function &F) {
7373
}
7474
return false;
7575
}
76+
static Value *expandVecReduceAdd(CallInst *Orig, Intrinsic::ID IntrinsicId) {
77+
assert(IntrinsicId == Intrinsic::vector_reduce_add ||
78+
IntrinsicId == Intrinsic::vector_reduce_fadd);
7679

77-
static Value *expandVecReduceFAdd(CallInst *Orig) {
78-
// Note: vector_reduce_fadd first argument is a starting value
79-
// Our use doesn't need it, so ignoring argument zero.
80-
Value *X = Orig->getOperand(1);
8180
IRBuilder<> Builder(Orig);
81+
bool IsFAdd = (IntrinsicId == Intrinsic::vector_reduce_fadd);
82+
83+
// Define the addition operation based on the intrinsic ID.
84+
auto AddOp = [&Builder, IsFAdd](Value *Sum, Value *Elt) {
85+
return IsFAdd ? Builder.CreateFAdd(Sum, Elt) : Builder.CreateAdd(Sum, Elt);
86+
};
87+
88+
Value *X = Orig->getOperand(IsFAdd ? 1 : 0);
8289
Type *Ty = X->getType();
8390
auto *XVec = dyn_cast<FixedVectorType>(Ty);
8491
unsigned XVecSize = XVec->getNumElements();
8592
Value *Sum = Builder.CreateExtractElement(X, static_cast<uint64_t>(0));
86-
for (unsigned I = 1; I < XVecSize; I++) {
87-
Value *Elt = Builder.CreateExtractElement(X, I);
88-
Sum = Builder.CreateFAdd(Sum, Elt);
89-
}
90-
return Sum;
91-
}
9293

93-
static Value *expandVecReduceAdd(CallInst *Orig) {
94-
Value *X = Orig->getOperand(0);
95-
IRBuilder<> Builder(Orig);
96-
Type *Ty = X->getType();
97-
auto *XVec = dyn_cast<FixedVectorType>(Ty);
98-
unsigned XVecSize = XVec->getNumElements();
94+
// Handle the initial start value for floating-point addition.
95+
if (IsFAdd) {
96+
llvm::Constant *StartValue =
97+
llvm::dyn_cast<llvm::Constant>(Orig->getOperand(0));
98+
if (StartValue && !StartValue->isZeroValue())
99+
Sum = Builder.CreateFAdd(Sum, StartValue);
100+
}
99101

100-
Value *Sum = Builder.CreateExtractElement(X, static_cast<uint64_t>(0));
102+
// Accumulate the remaining vector elements.
101103
for (unsigned I = 1; I < XVecSize; I++) {
102104
Value *Elt = Builder.CreateExtractElement(X, I);
103-
Sum = Builder.CreateAdd(Sum, Elt);
105+
Sum = AddOp(Sum, Elt);
104106
}
107+
105108
return Sum;
106109
}
107110

@@ -614,10 +617,8 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
614617
Result = expandRadiansIntrinsic(Orig);
615618
break;
616619
case Intrinsic::vector_reduce_add:
617-
Result = expandVecReduceAdd(Orig);
618-
break;
619620
case Intrinsic::vector_reduce_fadd:
620-
Result = expandVecReduceFAdd(Orig);
621+
Result = expandVecReduceAdd(Orig, IntrinsicId);
621622
break;
622623
}
623624
if (Result) {

llvm/test/CodeGen/DirectX/vector_reduce_add.ll

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,21 @@ entry:
1717
ret half %rdx.fadd
1818
}
1919

20+
define noundef half @test_length_half2_start1(<2 x half> noundef %p0) {
21+
; CHECK-LABEL: define noundef half @test_length_half2_start1(
22+
; CHECK-SAME: <2 x half> noundef [[P0:%.*]]) {
23+
; CHECK-NEXT: [[ENTRY:.*:]]
24+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x half> [[P0]], i64 0
25+
; CHECK-NEXT: [[TMP1:%.*]] = fadd half [[TMP0]], 0xH0001
26+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x half> [[P0]], i64 1
27+
; CHECK-NEXT: [[TMP3:%.*]] = fadd half [[TMP1]], [[TMP2]]
28+
; CHECK-NEXT: ret half [[TMP3]]
29+
;
30+
entry:
31+
%rdx.fadd = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0001, <2 x half> %p0)
32+
ret half %rdx.fadd
33+
}
34+
2035
define noundef half @test_length_half3(<3 x half> noundef %p0) {
2136
; CHECK-LABEL: define noundef half @test_length_half3(
2237
; CHECK-SAME: <3 x half> noundef [[P0:%.*]]) {
@@ -81,6 +96,23 @@ entry:
8196
ret float %rdx.fadd
8297
}
8398

99+
define noundef float @test_length_float3_start1(<3 x float> noundef %p0) {
100+
; CHECK-LABEL: define noundef float @test_length_float3_start1(
101+
; CHECK-SAME: <3 x float> noundef [[P0:%.*]]) {
102+
; CHECK-NEXT: [[ENTRY:.*:]]
103+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x float> [[P0]], i64 0
104+
; CHECK-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
105+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x float> [[P0]], i64 1
106+
; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
107+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <3 x float> [[P0]], i64 2
108+
; CHECK-NEXT: [[TMP5:%.*]] = fadd float [[TMP3]], [[TMP4]]
109+
; CHECK-NEXT: ret float [[TMP5]]
110+
;
111+
entry:
112+
%rdx.fadd = call float @llvm.vector.reduce.fadd.v3f32(float 1.000000e+00, <3 x float> %p0)
113+
ret float %rdx.fadd
114+
}
115+
84116
define noundef float @test_length_float4(<4 x float> noundef %p0) {
85117
; CHECK-LABEL: define noundef float @test_length_float4(
86118
; CHECK-SAME: <4 x float> noundef [[P0:%.*]]) {
@@ -147,6 +179,25 @@ entry:
147179
ret double %rdx.fadd
148180
}
149181

182+
define noundef double @test_length_double4_start1(<4 x double> noundef %p0) {
183+
; CHECK-LABEL: define noundef double @test_length_double4_start1(
184+
; CHECK-SAME: <4 x double> noundef [[P0:%.*]]) {
185+
; CHECK-NEXT: [[ENTRY:.*:]]
186+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x double> [[P0]], i64 0
187+
; CHECK-NEXT: [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00
188+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x double> [[P0]], i64 1
189+
; CHECK-NEXT: [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]]
190+
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x double> [[P0]], i64 2
191+
; CHECK-NEXT: [[TMP5:%.*]] = fadd double [[TMP3]], [[TMP4]]
192+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[P0]], i64 3
193+
; CHECK-NEXT: [[TMP7:%.*]] = fadd double [[TMP5]], [[TMP6]]
194+
; CHECK-NEXT: ret double [[TMP7]]
195+
;
196+
entry:
197+
%rdx.fadd = call double @llvm.vector.reduce.fadd.v4f64(double 1.000000e+00, <4 x double> %p0)
198+
ret double %rdx.fadd
199+
}
200+
150201
define noundef i16 @test_length_short2(<2 x i16> noundef %p0) {
151202
; CHECK-LABEL: define noundef i16 @test_length_short2(
152203
; CHECK-SAME: <2 x i16> noundef [[P0:%.*]]) {

0 commit comments

Comments
 (0)