Skip to content

Commit 3756060

Browse files
committed
[DirectX] Add support for vector_reduce_add
1 parent 9919295 commit 3756060

File tree

2 files changed

+331
-0
lines changed

2 files changed

+331
-0
lines changed

llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,44 @@ static bool isIntrinsicExpansion(Function &F) {
6767
case Intrinsic::dx_sign:
6868
case Intrinsic::dx_step:
6969
case Intrinsic::dx_radians:
70+
case Intrinsic::vector_reduce_add:
71+
case Intrinsic::vector_reduce_fadd:
7072
return true;
7173
}
7274
return false;
7375
}
7476

77+
static Value *expandVecReduceFAdd(CallInst *Orig) {
78+
// Note: vector_reduce_fadd first argument is a starting value
79+
// Our use doesn't need it, so ignoring argument zero.
80+
Value *X = Orig->getOperand(1);
81+
IRBuilder<> Builder(Orig);
82+
Type *Ty = X->getType();
83+
auto *XVec = dyn_cast<FixedVectorType>(Ty);
84+
unsigned XVecSize = XVec->getNumElements();
85+
Value *Sum = Builder.CreateExtractElement(X, static_cast<uint64_t>(0));
86+
for (unsigned I = 1; I < XVecSize; I++) {
87+
Value *Elt = Builder.CreateExtractElement(X, I);
88+
Sum = Builder.CreateFAdd(Sum, Elt);
89+
}
90+
return Sum;
91+
}
92+
93+
static Value *expandVecReduceAdd(CallInst *Orig) {
94+
Value *X = Orig->getOperand(0);
95+
IRBuilder<> Builder(Orig);
96+
Type *Ty = X->getType();
97+
auto *XVec = dyn_cast<FixedVectorType>(Ty);
98+
unsigned XVecSize = XVec->getNumElements();
99+
100+
Value *Sum = Builder.CreateExtractElement(X, static_cast<uint64_t>(0));
101+
for (unsigned I = 1; I < XVecSize; I++) {
102+
Value *Elt = Builder.CreateExtractElement(X, I);
103+
Sum = Builder.CreateAdd(Sum, Elt);
104+
}
105+
return Sum;
106+
}
107+
75108
static Value *expandAbs(CallInst *Orig) {
76109
Value *X = Orig->getOperand(0);
77110
IRBuilder<> Builder(Orig);
@@ -580,6 +613,12 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
580613
case Intrinsic::dx_radians:
581614
Result = expandRadiansIntrinsic(Orig);
582615
break;
616+
case Intrinsic::vector_reduce_add:
617+
Result = expandVecReduceAdd(Orig);
618+
break;
619+
case Intrinsic::vector_reduce_fadd:
620+
Result = expandVecReduceFAdd(Orig);
621+
break;
583622
}
584623
if (Result) {
585624
Orig->replaceAllUsesWith(Result);
Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -dxil-intrinsic-expansion < %s | FileCheck %s
3+
4+
; Make sure dxil operation function calls for lvm.vector.reduce.fadd and lvm.vector.reduce.add are generate.
5+
6+
define noundef half @test_length_half2(<2 x half> noundef %p0) {
7+
; CHECK-LABEL: define noundef half @test_length_half2(
8+
; CHECK-SAME: <2 x half> noundef [[P0:%.*]]) {
9+
; CHECK-NEXT: [[ENTRY:.*:]]
10+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x half> [[P0]], i64 0
11+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[P0]], i64 1
12+
; CHECK-NEXT: [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
13+
; CHECK-NEXT: ret half [[TMP2]]
14+
;
15+
entry:
16+
%rdx.fadd = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> %p0)
17+
ret half %rdx.fadd
18+
}
19+
20+
define noundef half @test_length_half3(<3 x half> noundef %p0) {
21+
; CHECK-LABEL: define noundef half @test_length_half3(
22+
; CHECK-SAME: <3 x half> noundef [[P0:%.*]]) {
23+
; CHECK-NEXT: [[ENTRY:.*:]]
24+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x half> [[P0]], i64 0
25+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x half> [[P0]], i64 1
26+
; CHECK-NEXT: [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
27+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x half> [[P0]], i64 2
28+
; CHECK-NEXT: [[TMP4:%.*]] = fadd half [[TMP2]], [[TMP3]]
29+
; CHECK-NEXT: ret half [[TMP4]]
30+
;
31+
entry:
32+
%rdx.fadd = call half @llvm.vector.reduce.fadd.v3f16(half 0xH0000, <3 x half> %p0)
33+
ret half %rdx.fadd
34+
}
35+
36+
define noundef half @test_length_half4(<4 x half> noundef %p0) {
37+
; CHECK-LABEL: define noundef half @test_length_half4(
38+
; CHECK-SAME: <4 x half> noundef [[P0:%.*]]) {
39+
; CHECK-NEXT: [[ENTRY:.*:]]
40+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x half> [[P0]], i64 0
41+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x half> [[P0]], i64 1
42+
; CHECK-NEXT: [[TMP2:%.*]] = fadd half [[TMP0]], [[TMP1]]
43+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x half> [[P0]], i64 2
44+
; CHECK-NEXT: [[TMP4:%.*]] = fadd half [[TMP2]], [[TMP3]]
45+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x half> [[P0]], i64 3
46+
; CHECK-NEXT: [[TMP6:%.*]] = fadd half [[TMP4]], [[TMP5]]
47+
; CHECK-NEXT: ret half [[TMP6]]
48+
;
49+
entry:
50+
%rdx.fadd = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> %p0)
51+
ret half %rdx.fadd
52+
}
53+
54+
define noundef float @test_length_float2(<2 x float> noundef %p0) {
55+
; CHECK-LABEL: define noundef float @test_length_float2(
56+
; CHECK-SAME: <2 x float> noundef [[P0:%.*]]) {
57+
; CHECK-NEXT: [[ENTRY:.*:]]
58+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[P0]], i64 0
59+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[P0]], i64 1
60+
; CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
61+
; CHECK-NEXT: ret float [[TMP2]]
62+
;
63+
entry:
64+
%rdx.fadd = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> %p0)
65+
ret float %rdx.fadd
66+
}
67+
68+
define noundef float @test_length_float3(<3 x float> noundef %p0) {
69+
; CHECK-LABEL: define noundef float @test_length_float3(
70+
; CHECK-SAME: <3 x float> noundef [[P0:%.*]]) {
71+
; CHECK-NEXT: [[ENTRY:.*:]]
72+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x float> [[P0]], i64 0
73+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x float> [[P0]], i64 1
74+
; CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
75+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x float> [[P0]], i64 2
76+
; CHECK-NEXT: [[TMP4:%.*]] = fadd float [[TMP2]], [[TMP3]]
77+
; CHECK-NEXT: ret float [[TMP4]]
78+
;
79+
entry:
80+
%rdx.fadd = call float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> %p0)
81+
ret float %rdx.fadd
82+
}
83+
84+
define noundef float @test_length_float4(<4 x float> noundef %p0) {
85+
; CHECK-LABEL: define noundef float @test_length_float4(
86+
; CHECK-SAME: <4 x float> noundef [[P0:%.*]]) {
87+
; CHECK-NEXT: [[ENTRY:.*:]]
88+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x float> [[P0]], i64 0
89+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[P0]], i64 1
90+
; CHECK-NEXT: [[TMP2:%.*]] = fadd float [[TMP0]], [[TMP1]]
91+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[P0]], i64 2
92+
; CHECK-NEXT: [[TMP4:%.*]] = fadd float [[TMP2]], [[TMP3]]
93+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[P0]], i64 3
94+
; CHECK-NEXT: [[TMP6:%.*]] = fadd float [[TMP4]], [[TMP5]]
95+
; CHECK-NEXT: ret float [[TMP6]]
96+
;
97+
entry:
98+
%rdx.fadd = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> %p0)
99+
ret float %rdx.fadd
100+
}
101+
102+
define noundef double @test_length_double2(<2 x double> noundef %p0) {
103+
; CHECK-LABEL: define noundef double @test_length_double2(
104+
; CHECK-SAME: <2 x double> noundef [[P0:%.*]]) {
105+
; CHECK-NEXT: [[ENTRY:.*:]]
106+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[P0]], i64 0
107+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[P0]], i64 1
108+
; CHECK-NEXT: [[TMP2:%.*]] = fadd double [[TMP0]], [[TMP1]]
109+
; CHECK-NEXT: ret double [[TMP2]]
110+
;
111+
entry:
112+
%rdx.fadd = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> %p0)
113+
ret double %rdx.fadd
114+
}
115+
116+
define noundef double @test_length_double3(<3 x double> noundef %p0) {
117+
; CHECK-LABEL: define noundef double @test_length_double3(
118+
; CHECK-SAME: <3 x double> noundef [[P0:%.*]]) {
119+
; CHECK-NEXT: [[ENTRY:.*:]]
120+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x double> [[P0]], i64 0
121+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x double> [[P0]], i64 1
122+
; CHECK-NEXT: [[TMP2:%.*]] = fadd double [[TMP0]], [[TMP1]]
123+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x double> [[P0]], i64 2
124+
; CHECK-NEXT: [[TMP4:%.*]] = fadd double [[TMP2]], [[TMP3]]
125+
; CHECK-NEXT: ret double [[TMP4]]
126+
;
127+
entry:
128+
%rdx.fadd = call double @llvm.vector.reduce.fadd.v3f64(double 0.000000e+00, <3 x double> %p0)
129+
ret double %rdx.fadd
130+
}
131+
132+
define noundef double @test_length_double4(<4 x double> noundef %p0) {
133+
; CHECK-LABEL: define noundef double @test_length_double4(
134+
; CHECK-SAME: <4 x double> noundef [[P0:%.*]]) {
135+
; CHECK-NEXT: [[ENTRY:.*:]]
136+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x double> [[P0]], i64 0
137+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x double> [[P0]], i64 1
138+
; CHECK-NEXT: [[TMP2:%.*]] = fadd double [[TMP0]], [[TMP1]]
139+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x double> [[P0]], i64 2
140+
; CHECK-NEXT: [[TMP4:%.*]] = fadd double [[TMP2]], [[TMP3]]
141+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x double> [[P0]], i64 3
142+
; CHECK-NEXT: [[TMP6:%.*]] = fadd double [[TMP4]], [[TMP5]]
143+
; CHECK-NEXT: ret double [[TMP6]]
144+
;
145+
entry:
146+
%rdx.fadd = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> %p0)
147+
ret double %rdx.fadd
148+
}
149+
150+
define noundef i16 @test_length_short2(<2 x i16> noundef %p0) {
151+
; CHECK-LABEL: define noundef i16 @test_length_short2(
152+
; CHECK-SAME: <2 x i16> noundef [[P0:%.*]]) {
153+
; CHECK-NEXT: [[ENTRY:.*:]]
154+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i16> [[P0]], i64 0
155+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i16> [[P0]], i64 1
156+
; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[TMP0]], [[TMP1]]
157+
; CHECK-NEXT: ret i16 [[TMP2]]
158+
;
159+
entry:
160+
%rdx.add = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %p0)
161+
ret i16 %rdx.add
162+
}
163+
164+
define noundef i16 @test_length_short3(<3 x i16> noundef %p0) {
165+
; CHECK-LABEL: define noundef i16 @test_length_short3(
166+
; CHECK-SAME: <3 x i16> noundef [[P0:%.*]]) {
167+
; CHECK-NEXT: [[ENTRY:.*:]]
168+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x i16> [[P0]], i64 0
169+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i16> [[P0]], i64 1
170+
; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[TMP0]], [[TMP1]]
171+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i16> [[P0]], i64 2
172+
; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[TMP2]], [[TMP3]]
173+
; CHECK-NEXT: ret i16 [[TMP4]]
174+
;
175+
entry:
176+
%rdx.fadd = call i16 @llvm.vector.reduce.add.v3i16(<3 x i16> %p0)
177+
ret i16 %rdx.fadd
178+
}
179+
180+
define noundef i16 @test_length_short4(<4 x i16> noundef %p0) {
181+
; CHECK-LABEL: define noundef i16 @test_length_short4(
182+
; CHECK-SAME: <4 x i16> noundef [[P0:%.*]]) {
183+
; CHECK-NEXT: [[ENTRY:.*:]]
184+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[P0]], i64 0
185+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i16> [[P0]], i64 1
186+
; CHECK-NEXT: [[TMP2:%.*]] = add i16 [[TMP0]], [[TMP1]]
187+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[P0]], i64 2
188+
; CHECK-NEXT: [[TMP4:%.*]] = add i16 [[TMP2]], [[TMP3]]
189+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i16> [[P0]], i64 3
190+
; CHECK-NEXT: [[TMP6:%.*]] = add i16 [[TMP4]], [[TMP5]]
191+
; CHECK-NEXT: ret i16 [[TMP6]]
192+
;
193+
entry:
194+
%rdx.fadd = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %p0)
195+
ret i16 %rdx.fadd
196+
}
197+
198+
define noundef i32 @test_length_int2(<2 x i32> noundef %p0) {
199+
; CHECK-LABEL: define noundef i32 @test_length_int2(
200+
; CHECK-SAME: <2 x i32> noundef [[P0:%.*]]) {
201+
; CHECK-NEXT: [[ENTRY:.*:]]
202+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i32> [[P0]], i64 0
203+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[P0]], i64 1
204+
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP1]]
205+
; CHECK-NEXT: ret i32 [[TMP2]]
206+
;
207+
entry:
208+
%rdx.add = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %p0)
209+
ret i32 %rdx.add
210+
}
211+
212+
define noundef i32 @test_length_int3(<3 x i32> noundef %p0) {
213+
; CHECK-LABEL: define noundef i32 @test_length_int3(
214+
; CHECK-SAME: <3 x i32> noundef [[P0:%.*]]) {
215+
; CHECK-NEXT: [[ENTRY:.*:]]
216+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x i32> [[P0]], i64 0
217+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i32> [[P0]], i64 1
218+
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP1]]
219+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i32> [[P0]], i64 2
220+
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]]
221+
; CHECK-NEXT: ret i32 [[TMP4]]
222+
;
223+
entry:
224+
%rdx.fadd = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> %p0)
225+
ret i32 %rdx.fadd
226+
}
227+
228+
define noundef i32 @test_length_int4(<4 x i32> noundef %p0) {
229+
; CHECK-LABEL: define noundef i32 @test_length_int4(
230+
; CHECK-SAME: <4 x i32> noundef [[P0:%.*]]) {
231+
; CHECK-NEXT: [[ENTRY:.*:]]
232+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i32> [[P0]], i64 0
233+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[P0]], i64 1
234+
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP1]]
235+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[P0]], i64 2
236+
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]]
237+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[P0]], i64 3
238+
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP4]], [[TMP5]]
239+
; CHECK-NEXT: ret i32 [[TMP6]]
240+
;
241+
entry:
242+
%rdx.fadd = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %p0)
243+
ret i32 %rdx.fadd
244+
}
245+
246+
define noundef i64 @test_length_int64_2(<2 x i64> noundef %p0) {
247+
; CHECK-LABEL: define noundef i64 @test_length_int64_2(
248+
; CHECK-SAME: <2 x i64> noundef [[P0:%.*]]) {
249+
; CHECK-NEXT: [[ENTRY:.*:]]
250+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[P0]], i64 0
251+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[P0]], i64 1
252+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP0]], [[TMP1]]
253+
; CHECK-NEXT: ret i64 [[TMP2]]
254+
;
255+
entry:
256+
%rdx.add = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %p0)
257+
ret i64 %rdx.add
258+
}
259+
260+
define noundef i64 @test_length_int64_3(<3 x i64> noundef %p0) {
261+
; CHECK-LABEL: define noundef i64 @test_length_int64_3(
262+
; CHECK-SAME: <3 x i64> noundef [[P0:%.*]]) {
263+
; CHECK-NEXT: [[ENTRY:.*:]]
264+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <3 x i64> [[P0]], i64 0
265+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <3 x i64> [[P0]], i64 1
266+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP0]], [[TMP1]]
267+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <3 x i64> [[P0]], i64 2
268+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], [[TMP3]]
269+
; CHECK-NEXT: ret i64 [[TMP4]]
270+
;
271+
entry:
272+
%rdx.fadd = call i64 @llvm.vector.reduce.add.v3i64(<3 x i64> %p0)
273+
ret i64 %rdx.fadd
274+
}
275+
276+
define noundef i64 @test_length_int64_4(<4 x i64> noundef %p0) {
277+
; CHECK-LABEL: define noundef i64 @test_length_int64_4(
278+
; CHECK-SAME: <4 x i64> noundef [[P0:%.*]]) {
279+
; CHECK-NEXT: [[ENTRY:.*:]]
280+
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i64> [[P0]], i64 0
281+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[P0]], i64 1
282+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP0]], [[TMP1]]
283+
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[P0]], i64 2
284+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], [[TMP3]]
285+
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[P0]], i64 3
286+
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP4]], [[TMP5]]
287+
; CHECK-NEXT: ret i64 [[TMP6]]
288+
;
289+
entry:
290+
%rdx.fadd = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %p0)
291+
ret i64 %rdx.fadd
292+
}

0 commit comments

Comments
 (0)