Skip to content

Commit 84ff8f4

Browse files
committed
pre-land reduce.ll
1 parent e391301 commit 84ff8f4

File tree

1 file changed

+298
-0
lines changed
  • llvm/test/Transforms/LowerMatrixIntrinsics

1 file changed

+298
-0
lines changed
Lines changed: 298 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,298 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s
3+
4+
define i32 @reduce_add_4x2(ptr %in, ptr %out) {
5+
; CHECK-LABEL: @reduce_add_4x2(
6+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <4 x i32>, ptr [[IN:%.*]], align 4
7+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i32, ptr [[IN]], i64 4
8+
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <4 x i32>, ptr [[VEC_GEP]], align 4
9+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[COL_LOAD]], <4 x i32> [[COL_LOAD1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
10+
; CHECK-NEXT: [[REDUCE:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP1]])
11+
; CHECK-NEXT: ret i32 [[REDUCE]]
12+
;
13+
%inv = call <8 x i32> @llvm.matrix.column.major.load(ptr %in, i64 4, i1 1, i32 4, i32 2)
14+
%reduce = call i32 @llvm.vector.reduce.add(<8 x i32> %inv)
15+
ret i32 %reduce
16+
}
17+
18+
define i32 @reduce_add_8x1(ptr %in, ptr %out) {
19+
; CHECK-LABEL: @reduce_add_8x1(
20+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <8 x i32>, ptr [[IN:%.*]], align 4
21+
; CHECK-NEXT: [[REDUCE:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[COL_LOAD]])
22+
; CHECK-NEXT: ret i32 [[REDUCE]]
23+
;
24+
%inv = call <8 x i32> @llvm.matrix.column.major.load(ptr %in, i64 8, i1 1, i32 8, i32 1)
25+
%reduce = call i32 @llvm.vector.reduce.add(<8 x i32> %inv)
26+
ret i32 %reduce
27+
}
28+
29+
define i32 @reduce_add_1x8(ptr %in, ptr %out) {
30+
; CHECK-LABEL: @reduce_add_1x8(
31+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <1 x i32>, ptr [[IN:%.*]], align 4
32+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i32, ptr [[IN]], i64 1
33+
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <1 x i32>, ptr [[VEC_GEP]], align 4
34+
; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr i32, ptr [[IN]], i64 2
35+
; CHECK-NEXT: [[COL_LOAD3:%.*]] = load volatile <1 x i32>, ptr [[VEC_GEP2]], align 4
36+
; CHECK-NEXT: [[VEC_GEP4:%.*]] = getelementptr i32, ptr [[IN]], i64 3
37+
; CHECK-NEXT: [[COL_LOAD5:%.*]] = load volatile <1 x i32>, ptr [[VEC_GEP4]], align 4
38+
; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr i32, ptr [[IN]], i64 4
39+
; CHECK-NEXT: [[COL_LOAD7:%.*]] = load volatile <1 x i32>, ptr [[VEC_GEP6]], align 4
40+
; CHECK-NEXT: [[VEC_GEP8:%.*]] = getelementptr i32, ptr [[IN]], i64 5
41+
; CHECK-NEXT: [[COL_LOAD9:%.*]] = load volatile <1 x i32>, ptr [[VEC_GEP8]], align 4
42+
; CHECK-NEXT: [[VEC_GEP10:%.*]] = getelementptr i32, ptr [[IN]], i64 6
43+
; CHECK-NEXT: [[COL_LOAD11:%.*]] = load volatile <1 x i32>, ptr [[VEC_GEP10]], align 4
44+
; CHECK-NEXT: [[VEC_GEP12:%.*]] = getelementptr i32, ptr [[IN]], i64 7
45+
; CHECK-NEXT: [[COL_LOAD13:%.*]] = load volatile <1 x i32>, ptr [[VEC_GEP12]], align 4
46+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i32> [[COL_LOAD]], <1 x i32> [[COL_LOAD1]], <2 x i32> <i32 0, i32 1>
47+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <1 x i32> [[COL_LOAD3]], <1 x i32> [[COL_LOAD5]], <2 x i32> <i32 0, i32 1>
48+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <1 x i32> [[COL_LOAD7]], <1 x i32> [[COL_LOAD9]], <2 x i32> <i32 0, i32 1>
49+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <1 x i32> [[COL_LOAD11]], <1 x i32> [[COL_LOAD13]], <2 x i32> <i32 0, i32 1>
50+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
51+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
52+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
53+
; CHECK-NEXT: [[REDUCE:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP7]])
54+
; CHECK-NEXT: ret i32 [[REDUCE]]
55+
;
56+
%inv = call <8 x i32> @llvm.matrix.column.major.load(ptr %in, i64 1, i1 1, i32 1, i32 8)
57+
%reduce = call i32 @llvm.vector.reduce.add(<8 x i32> %inv)
58+
ret i32 %reduce
59+
}
60+
61+
define i32 @reduce_add_1x3(ptr %in, ptr %out) {
62+
; CHECK-LABEL: @reduce_add_1x3(
63+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <1 x i32>, ptr [[IN:%.*]], align 4
64+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i32, ptr [[IN]], i64 1
65+
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <1 x i32>, ptr [[VEC_GEP]], align 4
66+
; CHECK-NEXT: [[VEC_GEP2:%.*]] = getelementptr i32, ptr [[IN]], i64 2
67+
; CHECK-NEXT: [[COL_LOAD3:%.*]] = load volatile <1 x i32>, ptr [[VEC_GEP2]], align 4
68+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <1 x i32> [[COL_LOAD]], <1 x i32> [[COL_LOAD1]], <2 x i32> <i32 0, i32 1>
69+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <1 x i32> [[COL_LOAD3]], <1 x i32> poison, <2 x i32> <i32 0, i32 poison>
70+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <3 x i32> <i32 0, i32 1, i32 2>
71+
; CHECK-NEXT: [[REDUCE:%.*]] = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> [[TMP3]])
72+
; CHECK-NEXT: ret i32 [[REDUCE]]
73+
;
74+
%inv = call <3 x i32> @llvm.matrix.column.major.load(ptr %in, i64 1, i1 1, i32 1, i32 3)
75+
%reduce = call i32 @llvm.vector.reduce.add(<3 x i32> %inv)
76+
ret i32 %reduce
77+
}
78+
79+
define i32 @reduce_add_3x1(ptr %in, ptr %out) {
80+
; CHECK-LABEL: @reduce_add_3x1(
81+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <3 x i32>, ptr [[IN:%.*]], align 4
82+
; CHECK-NEXT: [[REDUCE:%.*]] = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> [[COL_LOAD]])
83+
; CHECK-NEXT: ret i32 [[REDUCE]]
84+
;
85+
%inv = call <3 x i32> @llvm.matrix.column.major.load(ptr %in, i64 3, i1 1, i32 3, i32 1)
86+
%reduce = call i32 @llvm.vector.reduce.add(<3 x i32> %inv)
87+
ret i32 %reduce
88+
}
89+
90+
define i32 @reduce_and(ptr %in, ptr %out) {
91+
; CHECK-LABEL: @reduce_and(
92+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <4 x i32>, ptr [[IN:%.*]], align 4
93+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i32, ptr [[IN]], i64 4
94+
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <4 x i32>, ptr [[VEC_GEP]], align 4
95+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[COL_LOAD]], <4 x i32> [[COL_LOAD1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
96+
; CHECK-NEXT: [[REDUCE:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
97+
; CHECK-NEXT: ret i32 [[REDUCE]]
98+
;
99+
%inv = call <8 x i32> @llvm.matrix.column.major.load(ptr %in, i64 4, i1 1, i32 4, i32 2)
100+
%reduce = call i32 @llvm.vector.reduce.and(<8 x i32> %inv)
101+
ret i32 %reduce
102+
}
103+
104+
define i32 @reduce_or(ptr %in, ptr %out) {
105+
; CHECK-LABEL: @reduce_or(
106+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <4 x i32>, ptr [[IN:%.*]], align 4
107+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i32, ptr [[IN]], i64 4
108+
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <4 x i32>, ptr [[VEC_GEP]], align 4
109+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[COL_LOAD]], <4 x i32> [[COL_LOAD1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
110+
; CHECK-NEXT: [[REDUCE:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP1]])
111+
; CHECK-NEXT: ret i32 [[REDUCE]]
112+
;
113+
%inv = call <8 x i32> @llvm.matrix.column.major.load(ptr %in, i64 4, i1 1, i32 4, i32 2)
114+
%reduce = call i32 @llvm.vector.reduce.or(<8 x i32> %inv)
115+
ret i32 %reduce
116+
}
117+
118+
define i32 @reduce_mul(ptr %in, ptr %out) {
119+
; CHECK-LABEL: @reduce_mul(
120+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <4 x i32>, ptr [[IN:%.*]], align 4
121+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i32, ptr [[IN]], i64 4
122+
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <4 x i32>, ptr [[VEC_GEP]], align 4
123+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[COL_LOAD]], <4 x i32> [[COL_LOAD1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
124+
; CHECK-NEXT: [[REDUCE:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP1]])
125+
; CHECK-NEXT: ret i32 [[REDUCE]]
126+
;
127+
%inv = call <8 x i32> @llvm.matrix.column.major.load(ptr %in, i64 4, i1 1, i32 4, i32 2)
128+
%reduce = call i32 @llvm.vector.reduce.mul(<8 x i32> %inv)
129+
ret i32 %reduce
130+
}
131+
132+
define i32 @reduce_xor(ptr %in, ptr %out) {
133+
; CHECK-LABEL: @reduce_xor(
134+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <4 x i32>, ptr [[IN:%.*]], align 4
135+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i32, ptr [[IN]], i64 4
136+
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <4 x i32>, ptr [[VEC_GEP]], align 4
137+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[COL_LOAD]], <4 x i32> [[COL_LOAD1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
138+
; CHECK-NEXT: [[REDUCE:%.*]] = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> [[TMP1]])
139+
; CHECK-NEXT: ret i32 [[REDUCE]]
140+
;
141+
%inv = call <8 x i32> @llvm.matrix.column.major.load(ptr %in, i64 4, i1 1, i32 4, i32 2)
142+
%reduce = call i32 @llvm.vector.reduce.xor(<8 x i32> %inv)
143+
ret i32 %reduce
144+
}
145+
146+
define float @reduce_fadd(ptr %in, ptr %out) {
147+
; CHECK-LABEL: @reduce_fadd(
148+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <4 x float>, ptr [[IN:%.*]], align 4
149+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[IN]], i64 4
150+
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <4 x float>, ptr [[VEC_GEP]], align 4
151+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[COL_LOAD]], <4 x float> [[COL_LOAD1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
152+
; CHECK-NEXT: [[REDUCE:%.*]] = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP1]])
153+
; CHECK-NEXT: ret float [[REDUCE]]
154+
;
155+
%inv = call <8 x float> @llvm.matrix.column.major.load(ptr %in, i64 4, i1 1, i32 4, i32 2)
156+
%reduce = call float @llvm.vector.reduce.fadd(float 0., <8 x float> %inv)
157+
ret float %reduce
158+
}
159+
160+
define float @reduce_fadd_reassoc(ptr %in, ptr %out) {
161+
; CHECK-LABEL: @reduce_fadd_reassoc(
162+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <4 x float>, ptr [[IN:%.*]], align 4
163+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[IN]], i64 4
164+
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <4 x float>, ptr [[VEC_GEP]], align 4
165+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[COL_LOAD]], <4 x float> [[COL_LOAD1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
166+
; CHECK-NEXT: [[REDUCE:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP1]])
167+
; CHECK-NEXT: ret float [[REDUCE]]
168+
;
169+
%inv = call <8 x float> @llvm.matrix.column.major.load(ptr %in, i64 4, i1 1, i32 4, i32 2)
170+
%reduce = call reassoc float @llvm.vector.reduce.fadd(float 0., <8 x float> %inv)
171+
ret float %reduce
172+
}
173+
174+
define float @reduce_fadd_contract(ptr %in, ptr %out) {
175+
; CHECK-LABEL: @reduce_fadd_contract(
176+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <4 x float>, ptr [[IN:%.*]], align 4
177+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[IN]], i64 4
178+
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <4 x float>, ptr [[VEC_GEP]], align 4
179+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[COL_LOAD]], <4 x float> [[COL_LOAD1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
180+
; CHECK-NEXT: [[REDUCE:%.*]] = call contract float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP1]])
181+
; CHECK-NEXT: ret float [[REDUCE]]
182+
;
183+
%inv = call <8 x float> @llvm.matrix.column.major.load(ptr %in, i64 4, i1 1, i32 4, i32 2)
184+
%reduce = call contract float @llvm.vector.reduce.fadd(float 0., <8 x float> %inv)
185+
ret float %reduce
186+
}
187+
188+
define float @reduce_fadd_reassoccontract(ptr %in, ptr %out) {
189+
; CHECK-LABEL: @reduce_fadd_reassoccontract(
190+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <4 x float>, ptr [[IN:%.*]], align 4
191+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[IN]], i64 4
192+
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <4 x float>, ptr [[VEC_GEP]], align 4
193+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[COL_LOAD]], <4 x float> [[COL_LOAD1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
194+
; CHECK-NEXT: [[REDUCE:%.*]] = call reassoc contract float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP1]])
195+
; CHECK-NEXT: ret float [[REDUCE]]
196+
;
197+
%inv = call <8 x float> @llvm.matrix.column.major.load(ptr %in, i64 4, i1 1, i32 4, i32 2)
198+
%reduce = call reassoc contract float @llvm.vector.reduce.fadd(float 0., <8 x float> %inv)
199+
ret float %reduce
200+
}
201+
202+
define float @reduce_fadd_weirdstart(ptr %in, ptr %out) {
203+
; CHECK-LABEL: @reduce_fadd_weirdstart(
204+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <4 x float>, ptr [[IN:%.*]], align 4
205+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[IN]], i64 4
206+
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <4 x float>, ptr [[VEC_GEP]], align 4
207+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[COL_LOAD]], <4 x float> [[COL_LOAD1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
208+
; CHECK-NEXT: [[REDUCE:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v8f32(float 1.000000e+00, <8 x float> [[TMP1]])
209+
; CHECK-NEXT: ret float [[REDUCE]]
210+
;
211+
%inv = call <8 x float> @llvm.matrix.column.major.load(ptr %in, i64 4, i1 1, i32 4, i32 2)
212+
%reduce = call reassoc float @llvm.vector.reduce.fadd(float 1., <8 x float> %inv)
213+
ret float %reduce
214+
}
215+
216+
define float @reduce_fmul_reassoc(ptr %in, ptr %out) {
217+
; CHECK-LABEL: @reduce_fmul_reassoc(
218+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <4 x float>, ptr [[IN:%.*]], align 4
219+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[IN]], i64 4
220+
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <4 x float>, ptr [[VEC_GEP]], align 4
221+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[COL_LOAD]], <4 x float> [[COL_LOAD1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
222+
; CHECK-NEXT: [[REDUCE:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 1.000000e+00, <8 x float> [[TMP1]])
223+
; CHECK-NEXT: ret float [[REDUCE]]
224+
;
225+
%inv = call <8 x float> @llvm.matrix.column.major.load(ptr %in, i64 4, i1 1, i32 4, i32 2)
226+
%reduce = call reassoc float @llvm.vector.reduce.fmul(float 1., <8 x float> %inv)
227+
ret float %reduce
228+
}
229+
230+
define float @reduce_fmul_weirdstart(ptr %in, ptr %out) {
231+
; CHECK-LABEL: @reduce_fmul_weirdstart(
232+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <4 x float>, ptr [[IN:%.*]], align 4
233+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[IN]], i64 4
234+
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <4 x float>, ptr [[VEC_GEP]], align 4
235+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[COL_LOAD]], <4 x float> [[COL_LOAD1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
236+
; CHECK-NEXT: [[REDUCE:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 0.000000e+00, <8 x float> [[TMP1]])
237+
; CHECK-NEXT: ret float [[REDUCE]]
238+
;
239+
%inv = call <8 x float> @llvm.matrix.column.major.load(ptr %in, i64 4, i1 1, i32 4, i32 2)
240+
%reduce = call reassoc float @llvm.vector.reduce.fmul(float 0., <8 x float> %inv)
241+
ret float %reduce
242+
}
243+
244+
define float @reduce_fmax(ptr %in, ptr %out) {
245+
; CHECK-LABEL: @reduce_fmax(
246+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <4 x float>, ptr [[IN:%.*]], align 4
247+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[IN]], i64 4
248+
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <4 x float>, ptr [[VEC_GEP]], align 4
249+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[COL_LOAD]], <4 x float> [[COL_LOAD1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
250+
; CHECK-NEXT: [[REDUCE:%.*]] = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> [[TMP1]])
251+
; CHECK-NEXT: ret float [[REDUCE]]
252+
;
253+
%inv = call <8 x float> @llvm.matrix.column.major.load(ptr %in, i64 4, i1 1, i32 4, i32 2)
254+
%reduce = call float @llvm.vector.reduce.fmax(<8 x float> %inv)
255+
ret float %reduce
256+
}
257+
258+
define float @reduce_fmaximum(ptr %in, ptr %out) {
259+
; CHECK-LABEL: @reduce_fmaximum(
260+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <4 x float>, ptr [[IN:%.*]], align 4
261+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[IN]], i64 4
262+
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <4 x float>, ptr [[VEC_GEP]], align 4
263+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[COL_LOAD]], <4 x float> [[COL_LOAD1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
264+
; CHECK-NEXT: [[REDUCE:%.*]] = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> [[TMP1]])
265+
; CHECK-NEXT: ret float [[REDUCE]]
266+
;
267+
%inv = call <8 x float> @llvm.matrix.column.major.load(ptr %in, i64 4, i1 1, i32 4, i32 2)
268+
%reduce = call float @llvm.vector.reduce.fmaximum(<8 x float> %inv)
269+
ret float %reduce
270+
}
271+
272+
define float @reduce_fmin(ptr %in, ptr %out) {
273+
; CHECK-LABEL: @reduce_fmin(
274+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <4 x float>, ptr [[IN:%.*]], align 4
275+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[IN]], i64 4
276+
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <4 x float>, ptr [[VEC_GEP]], align 4
277+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[COL_LOAD]], <4 x float> [[COL_LOAD1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
278+
; CHECK-NEXT: [[REDUCE:%.*]] = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> [[TMP1]])
279+
; CHECK-NEXT: ret float [[REDUCE]]
280+
;
281+
%inv = call <8 x float> @llvm.matrix.column.major.load(ptr %in, i64 4, i1 1, i32 4, i32 2)
282+
%reduce = call float @llvm.vector.reduce.fmin(<8 x float> %inv)
283+
ret float %reduce
284+
}
285+
286+
define float @reduce_fminimum(ptr %in, ptr %out) {
287+
; CHECK-LABEL: @reduce_fminimum(
288+
; CHECK-NEXT: [[COL_LOAD:%.*]] = load volatile <4 x float>, ptr [[IN:%.*]], align 4
289+
; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, ptr [[IN]], i64 4
290+
; CHECK-NEXT: [[COL_LOAD1:%.*]] = load volatile <4 x float>, ptr [[VEC_GEP]], align 4
291+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[COL_LOAD]], <4 x float> [[COL_LOAD1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
292+
; CHECK-NEXT: [[REDUCE:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP1]])
293+
; CHECK-NEXT: ret float [[REDUCE]]
294+
;
295+
%inv = call <8 x float> @llvm.matrix.column.major.load(ptr %in, i64 4, i1 1, i32 4, i32 2)
296+
%reduce = call float @llvm.vector.reduce.fminimum(<8 x float> %inv)
297+
ret float %reduce
298+
}

0 commit comments

Comments
 (0)