22; This test is a carved out test for sending patch upstream from
33; iree-amd-aie/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/multi_reduction_to_reduction_sizes_types.mlirUntitled-1.mlir
44
5+ ; Ideally reduction should be as follows(with minor changes for each shape):
6+ ; Input1: <32xbf16> and Input2: <32xbf16>
7+ ; Extended1<32xf32> = fpext <32xbf16>
8+ ; Extended2<32xf32> = fpext <32xbf16>
9+ ; Zero<32xf32> = zeroinitializer
10+ ; Out1<64xf32> = Concat zero, <Extended1<32xf32>>
11+ ; Out2<64xf32> = Concat zero, <Extended2<32xf32>>
12+ ; Result<64xf32> = fadd <Out1<64xf32>>, <Out2<64xf32>>
13+ ; R1<32xf32>, R2<32xf32> = unmerge <Result<64xf32>>
14+ ; R2 is all 0s
15+ ; R1<32xbf16> = trunc <R1<32xf32>>
16+
517; check the vadd.f
18+ ; pad checks
19+ ; checks similar to <32xbf16>
20+ ; unpad checks
621define bfloat @multi_reduction_1d_16_bf16 (<16 x bfloat> %0 , bfloat %1 ) {
722 %3 = call reassoc bfloat @llvm.vector.reduce.fadd.v16bf16 (bfloat %1 , <16 x bfloat> %0 )
823 ret bfloat %3
924}
1025
11- ; check the vadd.f
26+
27+
28+ ; CHECK-LABEL: name: multi_reduction_1d_32_bf16
29+ ; CHECK: G_CONSTANT i32 0
30+ ; CHECK: G_AIE_BROADCAST_VECTOR %{{[0-9]+}}(s32)
31+ ; CHECK: G_CONSTANT i32 2
32+ ; CHECK: G_CONSTANT i32 3
33+ ; CHECK: G_AIE_SHUFFLE_VECTOR %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}(s32)
34+ ; CHECK: G_AIE_SHUFFLE_VECTOR %{{[0-9]+}}, %{{[0-9]+}}, %{{[0-9]+}}(s32)
35+ ; CHECK: G_BITCAST %{{[0-9]+}}(<32 x s16>)
36+ ; CHECK: G_BITCAST %{{[0-9]+}}(<32 x s16>)
37+ ; CHECK: G_CONCAT_VECTORS %{{[0-9]+}}(<16 x s32>), %{{[0-9]+}}(<16 x s32>)
38+ ; CHECK: G_IMPLICIT_DEF
39+ ; CHECK: G_CONCAT_VECTORS %{{[0-9]+}}(<32 x s32>), %{{[0-9]+}}(<32 x s32>)
40+ ; CHECK: G_FADD %{{[0-9]+}}, %{{[0-9]+}}
41+ ; CHECK: G_UNMERGE_VALUES %{{[0-9]+}}(<64 x s32>)
42+ ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2p.v32accfloat.to.v32bf16), %{{[0-9]+}}(<32 x s32>)
1243define bfloat @multi_reduction_1d_32_bf16 (<32 x bfloat> %0 , bfloat %1 ) {
1344 %3 = call reassoc bfloat @llvm.vector.reduce.fadd.v32bf16 (bfloat %1 , <32 x bfloat> %0 )
1445 ret bfloat %3
1546}
1647
17- ; Converted to chunks of <32 x bf16>
18- ; Check if the input is split into 2 chunks of <32 x bf16> and then for each check the vadd.f
48+ ; ; Converted to chunks of <32 x bf16>
49+ ; Check if the input is split into 2 chunks of <32 x bf16>
50+ ; Check for each chunk similar to <32xbf16> case
51+ ; Check if both inputs get concatenated to <64xbf16>
52+
1953define bfloat @multi_reduction_1d_64_bf16 (<64 x bfloat> %0 , bfloat %1 ) {
2054 %3 = call reassoc bfloat @llvm.vector.reduce.fadd.v64bf16 (bfloat %1 , <64 x bfloat> %0 )
2155 ret bfloat %3
22- }
56+ }
0 commit comments