Skip to content

Commit fea2ca8

Browse files
committed
Add more tests
1 parent 2ae5b13 commit fea2ca8

File tree

1 file changed

+65
-0
lines changed

1 file changed

+65
-0
lines changed

mlir/test/Dialect/AMDGPU/amdgpu-fold-memrefs.mlir

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,3 +96,68 @@ func.func @test_collapse_shape(%offset_i: index, %offset_j: index) {
9696
: vector<8xf16>, memref<8192xf16>, memref<4096xf16, #gpu_lds_addrspace>
9797
func.return
9898
}
99+
100+
101+
// -----
102+
103+
#gpu_lds_addrspace = 3
104+
105+
106+
// CHECK: func @test_expand_shape_src_raw_buffer
107+
// CHECK-SAME: %[[ARG0:.*]]: memref<8192xf16, #amdgpu.address_space<fat_raw_buffer>>, %[[ARG1:.*]]: index, %[[ARG2:.*]]: index
108+
func.func @test_expand_shape_src_raw_buffer(%mem : memref<8192xf16, #amdgpu.address_space<fat_raw_buffer>>, %offset_i: index, %offset_j: index) {
109+
// CHECK: %[[LOCAL:.*]] = memref.alloc() : memref<4096xf16, 3>
110+
// CHECK: %[[C0:.*]] = arith.constant 0 : index
111+
// CHECK: %[[IDXM:.*]] = affine.linearize_index [%[[ARG1]], %[[ARG2]]] by (64, 128) : index
112+
// CHECK: amdgpu.gather_to_lds %[[ARG0]][%[[IDXM]]], %[[LOCAL]][%[[C0]]]
113+
// CHECK-SAME: vector<8xf16>, memref<8192xf16, #amdgpu.address_space<fat_raw_buffer>>, memref<4096xf16, 3>
114+
115+
%alloc = memref.alloc() : memref<4096xf16, #gpu_lds_addrspace>
116+
%expand_mem = memref.expand_shape %mem [[0, 1]] output_shape [64, 128] : memref<8192xf16, #amdgpu.address_space<fat_raw_buffer>> into memref<64x128xf16, #amdgpu.address_space<fat_raw_buffer>>
117+
118+
%c0 = arith.constant 0 : index
119+
amdgpu.gather_to_lds %expand_mem[%offset_i, %offset_j], %alloc[%c0]
120+
: vector<8xf16>, memref<64x128xf16, #amdgpu.address_space<fat_raw_buffer>>, memref<4096xf16, #gpu_lds_addrspace>
121+
func.return
122+
}
123+
124+
// -----
125+
126+
#gpu_lds_addrspace = 3
127+
128+
// CHECK: func @test_expand_shape_dst_only
129+
// CHECK-SAME: %[[ARG0:.*]]: index, %[[ARG1:.*]]: index
130+
func.func @test_expand_shape_dst_only(%offset_i: index, %offset_j: index) {
131+
// CHECK: %[[LOCAL:.*]] = memref.alloc() : memref<4096xf16, 3>
132+
// CHECK: %[[MEM:.*]] = memref.alloc() : memref<8192xf16>
133+
// CHECK: %[[C0:.*]] = arith.constant 0 : index
134+
// CHECK: %[[IDX_LDS:.*]] = affine.linearize_index [%[[ARG1]], %[[C0]]] by (64, 64) : index
135+
// CHECK: amdgpu.gather_to_lds %[[MEM]][%[[ARG0]]], %[[LOCAL]][%[[IDX_LDS]]]
136+
// CHECK-SAME: vector<8xf16>, memref<8192xf16>, memref<4096xf16, 3>
137+
138+
%alloc = memref.alloc() : memref<4096xf16, #gpu_lds_addrspace>
139+
%mem = memref.alloc() : memref<8192xf16>
140+
%expand_alloc = memref.expand_shape %alloc [[0, 1]] output_shape [64, 64] : memref<4096xf16, #gpu_lds_addrspace> into memref<64x64xf16, #gpu_lds_addrspace>
141+
142+
%c0 = arith.constant 0 : index
143+
amdgpu.gather_to_lds %mem[%offset_i], %expand_alloc[%offset_j, %c0]
144+
: vector<8xf16>, memref<8192xf16>, memref<64x64xf16, #gpu_lds_addrspace>
145+
func.return
146+
}
147+
148+
// -----
149+
150+
#gpu_lds_addrspace = 3
151+
152+
// CHECK: func @test_nop
153+
// CHECK-SAME: %[[ARG0:.*]]: memref<8192xf16, #amdgpu.address_space<fat_raw_buffer>>, %[[ARG1:.*]]: index, %[[ARG2:.*]]: index
154+
func.func @test_nop(%mem : memref<8192xf16, #amdgpu.address_space<fat_raw_buffer>>, %offset_i: index, %offset_j: index) {
155+
// CHECK: %[[LOCAL:.*]] = memref.alloc() : memref<4096xf16, 3>
156+
// CHECK: amdgpu.gather_to_lds %[[ARG0]][%[[ARG1]]], %[[LOCAL]][%[[ARG2]]]
157+
// CHECK-SAME: vector<8xf16>, memref<8192xf16, #amdgpu.address_space<fat_raw_buffer>>, memref<4096xf16, 3>
158+
159+
%alloc = memref.alloc() : memref<4096xf16, #gpu_lds_addrspace>
160+
amdgpu.gather_to_lds %mem[%offset_i], %alloc[%offset_j]
161+
: vector<8xf16>, memref<8192xf16, #amdgpu.address_space<fat_raw_buffer>>, memref<4096xf16, #gpu_lds_addrspace>
162+
func.return
163+
}

0 commit comments

Comments
 (0)