Skip to content

Commit aecf9ab

Browse files
committed
add test
1 parent 32a6408 commit aecf9ab

File tree

1 file changed

+54
-0
lines changed

1 file changed

+54
-0
lines changed
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// RUN: imex-opt --insert-gpu-allocs='client-api=opencl is-usm-args=1' %s | FileCheck %s --check-prefix=OPENCL
2+
// RUN: imex-opt --insert-gpu-allocs='client-api=vulkan is-usm-args=1' %s | FileCheck %s --check-prefix=VULKAN
3+
4+
// OPENCL-LABEL: func.func @addt
5+
// OPENCL-SAME: %[[arg0:.+]]: memref<2x5xf32>, %[[arg1:.+]]: memref<2x5xf32>, %[[out_buff:.+]]: memref<2x5xf32>
6+
// VULKAN-LABEL: func.func @addt
7+
// VULKAN-SAME: %[[arg0:.+]]: memref<2x5xf32>, %[[arg1:.+]]: memref<2x5xf32>, %[[out_buff:.+]]: memref<2x5xf32>
8+
func.func @addt(%arg0: memref<2x5xf32>, %arg1: memref<2x5xf32>, %out_buff: memref<2x5xf32>) -> memref<2x5xf32> {
9+
%c0 = arith.constant 0 : index
10+
%c2 = arith.constant 2 : index
11+
%c1 = arith.constant 1 : index
12+
%c5 = arith.constant 5 : index
13+
// OPENCL-NOT: %[[MEMREF0:.*]] = gpu.alloc host_shared () : memref<2x5xf32>
14+
// OPENCL-NOT: %[[MEMREF1:.*]] = gpu.alloc host_shared () : memref<2x5xf32>
15+
// OPENCL-NOT: memref.copy
16+
// OPENCL-NOT: %[[MEMREF2:.*]] = gpu.alloc host_shared () : memref<2x5xf32>
17+
// OPENCL-NOT: memref.copy
18+
19+
// VULKAN-NOT: %[[MEMREF0:.*]] = memref.alloc() : memref<2x5xf32>
20+
// VULKAN-NOT: %[[MEMREF1:.*]] = memref.alloc() : memref<2x5xf32>
21+
// VULKAN-NOT: memref.copy
22+
// VULKAN-NOT: %[[MEMREF2:.*]] = memref.alloc() : memref<2x5xf32>
23+
// VULKAN-NOT: memref.copy
24+
25+
%tmp_buff = memref.alloc() {alignment = 128 : i64} : memref<2x5xf32>
26+
// OPENCL-NOT: %[[MEMREF3:.*]] = memref.alloc().*
27+
// OPENCL: %[[MEMREF3:.*]] = gpu.alloc () : memref<2x5xf32>
28+
// VULKAN: %[[MEMREF3:.*]] = memref.alloc() {alignment = 128 : i64} : memref<2x5xf32>
29+
30+
%c1_0 = arith.constant 1 : index
31+
%1 = affine.apply affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>(%c2)[%c0, %c1]
32+
%2 = affine.apply affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>(%c5)[%c0, %c1]
33+
gpu.launch blocks(%arg2, %arg3, %arg4) in (%arg8 = %1, %arg9 = %2, %arg10 = %c1_0) threads(%arg5, %arg6, %arg7) in (%arg11 = %c1_0, %arg12 = %c1_0, %arg13 = %c1_0) {
34+
%3 = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)>(%arg2)[%c1, %c0]
35+
%4 = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)>(%arg3)[%c1, %c0]
36+
%5 = memref.load %arg0[%3, %4] : memref<2x5xf32>
37+
%6 = memref.load %arg1[%3, %4] : memref<2x5xf32>
38+
%7 = arith.addf %5, %6 : f32
39+
memref.store %7, %tmp_buff[%3, %4] : memref<2x5xf32>
40+
41+
%8 = memref.load %tmp_buff[%3, %4] : memref<2x5xf32>
42+
%9 = arith.addf %8, %5 : f32
43+
memref.store %9, %out_buff[%3, %4] : memref<2x5xf32>
44+
45+
gpu.terminator
46+
} {SCFToGPU_visited}
47+
48+
// OPENCL-NOT: memref.dealloc %[[MEMREF3]] : memref<2x5xf32>
49+
// OPENCL: gpu.dealloc %[[MEMREF3]] : memref<2x5xf32>
50+
// VULKAN: memref.dealloc %[[MEMREF3]] : memref<2x5xf32>
51+
memref.dealloc %tmp_buff : memref<2x5xf32>
52+
53+
return %out_buff : memref<2x5xf32>
54+
}

0 commit comments

Comments
 (0)