Skip to content

Commit 041557b

Browse files
committed
add functionality test
1 parent 6feea7f commit 041557b

File tree

3 files changed

+213
-0
lines changed

3 files changed

+213
-0
lines changed
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// RUN: gc-opt %s --gc-gpu-pipeline -split-input-file | FileCheck %s
2+
3+
// CHECK-LABEL: llvm
4+
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
5+
func.func @multiply(%arg0: memref<1024x1024xf16>, %arg1: memref<1024x1024xf16>, %arg2: memref<1024x1024xf16>, %arg3: memref<1024x1024xf16>) {
6+
%0 = bufferization.to_tensor %arg0 restrict : memref<1024x1024xf16>
7+
%1 = bufferization.to_tensor %arg1 restrict : memref<1024x1024xf16>
8+
%2 = bufferization.to_tensor %arg2 restrict : memref<1024x1024xf16>
9+
%3 = tensor.empty() : tensor<1024x1024xf16>
10+
%4 = linalg.mul ins(%0, %1 : tensor<1024x1024xf16>, tensor<1024x1024xf16>) outs(%3 : tensor<1024x1024xf16>) -> tensor<1024x1024xf16>
11+
bufferization.materialize_in_destination %4 in restrict writable %arg3 : (tensor<1024x1024xf16>, memref<1024x1024xf16>) -> ()
12+
return
13+
}
14+
}
15+
16+
// -----
17+
18+
// CHECK-LABEL: llvm
19+
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
20+
func.func @add(%arg0: memref<1024x1024xf16>, %arg1: memref<1024x1024xf16>, %arg2: memref<1024x1024xf16>, %arg3: memref<1024x1024xf16>) {
21+
%0 = bufferization.to_tensor %arg0 restrict : memref<1024x1024xf16>
22+
%1 = bufferization.to_tensor %arg1 restrict : memref<1024x1024xf16>
23+
%2 = bufferization.to_tensor %arg2 restrict : memref<1024x1024xf16>
24+
%3 = tensor.empty() : tensor<1024x1024xf16>
25+
%4 = linalg.add ins(%0, %1 : tensor<1024x1024xf16>, tensor<1024x1024xf16>) outs(%3 : tensor<1024x1024xf16>) -> tensor<1024x1024xf16>
26+
bufferization.materialize_in_destination %4 in restrict writable %arg3 : (tensor<1024x1024xf16>, memref<1024x1024xf16>) -> ()
27+
return
28+
}
29+
}
30+
31+
// -----
32+
// CHECK-LABEL: llvm
33+
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
34+
func.func @subtract(%arg0: memref<1024x1024xf16>, %arg1: memref<1024x1024xf16>, %arg2: memref<1024x1024xf16>, %arg3: memref<1024x1024xf16>) {
35+
%0 = bufferization.to_tensor %arg0 restrict : memref<1024x1024xf16>
36+
%1 = bufferization.to_tensor %arg1 restrict : memref<1024x1024xf16>
37+
%2 = bufferization.to_tensor %arg2 restrict : memref<1024x1024xf16>
38+
%3 = tensor.empty() : tensor<1024x1024xf16>
39+
%4 = linalg.sub ins(%0, %1 : tensor<1024x1024xf16>, tensor<1024x1024xf16>) outs(%3 : tensor<1024x1024xf16>) -> tensor<1024x1024xf16>
40+
bufferization.materialize_in_destination %4 in restrict writable %arg3 : (tensor<1024x1024xf16>, memref<1024x1024xf16>) -> ()
41+
return
42+
}
43+
}
44+
45+
// -----
46+
// CHECK-LABEL: llvm
47+
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
48+
func.func @divide(%arg0: memref<1024x1024xf16>, %arg1: memref<1024x1024xf16>, %arg2: memref<1024x1024xf16>, %arg3: memref<1024x1024xf16>) {
49+
%0 = bufferization.to_tensor %arg0 restrict : memref<1024x1024xf16>
50+
%1 = bufferization.to_tensor %arg1 restrict : memref<1024x1024xf16>
51+
%2 = bufferization.to_tensor %arg2 restrict : memref<1024x1024xf16>
52+
%3 = tensor.empty() : tensor<1024x1024xf16>
53+
%4 = linalg.div ins(%0, %1 : tensor<1024x1024xf16>, tensor<1024x1024xf16>) outs(%3 : tensor<1024x1024xf16>) -> tensor<1024x1024xf16>
54+
bufferization.materialize_in_destination %4 in restrict writable %arg3 : (tensor<1024x1024xf16>, memref<1024x1024xf16>) -> ()
55+
return
56+
}
57+
}
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// RUN: gc-opt %s --gc-gpu-pipeline -split-input-file | FileCheck %s
2+
3+
// CHECK-LABEL: llvm
4+
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
5+
func.func @matmul_f16(%arg0: memref<4096x4096xf16>, %arg1: memref<4096x4096xf16>, %arg2: memref<4096x4096xf16>) {
6+
%0 = bufferization.to_tensor %arg0 restrict : memref<4096x4096xf16>
7+
%1 = bufferization.to_tensor %arg1 restrict : memref<4096x4096xf16>
8+
%2 = tensor.empty() : tensor<4096x4096xf16>
9+
%cst = arith.constant 0.000000e+00 : f16
10+
%3 = linalg.fill ins(%cst : f16) outs(%2 : tensor<4096x4096xf16>) -> tensor<4096x4096xf16>
11+
%4 = linalg.matmul_transpose_b ins(%0, %1 : tensor<4096x4096xf16>, tensor<4096x4096xf16>) outs(%3 : tensor<4096x4096xf16>) -> tensor<4096x4096xf16>
12+
bufferization.materialize_in_destination %4 in restrict writable %arg2 : (tensor<4096x4096xf16>, memref<4096x4096xf16>) -> ()
13+
return
14+
}
15+
}
16+
17+
// -----
18+
// CHECK-LABEL: llvm
19+
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
20+
func.func @corner_shape_matmul_f16(%arg0: memref<521x521xf16>, %arg1: memref<521x521xf16>, %arg2: memref<521x521xf16>) {
21+
%0 = bufferization.to_tensor %arg0 restrict : memref<521x521xf16>
22+
%1 = bufferization.to_tensor %arg1 restrict : memref<521x521xf16>
23+
%2 = tensor.empty() : tensor<521x521xf16>
24+
%cst = arith.constant 0.000000e+00 : f16
25+
%3 = linalg.fill ins(%cst : f16) outs(%2 : tensor<521x521xf16>) -> tensor<521x521xf16>
26+
%4 = linalg.matmul_transpose_b ins(%0, %1 : tensor<521x521xf16>, tensor<521x521xf16>) outs(%3 : tensor<521x521xf16>) -> tensor<521x521xf16>
27+
bufferization.materialize_in_destination %4 in restrict writable %arg2 : (tensor<521x521xf16>, memref<521x521xf16>) -> ()
28+
return
29+
}
30+
}
31+
32+
// -----
33+
// CHECK-LABEL: llvm
34+
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
35+
func.func @dynamic_matmul_f16(%arg0: memref<?x?xf16>, %arg1: memref<1024x1024xf16>, %arg2: memref<?x1024xf16>) {
36+
%0 = bufferization.to_tensor %arg0 restrict : memref<?x?xf16>
37+
%c0 = arith.constant 0 : index
38+
%dim = tensor.dim %0, %c0 : tensor<?x?xf16>
39+
%c1 = arith.constant 1 : index
40+
%dim_0 = tensor.dim %0, %c1 : tensor<?x?xf16>
41+
%1 = bufferization.to_tensor %arg1 restrict : memref<1024x1024xf16>
42+
%2 = tensor.empty(%dim) : tensor<?x1024xf16>
43+
%cst = arith.constant 0.000000e+00 : f16
44+
%3 = linalg.fill ins(%cst : f16) outs(%2 : tensor<?x1024xf16>) -> tensor<?x1024xf16>
45+
%4 = linalg.matmul_transpose_b ins(%0, %1 : tensor<?x?xf16>, tensor<1024x1024xf16>) outs(%3 : tensor<?x1024xf16>) -> tensor<?x1024xf16>
46+
bufferization.materialize_in_destination %4 in restrict writable %arg2 : (tensor<?x1024xf16>, memref<?x1024xf16>) -> ()
47+
return
48+
}
49+
}
50+
51+
// -----
52+
// CHECK-LABEL: llvm
53+
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
54+
func.func @matmul_bf16(%arg0: memref<4096x4096xbf16>, %arg1: memref<4096x4096xbf16>, %arg2: memref<4096x4096xbf16>) {
55+
%0 = bufferization.to_tensor %arg0 restrict : memref<4096x4096xbf16>
56+
%1 = bufferization.to_tensor %arg1 restrict : memref<4096x4096xbf16>
57+
%2 = tensor.empty() : tensor<4096x4096xbf16>
58+
%cst = arith.constant 0.000000e+00 : bf16
59+
%3 = linalg.fill ins(%cst : bf16) outs(%2 : tensor<4096x4096xbf16>) -> tensor<4096x4096xbf16>
60+
%4 = linalg.matmul_transpose_b ins(%0, %1 : tensor<4096x4096xbf16>, tensor<4096x4096xbf16>) outs(%3 : tensor<4096x4096xbf16>) -> tensor<4096x4096xbf16>
61+
bufferization.materialize_in_destination %4 in restrict writable %arg2 : (tensor<4096x4096xbf16>, memref<4096x4096xbf16>) -> ()
62+
return
63+
}
64+
}
65+
66+
// -----
67+
// CHECK-LABEL: llvm
68+
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
69+
func.func @matmul_f32(%arg0: memref<4096x4096xf32>, %arg1: memref<4096x4096xf32>, %arg2: memref<4096x4096xf32>) {
70+
%0 = bufferization.to_tensor %arg0 restrict : memref<4096x4096xf32>
71+
%1 = bufferization.to_tensor %arg1 restrict : memref<4096x4096xf32>
72+
%2 = tensor.empty() : tensor<4096x4096xf32>
73+
%cst = arith.constant 0.000000e+00 : f32
74+
%3 = linalg.fill ins(%cst : f32) outs(%2 : tensor<4096x4096xf32>) -> tensor<4096x4096xf32>
75+
%4 = linalg.matmul_transpose_b ins(%0, %1 : tensor<4096x4096xf32>, tensor<4096x4096xf32>) outs(%3 : tensor<4096x4096xf32>) -> tensor<4096x4096xf32>
76+
bufferization.materialize_in_destination %4 in restrict writable %arg2 : (tensor<4096x4096xf32>, memref<4096x4096xf32>) -> ()
77+
return
78+
}
79+
}
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// RUN: gc-opt %s --gc-gpu-pipeline -split-input-file | FileCheck %s
2+
3+
4+
// -----
5+
// CHECK-LABEL: llvm
6+
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
7+
func.func @relu_f16(%arg0: memref<1024x1024xf16>, %arg1: memref<1024x1024xf16>) {
8+
%0 = bufferization.to_tensor %arg0 restrict : memref<1024x1024xf16>
9+
%1 = tensor.empty() : tensor<1024x1024xf16>
10+
%cst = arith.constant 0.000000e+00 : f16
11+
%2 = linalg.fill ins(%cst : f16) outs(%1 : tensor<1024x1024xf16>) -> tensor<1024x1024xf16>
12+
%3 = linalg.max ins(%0, %2 : tensor<1024x1024xf16>, tensor<1024x1024xf16>) outs(%1 : tensor<1024x1024xf16>) -> tensor<1024x1024xf16>
13+
bufferization.materialize_in_destination %3 in restrict writable %arg1 : (tensor<1024x1024xf16>, memref<1024x1024xf16>) -> ()
14+
return
15+
}
16+
}
17+
18+
// -----
19+
// CHECK-LABEL: llvm
20+
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
21+
func.func @dynamic_relu(%arg0: memref<?x?xf16>, %arg1: memref<?x?xf16>) {
22+
%0 = bufferization.to_tensor %arg0 restrict : memref<?x?xf16>
23+
%c0 = arith.constant 0 : index
24+
%dim = tensor.dim %0, %c0 : tensor<?x?xf16>
25+
%c1 = arith.constant 1 : index
26+
%dim_0 = tensor.dim %0, %c1 : tensor<?x?xf16>
27+
%1 = tensor.empty(%dim, %dim_0) : tensor<?x?xf16>
28+
%cst = arith.constant 0.000000e+00 : f16
29+
%2 = linalg.fill ins(%cst : f16) outs(%1 : tensor<?x?xf16>) -> tensor<?x?xf16>
30+
%3 = linalg.max ins(%0, %2 : tensor<?x?xf16>, tensor<?x?xf16>) outs(%1 : tensor<?x?xf16>) -> tensor<?x?xf16>
31+
bufferization.materialize_in_destination %3 in restrict writable %arg1 : (tensor<?x?xf16>, memref<?x?xf16>) -> ()
32+
return
33+
}
34+
}
35+
36+
// -----
37+
// CHECK-LABEL: llvm
38+
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
39+
func.func @relu_bf16(%arg0: memref<1024x1024xbf16>, %arg1: memref<1024x1024xbf16>) {
40+
%0 = bufferization.to_tensor %arg0 restrict : memref<1024x1024xbf16>
41+
%1 = tensor.empty() : tensor<1024x1024xbf16>
42+
%cst = arith.constant 0.000000e+00 : bf16
43+
%2 = linalg.fill ins(%cst : bf16) outs(%1 : tensor<1024x1024xbf16>) -> tensor<1024x1024xbf16>
44+
%3 = linalg.max ins(%0, %2 : tensor<1024x1024xbf16>, tensor<1024x1024xbf16>) outs(%1 : tensor<1024x1024xbf16>) -> tensor<1024x1024xbf16>
45+
bufferization.materialize_in_destination %3 in restrict writable %arg1 : (tensor<1024x1024xbf16>, memref<1024x1024xbf16>) -> ()
46+
return
47+
}
48+
}
49+
50+
// -----
51+
// CHECK-LABEL: llvm
52+
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
53+
func.func @relu_f32(%arg0: memref<1024x1024xf32>, %arg1: memref<1024x1024xf32>) {
54+
%0 = bufferization.to_tensor %arg0 restrict : memref<1024x1024xf32>
55+
%1 = tensor.empty() : tensor<1024x1024xf32>
56+
%cst = arith.constant 0.000000e+00 : f32
57+
%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1024x1024xf32>) -> tensor<1024x1024xf32>
58+
%3 = linalg.max ins(%0, %2 : tensor<1024x1024xf32>, tensor<1024x1024xf32>) outs(%1 : tensor<1024x1024xf32>) -> tensor<1024x1024xf32>
59+
bufferization.materialize_in_destination %3 in restrict writable %arg1 : (tensor<1024x1024xf32>, memref<1024x1024xf32>) -> ()
60+
return
61+
}
62+
}
63+
64+
// -----
65+
// CHECK-LABEL: llvm
66+
module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"CPU" : #dlti.target_device_spec<#dlti.dl_entry<"tile_size", 32 : i32>>>} {
67+
func.func @relu_f32_corner_shape(%arg0: memref<1061x1061xf32>, %arg1: memref<1061x1061xf32>) {
68+
%0 = bufferization.to_tensor %arg0 restrict : memref<1061x1061xf32>
69+
%1 = tensor.empty() : tensor<1061x1061xf32>
70+
%cst = arith.constant 0.000000e+00 : f32
71+
%2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1061x1061xf32>) -> tensor<1061x1061xf32>
72+
%3 = linalg.max ins(%0, %2 : tensor<1061x1061xf32>, tensor<1061x1061xf32>) outs(%1 : tensor<1061x1061xf32>) -> tensor<1061x1061xf32>
73+
bufferization.materialize_in_destination %3 in restrict writable %arg1 : (tensor<1061x1061xf32>, memref<1061x1061xf32>) -> ()
74+
return
75+
}
76+
}
77+

0 commit comments

Comments
 (0)