11// RUN: gc-opt %s --gc-gpu-pipeline -split-input-file | FileCheck %s
22
33// CHECK-LABEL: llvm
4- module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" CPU " : #dlti.target_device_spec <#dlti.dl_entry <" tile_size " , 32 : i32 >>>} {
4+ module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" GPU " : #dlti.target_device_spec <#dlti.dl_entry <" num_exec_units " , 448 : i32 >, #dlti.dl_entry < " num_exec_units_per_slice " , 32 : i32 >, #dlti.dl_entry < " num_threads_per_eu " , 8 : i32 >, #dlti.dl_entry < " L1_cache_size_in_bytes " , 67108864 : i32 >, #dlti.dl_entry < " max_vector_op_width " , 256 : i32 >, #dlti.dl_entry < " max_work_group_size " , 1024 : i32 >>>} {
55 func.func @matmul_f16 (%arg0: memref <4096 x4096 xf16 >, %arg1: memref <4096 x4096 xf16 >, %arg2: memref <4096 x4096 xf16 >) {
66 %0 = bufferization.to_tensor %arg0 restrict : memref <4096 x4096 xf16 >
77 %1 = bufferization.to_tensor %arg1 restrict : memref <4096 x4096 xf16 >
@@ -16,7 +16,7 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C
1616
1717// -----
1818// CHECK-LABEL: llvm
19- module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" CPU " : #dlti.target_device_spec <#dlti.dl_entry <" tile_size " , 32 : i32 >>>} {
19+ module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" GPU " : #dlti.target_device_spec <#dlti.dl_entry <" num_exec_units " , 448 : i32 >, #dlti.dl_entry < " num_exec_units_per_slice " , 32 : i32 >, #dlti.dl_entry < " num_threads_per_eu " , 8 : i32 >, #dlti.dl_entry < " L1_cache_size_in_bytes " , 67108864 : i32 >, #dlti.dl_entry < " max_vector_op_width " , 256 : i32 >, #dlti.dl_entry < " max_work_group_size " , 1024 : i32 >>>} {
2020 func.func @corner_shape_matmul_f16 (%arg0: memref <521 x521 xf16 >, %arg1: memref <521 x521 xf16 >, %arg2: memref <521 x521 xf16 >) {
2121 %0 = bufferization.to_tensor %arg0 restrict : memref <521 x521 xf16 >
2222 %1 = bufferization.to_tensor %arg1 restrict : memref <521 x521 xf16 >
@@ -31,7 +31,7 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C
3131
3232// -----
3333// CHECK-LABEL: llvm
34- module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" CPU " : #dlti.target_device_spec <#dlti.dl_entry <" tile_size " , 32 : i32 >>>} {
34+ module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" GPU " : #dlti.target_device_spec <#dlti.dl_entry <" num_exec_units " , 448 : i32 >, #dlti.dl_entry < " num_exec_units_per_slice " , 32 : i32 >, #dlti.dl_entry < " num_threads_per_eu " , 8 : i32 >, #dlti.dl_entry < " L1_cache_size_in_bytes " , 67108864 : i32 >, #dlti.dl_entry < " max_vector_op_width " , 256 : i32 >, #dlti.dl_entry < " max_work_group_size " , 1024 : i32 >>>} {
3535 func.func @dynamic_matmul_f16 (%arg0: memref <?x?xf16 >, %arg1: memref <1024 x1024 xf16 >, %arg2: memref <?x1024 xf16 >) {
3636 %0 = bufferization.to_tensor %arg0 restrict : memref <?x?xf16 >
3737 %c0 = arith.constant 0 : index
@@ -50,7 +50,7 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C
5050
5151// -----
5252// CHECK-LABEL: llvm
53- module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" CPU " : #dlti.target_device_spec <#dlti.dl_entry <" tile_size " , 32 : i32 >>>} {
53+ module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" GPU " : #dlti.target_device_spec <#dlti.dl_entry <" num_exec_units " , 448 : i32 >, #dlti.dl_entry < " num_exec_units_per_slice " , 32 : i32 >, #dlti.dl_entry < " num_threads_per_eu " , 8 : i32 >, #dlti.dl_entry < " L1_cache_size_in_bytes " , 67108864 : i32 >, #dlti.dl_entry < " max_vector_op_width " , 256 : i32 >, #dlti.dl_entry < " max_work_group_size " , 1024 : i32 >>>} {
5454 func.func @matmul_bf16 (%arg0: memref <4096 x4096 xbf16 >, %arg1: memref <4096 x4096 xbf16 >, %arg2: memref <4096 x4096 xbf16 >) {
5555 %0 = bufferization.to_tensor %arg0 restrict : memref <4096 x4096 xbf16 >
5656 %1 = bufferization.to_tensor %arg1 restrict : memref <4096 x4096 xbf16 >
@@ -65,7 +65,7 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C
6565
6666// -----
6767// CHECK-LABEL: llvm
68- module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" CPU " : #dlti.target_device_spec <#dlti.dl_entry <" tile_size " , 32 : i32 >>>} {
68+ module @fragment_name attributes {" #dlti.sys_spec" = #dlti.target_system_spec <" GPU " : #dlti.target_device_spec <#dlti.dl_entry <" num_exec_units " , 448 : i32 >, #dlti.dl_entry < " num_exec_units_per_slice " , 32 : i32 >, #dlti.dl_entry < " num_threads_per_eu " , 8 : i32 >, #dlti.dl_entry < " L1_cache_size_in_bytes " , 67108864 : i32 >, #dlti.dl_entry < " max_vector_op_width " , 256 : i32 >, #dlti.dl_entry < " max_work_group_size " , 1024 : i32 >>>} {
6969 func.func @matmul_f32 (%arg0: memref <4096 x4096 xf32 >, %arg1: memref <4096 x4096 xf32 >, %arg2: memref <4096 x4096 xf32 >) {
7070 %0 = bufferization.to_tensor %arg0 restrict : memref <4096 x4096 xf32 >
7171 %1 = bufferization.to_tensor %arg1 restrict : memref <4096 x4096 xf32 >
@@ -76,4 +76,4 @@ module @fragment_name attributes {"#dlti.sys_spec" = #dlti.target_system_spec<"C
7676 bufferization.materialize_in_destination %4 in restrict writable %arg2 : (tensor <4096 x4096 xf32 >, memref <4096 x4096 xf32 >) -> ()
7777 return
7878 }
79- }
79+ }
0 commit comments