@@ -41,15 +41,13 @@ module @relu attributes {gpu.container_module} {
4141 memref.copy %arg0 , %memref : memref <4 x5 xf32 > to memref <4 x5 xf32 >
4242 %memref_0 = gpu.alloc host_shared () : memref <4 x5 xi1 >
4343 %2 = gpu.wait async
44-
45- %3 = gpu.launch_func async [%2 ] @test_kernel ::@test_kernel blocks in (%c4 , %c5 , %c1 ) threads in (%c1 , %c1 , %c1 ) args (%memref : memref <4 x5 xf32 >, %cst : f32 , %memref_0 : memref <4 x5 xi1 >)
46-
44+ %3 = gpu.launch_func async [%2 ] @test_kernel ::@test_kernel blocks in (%c4 , %c5 , %c1 ) threads in (%c1 , %c1 , %c1 )
45+ args (%memref : memref <4 x5 xf32 >, %cst : f32 , %memref_0 : memref <4 x5 xi1 >)
4746 gpu.wait [%3 ]
4847 %memref_1 = gpu.alloc host_shared () : memref <4 x5 xf32 >
4948 %4 = gpu.wait async
50-
51- %5 = gpu.launch_func async [%4 ] @test_kernel_0 ::@test_kernel blocks in (%c4 , %c5 , %c1 ) threads in (%c1 , %c1 , %c1 ) args (%memref_0 : memref <4 x5 xi1 >, %memref : memref <4 x5 xf32 >, %cst : f32 ,
52-
49+ %5 = gpu.launch_func async [%4 ] @test_kernel_0 ::@test_kernel blocks in (%c4 , %c5 , %c1 ) threads in (%c1 , %c1 , %c1 )
50+ args (%memref_0 : memref <4 x5 xi1 >, %memref : memref <4 x5 xf32 >, %cst : f32 ,
5351 %memref_1 : memref <4 x5 xf32 >)
5452 gpu.wait [%5 ]
5553 %alloc = memref.alloc () : memref <4 x5 xf32 >
@@ -60,8 +58,10 @@ module @relu attributes {gpu.container_module} {
6058 %9 = gpu.dealloc async [%8 ] %memref : memref <4 x5 xf32 >
6159 return %alloc : memref <4 x5 xf32 >
6260 }
63- gpu.module @test_kernel attributes {spirv.target_env = #spirv.target_env <#spirv.vce <v1.0 , [Addresses , Int64 , Int8 , Kernel ], []>, api =OpenCL , #spirv.resource_limits <>>} {
64- gpu.func @test_kernel (%arg0: memref <4 x5 xf32 >, %arg1: f32 , %arg2: memref <4 x5 xi1 >) kernel attributes {gpu.known_block_size = array<i32 : 1 , 1 , 1 >, gpu.known_grid_size = array<i32 : 4 , 5 , 1 >, spirv.entry_point_abi = #spirv.entry_point_abi <>} {
61+ gpu.module @test_kernel
62+ attributes {spirv.target_env = #spirv.target_env <#spirv.vce <v1.0 , [Addresses , Int64 , Int8 , Kernel ], []>, api =OpenCL , #spirv.resource_limits <>>} {
63+ gpu.func @test_kernel (%arg0: memref <4 x5 xf32 >, %arg1: f32 , %arg2: memref <4 x5 xi1 >) kernel
64+ attributes {gpu.known_block_size = array<i32 : 1 , 1 , 1 >, gpu.known_grid_size = array<i32 : 4 , 5 , 1 >, spirv.entry_point_abi = #spirv.entry_point_abi <>} {
6565 %0 = gpu.block_id x
6666 %1 = gpu.block_id y
6767 %2 = memref.load %arg0 [%0 , %1 ] : memref <4 x5 xf32 >
@@ -70,8 +70,10 @@ module @relu attributes {gpu.container_module} {
7070 gpu.return
7171 }
7272 }
73- gpu.module @test_kernel_0 attributes {spirv.target_env = #spirv.target_env <#spirv.vce <v1.0 , [Addresses , Int64 , Int8 , Kernel ], []>, api =OpenCL , #spirv.resource_limits <>>} {
74- gpu.func @test_kernel (%arg0: memref <4 x5 xi1 >, %arg1: memref <4 x5 xf32 >, %arg2: f32 , %arg3: memref <4 x5 xf32 >) kernel attributes {gpu.known_block_size = array<i32 : 1 , 1 , 1 >, gpu.known_grid_size = array<i32 : 4 , 5 , 1 >, spirv.entry_point_abi = #spirv.entry_point_abi <>} {
73+ gpu.module @test_kernel_0
74+ attributes {spirv.target_env = #spirv.target_env <#spirv.vce <v1.0 , [Addresses , Int64 , Int8 , Kernel ], []>, api =OpenCL , #spirv.resource_limits <>>} {
75+ gpu.func @test_kernel (%arg0: memref <4 x5 xi1 >, %arg1: memref <4 x5 xf32 >, %arg2: f32 , %arg3: memref <4 x5 xf32 >) kernel
76+ attributes {gpu.known_block_size = array<i32 : 1 , 1 , 1 >, gpu.known_grid_size = array<i32 : 4 , 5 , 1 >, spirv.entry_point_abi = #spirv.entry_point_abi <>} {
7577 %0 = gpu.block_id x
7678 %1 = gpu.block_id y
7779 %2 = memref.load %arg0 [%0 , %1 ] : memref <4 x5 xi1 >
0 commit comments