|
1 | | -// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-cf-to-llvm,convert-arith-to-llvm,convert-math-to-llvm,convert-func-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,finalize-memref-to-llvm,reconcile-unrealized-casts)' \ |
| 1 | +// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,reconcile-unrealized-casts)' \ |
2 | 2 | // RUN: | mlir-runner \ |
3 | | -// RUN: --shared-libs=%mlir_sycl_runtime \ |
| 3 | +// RUN: --shared-libs=%mlir_levelzero_runtime \ |
4 | 4 | // RUN: --shared-libs=%mlir_runner_utils \ |
5 | 5 | // RUN: --entry-point-result=void \ |
6 | 6 | // RUN: | FileCheck %s |
@@ -41,11 +41,16 @@ module @relu attributes {gpu.container_module} { |
41 | 41 | memref.copy %arg0, %memref : memref<4x5xf32> to memref<4x5xf32> |
42 | 42 | %memref_0 = gpu.alloc host_shared () : memref<4x5xi1> |
43 | 43 | %2 = gpu.wait async |
| 44 | + |
44 | 45 | %3 = gpu.launch_func async [%2] @test_kernel::@test_kernel blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1) args(%memref : memref<4x5xf32>, %cst : f32, %memref_0 : memref<4x5xi1>) |
| 46 | + |
45 | 47 | gpu.wait [%3] |
46 | 48 | %memref_1 = gpu.alloc host_shared () : memref<4x5xf32> |
47 | 49 | %4 = gpu.wait async |
48 | | - %5 = gpu.launch_func async [%4] @test_kernel_0::@test_kernel blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1) args(%memref_0 : memref<4x5xi1>, %memref : memref<4x5xf32>, %cst : f32, %memref_1 : memref<4x5xf32>) |
| 50 | + |
| 51 | + %5 = gpu.launch_func async [%4] @test_kernel_0::@test_kernel blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1) args(%memref_0 : memref<4x5xi1>, %memref : memref<4x5xf32>, %cst : f32, |
| 52 | + |
| 53 | + %memref_1 : memref<4x5xf32>) |
49 | 54 | gpu.wait [%5] |
50 | 55 | %alloc = memref.alloc() : memref<4x5xf32> |
51 | 56 | memref.copy %memref_1, %alloc : memref<4x5xf32> to memref<4x5xf32> |
|
0 commit comments