|
| 1 | +// RUN: %python_executable %imex_runner --requires=l0-runtime -i %s --pass-pipeline-file=%p/spirv-to-llvm.pp \ |
| 2 | +// RUN: --runner imex-cpu-runner -e main \ |
| 3 | +// RUN: --entry-point-result=void \ |
| 4 | +// RUN: --shared-libs=%irunner_utils,%mlir_runner_utils,%mlir_c_runner_utils,%levelzero_runtime --filecheck |
| 5 | +// RUN: %python_executable %imex_runner --requires=sycl-runtime -i %s --pass-pipeline-file=%p/spirv-to-llvm.pp \ |
| 6 | +// RUN: --runner imex-cpu-runner -e main \ |
| 7 | +// RUN: --entry-point-result=void \ |
| 8 | +// RUN: --shared-libs=%irunner_utils,%mlir_runner_utils,%mlir_c_runner_utils,%sycl_runtime --filecheck |
| 9 | + |
| 10 | +module @broadcast_non_numpy attributes {gpu.container_module} { |
| 11 | + memref.global "private" constant @__constant_3xf32 : memref<3xf32> = dense<[1.000000e+00, 2.000000e+00, 3.000000e+00]> |
| 12 | + memref.global "private" constant @__constant_3x4xf32_ref_result : memref<3x4xf32> = dense<[[1.0, 1.0, 1.0, 1.0], [2.0, 2.0, 2.0, 2.0], [3.0, 3.0, 3.0, 3.0]]> |
| 13 | + func.func @test(%arg0: memref<3xf32>) -> memref<3x4xf32> attributes {llvm.emit_c_interface} { |
| 14 | + %c4 = arith.constant 4 : index |
| 15 | + %c3 = arith.constant 3 : index |
| 16 | + %cst = arith.constant 0.000000e+00 : f32 |
| 17 | + %c1 = arith.constant 1 : index |
| 18 | + %memref = gpu.alloc host_shared () : memref<3xf32> |
| 19 | + memref.copy %arg0, %memref : memref<3xf32> to memref<3xf32> |
| 20 | + %memref_0 = gpu.alloc () : memref<3x4xf32> |
| 21 | + gpu.launch_func @test_kernel::@test_kernel blocks in (%c3, %c4, %c1) threads in (%c1, %c1, %c1) args(%cst : f32, %memref_0 : memref<3x4xf32>) |
| 22 | + %memref_1 = gpu.alloc host_shared () : memref<3x4xf32> |
| 23 | + gpu.launch_func @test_kernel_0::@test_kernel blocks in (%c3, %c4, %c1) threads in (%c1, %c1, %c1) args(%memref : memref<3xf32>, %memref_1 : memref<3x4xf32>) |
| 24 | + gpu.dealloc %memref_0 : memref<3x4xf32> |
| 25 | + gpu.dealloc %memref : memref<3xf32> |
| 26 | + return %memref_1 : memref<3x4xf32> |
| 27 | + } |
| 28 | + spirv.module @__spv__test_kernel Physical64 OpenCL requires #spirv.vce<v1.0, [Int64, Kernel, Addresses], []> attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Float16Buffer, Int64, Int16, Int8, Kernel, Linkage, Vector16, GenericPointer, Groups, Float16, Float64, AtomicFloat32AddEXT, ExpectAssumeKHR], [SPV_EXT_shader_atomic_float_add, SPV_KHR_expect_assume]>, api=OpenCL, #spirv.resource_limits<>>} { |
| 29 | + spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi64>, Input> |
| 30 | + spirv.func @test_kernel(%arg0: f32, %arg1: !spirv.ptr<!spirv.array<12 x f32>, CrossWorkgroup>) "None" attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 3, 4, 1>, workgroup_attributions = 0 : i64} { |
| 31 | + %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi64>, Input> |
| 32 | + %0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi64> |
| 33 | + %1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi64> |
| 34 | + %__builtin_var_WorkgroupId___addr_0 = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi64>, Input> |
| 35 | + %2 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr_0 : vector<3xi64> |
| 36 | + %3 = spirv.CompositeExtract %2[1 : i32] : vector<3xi64> |
| 37 | + %cst0_i64 = spirv.Constant 0 : i64 |
| 38 | + %cst4_i64 = spirv.Constant 4 : i64 |
| 39 | + %4 = spirv.IMul %cst4_i64, %1 : i64 |
| 40 | + %5 = spirv.IAdd %cst0_i64, %4 : i64 |
| 41 | + %cst1_i64 = spirv.Constant 1 : i64 |
| 42 | + %6 = spirv.IMul %cst1_i64, %3 : i64 |
| 43 | + %7 = spirv.IAdd %5, %6 : i64 |
| 44 | + %8 = spirv.AccessChain %arg1[%7] : !spirv.ptr<!spirv.array<12 x f32>, CrossWorkgroup>, i64 |
| 45 | + spirv.Store "CrossWorkgroup" %8, %arg0 : f32 |
| 46 | + spirv.Return |
| 47 | + } |
| 48 | + spirv.EntryPoint "Kernel" @test_kernel, @__builtin_var_WorkgroupId__ |
| 49 | + } |
| 50 | + gpu.module @test_kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Float16Buffer, Int64, Int16, Int8, Kernel, Linkage, Vector16, GenericPointer, Groups, Float16, Float64, AtomicFloat32AddEXT, ExpectAssumeKHR], [SPV_EXT_shader_atomic_float_add, SPV_KHR_expect_assume]>, api=OpenCL, #spirv.resource_limits<>>} { |
| 51 | + gpu.func @test_kernel(%arg0: f32, %arg1: memref<3x4xf32>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 3, 4, 1>, spirv.entry_point_abi = #spirv.entry_point_abi<>} { |
| 52 | + %0 = gpu.block_id x |
| 53 | + %1 = gpu.block_id y |
| 54 | + memref.store %arg0, %arg1[%0, %1] : memref<3x4xf32> |
| 55 | + gpu.return |
| 56 | + } |
| 57 | + } |
| 58 | + spirv.module @__spv__test_kernel_0 Physical64 OpenCL requires #spirv.vce<v1.0, [Int64, Kernel, Addresses], []> attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Float16Buffer, Int64, Int16, Int8, Kernel, Linkage, Vector16, GenericPointer, Groups, Float16, Float64, AtomicFloat32AddEXT, ExpectAssumeKHR], [SPV_EXT_shader_atomic_float_add, SPV_KHR_expect_assume]>, api=OpenCL, #spirv.resource_limits<>>} { |
| 59 | + spirv.GlobalVariable @__builtin_var_WorkgroupId__ built_in("WorkgroupId") : !spirv.ptr<vector<3xi64>, Input> |
| 60 | + spirv.func @test_kernel(%arg0: !spirv.ptr<!spirv.array<3 x f32>, CrossWorkgroup>, %arg1: !spirv.ptr<!spirv.array<12 x f32>, CrossWorkgroup>) "None" attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 3, 4, 1>, workgroup_attributions = 0 : i64} { |
| 61 | + %__builtin_var_WorkgroupId___addr = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi64>, Input> |
| 62 | + %0 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr : vector<3xi64> |
| 63 | + %1 = spirv.CompositeExtract %0[0 : i32] : vector<3xi64> |
| 64 | + %__builtin_var_WorkgroupId___addr_0 = spirv.mlir.addressof @__builtin_var_WorkgroupId__ : !spirv.ptr<vector<3xi64>, Input> |
| 65 | + %2 = spirv.Load "Input" %__builtin_var_WorkgroupId___addr_0 : vector<3xi64> |
| 66 | + %3 = spirv.CompositeExtract %2[1 : i32] : vector<3xi64> |
| 67 | + %cst0_i64 = spirv.Constant 0 : i64 |
| 68 | + %cst1_i64 = spirv.Constant 1 : i64 |
| 69 | + %4 = spirv.IMul %cst1_i64, %1 : i64 |
| 70 | + %5 = spirv.IAdd %cst0_i64, %4 : i64 |
| 71 | + %6 = spirv.AccessChain %arg0[%5] : !spirv.ptr<!spirv.array<3 x f32>, CrossWorkgroup>, i64 |
| 72 | + %7 = spirv.Load "CrossWorkgroup" %6 : f32 |
| 73 | + %cst0_i64_1 = spirv.Constant 0 : i64 |
| 74 | + %cst4_i64 = spirv.Constant 4 : i64 |
| 75 | + %8 = spirv.IMul %cst4_i64, %1 : i64 |
| 76 | + %9 = spirv.IAdd %cst0_i64_1, %8 : i64 |
| 77 | + %cst1_i64_2 = spirv.Constant 1 : i64 |
| 78 | + %10 = spirv.IMul %cst1_i64_2, %3 : i64 |
| 79 | + %11 = spirv.IAdd %9, %10 : i64 |
| 80 | + %12 = spirv.AccessChain %arg1[%11] : !spirv.ptr<!spirv.array<12 x f32>, CrossWorkgroup>, i64 |
| 81 | + spirv.Store "CrossWorkgroup" %12, %7 : f32 |
| 82 | + spirv.Return |
| 83 | + } |
| 84 | + spirv.EntryPoint "Kernel" @test_kernel, @__builtin_var_WorkgroupId__ |
| 85 | + } |
| 86 | + gpu.module @test_kernel_0 attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Float16Buffer, Int64, Int16, Int8, Kernel, Linkage, Vector16, GenericPointer, Groups, Float16, Float64, AtomicFloat32AddEXT, ExpectAssumeKHR], [SPV_EXT_shader_atomic_float_add, SPV_KHR_expect_assume]>, api=OpenCL, #spirv.resource_limits<>>} { |
| 87 | + gpu.func @test_kernel(%arg0: memref<3xf32>, %arg1: memref<3x4xf32>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 3, 4, 1>, spirv.entry_point_abi = #spirv.entry_point_abi<>} { |
| 88 | + %0 = gpu.block_id x |
| 89 | + %1 = gpu.block_id y |
| 90 | + %2 = memref.load %arg0[%0] : memref<3xf32> |
| 91 | + memref.store %2, %arg1[%0, %1] : memref<3x4xf32> |
| 92 | + gpu.return |
| 93 | + } |
| 94 | + } |
| 95 | + func.func @main() attributes {llvm.emit_c_interface} { |
| 96 | + %c0 = arith.constant 0 : index |
| 97 | + %c1 = arith.constant 1 : index |
| 98 | + %c100 = arith.constant 100 : index |
| 99 | + |
| 100 | + %0 = memref.get_global @__constant_3xf32 : memref<3xf32> |
| 101 | + %ref_result = memref.get_global @__constant_3x4xf32_ref_result : memref<3x4xf32> |
| 102 | + %unranked_ref_result = memref.cast %ref_result : memref<3x4xf32> to memref<*xf32> |
| 103 | + |
| 104 | + scf.for %arg0 = %c0 to %c100 step %c1 { |
| 105 | + %1 = func.call @test(%0) : (memref<3xf32>) -> memref<3x4xf32> |
| 106 | + %cast = memref.cast %1 : memref<3x4xf32> to memref<*xf32> |
| 107 | + func.call @printAllcloseF32(%cast, %unranked_ref_result) : (memref<*xf32>, memref<*xf32>) -> () |
| 108 | + func.call @printMemrefF32(%cast) : (memref<*xf32>) -> () |
| 109 | + // CHECK: [ALLCLOSE: TRUE] |
| 110 | + } |
| 111 | + return |
| 112 | + } |
| 113 | + func.func private @printAllcloseF32(memref<*xf32>, memref<*xf32>) attributes {llvm.emit_c_interface} |
| 114 | + func.func private @printMemrefF32(memref<*xf32>) attributes {llvm.emit_c_interface} |
| 115 | +} |
0 commit comments