|
| 1 | +// RUN: gc-opt %s --gc-gpu-pipeline | FileCheck %s |
| 2 | + |
| 3 | +module @test attributes {gpu.container_module} { |
| 4 | + llvm.func @entry(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: i64, %arg3: i64, %arg4: i64, %arg5: i64, %arg6: i64, %arg7: !llvm.ptr, %arg8: !llvm.ptr, %arg9: i64) attributes {llvm.emit_c_interface} { |
| 5 | + %0 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> |
| 6 | + %1 = llvm.insertvalue %arg0, %0[0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> |
| 7 | + %2 = llvm.insertvalue %arg1, %1[1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> |
| 8 | + %3 = llvm.insertvalue %arg2, %2[2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> |
| 9 | + %4 = llvm.insertvalue %arg3, %3[3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> |
| 10 | + %5 = llvm.insertvalue %arg4, %4[4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> |
| 11 | + %6 = llvm.insertvalue %arg5, %5[3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> |
| 12 | + %7 = llvm.insertvalue %arg6, %6[4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> |
| 13 | + %8 = builtin.unrealized_conversion_cast %7 : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to memref<64x64xf32> |
| 14 | + %gpu_mem = gpu.alloc host_shared () : memref<64x64xf32> |
| 15 | + gpu.memcpy %gpu_mem, %8 : memref<64x64xf32>, memref<64x64xf32> |
| 16 | + %9 = llvm.mlir.constant(32 : index) : i64 |
| 17 | + %10 = builtin.unrealized_conversion_cast %9 : i64 to index |
| 18 | + %11 = llvm.mlir.constant(2 : index) : i64 |
| 19 | + %12 = builtin.unrealized_conversion_cast %11 : i64 to index |
| 20 | + %13 = llvm.mlir.constant(1 : index) : i64 |
| 21 | + %14 = builtin.unrealized_conversion_cast %13 : i64 to index |
| 22 | + |
| 23 | + %floaat = llvm.mlir.constant(1.1 : f32) : f32 |
| 24 | + %a_ptr_as_idx = memref.extract_aligned_pointer_as_index %gpu_mem : memref<64x64xf32> -> index |
| 25 | + %a_ptr_as_i64 = arith.index_cast %a_ptr_as_idx : index to i64 |
| 26 | + %a_ptr = llvm.inttoptr %a_ptr_as_i64 : i64 to !llvm.ptr |
| 27 | + %a_ptr_casted = llvm.addrspacecast %a_ptr : !llvm.ptr to !llvm.ptr<1> |
| 28 | + |
| 29 | + gpu.launch blocks(%arg10, %arg11, %arg12) in (%arg16 = %12, %arg17 = %12, %arg18 = %12) threads(%arg13, %arg14, %arg15) in (%arg19 = %14, %arg20 = %14, %arg21 = %14) { |
| 30 | + llvm.store %floaat, %a_ptr_casted : f32, !llvm.ptr<1> |
| 31 | + gpu.terminator |
| 32 | + } |
| 33 | + gpu.memcpy %8, %gpu_mem : memref<64x64xf32>, memref<64x64xf32> |
| 34 | + gpu.dealloc %gpu_mem : memref<64x64xf32> |
| 35 | + llvm.return |
| 36 | + } |
| 37 | +} |
| 38 | + |
| 39 | +// CHECK: llvm.mlir.global internal constant @gcGpuOclKernel_entry_kernel_SPIRV |
| 40 | +// CHECK: llvm.mlir.global internal constant @gcGpuOclKernel_entry_kernel_Name |
| 41 | +// CHECK: llvm.mlir.global internal @gcGpuOclKernel_entry_kernel_Ptr |
| 42 | + |
| 43 | +// CHECK: llvm.func @createGcGpuOclKernel_entry_kernel([[CTX:%.+]]: !llvm.ptr) -> !llvm.ptr |
| 44 | +// CHECK: [[NEW_PTR:%.+]] = llvm.call @gcGpuOclKernelCreate([[CTX]] |
| 45 | +// CHECK: [[ZERO:%.+]] = llvm.mlir.zero |
| 46 | +// CHECK: [[PTR_ADDR:%.+]] = llvm.mlir.addressof @gcGpuOclKernel_entry_kernel_Ptr |
| 47 | +// CHECK: [[CMPXCHG:%.+]] = llvm.cmpxchg [[PTR_ADDR]], [[ZERO]], [[NEW_PTR]] |
| 48 | +// CHECK: [[FLAG:%.+]] = llvm.extractvalue [[CMPXCHG]][1] |
| 49 | +// CHECK: llvm.cond_br [[FLAG]], [[BB1:\^.+]], [[BB2:\^.+]] |
| 50 | +// CHECK: [[BB1]]: |
| 51 | +// CHECK: llvm.return [[NEW_PTR]] |
| 52 | +// CHECK: [[BB2]]: |
| 53 | +// CHECK: [[ONE:%.+]] = llvm.mlir.constant(1 : i64) : i64 |
| 54 | +// CHECK: [[ARRAY:%.+]] = llvm.alloca [[ONE]] |
| 55 | +// CHECK: [[ADDR:%.+]] = llvm.getelementptr [[ARRAY]] |
| 56 | +// CHECK: llvm.store [[NEW_PTR]], [[ADDR]] |
| 57 | +// CHECK: llvm.call @gcGpuOclKernelDestroy([[ONE]], [[ARRAY]]) |
| 58 | +// CHECK: [[OLD_PTR:%.+]] = llvm.extractvalue [[CMPXCHG]][0] |
| 59 | +// CHECK: llvm.return [[OLD_PTR]] |
| 60 | + |
| 61 | +// CHECK: llvm.func internal @getGcGpuOclKernel_entry_kernel([[CTX:%.+]]: !llvm.ptr) -> !llvm.ptr attributes {always_inline} |
| 62 | +// CHECK: [[ZERO:%.+]] = llvm.mlir.zero |
| 63 | +// CHECK: [[PTR_ADDR:%.+]] = llvm.mlir.addressof @gcGpuOclKernel_entry_kernel_Ptr |
| 64 | +// CHECK: [[PTR:%.+]] = llvm.load [[PTR_ADDR]] |
| 65 | +// CHECK: [[ICMP:%.+]] = llvm.icmp "eq" [[PTR]], [[ZERO]] |
| 66 | +// CHECK: llvm.cond_br [[ICMP]], [[BB1:\^.+]], [[BB2:\^.+]] |
| 67 | +// CHECK: [[BB1]]: |
| 68 | +// CHECK: [[NEW_PTR:%.+]] = llvm.call @createGcGpuOclKernel_entry_kernel([[CTX]]) |
| 69 | +// CHECK: llvm.return [[NEW_PTR]] |
| 70 | +// CHECK: [[BB2]]: |
| 71 | +// CHECK: llvm.return [[PTR]] |
| 72 | + |
| 73 | +// CHECK: llvm.func @entry(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: i64, %arg3: i64, %arg4: i64, %arg5: i64, %arg6: i64, [[CTX:%.+]]: !llvm.ptr, %arg8: !llvm.ptr, %arg9: i64) |
| 74 | +// CHECK: [[SIZE:%.+]] = llvm.mlir.constant(16384 : i64) : i64 |
| 75 | +// CHECK: llvm.call @gcGpuOclMallocShared([[CTX]], [[SIZE]]) |
| 76 | +// CHECK: [[SIZE:%.+]] = llvm.mlir.constant(16384 : i64) : i64 |
| 77 | +// CHECK: [[SRC:%.+]] = llvm.extractvalue |
| 78 | +// CHECK: [[DST:%.+]] = llvm.extractvalue [[GPU_MEMREF:%.+]][1] |
| 79 | +// CHECK: llvm.call @gcGpuOclMemcpy([[CTX]], [[SRC]], [[DST]], [[SIZE]]) |
| 80 | +// CHECK: [[KERNEL:%.+]] = llvm.call @getGcGpuOclKernel_entry_kernel([[CTX:%.+]]) : (!llvm.ptr) -> !llvm.ptr |
| 81 | +// CHECK: llvm.call @gcGpuOclKernelLaunch([[CTX]], [[KERNEL]], |
| 82 | +// CHECK: [[SIZE:%.+]] = llvm.mlir.constant(16384 : i64) : i64 |
| 83 | +// CHECK: [[SRC:%.+]] = llvm.extractvalue [[GPU_MEMREF:%.+]][1] |
| 84 | +// CHECK: [[DST:%.+]] = llvm.extractvalue |
| 85 | +// CHECK: llvm.call @gcGpuOclMemcpy([[CTX]], [[SRC]], [[DST]], [[SIZE]]) |
| 86 | +// CHECK: [[GPU_PTR:%.+]] = llvm.extractvalue [[GPU_MEMREF:%.+]][0] |
| 87 | +// CHECK: llvm.call @gcGpuOclDealloc([[CTX]], [[GPU_PTR]]) |
| 88 | + |
| 89 | +// CHECK: llvm.func @gcGpuOclKernelCreate |
| 90 | +// CHECK: llvm.func @gcGpuOclKernelDestroy |
| 91 | +// CHECK: llvm.func @gcGpuOclKernelLaunch |
| 92 | + |
| 93 | + |
| 94 | +// CHECK: llvm.func @gcGpuOclModuleDestructor() |
| 95 | +// CHECK: llvm.fence acquire |
| 96 | +// CHECK: [[PTR_ADDR:%.+]] = llvm.mlir.addressof @gcGpuOclKernel_entry_kernel_Ptr |
| 97 | +// CHECK: [[PTR:%.+]] = llvm.load [[PTR_ADDR]] |
| 98 | +// CHECK: [[ONE:%.+]] = llvm.mlir.constant(1 : i64) : i64 |
| 99 | +// CHECK: [[ARRAY:%.+]] = llvm.alloca [[ONE]] |
| 100 | +// CHECK: [[ADDR:%.+]] = llvm.getelementptr [[ARRAY]] |
| 101 | +// CHECK: llvm.store [[PTR]], [[ADDR]] |
| 102 | +// CHECK: llvm.call @gcGpuOclKernelDestroy([[ONE]], [[ARRAY]]) |
0 commit comments