|
| 1 | +// RUN: imex-opt --insert-gpu-allocs='client-api=opencl is-usm-args=1' %s | FileCheck %s --check-prefix=OPENCL |
| 2 | +// RUN: imex-opt --insert-gpu-allocs='client-api=vulkan is-usm-args=1' %s | FileCheck %s --check-prefix=VULKAN |
| 3 | + |
| 4 | +// OPENCL-LABEL: func.func @addt |
| 5 | +// OPENCL-SAME: %[[arg0:.+]]: memref<2x5xf32>, %[[arg1:.+]]: memref<2x5xf32>, %[[out_buff:.+]]: memref<2x5xf32> |
| 6 | +// VULKAN-LABEL: func.func @addt |
| 7 | +// VULKAN-SAME: %[[arg0:.+]]: memref<2x5xf32>, %[[arg1:.+]]: memref<2x5xf32>, %[[out_buff:.+]]: memref<2x5xf32> |
| 8 | +func.func @addt(%arg0: memref<2x5xf32>, %arg1: memref<2x5xf32>, %out_buff: memref<2x5xf32>) -> memref<2x5xf32> { |
| 9 | + %c0 = arith.constant 0 : index |
| 10 | + %c2 = arith.constant 2 : index |
| 11 | + %c1 = arith.constant 1 : index |
| 12 | + %c5 = arith.constant 5 : index |
| 13 | + // OPENCL-NOT: %[[MEMREF0:.*]] = gpu.alloc host_shared () : memref<2x5xf32> |
| 14 | + // OPENCL-NOT: %[[MEMREF1:.*]] = gpu.alloc host_shared () : memref<2x5xf32> |
| 15 | + // OPENCL-NOT: memref.copy |
| 16 | + // OPENCL-NOT: %[[MEMREF2:.*]] = gpu.alloc host_shared () : memref<2x5xf32> |
| 17 | + // OPENCL-NOT: memref.copy |
| 18 | + |
| 19 | + // VULKAN-NOT: %[[MEMREF0:.*]] = memref.alloc() : memref<2x5xf32> |
| 20 | + // VULKAN-NOT: %[[MEMREF1:.*]] = memref.alloc() : memref<2x5xf32> |
| 21 | + // VULKAN-NOT: memref.copy |
| 22 | + // VULKAN-NOT: %[[MEMREF2:.*]] = memref.alloc() : memref<2x5xf32> |
| 23 | + // VULKAN-NOT: memref.copy |
| 24 | + |
| 25 | + %tmp_buff = memref.alloc() {alignment = 128 : i64} : memref<2x5xf32> |
| 26 | + // OPENCL-NOT: %[[MEMREF3:.*]] = memref.alloc().* |
| 27 | + // OPENCL: %[[MEMREF3:.*]] = gpu.alloc () : memref<2x5xf32> |
| 28 | + // VULKAN: %[[MEMREF3:.*]] = memref.alloc() {alignment = 128 : i64} : memref<2x5xf32> |
| 29 | + |
| 30 | + %c1_0 = arith.constant 1 : index |
| 31 | + %1 = affine.apply affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>(%c2)[%c0, %c1] |
| 32 | + %2 = affine.apply affine_map<(d0)[s0, s1] -> ((d0 - s0) ceildiv s1)>(%c5)[%c0, %c1] |
| 33 | + gpu.launch blocks(%arg2, %arg3, %arg4) in (%arg8 = %1, %arg9 = %2, %arg10 = %c1_0) threads(%arg5, %arg6, %arg7) in (%arg11 = %c1_0, %arg12 = %c1_0, %arg13 = %c1_0) { |
| 34 | + %3 = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)>(%arg2)[%c1, %c0] |
| 35 | + %4 = affine.apply affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)>(%arg3)[%c1, %c0] |
| 36 | + %5 = memref.load %arg0[%3, %4] : memref<2x5xf32> |
| 37 | + %6 = memref.load %arg1[%3, %4] : memref<2x5xf32> |
| 38 | + %7 = arith.addf %5, %6 : f32 |
| 39 | + memref.store %7, %tmp_buff[%3, %4] : memref<2x5xf32> |
| 40 | + |
| 41 | + %8 = memref.load %tmp_buff[%3, %4] : memref<2x5xf32> |
| 42 | + %9 = arith.addf %8, %5 : f32 |
| 43 | + memref.store %9, %out_buff[%3, %4] : memref<2x5xf32> |
| 44 | + |
| 45 | + gpu.terminator |
| 46 | + } {SCFToGPU_visited} |
| 47 | + |
| 48 | + // OPENCL-NOT: memref.dealloc %[[MEMREF3]] : memref<2x5xf32> |
| 49 | + // OPENCL: gpu.dealloc %[[MEMREF3]] : memref<2x5xf32> |
| 50 | + // VULKAN: memref.dealloc %[[MEMREF3]] : memref<2x5xf32> |
| 51 | + memref.dealloc %tmp_buff : memref<2x5xf32> |
| 52 | + |
| 53 | + return %out_buff : memref<2x5xf32> |
| 54 | +} |
0 commit comments