|
| 1 | +// RUN: gc-gpu-runner --shared-libs=%mlir_runner_utils %s | FileCheck %s |
| 2 | + |
| 3 | +module{ |
| 4 | + |
| 5 | +func.func @load_store(%src: memref<8x16xf32>, %dst: memref<8x16xf32>) -> memref<8x16xf32> { |
| 6 | + %constant = arith.constant 1.23 : f32 |
| 7 | + %c0 = arith.constant 0 : index |
| 8 | + memref.store %constant, %dst[%c0, %c0] : memref<8x16xf32> |
| 9 | + |
| 10 | + %0 = memref.extract_aligned_pointer_as_index %src : memref<8x16xf32> -> index |
| 11 | + %1 = arith.index_cast %0 : index to i64 |
| 12 | + %ptr_generic = llvm.inttoptr %1 : i64 to !llvm.ptr |
| 13 | + %ptr = llvm.addrspacecast %ptr_generic : !llvm.ptr to !llvm.ptr<1> |
| 14 | + |
| 15 | + |
| 16 | + %base_width = arith.constant 16 : i32 |
| 17 | + %base_height = arith.constant 16 : i32 |
| 18 | + %base_pitch = arith.constant 16 : i32 |
| 19 | + %x = arith.constant 0 : i32 |
| 20 | + %y = arith.constant 0 : i32 |
| 21 | + |
| 22 | + %loaded = xevm.blockload2d %ptr, %base_width, %base_height, %base_pitch, %x, %y {elem_size_in_bits=32, tile_width=16, tile_height=8, v_blocks=1, transpose=false, vnni_transform=false, l1_cache_control=Default, l3_cache_control=Default} : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<8xi32> |
| 23 | + |
| 24 | + %dst_ptr_as_idx = memref.extract_aligned_pointer_as_index %dst : memref<8x16xf32> -> index |
| 25 | + %dst_ptr_as_i64 = arith.index_cast %dst_ptr_as_idx : index to i64 |
| 26 | + %dst_ptr_generic = llvm.inttoptr %dst_ptr_as_i64 : i64 to !llvm.ptr |
| 27 | + %dst_ptr = llvm.addrspacecast %dst_ptr_generic : !llvm.ptr to !llvm.ptr<1> |
| 28 | + |
| 29 | + xevm.blockstore2d %dst_ptr, %base_width, %base_height, %base_pitch, %x, %y, %loaded {elem_size_in_bits=32, tile_width=16, tile_height=8, v_blocks=1, l1_cache_control=Default, l3_cache_control=Default} : (!llvm.ptr<1>, i32, i32, i32, i32, i32, vector<8xi32>) |
| 30 | + |
| 31 | + return %dst : memref<8x16xf32> |
| 32 | +} |
| 33 | + |
| 34 | +func.func @main() { |
| 35 | + %src = memref.alloc() : memref<8x16xf32> |
| 36 | + %dst = memref.alloc() : memref<8x16xf32> |
| 37 | + %gpu_res = call @load_store(%src, %dst) : (memref<8x16xf32>, memref<8x16xf32>) -> memref<8x16xf32> |
| 38 | + %cast = memref.cast %gpu_res : memref<8x16xf32> to memref<*xf32> |
| 39 | + call @printMemrefF32(%cast) : (memref<*xf32>) -> () |
| 40 | + return |
| 41 | +} |
| 42 | + |
| 43 | +func.func private @printMemrefF32(%ptr : memref<*xf32>) |
| 44 | + |
| 45 | +} |
0 commit comments