Skip to content

Commit 81bb8bc

Browse files
committed
update test files.
1 parent 4ed2006 commit 81bb8bc

File tree

2 files changed

+26
-23
lines changed

2 files changed

+26
-23
lines changed

mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
// test pass doesn't set up the GPU address space conversions.
1010

1111
#gpu_global_addrspace = 1
12-
#gpu_lds_addrspace = 3
1312

1413
// CHECK-LABEL: func @fat_raw_buffer_cast
1514
func.func @fat_raw_buffer_cast(%buf: memref<8xi32, #gpu_global_addrspace>) -> memref<8xi32, #amdgpu.address_space<fat_raw_buffer>> {
@@ -462,25 +461,3 @@ func.func @sched_barrier() {
462461
amdgpu.sched_barrier allow = <valu|all_vmem>
463462
func.return
464463
}
465-
466-
// CHECK-LABEL: func @global_load_to_rocdl_f32
467-
// CHECK-SAME: (%[[ARG0:.*]]: memref<128x72xf32, 1>)
468-
func.func @global_load_to_rocdl_f32(%global : memref<128x72xf32, #gpu_global_addrspace>) {
469-
%c0 = arith.constant 0 : index
470-
%c12 = arith.constant 12 : index
471-
%c32 = arith.constant 32 : index
472-
%alloc = memref.alloc() : memref<64x64xf32, #gpu_lds_addrspace>
473-
// GFX942: %[[GLOBAL_DESC:.*]] = builtin.unrealized_conversion_cast %arg0 : memref<128x72xf32, 1> to !llvm.struct<(ptr<1>, ptr<1>, i64, array<2 x i64>, array<2 x i64>)>
474-
// GFX942: %[[ALLOC:.*]] = memref.alloc() : memref<64x64xf32, 3>
475-
// GFX942: %[[LDS_DESC:.*]] = builtin.unrealized_conversion_cast %[[ALLOC]] : memref<64x64xf32, 3> to !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
476-
// GFX942: %[[GLOBAL_BASE:.*]] = llvm.extractvalue %[[GLOBAL_DESC]][1] : !llvm.struct<(ptr<1>, ptr<1>, i64, array<2 x i64>, array<2 x i64>)>
477-
// GFX942: %[[LDS_BASE:.*]] = llvm.extractvalue %[[LDS_DESC]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
478-
// GFX942: %[[GLOBAL_PTR:.*]] = llvm.getelementptr %[[GLOBAL_BASE]]
479-
// GFX942: %[[LDS_PTR:.*]] = llvm.getelementptr %[[LDS_BASE]]
480-
// GFX942: %[[C4:.*]] = llvm.mlir.constant(4 : i32) : i32
481-
// GFX942: %[[C0:.*]] = llvm.mlir.constant(0 : i32) : i32
482-
// GFX942: %[[C0_2:.*]] = llvm.mlir.constant(0 : i32) : i32
483-
// GFX942: rocdl.global.load.lds %[[GLOBAL_PTR]], %[[LDS_PTR]], %[[C4]], %[[C0]], %[[C0_2]]
484-
amdgpu.gather_to_lds %global[%c12, %c0], %alloc[%c32, %c0] {transferType = f32} : memref<128x72xf32, #gpu_global_addrspace>, memref<64x64xf32, #gpu_lds_addrspace>
485-
func.return
486-
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx942 | FileCheck %s
2+
3+
#gpu_global_addrspace = 1
4+
#gpu_lds_addrspace = 3
5+
6+
// CHECK-LABEL: func @global_load_to_rocdl_f32
7+
// CHECK-SAME: (%[[ARG0:.*]]: memref<128x72xf32, 1>)
8+
func.func @global_load_to_rocdl_f32(%global : memref<128x72xf32, #gpu_global_addrspace>) {
9+
%c0 = arith.constant 0 : index
10+
%c12 = arith.constant 12 : index
11+
%c32 = arith.constant 32 : index
12+
%alloc = memref.alloc() : memref<64x64xf32, #gpu_lds_addrspace>
13+
// CHECK: %[[GLOBAL_DESC:.*]] = builtin.unrealized_conversion_cast %arg0 : memref<128x72xf32, 1> to !llvm.struct<(ptr<1>, ptr<1>, i64, array<2 x i64>, array<2 x i64>)>
14+
// CHECK: %[[ALLOC:.*]] = memref.alloc() : memref<64x64xf32, 3>
15+
// CHECK: %[[LDS_DESC:.*]] = builtin.unrealized_conversion_cast %[[ALLOC]] : memref<64x64xf32, 3> to !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)>
16+
// CHECK: %[[GLOBAL_BASE:.*]] = llvm.extractvalue %[[GLOBAL_DESC]][1]
17+
// CHECK: %[[GLOBAL_PTR:.*]] = llvm.getelementptr %[[GLOBAL_BASE]]
18+
// CHECK: %[[LDS_BASE:.*]] = llvm.extractvalue %[[LDS_DESC]][1]
19+
// CHECK: %[[LDS_PTR:.*]] = llvm.getelementptr %[[LDS_BASE]]
20+
// CHECK: %[[C4:.*]] = llvm.mlir.constant(4 : i32) : i32
21+
// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i32) : i32
22+
// CHECK: %[[C0_2:.*]] = llvm.mlir.constant(0 : i32) : i32
23+
// CHECK: rocdl.global.load.lds %[[GLOBAL_PTR]], %[[LDS_PTR]], %[[C4]], %[[C0]], %[[C0_2]]
24+
amdgpu.gather_to_lds %global[%c12, %c0], %alloc[%c32, %c0] {transferType = f32} : memref<128x72xf32, #gpu_global_addrspace>, memref<64x64xf32, #gpu_lds_addrspace>
25+
func.return
26+
}

0 commit comments

Comments
 (0)