|
9 | 9 | // test pass doesn't set up the GPU address space conversions. |
10 | 10 |
|
11 | 11 | #gpu_global_addrspace = 1 |
12 | | -#gpu_lds_addrspace = 3 |
13 | 12 |
|
14 | 13 | // CHECK-LABEL: func @fat_raw_buffer_cast |
15 | 14 | func.func @fat_raw_buffer_cast(%buf: memref<8xi32, #gpu_global_addrspace>) -> memref<8xi32, #amdgpu.address_space<fat_raw_buffer>> { |
@@ -462,25 +461,3 @@ func.func @sched_barrier() { |
462 | 461 | amdgpu.sched_barrier allow = <valu|all_vmem> |
463 | 462 | func.return |
464 | 463 | } |
465 | | - |
466 | | -// CHECK-LABEL: func @global_load_to_rocdl_f32 |
467 | | -// CHECK-SAME: (%[[ARG0:.*]]: memref<128x72xf32, 1>) |
468 | | -func.func @global_load_to_rocdl_f32(%global : memref<128x72xf32, #gpu_global_addrspace>) { |
469 | | - %c0 = arith.constant 0 : index |
470 | | - %c12 = arith.constant 12 : index |
471 | | - %c32 = arith.constant 32 : index |
472 | | - %alloc = memref.alloc() : memref<64x64xf32, #gpu_lds_addrspace> |
473 | | - // GFX942: %[[GLOBAL_DESC:.*]] = builtin.unrealized_conversion_cast %arg0 : memref<128x72xf32, 1> to !llvm.struct<(ptr<1>, ptr<1>, i64, array<2 x i64>, array<2 x i64>)> |
474 | | - // GFX942: %[[ALLOC:.*]] = memref.alloc() : memref<64x64xf32, 3> |
475 | | - // GFX942: %[[LDS_DESC:.*]] = builtin.unrealized_conversion_cast %[[ALLOC]] : memref<64x64xf32, 3> to !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> |
476 | | - // GFX942: %[[GLOBAL_BASE:.*]] = llvm.extractvalue %[[GLOBAL_DESC]][1] : !llvm.struct<(ptr<1>, ptr<1>, i64, array<2 x i64>, array<2 x i64>)> |
477 | | - // GFX942: %[[LDS_BASE:.*]] = llvm.extractvalue %[[LDS_DESC]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<2 x i64>, array<2 x i64>)> |
478 | | - // GFX942: %[[GLOBAL_PTR:.*]] = llvm.getelementptr %[[GLOBAL_BASE]] |
479 | | - // GFX942: %[[LDS_PTR:.*]] = llvm.getelementptr %[[LDS_BASE]] |
480 | | - // GFX942: %[[C4:.*]] = llvm.mlir.constant(4 : i32) : i32 |
481 | | - // GFX942: %[[C0:.*]] = llvm.mlir.constant(0 : i32) : i32 |
482 | | - // GFX942: %[[C0_2:.*]] = llvm.mlir.constant(0 : i32) : i32 |
483 | | - // GFX942: rocdl.global.load.lds %[[GLOBAL_PTR]], %[[LDS_PTR]], %[[C4]], %[[C0]], %[[C0_2]] |
484 | | - amdgpu.gather_to_lds %global[%c12, %c0], %alloc[%c32, %c0] {transferType = f32} : memref<128x72xf32, #gpu_global_addrspace>, memref<64x64xf32, #gpu_lds_addrspace> |
485 | | - func.return |
486 | | -} |
0 commit comments