@@ -23,7 +23,7 @@ func.func @fat_raw_buffer_cast(%buf: memref<8xi32, #gpu_global_addrspace>) -> me
2323 // RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
2424 // CHECK: %[[rsrc:.*]] = rocdl.make.buffer.rsrc %[[base]], %[[strideArg]], %[[numRecords]], %[[flags]]
2525 // CHECK: %[[fatBuf:.*]] = llvm.addrspacecast %[[rsrc]] : !llvm.ptr<8> to !llvm.ptr<7>
26- // CHECK: %[[ret0:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<7>, ptr<7>, i64, array<1 x i64>, array<1 x i64>)>
26+ // CHECK: %[[ret0:.*]] = llvm.mlir.poison : !llvm.struct<(ptr<7>, ptr<7>, i64, array<1 x i64>, array<1 x i64>)>
2727 // CHECK: %[[ret1:.*]] = llvm.insertvalue %[[fatBuf]], %[[ret0]][0]
2828 // CHECK: %[[ret2:.*]] = llvm.insertvalue %[[fatBuf]], %[[ret1]][1]
2929 // CHECK: %[[ret3:.*]] = llvm.insertvalue %[[offset]], %[[ret2]][2]
@@ -34,6 +34,26 @@ func.func @fat_raw_buffer_cast(%buf: memref<8xi32, #gpu_global_addrspace>) -> me
3434 return %ret : memref <8 xi32 , #amdgpu.address_space <fat_raw_buffer >>
3535}
3636
37+ // CHECK-LABEL: func @fat_raw_buffer_cast_0d
38+ func.func @fat_raw_buffer_cast_0d (%buf: memref <i32 , #gpu_global_addrspace >) -> memref <i32 , #amdgpu.address_space <fat_raw_buffer >> {
39+ // CHECK: %[[desc:.*]] = builtin.unrealized_conversion_cast %{{.*}} : memref<i32, 1> to !llvm.struct<(ptr<1>, ptr<1>, i64)>
40+ // CHECK-DAG: %[[base:.*]] = llvm.extractvalue %[[desc]][1]
41+ // CHECK-DAG: %[[offset:.*]] = llvm.extractvalue %[[desc]][2]
42+ // CHECK-DAG: %[[numRecords:.*]] = llvm.mlir.constant(4 : i32) : i32
43+ // CHECK-DAG: %[[strideArg:.*]] = llvm.mlir.constant(0 : i16) : i16
44+ // GFX9: %[[flags:.*]] = llvm.mlir.constant(159744 : i32)
45+ // RDNA: %[[flags:.*]] = llvm.mlir.constant(822243328 : i32)
46+ // CHECK: %[[rsrc:.*]] = rocdl.make.buffer.rsrc %[[base]], %[[strideArg]], %[[numRecords]], %[[flags]]
47+ // CHECK: %[[fatBuf:.*]] = llvm.addrspacecast %[[rsrc]] : !llvm.ptr<8> to !llvm.ptr<7>
48+ // CHECK: %[[ret0:.*]] = llvm.mlir.poison : !llvm.struct<(ptr<7>, ptr<7>, i64)>
49+ // CHECK: %[[ret1:.*]] = llvm.insertvalue %[[fatBuf]], %[[ret0]][0]
50+ // CHECK: %[[ret2:.*]] = llvm.insertvalue %[[fatBuf]], %[[ret1]][1]
51+ // CHECK: %[[ret3:.*]] = llvm.insertvalue %[[offset]], %[[ret2]][2]
52+ // CHECK: builtin.unrealized_conversion_cast %[[ret3]]
53+ %ret = amdgpu.fat_raw_buffer_cast %buf : memref <i32 , #gpu_global_addrspace > to memref <i32 , #amdgpu.address_space <fat_raw_buffer >>
54+ return %ret : memref <i32 , #amdgpu.address_space <fat_raw_buffer >>
55+ }
56+
3757// CHECK-LABEL: func @fat_raw_buffer_cast_dyn_size_offset
3858func.func @fat_raw_buffer_cast_dyn_size_offset (%buf: memref <?xi32 , strided <[1 ], offset : ?>, #gpu_global_addrspace >) -> memref <?xi32 , strided <[1 ], offset : ?>, #amdgpu.address_space <fat_raw_buffer >> {
3959 // CHECK: %[[size0:.*]] = llvm.extractvalue %{{.*}}[3, 0]
0 commit comments