Skip to content

Commit d3208e6

Browse files
committed
[amdgpu][mlir] make_dma_base add type information.
1 parent af068e0 commit d3208e6

File tree

2 files changed

+14
-6
lines changed

2 files changed

+14
-6
lines changed

mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2311,6 +2311,10 @@ struct AMDGPUMakeDmaBaseLowering
23112311
LLVM::TruncOp::create(rewriter, loc, i32, first57BitsOfGlobalAddr);
23122312
Value highHalf = LLVM::TruncOp::create(rewriter, loc, i32, shift);
23132313

2314+
Value typeMask = createI32Constant(rewriter, loc, 2 << 30);
2315+
Value highHalfPlusType =
2316+
LLVM::OrOp::create(rewriter, loc, highHalf, typeMask);
2317+
23142318
Value c0 = createI32Constant(rewriter, loc, 0);
23152319
Value c1 = createI32Constant(rewriter, loc, 1);
23162320
Value c2 = createI32Constant(rewriter, loc, 2);
@@ -2322,7 +2326,8 @@ struct AMDGPUMakeDmaBaseLowering
23222326
result = LLVM::InsertElementOp::create(rewriter, loc, result,
23232327
castForLdsAddr, c1);
23242328
result = LLVM::InsertElementOp::create(rewriter, loc, result, lowHalf, c2);
2325-
result = LLVM::InsertElementOp::create(rewriter, loc, result, highHalf, c3);
2329+
result = LLVM::InsertElementOp::create(rewriter, loc, result,
2330+
highHalfPlusType, c3);
23262331

23272332
rewriter.replaceOp(op, result);
23282333
return success();

mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -192,16 +192,19 @@ func.func @make_dma_base(%idx: index, %mem: memref<8xi32, #gpu_global_addrspace>
192192
// CHECK-DAG: %[[MEM_INT_LOW:.+]] = llvm.trunc %[[MEM_INT_LOW_57]] : i64 to i32
193193
// CHECK-DAG: %[[MEM_INT_HIGH:.+]] = llvm.trunc %[[SHIFT]] : i64 to i32
194194

195+
// CHECK-DAG: %[[TYPE_MASK:.+]] = llvm.mlir.constant(-2147483648 : i32)
196+
// CHECK: %[[MEM_INT_HIGH_TYPE:.+]] = llvm.or %[[MEM_INT_HIGH]], %[[TYPE_MASK]]
197+
195198
// CHECK-DAG: %[[C0:.+]] = llvm.mlir.constant(0 : i32) : i32
196199
// CHECK-DAG: %[[C1:.+]] = llvm.mlir.constant(1 : i32) : i32
197200
// CHECK-DAG: %[[C2:.+]] = llvm.mlir.constant(2 : i32) : i32
198201
// CHECK-DAG: %[[C3:.+]] = llvm.mlir.constant(3 : i32) : i32
199202

200-
// CHECK: %[[V4I32_1_0:.+]] = llvm.mlir.poison : vector<4xi32>
201-
// CHECK: %[[V4I32_1_1:.+]] = llvm.insertelement %[[C1]], %[[V4I32_1_0]][%[[C0]] : i32]
202-
// CHECK: %[[V4I32_1_2:.+]] = llvm.insertelement %[[SMEM_INT]], %[[V4I32_1_1]][%[[C1]] : i32]
203-
// CHECK: %[[V4I32_1_3:.+]] = llvm.insertelement %[[MEM_INT_LOW]], %[[V4I32_1_2]][%[[C2]] : i32]
204-
// CHECK: %[[V4I32_1_4:.+]] = llvm.insertelement %[[MEM_INT_HIGH]], %[[V4I32_1_3]][%[[C3]] : i32]
203+
// CHECK: %[[V4I32_0_0:.+]] = llvm.mlir.poison : vector<4xi32>
204+
// CHECK: %[[V4I32_0_1:.+]] = llvm.insertelement %[[C1]], %[[V4I32_0_0]][%[[C0]] : i32]
205+
// CHECK: %[[V4I32_0_2:.+]] = llvm.insertelement %[[SMEM_INT]], %[[V4I32_0_1]][%[[C1]] : i32]
206+
// CHECK: %[[V4I32_0_3:.+]] = llvm.insertelement %[[MEM_INT_LOW]], %[[V4I32_0_2]][%[[C2]] : i32]
207+
// CHECK: %[[V4I32_0_4:.+]] = llvm.insertelement %[[MEM_INT_HIGH_TYPE]], %[[V4I32_0_3]][%[[C3]] : i32]
205208

206209
%0 = amdgpu.make_dma_base %smem[%idx], %mem[%idx] : memref<8xi32, #gpu_lds_addrspace>, memref<8xi32, #gpu_global_addrspace> -> !amdgpu.tdm_base<i32>
207210

0 commit comments

Comments
 (0)