Skip to content

Commit 7955fe0

Browse files
committed
correction about rank2
1 parent eeb008a commit 7955fe0

File tree

2 files changed

+2
-27
lines changed

2 files changed

+2
-27
lines changed

mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2659,24 +2659,8 @@ struct AMDGPUMakeDmaDescriptorLowering
26592659

26602660
Value dgroup0 = this->getDGroup0(adaptor);
26612661
Value dgroup1 = this->getDGroup1(op, adaptor, rewriter, loc, consts);
2662-
Value undefV4I32 = LLVM::UndefOp::create(rewriter, loc, v4i32);
2663-
Value dgroup2 = undefV4I32;
2664-
Value dgroup3 = undefV4I32;
2665-
2666-
if (op.getRank() == 2) {
2667-
Value nullConstant = createI32Constant(rewriter, loc, 0x7c);
2668-
dgroup2 = LLVM::InsertElementOp::create(rewriter, loc, dgroup2,
2669-
nullConstant, consts[0]);
2670-
dgroup2 = LLVM::InsertElementOp::create(rewriter, loc, dgroup2, consts[0],
2671-
consts[1]);
2672-
dgroup2 = LLVM::InsertElementOp::create(rewriter, loc, dgroup2, consts[0],
2673-
consts[2]);
2674-
dgroup2 = LLVM::InsertElementOp::create(rewriter, loc, dgroup2, consts[0],
2675-
consts[3]);
2676-
dgroup3 = dgroup2;
2677-
}
26782662

2679-
SmallVector<Value> results = {dgroup0, dgroup1, dgroup2, dgroup3};
2663+
SmallVector<Value> results = {dgroup0, dgroup1};
26802664
rewriter.replaceOpWithMultiple(op, {results});
26812665
return success();
26822666
}

mlir/test/Conversion/AMDGPUToROCDL/gfx1250.mlir

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -302,16 +302,7 @@ func.func @make_dma_descriptor(%base: !amdgpu.tdm_base<i32>) -> !amdgpu.tdm_desc
302302
// CHECK: %[[DGROUP1_6:.+]] = llvm.insertelement %[[SGPR6]], %[[DGROUP1_5]][%[[C6]] : i32]
303303
// CHECK: %[[DGROUP1:.+]] = llvm.insertelement %[[SGPR7]], %[[DGROUP1_6]][%[[C7]] : i32]
304304

305-
// CHECK-DAG: %[[V4I32:.+]] = llvm.mlir.undef : vector<4xi32>
306-
307-
// CHECK-DAG: %[[NULL:.+]] = llvm.mlir.constant(124 : i32)
308-
309-
// CHECK: %[[NULL_GROUP_0:.+]] = llvm.insertelement %[[NULL]], %[[V4I32]][%[[C0]] : i32]
310-
// CHECK: %[[NULL_GROUP_1:.+]] = llvm.insertelement %[[C0]], %[[NULL_GROUP_0]][%[[C1]] : i32]
311-
// CHECK: %[[NULL_GROUP_2:.+]] = llvm.insertelement %[[C0]], %[[NULL_GROUP_1]][%[[C2]] : i32]
312-
// CHECK: %[[NULL_GROUP:.+]] = llvm.insertelement %[[C0]], %[[NULL_GROUP_2]][%[[C3]] : i32]
313-
314-
// CHECK: %[[DGROUPS:.+]] = builtin.unrealized_conversion_cast %[[DGROUP0]], %[[DGROUP1]], %[[NULL_GROUP]], %[[NULL_GROUP]] : vector<4xi32>, vector<8xi32>, vector<4xi32>, vector<4xi32> to !amdgpu.tdm_descriptor
305+
// CHECK: %[[DGROUPS:.+]] = builtin.unrealized_conversion_cast %[[DGROUP0]], %[[DGROUP1]] : vector<4xi32>, vector<8xi32> to !amdgpu.tdm_descriptor
315306
%descriptor = amdgpu.make_dma_descriptor %base globalSize [128, 64] globalStride [64, 1] sharedSize [128, 64] : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
316307
func.return %descriptor : !amdgpu.tdm_descriptor
317308
}

0 commit comments

Comments
 (0)