Skip to content

Commit d68db39

Browse files
committed
Final touch.
1 parent 1a40d6c commit d68db39

File tree

4 files changed

+11
-9
lines changed

4 files changed

+11
-9
lines changed

mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -784,7 +784,8 @@ def AMDGPU_GatherToLDSOp :
784784
* `$srcIndices`: indices into `$src` to read from for this thread.
785785
* `$dst`: LDS memory memref to write to.
786786
* `$dstIndices`: base indices into `$dst` to write to for the subgroup of this thread.
787-
number of subgroup size of elements will be written contiguously to `$dst[$dstIndices]`.
787+
The elements gathered by the subgroup will be written in order of lane ID will be written
788+
into contiguously starting at `$dst[$dstIndices]`.
788789
* `$transferType`: type of the data to be transferred by each thread. This is used to determine
789790
the size of the data to be transferred and the number of threads in the subgroup.
790791
The transfer type must be a scalar type or a vector type with a single element type.

mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -925,11 +925,12 @@ struct GatherToLDSOpLowering : public ConvertOpToLLVMPattern<GatherToLDSOp> {
925925
// `global_load_lds` instructions.
926926
size_t loadWidth;
927927
Type transferType = op.getTransferType();
928-
if (auto transferVectorType = dyn_cast<VectorType>(transferType))
928+
if (auto transferVectorType = dyn_cast<VectorType>(transferType)) {
929929
loadWidth = transferVectorType.getNumElements() *
930-
transferVectorType.getElementTypeBitWidth() / 8;
931-
else
930+
(transferVectorType.getElementTypeBitWidth() / 8);
931+
} else {
932932
loadWidth = transferType.getIntOrFloatBitWidth() / 8;
933+
}
933934

934935
// Currently only 1, 2, and 4 byte loads are supported.
935936
if (loadWidth != 1 && loadWidth != 2 && loadWidth != 4)

mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,17 +117,17 @@ LogicalResult FatRawBufferCastOp::verify() {
117117
static bool hasGlobalMemorySpace(Attribute memorySpace) {
118118
if (!memorySpace)
119119
return true;
120-
if (auto intMemorySpace = llvm::dyn_cast<IntegerAttr>(memorySpace))
120+
if (auto intMemorySpace = dyn_cast<IntegerAttr>(memorySpace))
121121
return intMemorySpace.getInt() == 0 || intMemorySpace.getInt() == 1;
122-
if (auto gpuMemorySpace = llvm::dyn_cast<gpu::AddressSpaceAttr>(memorySpace))
122+
if (auto gpuMemorySpace = dyn_cast<gpu::AddressSpaceAttr>(memorySpace))
123123
return gpuMemorySpace.getValue() == gpu::AddressSpace::Global;
124124
return false;
125125
}
126126

127127
static bool hasWorkgroupMemorySpace(Attribute memorySpace) {
128-
if (auto intMemorySpace = llvm::dyn_cast<IntegerAttr>(memorySpace))
128+
if (auto intMemorySpace = dyn_cast<IntegerAttr>(memorySpace))
129129
return intMemorySpace.getInt() == 3;
130-
if (auto gpuMemorySpace = llvm::dyn_cast<gpu::AddressSpaceAttr>(memorySpace))
130+
if (auto gpuMemorySpace = dyn_cast<gpu::AddressSpaceAttr>(memorySpace))
131131
return gpuMemorySpace.getValue() == gpu::AddressSpace::Workgroup;
132132
return false;
133133
}

mlir/test/Conversion/AMDGPUToROCDL/load_lds.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,4 +148,4 @@ func.func @global_load_to_rocdl_dynamic_indices(%global : memref<512xi32, #gpu_g
148148
amdgpu.gather_to_lds %global[%src_idx], %alloc[%dst_idx, %c0]
149149
: i32, memref<512xi32, #gpu_global_addrspace>, memref<4x64xi32, #gpu_lds_addrspace>
150150
func.return
151-
}
151+
}

0 commit comments

Comments
 (0)