Skip to content

Commit ad77f7a

Browse files
[mlir][AMDGPU] Improving DPP implementation of subgrroup reduce
Signed-off-by: Muzammiluddin Syed <[email protected]>
1 parent 848c6ba commit ad77f7a

File tree

9 files changed

+431
-154
lines changed

9 files changed

+431
-154
lines changed

mlir/include/mlir/Dialect/GPU/Transforms/Passes.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,15 @@ void populateGpuLowerClusteredSubgroupReduceToShufflePatterns(
7474
/// `subgroupSize` lanes. Applicable only to AMD GPUs.
7575
void populateGpuLowerSubgroupReduceToDPPPatterns(RewritePatternSet &patterns,
7676
unsigned subgroupSize,
77+
unsigned shuffleBitwidth,
7778
amdgpu::Chipset chipset,
7879
PatternBenefit benefit = 1);
7980

8081
/// Disjoint counterpart of `populateGpuLowerSubgroupReduceToDPPPatterns`
8182
/// that only matches `gpu.subgroup_reduce` ops with a `cluster_size`.
8283
void populateGpuLowerClusteredSubgroupReduceToDPPPatterns(
83-
RewritePatternSet &patterns, unsigned subgroupSize, amdgpu::Chipset chipset,
84+
RewritePatternSet &patterns, unsigned subgroupSize,
85+
unsigned shuffleBitwidth, amdgpu::Chipset chipset,
8486
PatternBenefit benefit = 1);
8587

8688
/// Collect all patterns to rewrite ops within the GPU dialect.

mlir/include/mlir/Dialect/GPU/Utils/GPUUtils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ class LaunchOp;
2929

3030
/// Returns the matching vector combining kind.
3131
vector::CombiningKind convertReductionKind(gpu::AllReduceOperation mode);
32+
/// Returns the matching gpu allreduce mode.
33+
gpu::AllReduceOperation convertReductionMode(vector::CombiningKind kind);
3234
} // namespace gpu
3335

3436
/// Get a gpu.func created from outlining the region of a gpu.launch op with the
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
//===- ReductionUtils.h - Reduction Utilities -------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef MLIR_DIALECT_GPU_TRANSFORMS_REDUCTIONUTILS_H_
10+
#define MLIR_DIALECT_GPU_TRANSFORMS_REDUCTIONUTILS_H_
11+
12+
#include "mlir/Dialect/Affine/IR/AffineOps.h"
13+
#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
14+
#include "mlir/Dialect/AMDGPU/Utils/Chipset.h"
15+
#include "mlir/Dialect/Arith/IR/Arith.h"
16+
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
17+
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
18+
#include "mlir/IR/PatternMatch.h"
19+
#include "mlir/IR/Value.h"
20+
21+
namespace mlir {
22+
23+
struct ClusterInfo {
24+
unsigned clusterStride;
25+
unsigned clusterSize;
26+
unsigned subgroupSize;
27+
};
28+
29+
FailureOr<ClusterInfo> getAndValidateClusterInfo(gpu::SubgroupReduceOp op,
30+
unsigned subgroupSize);
31+
32+
FailureOr<Value>
33+
createSubgroupDPPReduction(PatternRewriter &rewriter, gpu::SubgroupReduceOp op,
34+
Value input, gpu::AllReduceOperation mode,
35+
const ClusterInfo &ci, amdgpu::Chipset chipset,
36+
function_ref<Value(Value)> packFn,
37+
function_ref<Value(Value)> unpackFn);
38+
39+
} // namespace mlir
40+
41+
#endif // MLIR_DIALECT_GPU_TRANSFORMS_REDUCTIONUTILS_H_

mlir/lib/Dialect/GPU/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ add_mlir_dialect_library(MLIRGPUTransforms
5353

5454
LINK_LIBS PUBLIC
5555
MLIRAffineUtils
56+
MLIRAMDGPUDialect
5657
MLIRArithDialect
5758
MLIRAsyncDialect
5859
MLIRBufferizationDialect

0 commit comments

Comments
 (0)