Address PR feedback

sommerlukas · sommerlukas · commit 206adb30fb61 · 2024-12-02T17:08:55.000Z
Signed-off-by: Lukas Sommer &lt;lukas.sommer@codeplay.com&gt;
diff --git a/third_party/intel/lib/TritonIntelGPUToLLVM/SPIRVSubgroupOps.h b/third_party/intel/lib/TritonIntelGPUToLLVM/SPIRVSubgroupOps.h
@@ -0,0 +1,80 @@
+//===- SPIRVSubgroupOps.h - Mapping for SPIR-V Reduction --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines mapping from operations in the 'arith' dialect to the
+// corresponding SPIR-V Subgroup Reduction Operation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TRITONINTELGPUTOLLVM_SPIRVSUBGROUPOPS_H
+#define TRITONINTELGPUTOLLVM_SPIRVSUBGROUPOPS_H
+
+#include "mlir/Dialect/Arith/IR/Arith.h"
+#include "mlir/Dialect/SPIRV/IR/SPIRVOps.h"
+
+using namespace mlir;
+
+namespace mlir::triton::intel {
+
+template <typename OpTy> struct SPIRVArithmeticGroupOp {};
+
+template <> struct SPIRVArithmeticGroupOp<arith::AddFOp> {
+  using type = spirv::GroupNonUniformFAddOp;
+};
+template <> struct SPIRVArithmeticGroupOp<arith::AddIOp> {
+  using type = spirv::GroupNonUniformIAddOp;
+};
+template <> struct SPIRVArithmeticGroupOp<arith::MulFOp> {
+  using type = spirv::GroupNonUniformFMulOp;
+};
+template <> struct SPIRVArithmeticGroupOp<arith::MulIOp> {
+  using type = spirv::GroupNonUniformIMulOp;
+};
+template <> struct SPIRVArithmeticGroupOp<arith::MaxNumFOp> {
+  using type = spirv::GroupNonUniformFMaxOp;
+};
+template <> struct SPIRVArithmeticGroupOp<arith::MinNumFOp> {
+  using type = spirv::GroupNonUniformFMinOp;
+};
+
+template <typename OpTy>
+using SPIRVArithmeticGroupOpTy = typename SPIRVArithmeticGroupOp<OpTy>::type;
+
+template <typename OpTy> struct SPIRVBitwiseGroupOp {};
+
+template <> struct SPIRVBitwiseGroupOp<arith::AndIOp> {
+  using type = spirv::GroupNonUniformBitwiseAndOp;
+};
+template <> struct SPIRVBitwiseGroupOp<arith::OrIOp> {
+  using type = spirv::GroupNonUniformBitwiseOrOp;
+};
+template <> struct SPIRVBitwiseGroupOp<arith::XOrIOp> {
+  using type = spirv::GroupNonUniformBitwiseXorOp;
+};
+
+template <typename OpTy>
+using SPIRVBitwiseGroupOpTy = typename SPIRVBitwiseGroupOp<OpTy>::type;
+
+template <typename OpTy> struct SPIRVLogicalGroupOp {};
+
+template <> struct SPIRVLogicalGroupOp<arith::AndIOp> {
+  using type = spirv::GroupNonUniformLogicalAndOp;
+};
+template <> struct SPIRVLogicalGroupOp<arith::OrIOp> {
+  using type = spirv::GroupNonUniformLogicalOrOp;
+};
+template <> struct SPIRVLogicalGroupOp<arith::XOrIOp> {
+  using type = spirv::GroupNonUniformLogicalXorOp;
+};
+
+template <typename OpTy>
+using SPIRVLogicalGroupOpTy = typename SPIRVLogicalGroupOp<OpTy>::type;
+
+} // namespace mlir::triton::intel
+
+#endif // TRITONINTELGPUTOLLVM_SPIRVSUBGROUPOPS_H
diff --git a/third_party/intel/lib/TritonIntelGPUToLLVM/TargetInfo.cpp b/third_party/intel/lib/TritonIntelGPUToLLVM/TargetInfo.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "TargetInfo.h"
+#include "SPIRVSubgroupOps.h"
 #include "Utility.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVOps.h"
 #include "llvm/ADT/TypeSwitch.h"
@@ -133,46 +134,21 @@ Value warpReduceHelper(RewriterBase &rewriter, Location loc, Value acc,
                        unsigned warpSize) {
   auto resultType = reduceOp->getResult(0).getType();
   Value warpReduce =
-      llvm::TypeSwitch<mlir::Operation *, Value>(reduceOp)
-          .Case<arith::AddFOp>([&](auto) {
-            return createSPIRVGroupOp<spirv::GroupNonUniformFAddOp>(
-                rewriter, loc, resultType, acc, numLanesToReduce, warpSize);
-          })
-          .Case<arith::AddIOp>([&](auto) {
-            return createSPIRVGroupOp<spirv::GroupNonUniformIAddOp>(
-                rewriter, loc, resultType, acc, numLanesToReduce, warpSize);
-          })
-          .Case<arith::MulFOp>([&](auto) {
-            return createSPIRVGroupOp<spirv::GroupNonUniformFMulOp>(
-                rewriter, loc, resultType, acc, numLanesToReduce, warpSize);
-          })
-          .Case<arith::MulIOp>([&](auto) {
-            return createSPIRVGroupOp<spirv::GroupNonUniformIMulOp>(
-                rewriter, loc, resultType, acc, numLanesToReduce, warpSize);
-          })
-          .Case<arith::MaxNumFOp>([&](auto) {
-            return createSPIRVGroupOp<spirv::GroupNonUniformFMaxOp>(
-                rewriter, loc, resultType, acc, numLanesToReduce, warpSize);
-          })
-          .Case<arith::MinNumFOp>([&](auto) {
-            return createSPIRVGroupOp<spirv::GroupNonUniformFMinOp>(
-                rewriter, loc, resultType, acc, numLanesToReduce, warpSize);
-          })
-          .Case<arith::AndIOp>([&](auto) {
-            return createSPIRVGroupOp<spirv::GroupNonUniformBitwiseAndOp>(
-                rewriter, loc, resultType, acc, numLanesToReduce, warpSize);
-          })
-          .Case<arith::OrIOp>([&](auto) {
-            return createSPIRVGroupOp<spirv::GroupNonUniformBitwiseOrOp>(
+      TypeSwitch<mlir::Operation *, Value>(reduceOp)
+          .Case<arith::AddFOp, arith::AddIOp, arith::MulFOp, arith::MulIOp,
+                arith::MaxNumFOp, arith::MinNumFOp>([&](auto groupOp) {
+            return createSPIRVGroupOp<
+                SPIRVArithmeticGroupOpTy<decltype(groupOp)>>(
                 rewriter, loc, resultType, acc, numLanesToReduce, warpSize);
           })
-          .Case<arith::XOrIOp>([&](auto) {
-            return createSPIRVGroupOp<spirv::GroupNonUniformBitwiseXorOp>(
+          .Case<arith::AndIOp, arith::OrIOp, arith::XOrIOp>([&](auto groupOp) {
+            if (resultType.isInteger(1)) {
+              return createSPIRVGroupOp<
+                  SPIRVLogicalGroupOpTy<decltype(groupOp)>>(
+                  rewriter, loc, resultType, acc, numLanesToReduce, warpSize);
+            }
+            return createSPIRVGroupOp<SPIRVBitwiseGroupOpTy<decltype(groupOp)>>(
                 rewriter, loc, resultType, acc, numLanesToReduce, warpSize);
-          })
-          .Default([](auto) {
-            llvm_unreachable("Unsupported reduction");
-            return Value();
           });
   return warpReduce;
 }
@@ -206,12 +182,9 @@ bool TargetInfo::warpReduce(RewriterBase &rewriter, Location loc,
       reduceOp->getOperand(1) != block.getArgument(1))
     return false;
 
-  auto supportedOp =
-      llvm::TypeSwitch<mlir::Operation *, bool>(reduceOp)
-          .Case<arith::AddFOp, arith::AddIOp, arith::MulFOp, arith::MulIOp,
-                arith::MaxNumFOp, arith::MinNumFOp, arith::AndIOp, arith::OrIOp,
-                arith::XOrIOp>([&](auto) { return true; })
-          .Default([](auto) { return false; });
+  auto supportedOp = isa<arith::AddFOp, arith::AddIOp, arith::MulFOp,
+                         arith::MulIOp, arith::MaxNumFOp, arith::MinNumFOp,
+                         arith::AndIOp, arith::OrIOp, arith::XOrIOp>(reduceOp);
 
   if (!supportedOp)
     return false;
diff --git a/third_party/intel/lib/TritonIntelGPUToLLVM/TritonOpsToLLVM.cpp b/third_party/intel/lib/TritonIntelGPUToLLVM/TritonOpsToLLVM.cpp
@@ -1,5 +1,6 @@
 #include "Dialect/TritonIntelGPU/IR/Utils.h"
 #include "PatternTritonGPUOpToLLVM.h"
+#include "SPIRVSubgroupOps.h"
 
 #include "intel/include/Dialect/TritonGEN/IR/TritonGENDialect.h"
 #include "intel/include/Dialect/TritonIntelGPU/IR/Dialect.h"
@@ -583,28 +584,17 @@ class ReduceOpConversion : public ConvertTritonGPUOpToLLVMPattern<ReduceOp> {
     Operation *combine = &*combineOp.front().getOperations().begin();
 
     // FIXME: support all possible reduction modes
-    using AllReduceOperation = mlir::gpu::AllReduceOperation;
-    AllReduceOperation redKind;
-    if (isa<arith::AddFOp>(combine))
-      replaceWithSPIRVOp<mlir::spirv::GroupNonUniformFAddOp>(op, adaptor,
-                                                             rewriter);
-    else if (isa<arith::MaxNumFOp>(combine))
-      replaceWithSPIRVOp<mlir::spirv::GroupNonUniformFMaxOp>(op, adaptor,
-                                                             rewriter);
-    else
-      llvm_unreachable("Unhandled reduction kind");
+    TypeSwitch<Operation *>(combine).Case<arith::AddFOp, arith::MaxNumFOp>(
+        [&](auto reduce) {
+          rewriter.replaceOpWithNewOp<
+              intel::SPIRVArithmeticGroupOpTy<decltype(reduce)>>(
+              op, typeConverter->convertType(op.getType(0)),
+              spirv::Scope::Subgroup, spirv::GroupOperation::Reduce,
+              adaptor.getSrcs()[0], Value());
+        });
 
     return success();
   }
-
-private:
-  template <typename ReplaceOp>
-  void replaceWithSPIRVOp(ReduceOp op, ReduceOpAdaptor adaptor,
-                          ConversionPatternRewriter &rewriter) const {
-    rewriter.replaceOpWithNewOp<ReplaceOp>(
-        op, typeConverter->convertType(op.getType(0)), spirv::Scope::Subgroup,
-        spirv::GroupOperation::Reduce, adaptor.getSrcs()[0], Value());
-  }
 };
 
 class TransposedReduceOpConversion