make mxfpScaleBf16 private

leonling-ll · leonling-ll · commit b36c35edd7dc · 2024-12-06T18:29:40.000Z
diff --git a/third_party/intel/lib/TritonIntelGPUToLLVM/UpcastMXFPToLLVM.cpp b/third_party/intel/lib/TritonIntelGPUToLLVM/UpcastMXFPToLLVM.cpp
@@ -17,6 +17,24 @@ using namespace mlir::triton::gpu;
 
 namespace {
 
+static Value mxfpScaleBf16(ConversionPatternRewriter &rewriter, Location loc,
+                           Value v, Value scale) {
+  Value vBf16 = bitcast(v, bf16_ty);
+  Value nanBf16 = bitcast(i16_val(0x7fff), bf16_ty);
+  Value scaleIsNan = icmp_eq(scale, i8_val(0xff));
+  Value scaleBf16 = bitcast(shl(zext(i16_ty, scale), i16_val(7)), bf16_ty);
+
+  Value v0 = mlir::triton::intel::convertBf16ToFp32(loc, rewriter, vBf16);
+  Value v1 = mlir::triton::intel::convertBf16ToFp32(loc, rewriter, scaleBf16);
+  auto result = rewriter.create<LLVM::FMulOp>(loc, f32_ty, v0, v1);
+  auto undefRounding = static_cast<mlir::triton::RoundingMode>(-1);
+  Value scaledBf16 = mlir::triton::intel::convertFp32ToBf16(
+      loc, rewriter, result, undefRounding);
+  // Value scaledBf16 = fmul(vBf16, scaleBf16);
+  // Account for NaN in the scale as per the mxfp specification.
+  return select(scaleIsNan, nanBf16, scaledBf16);
+};
+
 class UpcastMXFPOpPattern : public ConvertOpToLLVMPattern<UpcastMXFPOp> {
 private:
   const TargetInfoBase &targetInfo;
@@ -48,8 +66,8 @@ class UpcastMXFPOpPattern : public ConvertOpToLLVMPattern<UpcastMXFPOp> {
 
     for (auto [i, scaleVal] : llvm::enumerate(scaleVals)) {
       for (int j = 0; j < 32; ++j) {
-        xVals[32 * i + j] = LLVM::intel::mxfpScaleBf16(
-            rewriter, loc, xVals[32 * i + j], scaleVal);
+        xVals[32 * i + j] =
+            mxfpScaleBf16(rewriter, loc, xVals[32 * i + j], scaleVal);
       }
     }
 
diff --git a/third_party/intel/lib/TritonIntelGPUToLLVM/Utility.cpp b/third_party/intel/lib/TritonIntelGPUToLLVM/Utility.cpp
@@ -159,21 +159,4 @@ LLVM::LLVMFuncOp getSpirvPrintfDeclaration(RewriterBase &rewriter) {
   return printFunc;
 }
 
-Value mxfpScaleBf16(ConversionPatternRewriter &rewriter, Location loc, Value v,
-                    Value scale) {
-  Value vBf16 = bitcast(v, bf16_ty);
-  Value nanBf16 = bitcast(i16_val(0x7fff), bf16_ty);
-  Value scaleIsNan = icmp_eq(scale, i8_val(0xff));
-  Value scaleBf16 = bitcast(shl(zext(i16_ty, scale), i16_val(7)), bf16_ty);
-
-  Value v0 = mlir::triton::intel::convertBf16ToFp32(loc, rewriter, vBf16);
-  Value v1 = mlir::triton::intel::convertBf16ToFp32(loc, rewriter, scaleBf16);
-  auto result = rewriter.create<LLVM::FMulOp>(loc, f32_ty, v0, v1);
-  auto undefRounding = static_cast<mlir::triton::RoundingMode>(-1);
-  Value scaledBf16 = mlir::triton::intel::convertFp32ToBf16(
-      loc, rewriter, result, undefRounding);
-  // Value scaledBf16 = fmul(vBf16, scaleBf16);
-  // Account for NaN in the scale as per the mxfp specification.
-  return select(scaleIsNan, nanBf16, scaledBf16);
-};
 } // namespace mlir::LLVM::intel
diff --git a/third_party/intel/lib/TritonIntelGPUToLLVM/Utility.h b/third_party/intel/lib/TritonIntelGPUToLLVM/Utility.h
@@ -127,8 +127,6 @@ static Value getModuleWarpSize(RewriterBase &rewriter, Location loc) {
   return i32_val(triton::gpu::TritonGPUDialect::getThreadsPerWarp(mod));
 }
 
-Value mxfpScaleBf16(ConversionPatternRewriter &rewriter, Location loc, Value v,
-                    Value scale);
 } // namespace mlir::LLVM::intel
 
 // -----------------------------------------------------------------------

Original file line number	Diff line number	Diff line change
`@@ -127,8 +127,6 @@ static Value getModuleWarpSize(RewriterBase &rewriter, Location loc) {`
`127`	`127`	`return i32_val(triton::gpu::TritonGPUDialect::getThreadsPerWarp(mod));`
`128`	`128`	`}`
`129`	`129`
`130`		`-Value mxfpScaleBf16(ConversionPatternRewriter &rewriter, Location loc, Value v,`
`131`		`- Value scale);`
`132`	`130`	`} // namespace mlir::LLVM::intel`
`133`	`131`
`134`	`132`	`// -----------------------------------------------------------------------`