-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[MLIR][ROCDL] Add math.clampf -> rocdl.fmed3 conversion #163259
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MLIR][ROCDL] Add math.clampf -> rocdl.fmed3 conversion #163259
Conversation
Signed-off-by: Keshav Vinayak Jha <[email protected]>
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-gpu Author: Keshav Vinayak Jha (keshavvinayak01) ChangesAdded Pattern for lowering Solves #15072 Patch is 52.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/163259.diff 5 Files Affected:
diff --git a/mlir/include/mlir/Conversion/MathToROCDL/MathToROCDL.h b/mlir/include/mlir/Conversion/MathToROCDL/MathToROCDL.h
index 46573e7966ccc..770f257d89bd5 100644
--- a/mlir/include/mlir/Conversion/MathToROCDL/MathToROCDL.h
+++ b/mlir/include/mlir/Conversion/MathToROCDL/MathToROCDL.h
@@ -9,6 +9,7 @@
#define MLIR_CONVERSION_MATHTOROCDL_MATHTOROCDL_H_
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Dialect/AMDGPU/Utils/Chipset.h"
#include "mlir/IR/PatternMatch.h"
#include <memory>
@@ -20,7 +21,8 @@ class Pass;
/// Populate the given list with patterns that convert from Math to ROCDL calls.
void populateMathToROCDLConversionPatterns(const LLVMTypeConverter &converter,
- RewritePatternSet &patterns);
+ RewritePatternSet &patterns,
+ amdgpu::Chipset chipset);
} // namespace mlir
#endif // MLIR_CONVERSION_MATHTOROCDL_MATHTOROCDL_H_
diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td
index 3c18ecc753d0f..c3fd397e258ae 100644
--- a/mlir/include/mlir/Conversion/Passes.td
+++ b/mlir/include/mlir/Conversion/Passes.td
@@ -755,6 +755,14 @@ def ConvertMathToLibmPass : Pass<"convert-math-to-libm", "ModuleOp"> {
"func::FuncDialect",
"vector::VectorDialect",
];
+ let options = [
+ Option<"chipset", "chipset", "std::string",
+
+
+ /*default=*/"\"gfx000\"",
+ "Chipset that these operations will run on">
+ ];
+
}
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index b215211e131d4..c03f3a5d3889c 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -484,5 +484,5 @@ void mlir::populateGpuToROCDLConversionPatterns(
GPUSubgroupBroadcastOpToROCDL>(converter);
patterns.add<GPUSubgroupSizeOpToROCDL>(converter, chipset);
- populateMathToROCDLConversionPatterns(converter, patterns);
+ populateMathToROCDLConversionPatterns(converter, patterns, chipset);
}
diff --git a/mlir/lib/Conversion/MathToROCDL/MathToROCDL.cpp b/mlir/lib/Conversion/MathToROCDL/MathToROCDL.cpp
index df219f3ff4f6e..ceb3d22c6bd59 100644
--- a/mlir/lib/Conversion/MathToROCDL/MathToROCDL.cpp
+++ b/mlir/lib/Conversion/MathToROCDL/MathToROCDL.cpp
@@ -10,6 +10,7 @@
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Dialect/AMDGPU/Utils/Chipset.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
@@ -42,8 +43,39 @@ static void populateOpPatterns(const LLVMTypeConverter &converter,
f32ApproxFunc, f16Func);
}
+struct ClampFOpConversion final
+ : public ConvertOpToLLVMPattern<math::ClampFOp> {
+ using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern;
+ ClampFOpConversion(const LLVMTypeConverter &converter,
+ amdgpu::Chipset chipset)
+ : ConvertOpToLLVMPattern<math::ClampFOp>(converter), chipset(chipset) {}
+
+ LogicalResult
+ matchAndRewrite(math::ClampFOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const override {
+ // V_MED3_F16/F32 only exists in gfx9+ artchitectures
+ if (chipset.majorVersion < 9) {
+ return rewriter.notifyMatchFailure(
+ op, ("pre-gfx9 (gfx" + std::to_string(chipset.majorVersion) +
+ "): V_MED_F16 / V_MED3_F32 not supported."));
+ }
+ rewriter.replaceOpWithNewOp<ROCDL::FMed3Op>(op, op.getType(), op.getValue(),
+ op.getMin(), op.getMax());
+ return success();
+ }
+ amdgpu::Chipset chipset;
+};
+
+static void addChipsetDependentPatterns(const LLVMTypeConverter &converter,
+ RewritePatternSet &patterns,
+ amdgpu::Chipset chipset) {
+
+ patterns.add<ClampFOpConversion>(converter, chipset);
+}
+
void mlir::populateMathToROCDLConversionPatterns(
- const LLVMTypeConverter &converter, RewritePatternSet &patterns) {
+ const LLVMTypeConverter &converter, RewritePatternSet &patterns,
+ amdgpu::Chipset chipset) {
// Handled by mathToLLVM: math::AbsIOp
// Handled by mathToLLVM: math::AbsFOp
// Handled by mathToLLVM: math::CopySignOp
@@ -118,27 +150,31 @@ void mlir::populateMathToROCDLConversionPatterns(
// worth creating a separate pass for it.
populateOpPatterns<arith::RemFOp>(converter, patterns, "__ocml_fmod_f32",
"__ocml_fmod_f64", "__ocml_fmod_f16");
+
+ addChipsetDependentPatterns(converter, patterns, chipset);
}
-namespace {
-struct ConvertMathToROCDLPass
- : public impl::ConvertMathToROCDLBase<ConvertMathToROCDLPass> {
- ConvertMathToROCDLPass() = default;
+struct ConvertMathToROCDLPass final
+ : impl::ConvertMathToROCDLBase<ConvertMathToROCDLPass> {
+ using impl::ConvertMathToROCDLBase<
+ ConvertMathToROCDLPass>::ConvertMathToROCDLBase;
+
void runOnOperation() override;
};
-} // namespace
void ConvertMathToROCDLPass::runOnOperation() {
auto m = getOperation();
MLIRContext *ctx = m.getContext();
+ FailureOr<amdgpu::Chipset> maybeChipset = amdgpu::Chipset::parse(chipset);
RewritePatternSet patterns(&getContext());
LowerToLLVMOptions options(ctx, DataLayout(m));
LLVMTypeConverter converter(ctx, options);
- populateMathToROCDLConversionPatterns(converter, patterns);
+ populateMathToROCDLConversionPatterns(converter, patterns, *maybeChipset);
ConversionTarget target(getContext());
- target.addLegalDialect<BuiltinDialect, func::FuncDialect,
- vector::VectorDialect, LLVM::LLVMDialect>();
+ target
+ .addLegalDialect<BuiltinDialect, func::FuncDialect, vector::VectorDialect,
+ LLVM::LLVMDialect, ROCDL::ROCDLDialect>();
target.addIllegalOp<LLVM::CosOp, LLVM::ExpOp, LLVM::Exp2Op, LLVM::FAbsOp,
LLVM::FCeilOp, LLVM::FFloorOp, LLVM::FRemOp, LLVM::LogOp,
LLVM::Log10Op, LLVM::Log2Op, LLVM::PowOp, LLVM::SinOp,
diff --git a/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir b/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir
index dbff23339d8b3..29851e2de5cb2 100644
--- a/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir
+++ b/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir
@@ -1,18 +1,40 @@
-// RUN: mlir-opt %s -convert-math-to-rocdl -allow-unregistered-dialect -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -allow-unregistered-dialect -split-input-file
+// -pass-pipeline='builtin.module(convert-math-to-rocdl{chipset=gfx803})' |
+// FileCheck %s --check-prefix=PRE9 RUN: mlir-opt %s -allow-unregistered-dialect
+// -split-input-file
+// -pass-pipeline='builtin.module(convert-math-to-rocdl{chipset=gfx942})' |
+// FileCheck %s --check-prefix=POST9
module @test_module {
// CHECK: llvm.func @__ocml_fmod_f16(f16, f16) -> f16
// CHECK: llvm.func @__ocml_fmod_f32(f32, f32) -> f32
// CHECK: llvm.func @__ocml_fmod_f64(f64, f64) -> f64
// CHECK-LABEL: func @arith_remf
- func.func @arith_remf(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
- %result16 = arith.remf %arg_f16, %arg_f16 : f16
- // CHECK: llvm.call @__ocml_fmod_f16(%{{.*}}, %{{.*}}) : (f16, f16) -> f16
- %result32 = arith.remf %arg_f32, %arg_f32 : f32
- // CHECK: llvm.call @__ocml_fmod_f32(%{{.*}}, %{{.*}}) : (f32, f32) -> f32
- %result64 = arith.remf %arg_f64, %arg_f64 : f64
- // CHECK: llvm.call @__ocml_fmod_f64(%{{.*}}, %{{.*}}) : (f64, f64) -> f64
- func.return %result16, %result32, %result64 : f16, f32, f64
+ func.func @arith_remf(% arg_f16
+ : f16, % arg_f32
+ : f32, % arg_f64
+ : f64)
+ ->(f16, f32, f64) {
+ % result16 = arith.remf % arg_f16,
+ %
+ arg_f16 : f16
+ // CHECK: llvm.call @__ocml_fmod_f16(%{{.*}}, %{{.*}}) :
+ // (f16, f16) -> f16
+ %
+ result32 = arith.remf % arg_f32,
+ %
+ arg_f32 : f32
+ // CHECK: llvm.call @__ocml_fmod_f32(%{{.*}}, %{{.*}}) :
+ // (f32, f32) -> f32
+ %
+ result64 = arith.remf % arg_f64,
+ %
+ arg_f64 : f64
+ // CHECK: llvm.call @__ocml_fmod_f64(%{{.*}}, %{{.*}}) :
+ // (f64, f64) -> f64
+ func.return %
+ result16,
+ % result32, % result64 : f16, f32, f64
}
}
@@ -23,14 +45,28 @@ module @test_module {
// CHECK: llvm.func @__ocml_acos_f32(f32) -> f32
// CHECK: llvm.func @__ocml_acos_f64(f64) -> f64
// CHECK-LABEL: func @math_acos
- func.func @math_acos(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
- %result16 = math.acos %arg_f16 : f16
- // CHECK: llvm.call @__ocml_acos_f16(%{{.*}}) : (f16) -> f16
- %result32 = math.acos %arg_f32 : f32
- // CHECK: llvm.call @__ocml_acos_f32(%{{.*}}) : (f32) -> f32
- %result64 = math.acos %arg_f64 : f64
- // CHECK: llvm.call @__ocml_acos_f64(%{{.*}}) : (f64) -> f64
- func.return %result16, %result32, %result64 : f16, f32, f64
+ func.func @math_acos(% arg_f16
+ : f16, % arg_f32
+ : f32, % arg_f64
+ : f64)
+ ->(f16, f32, f64) {
+ % result16 = math.acos %
+ arg_f16
+ : f16
+ // CHECK: llvm.call @__ocml_acos_f16(%{{.*}}) : (f16) -> f16
+ %
+ result32 = math.acos %
+ arg_f32
+ : f32
+ // CHECK: llvm.call @__ocml_acos_f32(%{{.*}}) : (f32) -> f32
+ %
+ result64 = math.acos %
+ arg_f64
+ : f64
+ // CHECK: llvm.call @__ocml_acos_f64(%{{.*}}) : (f64) -> f64
+ func.return %
+ result16,
+ % result32, % result64 : f16, f32, f64
}
}
@@ -41,14 +77,28 @@ module @test_module {
// CHECK: llvm.func @__ocml_acosh_f32(f32) -> f32
// CHECK: llvm.func @__ocml_acosh_f64(f64) -> f64
// CHECK-LABEL: func @math_acosh
- func.func @math_acosh(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
- %result16 = math.acosh %arg_f16 : f16
- // CHECK: llvm.call @__ocml_acosh_f16(%{{.*}}) : (f16) -> f16
- %result32 = math.acosh %arg_f32 : f32
- // CHECK: llvm.call @__ocml_acosh_f32(%{{.*}}) : (f32) -> f32
- %result64 = math.acosh %arg_f64 : f64
- // CHECK: llvm.call @__ocml_acosh_f64(%{{.*}}) : (f64) -> f64
- func.return %result16, %result32, %result64 : f16, f32, f64
+ func.func @math_acosh(% arg_f16
+ : f16, % arg_f32
+ : f32, % arg_f64
+ : f64)
+ ->(f16, f32, f64) {
+ % result16 = math.acosh %
+ arg_f16
+ : f16
+ // CHECK: llvm.call @__ocml_acosh_f16(%{{.*}}) : (f16) -> f16
+ %
+ result32 = math.acosh %
+ arg_f32
+ : f32
+ // CHECK: llvm.call @__ocml_acosh_f32(%{{.*}}) : (f32) -> f32
+ %
+ result64 = math.acosh %
+ arg_f64
+ : f64
+ // CHECK: llvm.call @__ocml_acosh_f64(%{{.*}}) : (f64) -> f64
+ func.return %
+ result16,
+ % result32, % result64 : f16, f32, f64
}
}
@@ -59,14 +109,28 @@ module @test_module {
// CHECK: llvm.func @__ocml_asin_f32(f32) -> f32
// CHECK: llvm.func @__ocml_asin_f64(f64) -> f64
// CHECK-LABEL: func @math_asin
- func.func @math_asin(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
- %result16 = math.asin %arg_f16 : f16
- // CHECK: llvm.call @__ocml_asin_f16(%{{.*}}) : (f16) -> f16
- %result32 = math.asin %arg_f32 : f32
- // CHECK: llvm.call @__ocml_asin_f32(%{{.*}}) : (f32) -> f32
- %result64 = math.asin %arg_f64 : f64
- // CHECK: llvm.call @__ocml_asin_f64(%{{.*}}) : (f64) -> f64
- func.return %result16, %result32, %result64 : f16, f32, f64
+ func.func @math_asin(% arg_f16
+ : f16, % arg_f32
+ : f32, % arg_f64
+ : f64)
+ ->(f16, f32, f64) {
+ % result16 = math.asin %
+ arg_f16
+ : f16
+ // CHECK: llvm.call @__ocml_asin_f16(%{{.*}}) : (f16) -> f16
+ %
+ result32 = math.asin %
+ arg_f32
+ : f32
+ // CHECK: llvm.call @__ocml_asin_f32(%{{.*}}) : (f32) -> f32
+ %
+ result64 = math.asin %
+ arg_f64
+ : f64
+ // CHECK: llvm.call @__ocml_asin_f64(%{{.*}}) : (f64) -> f64
+ func.return %
+ result16,
+ % result32, % result64 : f16, f32, f64
}
}
@@ -77,14 +141,28 @@ module @test_module {
// CHECK: llvm.func @__ocml_asinh_f32(f32) -> f32
// CHECK: llvm.func @__ocml_asinh_f64(f64) -> f64
// CHECK-LABEL: func @math_asinh
- func.func @math_asinh(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
- %result16 = math.asinh %arg_f16 : f16
- // CHECK: llvm.call @__ocml_asinh_f16(%{{.*}}) : (f16) -> f16
- %result32 = math.asinh %arg_f32 : f32
- // CHECK: llvm.call @__ocml_asinh_f32(%{{.*}}) : (f32) -> f32
- %result64 = math.asinh %arg_f64 : f64
- // CHECK: llvm.call @__ocml_asinh_f64(%{{.*}}) : (f64) -> f64
- func.return %result16, %result32, %result64 : f16, f32, f64
+ func.func @math_asinh(% arg_f16
+ : f16, % arg_f32
+ : f32, % arg_f64
+ : f64)
+ ->(f16, f32, f64) {
+ % result16 = math.asinh %
+ arg_f16
+ : f16
+ // CHECK: llvm.call @__ocml_asinh_f16(%{{.*}}) : (f16) -> f16
+ %
+ result32 = math.asinh %
+ arg_f32
+ : f32
+ // CHECK: llvm.call @__ocml_asinh_f32(%{{.*}}) : (f32) -> f32
+ %
+ result64 = math.asinh %
+ arg_f64
+ : f64
+ // CHECK: llvm.call @__ocml_asinh_f64(%{{.*}}) : (f64) -> f64
+ func.return %
+ result16,
+ % result32, % result64 : f16, f32, f64
}
}
@@ -95,14 +173,28 @@ module @test_module {
// CHECK: llvm.func @__ocml_atan_f32(f32) -> f32
// CHECK: llvm.func @__ocml_atan_f64(f64) -> f64
// CHECK-LABEL: func @math_atan
- func.func @math_atan(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
- %result16 = math.atan %arg_f16 : f16
- // CHECK: llvm.call @__ocml_atan_f16(%{{.*}}) : (f16) -> f16
- %result32 = math.atan %arg_f32 : f32
- // CHECK: llvm.call @__ocml_atan_f32(%{{.*}}) : (f32) -> f32
- %result64 = math.atan %arg_f64 : f64
- // CHECK: llvm.call @__ocml_atan_f64(%{{.*}}) : (f64) -> f64
- func.return %result16, %result32, %result64 : f16, f32, f64
+ func.func @math_atan(% arg_f16
+ : f16, % arg_f32
+ : f32, % arg_f64
+ : f64)
+ ->(f16, f32, f64) {
+ % result16 = math.atan %
+ arg_f16
+ : f16
+ // CHECK: llvm.call @__ocml_atan_f16(%{{.*}}) : (f16) -> f16
+ %
+ result32 = math.atan %
+ arg_f32
+ : f32
+ // CHECK: llvm.call @__ocml_atan_f32(%{{.*}}) : (f32) -> f32
+ %
+ result64 = math.atan %
+ arg_f64
+ : f64
+ // CHECK: llvm.call @__ocml_atan_f64(%{{.*}}) : (f64) -> f64
+ func.return %
+ result16,
+ % result32, % result64 : f16, f32, f64
}
}
@@ -113,14 +205,28 @@ module @test_module {
// CHECK: llvm.func @__ocml_atanh_f32(f32) -> f32
// CHECK: llvm.func @__ocml_atanh_f64(f64) -> f64
// CHECK-LABEL: func @math_atanh
- func.func @math_atanh(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
- %result16 = math.atanh %arg_f16 : f16
- // CHECK: llvm.call @__ocml_atanh_f16(%{{.*}}) : (f16) -> f16
- %result32 = math.atanh %arg_f32 : f32
- // CHECK: llvm.call @__ocml_atanh_f32(%{{.*}}) : (f32) -> f32
- %result64 = math.atanh %arg_f64 : f64
- // CHECK: llvm.call @__ocml_atanh_f64(%{{.*}}) : (f64) -> f64
- func.return %result16, %result32, %result64 : f16, f32, f64
+ func.func @math_atanh(% arg_f16
+ : f16, % arg_f32
+ : f32, % arg_f64
+ : f64)
+ ->(f16, f32, f64) {
+ % result16 = math.atanh %
+ arg_f16
+ : f16
+ // CHECK: llvm.call @__ocml_atanh_f16(%{{.*}}) : (f16) -> f16
+ %
+ result32 = math.atanh %
+ arg_f32
+ : f32
+ // CHECK: llvm.call @__ocml_atanh_f32(%{{.*}}) : (f32) -> f32
+ %
+ result64 = math.atanh %
+ arg_f64
+ : f64
+ // CHECK: llvm.call @__ocml_atanh_f64(%{{.*}}) : (f64) -> f64
+ func.return %
+ result16,
+ % result32, % result64 : f16, f32, f64
}
}
@@ -131,14 +237,31 @@ module @test_module {
// CHECK: llvm.func @__ocml_atan2_f32(f32, f32) -> f32
// CHECK: llvm.func @__ocml_atan2_f64(f64, f64) -> f64
// CHECK-LABEL: func @math_atan2
- func.func @math_atan2(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
- %result16 = math.atan2 %arg_f16, %arg_f16 : f16
- // CHECK: llvm.call @__ocml_atan2_f16(%{{.*}}, %{{.*}}) : (f16, f16) -> f16
- %result32 = math.atan2 %arg_f32, %arg_f32 : f32
- // CHECK: llvm.call @__ocml_atan2_f32(%{{.*}}, %{{.*}}) : (f32, f32) -> f32
- %result64 = math.atan2 %arg_f64, %arg_f64 : f64
- // CHECK: llvm.call @__ocml_atan2_f64(%{{.*}}, %{{.*}}) : (f64, f64) -> f64
- func.return %result16, %result32, %result64 : f16, f32, f64
+ func.func @math_atan2(% arg_f16
+ : f16, % arg_f32
+ : f32, % arg_f64
+ : f64)
+ ->(f16, f32, f64) {
+ % result16 = math.atan2 % arg_f16,
+ %
+ arg_f16 : f16
+ // CHECK: llvm.call @__ocml_atan2_f16(%{{.*}}, %{{.*}}) :
+ // (f16, f16) -> f16
+ %
+ result32 = math.atan2 % arg_f32,
+ %
+ arg_f32 : f32
+ // CHECK: llvm.call @__ocml_atan2_f32(%{{.*}}, %{{.*}}) :
+ // (f32, f32) -> f32
+ %
+ result64 = math.atan2 % arg_f64,
+ %
+ arg_f64 : f64
+ // CHECK: llvm.call @__ocml_atan2_f64(%{{.*}}, %{{.*}})
+ // : (f64, f64) -> f64
+ func.return %
+ result16,
+ % result32, % result64 : f16, f32, f64
}
}
@@ -149,14 +272,28 @@ module @test_module {
// CHECK: llvm.func @__ocml_cbrt_f32(f32) -> f32
// CHECK: llvm.func @__ocml_cbrt_f64(f64) -> f64
// CHECK-LABEL: func @math_cbrt
- func.func @math_cbrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
- %result16 = math.cbrt %arg_f16 : f16
- // CHECK: llvm.call @__ocml_cbrt_f16(%{{.*}}) : (f16) -> f16
- %result32 = math.cbrt %arg_f32 : f32
- // CHECK: llvm.call @__ocml_cbrt_f32(%{{.*}}) : (f32) -> f32
- %result64 = math.cbrt %arg_f64 : f64
- // CHECK: llvm.call @__ocml_cbrt_f64(%{{.*}}) : (f64) -> f64
- func.return %result16, %result32, %result64 : f16, f32, f64
+ func.func @math_cbrt(% arg_f16
+ : f16, % arg_f32
+ : f32, % arg_f64
+ : f64)
+ ->(f16, f32, f64) {
+ % result16 = math.cbrt %
+ arg_f16
+ : f16
+ // CHECK: llvm.call @__ocml_cbrt_f16(%{{.*}}) : (f16) -> f16
+ %
+ result32 = math.cbrt %
+ arg_f32
+ : f32
+ // CHECK: llvm.call @__ocml_cbrt_f32(%{{.*}}) : (f...
[truncated]
|
Signed-off-by: Keshav Vinayak Jha <[email protected]>
To reference @krzysz00 comments:
Applied changes as per other suggestions. |
Signed-off-by: Keshav Vinayak Jha <[email protected]>
1. Added lit test for 1D and 2D vectors 2. Added unrolling support for ND inputs Signed-off-by: Keshav Vinayak Jha <[email protected]>
Signed-off-by: Keshav Vinayak Jha <[email protected]>
Signed-off-by: Keshav Vinayak Jha <[email protected]>
Signed-off-by: Keshav Vinayak Jha <[email protected]>
Signed-off-by: Keshav Vinayak Jha <[email protected]>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider LLVM::detail::vectorOneToOneRewrite
to make the pattern shorter?
But this looks fundamentally fine
Signed-off-by: Keshav Vinayak Jha <[email protected]>
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/205/builds/24997 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/204/builds/25020 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/203/builds/26208 Here is the relevant piece of the build log for the reference
|
)" This reverts commit 1e6df64.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/138/builds/20429 Here is the relevant piece of the build log for the reference
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I reverted the commit because it caused a build bot failure, but also this PR shouldn't have been merged, as not all my comments had been addressed.
if (!chipset.empty()) { | ||
FailureOr<amdgpu::Chipset> maybeChipset = amdgpu::Chipset::parse(chipset); | ||
if (failed(maybeChipset)) { | ||
return signalPassFailure(); | ||
} | ||
populateMathToROCDLConversionPatterns(converter, patterns, *maybeChipset); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is wrong, the call to populateMathToROCDLConversionPatterns
shouldn't be guarded by an if.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
…rsion" (#163447) Reverts llvm/llvm-project#163259. Reverting due to missing link libraries causing failures in shared build bots.
Added Pattern for lowering `Math::ClampFOp` to `ROCDL::FMED3`. Also added `chipset` option to `MathToRocdl` pass to check for arch support ISA instructions Solves [llvm#15072](llvm#157052) Reapplies llvm#160100 --------- Signed-off-by: Keshav Vinayak Jha <[email protected]>
…m#163447) Reverts llvm#163259. Reverting due to missing link libraries causing failures in shared build bots.
Added Pattern for lowering
Math::ClampFOp
toROCDL::FMED3
.Also added
chipet
option toMathToRocdl
pass to check for arch support ISA instructionsSolves #15072
Reapplies #160100