-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[MLIR] Add cpow support in ComplexToROCDLLibraryCalls #153183
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
57181a2
9f01454
dce85c3
0cebae8
1325a56
a677ee9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1276,6 +1276,28 @@ mlir::Value genComplexMathOp(fir::FirOpBuilder &builder, mlir::Location loc, | |
| return result; | ||
| } | ||
|
|
||
| mlir::Value genComplexPow(fir::FirOpBuilder &builder, mlir::Location loc, | ||
| const MathOperation &mathOp, | ||
| mlir::FunctionType mathLibFuncType, | ||
| llvm::ArrayRef<mlir::Value> args) { | ||
| bool canUseApprox = mlir::arith::bitEnumContainsAny( | ||
| builder.getFastMathFlags(), mlir::arith::FastMathFlags::afn); | ||
| bool isAMDGPU = fir::getTargetTriple(builder.getModule()).isAMDGCN(); | ||
| if (!forceMlirComplex && !canUseApprox && !isAMDGPU) | ||
| return genLibCall(builder, loc, mathOp, mathLibFuncType, args); | ||
|
|
||
| auto complexTy = mlir::cast<mlir::ComplexType>(mathLibFuncType.getInput(0)); | ||
| auto realTy = complexTy.getElementType(); | ||
| mlir::Value realExp = builder.createConvert(loc, realTy, args[1]); | ||
| mlir::Value zero = builder.createRealConstant(loc, realTy, 0); | ||
| mlir::Value complexExp = | ||
| builder.create<mlir::complex::CreateOp>(loc, complexTy, realExp, zero); | ||
| mlir::Value result = | ||
| builder.create<mlir::complex::PowOp>(loc, args[0], complexExp); | ||
| result = builder.createConvert(loc, mathLibFuncType.getResult(0), result); | ||
| return result; | ||
| } | ||
|
|
||
| /// Mapping between mathematical intrinsic operations and MLIR operations | ||
| /// of some appropriate dialect (math, complex, etc.) or libm calls. | ||
| /// TODO: support remaining Fortran math intrinsics. | ||
|
|
@@ -1625,15 +1647,19 @@ static constexpr MathOperation mathOperations[] = { | |
| genFuncType<Ty::Real<16>, Ty::Real<16>, Ty::Integer<8>>, | ||
| genMathOp<mlir::math::FPowIOp>}, | ||
| {"pow", RTNAME_STRING(cpowi), | ||
|
||
| genFuncType<Ty::Complex<4>, Ty::Complex<4>, Ty::Integer<4>>, genLibCall}, | ||
| genFuncType<Ty::Complex<4>, Ty::Complex<4>, Ty::Integer<4>>, | ||
| genComplexPow}, | ||
|
||
| {"pow", RTNAME_STRING(zpowi), | ||
| genFuncType<Ty::Complex<8>, Ty::Complex<8>, Ty::Integer<4>>, genLibCall}, | ||
| genFuncType<Ty::Complex<8>, Ty::Complex<8>, Ty::Integer<4>>, | ||
| genComplexPow}, | ||
| {"pow", RTNAME_STRING(cqpowi), FuncTypeComplex16Complex16Integer4, | ||
| genLibF128Call}, | ||
| {"pow", RTNAME_STRING(cpowk), | ||
| genFuncType<Ty::Complex<4>, Ty::Complex<4>, Ty::Integer<8>>, genLibCall}, | ||
| genFuncType<Ty::Complex<4>, Ty::Complex<4>, Ty::Integer<8>>, | ||
| genComplexPow}, | ||
| {"pow", RTNAME_STRING(zpowk), | ||
| genFuncType<Ty::Complex<8>, Ty::Complex<8>, Ty::Integer<8>>, genLibCall}, | ||
| genFuncType<Ty::Complex<8>, Ty::Complex<8>, Ty::Integer<8>>, | ||
| genComplexPow}, | ||
| {"pow", RTNAME_STRING(cqpowk), FuncTypeComplex16Complex16Integer8, | ||
| genLibF128Call}, | ||
| {"remainder", "remainderf", | ||
|
|
@@ -4032,21 +4058,20 @@ void IntrinsicLibrary::genExecuteCommandLine( | |
| mlir::Value waitAddr = fir::getBase(wait); | ||
| mlir::Value waitIsPresentAtRuntime = | ||
| builder.genIsNotNullAddr(loc, waitAddr); | ||
| waitBool = builder | ||
| .genIfOp(loc, {i1Ty}, waitIsPresentAtRuntime, | ||
| /*withElseRegion=*/true) | ||
| .genThen([&]() { | ||
| auto waitLoad = | ||
| fir::LoadOp::create(builder, loc, waitAddr); | ||
| mlir::Value cast = | ||
| builder.createConvert(loc, i1Ty, waitLoad); | ||
| fir::ResultOp::create(builder, loc, cast); | ||
| }) | ||
| .genElse([&]() { | ||
| mlir::Value trueVal = builder.createBool(loc, true); | ||
| fir::ResultOp::create(builder, loc, trueVal); | ||
| }) | ||
| .getResults()[0]; | ||
| waitBool = | ||
| builder | ||
| .genIfOp(loc, {i1Ty}, waitIsPresentAtRuntime, | ||
| /*withElseRegion=*/true) | ||
| .genThen([&]() { | ||
| auto waitLoad = fir::LoadOp::create(builder, loc, waitAddr); | ||
| mlir::Value cast = builder.createConvert(loc, i1Ty, waitLoad); | ||
| fir::ResultOp::create(builder, loc, cast); | ||
| }) | ||
| .genElse([&]() { | ||
| mlir::Value trueVal = builder.createBool(loc, true); | ||
| fir::ResultOp::create(builder, loc, trueVal); | ||
| }) | ||
| .getResults()[0]; | ||
| } | ||
|
|
||
| mlir::Value exitstatBox = | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,21 +1,27 @@ | ||
| ! REQUIRES: amdgpu-registered-target | ||
| ! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-fir -flang-deprecated-no-hlfir %s -o - | FileCheck %s | ||
| ! RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-fir %s -o - | FileCheck %s | ||
|
|
||
| ! CHECK-LABEL: func @_QPcabsf_test( | ||
| ! CHECK: complex.abs | ||
| ! CHECK-NOT: fir.call @cabsf | ||
| subroutine cabsf_test(a, b) | ||
| complex :: a | ||
| real :: b | ||
| b = abs(a) | ||
| end subroutine | ||
|
|
||
| ! CHECK-LABEL: func @_QPcabsf_test( | ||
| ! CHECK: complex.abs | ||
| ! CHECK-NOT: fir.call @cabsf | ||
|
|
||
| ! CHECK-LABEL: func @_QPcexpf_test( | ||
| ! CHECK: complex.exp | ||
| ! CHECK-NOT: fir.call @cexpf | ||
| subroutine cexpf_test(a, b) | ||
| complex :: a, b | ||
| b = exp(a) | ||
| end subroutine | ||
|
|
||
| ! CHECK-LABEL: func @_QPcexpf_test( | ||
| ! CHECK: complex.exp | ||
| ! CHECK-NOT: fir.call @cexpf | ||
| ! CHECK-LABEL: func @_QPpow_test( | ||
| ! CHECK: complex.pow | ||
| ! CHECK-NOT: fir.call @_FortranAcpowi | ||
| subroutine pow_test(a, b, c) | ||
| complex :: a, b, c | ||
| a = b**c | ||
| end subroutine pow_test |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please make sure that
complex.powis only generated whenisAMDGPUis true, otherwise, I would expect performance regressions inafncompilations.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.