Skip to content

Commit 8aa64ed

Browse files
authored
[Flang] Add -ffast-real-mod and direct code for MOD on REAL types (#160660)
This patch adds direct code-gen support for a faster MOD intrinsic for REAL types. Flang has maintained and keeps maintaining a high-precision implementation of the MOD intrinsic as part of the Fortran runtime. With the -ffast-real-mod flag, users can opt to avoid calling into the Fortran runtime, but instead trigger code-gen that produces faster code by avoiding the runtime call, at the expense of potentially risking bit cancelation by having the compiler use the MOD formula a specified by ISO Fortran.
1 parent 031fb74 commit 8aa64ed

File tree

8 files changed

+144
-4
lines changed

8 files changed

+144
-4
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2750,6 +2750,9 @@ def fno_unsafe_math_optimizations : Flag<["-"], "fno-unsafe-math-optimizations">
27502750
Group<f_Group>;
27512751
def fassociative_math : Flag<["-"], "fassociative-math">, Visibility<[ClangOption, FlangOption]>, Group<f_Group>;
27522752
def fno_associative_math : Flag<["-"], "fno-associative-math">, Visibility<[ClangOption, FlangOption]>, Group<f_Group>;
2753+
def fno_fast_real_mod : Flag<["-"], "fno-fast-real-mod">,
2754+
Group<f_Group>, Visibility<[FlangOption, FC1Option]>,
2755+
HelpText<"Disable optimization of MOD for REAL types in presence of -ffast-math">;
27532756
defm reciprocal_math : BoolFOption<"reciprocal-math",
27542757
LangOpts<"AllowRecip">, DefaultFalse,
27552758
PosFlag<SetTrue, [], [ClangOption, CC1Option, FC1Option, FlangOption],

clang/lib/Driver/ToolChains/Flang.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,9 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args,
822822
complexRangeKindToStr(Range)));
823823
}
824824

825+
if (Args.hasArg(options::OPT_fno_fast_real_mod))
826+
CmdArgs.push_back("-fno-fast-real-mod");
827+
825828
if (!HonorINFs && !HonorNaNs && AssociativeMath && ReciprocalMath &&
826829
ApproxFunc && !SignedZeros &&
827830
(FPContract == "fast" || FPContract.empty())) {

flang/include/flang/Support/LangOptions.def

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ LANGOPT(OpenMPNoThreadState, 1, 0)
6060
LANGOPT(OpenMPNoNestedParallelism, 1, 0)
6161
/// Use SIMD only OpenMP support.
6262
LANGOPT(OpenMPSimd, 1, false)
63-
63+
/// Enable fast MOD operations for REAL
64+
LANGOPT(NoFastRealMod, 1, false)
6465
LANGOPT(VScaleMin, 32, 0) ///< Minimum vscale range value
6566
LANGOPT(VScaleMax, 32, 0) ///< Maximum vscale range value
6667

flang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1425,6 +1425,9 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc,
14251425
opts.setFPContractMode(Fortran::common::LangOptions::FPM_Fast);
14261426
}
14271427

1428+
if (args.hasArg(clang::driver::options::OPT_fno_fast_real_mod))
1429+
opts.NoFastRealMod = true;
1430+
14281431
return true;
14291432
}
14301433

flang/lib/Frontend/FrontendActions.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,14 @@ bool CodeGenAction::beginSourceFileAction() {
277277
ci.getInvocation().getLangOpts().OpenMPVersion);
278278
}
279279

280+
if (ci.getInvocation().getLangOpts().NoFastRealMod) {
281+
mlir::ModuleOp mod = lb.getModule();
282+
mod.getOperation()->setAttr(
283+
mlir::StringAttr::get(mod.getContext(),
284+
llvm::Twine{"fir.no_fast_real_mod"}),
285+
mlir::BoolAttr::get(mod.getContext(), true));
286+
}
287+
280288
// Create a parse tree and lower it to FIR
281289
parseAndLowerTree(ci, lb);
282290

flang/lib/Optimizer/Builder/IntrinsicCall.cpp

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6989,8 +6989,33 @@ mlir::Value IntrinsicLibrary::genMergeBits(mlir::Type resultType,
69896989
}
69906990

69916991
// MOD
6992+
static mlir::Value genFastMod(fir::FirOpBuilder &builder, mlir::Location loc,
6993+
mlir::Value a, mlir::Value p) {
6994+
auto fastmathFlags = mlir::arith::FastMathFlags::contract;
6995+
auto fastmathAttr =
6996+
mlir::arith::FastMathFlagsAttr::get(builder.getContext(), fastmathFlags);
6997+
mlir::Value divResult =
6998+
mlir::arith::DivFOp::create(builder, loc, a, p, fastmathAttr);
6999+
mlir::Type intType = builder.getIntegerType(
7000+
a.getType().getIntOrFloatBitWidth(), /*signed=*/true);
7001+
mlir::Value intResult = builder.createConvert(loc, intType, divResult);
7002+
mlir::Value cnvResult = builder.createConvert(loc, a.getType(), intResult);
7003+
mlir::Value mulResult =
7004+
mlir::arith::MulFOp::create(builder, loc, cnvResult, p, fastmathAttr);
7005+
mlir::Value subResult =
7006+
mlir::arith::SubFOp::create(builder, loc, a, mulResult, fastmathAttr);
7007+
return subResult;
7008+
}
7009+
69927010
mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType,
69937011
llvm::ArrayRef<mlir::Value> args) {
7012+
auto mod = builder.getModule();
7013+
bool dontUseFastRealMod = false;
7014+
bool canUseApprox = mlir::arith::bitEnumContainsAny(
7015+
builder.getFastMathFlags(), mlir::arith::FastMathFlags::afn);
7016+
if (auto attr = mod->getAttrOfType<mlir::BoolAttr>("fir.no_fast_real_mod"))
7017+
dontUseFastRealMod = attr.getValue();
7018+
69947019
assert(args.size() == 2);
69957020
if (resultType.isUnsignedInteger()) {
69967021
mlir::Type signlessType = mlir::IntegerType::get(
@@ -7002,9 +7027,16 @@ mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType,
70027027
if (mlir::isa<mlir::IntegerType>(resultType))
70037028
return mlir::arith::RemSIOp::create(builder, loc, args[0], args[1]);
70047029

7005-
// Use runtime.
7006-
return builder.createConvert(
7007-
loc, resultType, fir::runtime::genMod(builder, loc, args[0], args[1]));
7030+
if (resultType.isFloat() && canUseApprox && !dontUseFastRealMod) {
7031+
// Treat MOD as an approximate function and code-gen inline code
7032+
// instead of calling into the Fortran runtime library.
7033+
return builder.createConvert(loc, resultType,
7034+
genFastMod(builder, loc, args[0], args[1]));
7035+
} else {
7036+
// Use runtime.
7037+
return builder.createConvert(
7038+
loc, resultType, fir::runtime::genMod(builder, loc, args[0], args[1]));
7039+
}
70087040
}
70097041

70107042
// MODULO
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
! RUN: %flang -fno-fast-real-mod -### -c %s 2>&1 | FileCheck %s -check-prefix CHECK-NO-FAST-REAL-MOD
2+
3+
! CHECK-NO-FAST-REAL-MOD: "-fno-fast-real-mod"
4+
5+
program test
6+
! nothing to be done in here
7+
end program test
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
! RUN: %flang_fc1 -ffast-math -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK%if target=x86_64{{.*}} %{,CHECK-KIND10%}%if flang-supports-f128-math %{,CHECK-KIND16%}
2+
! RUN: %flang_fc1 -ffast-math -fno-fast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK-NFRM%if target=x86_64{{.*}} %{,CHECK-NFRM-KIND10%}%if flang-supports-f128-math %{,CHECK-NFRM-KIND16%}
3+
4+
! TODO: check line that fir.fast_real_mod is not there
5+
! CHECK-NFRM: module attributes {{{.*}}fir.no_fast_real_mod = true{{.*}}}
6+
7+
! CHECK-LABEL: @_QPmod_real4
8+
subroutine mod_real4(r, a, p)
9+
implicit none
10+
real(kind=4) :: r, a, p
11+
! CHECK: %[[A:.*]] = fir.declare{{.*}}a"
12+
! CHECK: %[[P:.*]] = fir.declare{{.*}}p"
13+
! CHECK: %[[R:.*]] = fir.declare{{.*}}r"
14+
! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]]
15+
! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]]
16+
! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f32
17+
! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f32) -> si32
18+
! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si32) -> f32
19+
! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f32
20+
! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f32
21+
! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref<f32>
22+
! CHECK-NFRM: fir.call @_FortranAModReal4(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f32, f32, !fir.ref<i8>, i32) -> f32
23+
r = mod(a, p)
24+
end subroutine mod_real4
25+
26+
! CHECK-LABEL: @_QPmod_real8
27+
subroutine mod_real8(r, a, p)
28+
implicit none
29+
real(kind=8) :: r, a, p
30+
! CHECK: %[[A:.*]] = fir.declare{{.*}}a"
31+
! CHECK: %[[P:.*]] = fir.declare{{.*}}p"
32+
! CHECK: %[[R:.*]] = fir.declare{{.*}}r"
33+
! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]]
34+
! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]]
35+
! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f64
36+
! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f64) -> si64
37+
! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si64) -> f64
38+
! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f64
39+
! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f64
40+
! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref<f64>
41+
! CHECK-NFRM: fir.call @_FortranAModReal8(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f64, f64, !fir.ref<i8>, i32) -> f64
42+
r = mod(a, p)
43+
end subroutine mod_real8
44+
45+
! CHECK-LABEL: @_QPmod_real10
46+
subroutine mod_real10(r, a, p)
47+
implicit none
48+
integer, parameter :: kind10 = merge(10, 4, selected_real_kind(p=18).eq.10)
49+
real(kind=kind10) :: r, a, p
50+
! CHECK-KIND10: %[[A:.*]] = fir.declare{{.*}}a"
51+
! CHECK-KIND10: %[[P:.*]] = fir.declare{{.*}}p"
52+
! CHECK-KIND10: %[[R:.*]] = fir.declare{{.*}}r"
53+
! CHECK-KIND10: %[[A_LOAD:.*]] = fir.load %[[A]]
54+
! CHECK-KIND10: %[[P_LOAD:.*]] = fir.load %[[P]]
55+
! CHECK-KIND10: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f80
56+
! CHECK-KIND10: %[[CV1:.*]] = fir.convert %[[DIV]] : (f80) -> si80
57+
! CHECK-KIND10: %[[CV2:.*]] = fir.convert %[[CV1]] : (si80) -> f80
58+
! CHECK-KIND10: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f80
59+
! CHECK-KIND10: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f80
60+
! CHECK-KIND10: fir.store %[[SUB]] to %[[R]] : !fir.ref<f80>
61+
! CHECK-NFRM-KIND10: fir.call @_FortranAModReal10(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f80, f80, !fir.ref<i8>, i32) -> f80
62+
r = mod(a, p)
63+
end subroutine mod_real10
64+
65+
! CHECK-LABEL: @_QPmod_real16
66+
subroutine mod_real16(r, a, p)
67+
implicit none
68+
integer, parameter :: kind16 = merge(16, 4, selected_real_kind(p=33).eq.16)
69+
real(kind=kind16) :: r, a, p
70+
! CHECK-KIND16: %[[A:.*]] = fir.declare{{.*}}a"
71+
! CHECK-KIND16: %[[P:.*]] = fir.declare{{.*}}p"
72+
! CHECK-KIND16: %[[R:.*]] = fir.declare{{.*}}r"
73+
! CHECK-KIND16: %[[A_LOAD:.*]] = fir.load %[[A]]
74+
! CHECK-KIND16: %[[P_LOAD:.*]] = fir.load %[[P]]
75+
! CHECK-KIND16: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f128
76+
! CHECK-KIND16: %[[CV1:.*]] = fir.convert %[[DIV]] : (f128) -> si128
77+
! CHECK-KIND16: %[[CV2:.*]] = fir.convert %[[CV1]] : (si128) -> f128
78+
! CHECK-KIND16: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f128
79+
! CHECK-KIND16: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f128
80+
! CHECK-KIND16: fir.store %[[SUB]] to %[[R]] : !fir.ref<f128>
81+
! CHECK-NFRM-KIND16: fir.call @_FortranAModReal16(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f128, f128, !fir.ref<i8>, i32) -> f128
82+
r = mod(a, p)
83+
end subroutine mod_real16

0 commit comments

Comments
 (0)