Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -2750,6 +2750,7 @@ def fno_unsafe_math_optimizations : Flag<["-"], "fno-unsafe-math-optimizations">
Group<f_Group>;
def fassociative_math : Flag<["-"], "fassociative-math">, Visibility<[ClangOption, FlangOption]>, Group<f_Group>;
def fno_associative_math : Flag<["-"], "fno-associative-math">, Visibility<[ClangOption, FlangOption]>, Group<f_Group>;
def ffast_real_mod : Flag<["-"], "ffast-real-mod">, Visibility<[FlangOption, FC1Option]>, Group<f_Group>;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please also add -fno-fast-real-mod?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added it my local copy for now and push it with my next update.

defm reciprocal_math : BoolFOption<"reciprocal-math",
LangOpts<"AllowRecip">, DefaultFalse,
PosFlag<SetTrue, [], [ClangOption, CC1Option, FC1Option, FlangOption],
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Driver/ToolChains/Flang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,9 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args,

if (ReciprocalMath)
CmdArgs.push_back("-freciprocal-math");

if (Args.hasArg(options::OPT_ffast_real_mod))
CmdArgs.push_back("-ffast-real-mod");
}

static void renderRemarksOptions(const ArgList &Args, ArgStringList &CmdArgs,
Expand Down
3 changes: 2 additions & 1 deletion flang/include/flang/Support/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ LANGOPT(OpenMPNoThreadState, 1, 0)
LANGOPT(OpenMPNoNestedParallelism, 1, 0)
/// Use SIMD only OpenMP support.
LANGOPT(OpenMPSimd, 1, false)

/// Enable fast MOD operations for REAL
LANGOPT(FastRealMod, 1, false)
LANGOPT(VScaleMin, 32, 0) ///< Minimum vscale range value
LANGOPT(VScaleMax, 32, 0) ///< Maximum vscale range value

Expand Down
3 changes: 3 additions & 0 deletions flang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1424,6 +1424,9 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc,
opts.setFPContractMode(Fortran::common::LangOptions::FPM_Fast);
}

if (args.hasArg(clang::driver::options::OPT_ffast_real_mod))
opts.FastRealMod = true;

return true;
}

Expand Down
8 changes: 8 additions & 0 deletions flang/lib/Frontend/FrontendActions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,14 @@ bool CodeGenAction::beginSourceFileAction() {
ci.getInvocation().getLangOpts().OpenMPVersion);
}

if (ci.getInvocation().getLangOpts().FastRealMod) {
mlir::ModuleOp mod = lb.getModule();
mod.getOperation()->setAttr(
mlir::StringAttr::get(mod.getContext(),
llvm::Twine{"fir.fast_real_mod"}),
mlir::BoolAttr::get(mod.getContext(), true));
}

// Create a parse tree and lower it to FIR
parseAndLowerTree(ci, lb);

Expand Down
36 changes: 33 additions & 3 deletions flang/lib/Optimizer/Builder/IntrinsicCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7009,8 +7009,31 @@ mlir::Value IntrinsicLibrary::genMergeBits(mlir::Type resultType,
}

// MOD
static mlir::Value genFastMod(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value a, mlir::Value p) {
auto fastmathFlags = mlir::arith::FastMathFlags::contract;
auto fastmathAttr =
mlir::arith::FastMathFlagsAttr::get(builder.getContext(), fastmathFlags);
mlir::Value divResult =
mlir::arith::DivFOp::create(builder, loc, a, p, fastmathAttr);
mlir::Type intType = builder.getIntegerType(
a.getType().getIntOrFloatBitWidth(), /*signed=*/true);
mlir::Value intResult = builder.createConvert(loc, intType, divResult);
mlir::Value cnvResult = builder.createConvert(loc, a.getType(), intResult);
mlir::Value mulResult =
mlir::arith::MulFOp::create(builder, loc, cnvResult, p, fastmathAttr);
mlir::Value subResult =
mlir::arith::SubFOp::create(builder, loc, a, mulResult, fastmathAttr);
return subResult;
}

mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
auto mod = builder.getModule();
bool useFastRealMod = false;
if (auto attr = mod->getAttrOfType<mlir::BoolAttr>("fir.fast_real_mod"))
useFastRealMod = attr.getValue();

assert(args.size() == 2);
if (resultType.isUnsignedInteger()) {
mlir::Type signlessType = mlir::IntegerType::get(
Expand All @@ -7022,9 +7045,16 @@ mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType,
if (mlir::isa<mlir::IntegerType>(resultType))
return mlir::arith::RemSIOp::create(builder, loc, args[0], args[1]);

// Use runtime.
return builder.createConvert(
loc, resultType, fir::runtime::genMod(builder, loc, args[0], args[1]));
if (useFastRealMod && resultType.isFloat()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we can enable the fast MOD under afn FastMathFlag. Is there a reason to control it via a separate option?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was doing that to avoid issues with precision loss that could have come in with the other math functions. After I spoke to the engineers, who are working on the applications, it seems they'd be fine with moving this under the afn flag and do the optimization as part of -ffast-math which is enabled for this application.

However, I'm tempted to keep this as a separate flag and simply enable this MOD optimization it when -ffast-math is present. That makes it more controllable w.r.t. to the other math optimizations. What do you think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It sounds okay to me. I would recommend enabling this optimization whenever afn is set (i.e. under -ffast-math or under -fapprox-func), and allow to override this with -fno-fast-real-mod.

// If fast MOD for REAL has been requested, generate less precise,
// but faster code directly.
return builder.createConvert(loc, resultType,
genFastMod(builder, loc, args[0], args[1]));
} else {
// Use runtime.
return builder.createConvert(
loc, resultType, fir::runtime::genMod(builder, loc, args[0], args[1]));
}
}

// MODULO
Expand Down
77 changes: 77 additions & 0 deletions flang/test/Lower/Intrinsics/fast-real-mod.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
! RUN: %flang_fc1 -ffast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK%if target=x86_64{{.*}} %{,CHECK-KIND10%}%if flang-supports-f128-math %{,CHECK-KIND16%}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think using kinds which are not enabled for a particular target is a compile error. Please could you put the checks for kind 10 and kind 16 in separate files and skip the whole thing if the types are not supported.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was trying to follow what mod.f90 did. Alas, I failed to do everything that test did. Now the test in this PR does it the same way. Hope that's OK and an acceptable way how I have addressed your comment.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That test is using lines like

integer, parameter :: kind16 = merge(16, 4, selected_real_kind(p=33).eq.16)
real(kind16) :: r, a, p

This make sure that on systems that don't support real(16) it will instead use real(4).


! CHECK: module attributes {{{.*}}fir.fast_real_mod = true{{.*}}}

! CHECK-LABEL: @_QPmod_real4
subroutine mod_real4(r, a, p)
implicit none
real(kind=4) :: r, a, p
! CHECK: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f32
! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f32) -> si32
! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si32) -> f32
! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f32
! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f32
! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref<f32>
r = mod(a, p)
end subroutine mod_real4

! CHECK-LABEL: @_QPmod_real8
subroutine mod_real8(r, a, p)
implicit none
real(kind=8) :: r, a, p
! CHECK: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f64
! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f64) -> si64
! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si64) -> f64
! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f64
! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f64
! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref<f64>
r = mod(a, p)
end subroutine mod_real8

! CHECK-LABEL: @_QPmod_real10
subroutine mod_real10(r, a, p)
implicit none
integer, parameter :: kind10 = merge(10, 4, selected_real_kind(p=18).eq.10)
real(kind=kind10) :: r, a, p
! CHECK-KIND10: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK-KIND10: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK-KIND10: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK-KIND10: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK-KIND10: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK-KIND10: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f80
! CHECK-KIND10: %[[CV1:.*]] = fir.convert %[[DIV]] : (f80) -> si80
! CHECK-KIND10: %[[CV2:.*]] = fir.convert %[[CV1]] : (si80) -> f80
! CHECK-KIND10: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f80
! CHECK-KIND10: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f80
! CHECK-KIND10: fir.store %[[SUB]] to %[[R]] : !fir.ref<f80>
r = mod(a, p)
end subroutine mod_real10

! CHECK-LABEL: @_QPmod_real16
subroutine mod_real16(r, a, p)
implicit none
integer, parameter :: kind16 = merge(16, 4, selected_real_kind(p=33).eq.16)
real(kind=kind16) :: r, a, p
! CHECK-KIND16: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK-KIND16: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK-KIND16: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK-KIND16: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK-KIND16: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK-KIND16: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f128
! CHECK-KIND16: %[[CV1:.*]] = fir.convert %[[DIV]] : (f128) -> si128
! CHECK-KIND16: %[[CV2:.*]] = fir.convert %[[CV1]] : (si128) -> f128
! CHECK-KIND16: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f128
! CHECK-KIND16: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f128
! CHECK-KIND16: fir.store %[[SUB]] to %[[R]] : !fir.ref<f128>
r = mod(a, p)
end subroutine mod_real16