Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -2750,6 +2750,7 @@ def fno_unsafe_math_optimizations : Flag<["-"], "fno-unsafe-math-optimizations">
Group<f_Group>;
def fassociative_math : Flag<["-"], "fassociative-math">, Visibility<[ClangOption, FlangOption]>, Group<f_Group>;
def fno_associative_math : Flag<["-"], "fno-associative-math">, Visibility<[ClangOption, FlangOption]>, Group<f_Group>;
def ffast_real_mod : Flag<["-"], "ffast-real-mod">, Visibility<[FlangOption, FC1Option]>, Group<f_Group>;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please also add -fno-fast-real-mod?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have added it my local copy for now and push it with my next update.

defm reciprocal_math : BoolFOption<"reciprocal-math",
LangOpts<"AllowRecip">, DefaultFalse,
PosFlag<SetTrue, [], [ClangOption, CC1Option, FC1Option, FlangOption],
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Driver/ToolChains/Flang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,9 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args,

if (ReciprocalMath)
CmdArgs.push_back("-freciprocal-math");

if (Args.hasArg(options::OPT_ffast_real_mod))
CmdArgs.push_back("-ffast-real-mod");
}

static void renderRemarksOptions(const ArgList &Args, ArgStringList &CmdArgs,
Expand Down
3 changes: 2 additions & 1 deletion flang/include/flang/Support/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ LANGOPT(OpenMPNoThreadState, 1, 0)
LANGOPT(OpenMPNoNestedParallelism, 1, 0)
/// Use SIMD only OpenMP support.
LANGOPT(OpenMPSimd, 1, false)

/// Enable fast MOD operations for REAL
LANGOPT(FastRealMod, 1, false)
LANGOPT(VScaleMin, 32, 0) ///< Minimum vscale range value
LANGOPT(VScaleMax, 32, 0) ///< Maximum vscale range value

Expand Down
4 changes: 4 additions & 0 deletions flang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1424,6 +1424,10 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc,
opts.setFPContractMode(Fortran::common::LangOptions::FPM_Fast);
}

if (args.hasArg(clang::driver::options::OPT_ffast_real_mod)) {
opts.FastRealMod = true;
}

return true;
}

Expand Down
8 changes: 8 additions & 0 deletions flang/lib/Frontend/FrontendActions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,14 @@ bool CodeGenAction::beginSourceFileAction() {
ci.getInvocation().getLangOpts().OpenMPVersion);
}

if (ci.getInvocation().getLangOpts().FastRealMod) {
auto mod = lb.getModule();
mod.getOperation()->setAttr(
mlir::StringAttr::get(mod.getContext(),
llvm::Twine{"fir.fast_real_mod"}),
mlir::BoolAttr::get(mod.getContext(), true));
}

// Create a parse tree and lower it to FIR
parseAndLowerTree(ci, lb);

Expand Down
37 changes: 34 additions & 3 deletions flang/lib/Optimizer/Builder/IntrinsicCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7009,8 +7009,30 @@ mlir::Value IntrinsicLibrary::genMergeBits(mlir::Type resultType,
}

// MOD
static mlir::Value genFastMod(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value a, mlir::Value p) {
auto fastmathFlags = mlir::arith::FastMathFlags::contract;
auto fastmathAttr =
mlir::arith::FastMathFlagsAttr::get(builder.getContext(), fastmathFlags);
mlir::Value divResult = mlir::arith::DivFOp::create(builder, loc, a, p, fastmathAttr);
mlir::Type intType = builder.getIntegerType(
a.getType().getIntOrFloatBitWidth(), /*signed=*/true);
mlir::Value intResult = builder.createConvert(loc, intType, divResult);
mlir::Value cnvResult = builder.createConvert(loc, a.getType(), intResult);
mlir::Value mulResult =
mlir::arith::MulFOp::create(builder, loc, cnvResult, p, fastmathAttr);
mlir::Value subResult =
mlir::arith::SubFOp::create(builder, loc, a, mulResult, fastmathAttr);
return subResult;
}

mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
auto mod = builder.getModule();
bool useFastRealMod = false;
if (auto attr = mod->getAttrOfType<mlir::BoolAttr>("fir.fast_real_mod"))
useFastRealMod = attr.getValue();

assert(args.size() == 2);
if (resultType.isUnsignedInteger()) {
mlir::Type signlessType = mlir::IntegerType::get(
Expand All @@ -7022,9 +7044,18 @@ mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType,
if (mlir::isa<mlir::IntegerType>(resultType))
return mlir::arith::RemSIOp::create(builder, loc, args[0], args[1]);

// Use runtime.
return builder.createConvert(
loc, resultType, fir::runtime::genMod(builder, loc, args[0], args[1]));
if (useFastRealMod) {
// If fast MOD for REAL has been requested, generate less precise,
// but faster code directly.
assert(resultType.isFloat() &&
"non floating-point type hit for fast real MOD");
return builder.createConvert(loc, resultType,
genFastMod(builder, loc, args[0], args[1]));
} else {
// Use runtime.
return builder.createConvert(
loc, resultType, fir::runtime::genMod(builder, loc, args[0], args[1]));
}
}

// MODULO
Expand Down
75 changes: 75 additions & 0 deletions flang/test/Lower/Intrinsics/fast-real-mod.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
! RUN: %flang_fc1 -ffast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK%if target=x86_64{{.*}} %{,CHECK-KIND10%}%if flang-supports-f128-math %{,CHECK-KIND16%}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think using kinds which are not enabled for a particular target is a compile error. Please could you put the checks for kind 10 and kind 16 in separate files and skip the whole thing if the types are not supported.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was trying to follow what mod.f90 did. Alas, I failed to do everything that test did. Now the test in this PR does it the same way. Hope that's OK and an acceptable way how I have addressed your comment.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That test is using lines like

integer, parameter :: kind16 = merge(16, 4, selected_real_kind(p=33).eq.16)
real(kind16) :: r, a, p

This make sure that on systems that don't support real(16) it will instead use real(4).


! CHECK: module attributes {{{.*}}fir.fast_real_mod = true{{.*}}}

! CHECK-LABEL: @_QPmod_real4
subroutine mod_real4(r, a, p)
implicit none
real(kind=4) :: r, a, p
! CHECK: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f32
! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f32) -> si32
! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si32) -> f32
! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f32
! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f32
! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref<f32>
r = mod(a, p)
end subroutine mod_real4

! CHECK-LABEL: @_QPmod_real8
subroutine mod_real8(r, a, p)
implicit none
real(kind=8) :: r, a, p
! CHECK: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f64
! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f64) -> si64
! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si64) -> f64
! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f64
! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f64
! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref<f64>
r = mod(a, p)
end subroutine mod_real8

! CHECK-LABEL: @_QPmod_real10
subroutine mod_real10(r, a, p)
implicit none
real(kind=10) :: r, a, p
! CHECK-KIND10: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK-KIND10: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK-KIND10: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK-KIND10: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK-KIND10: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK-KIND10: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f80
! CHECK-KIND10: %[[CV1:.*]] = fir.convert %[[DIV]] : (f80) -> si80
! CHECK-KIND10: %[[CV2:.*]] = fir.convert %[[CV1]] : (si80) -> f80
! CHECK-KIND10: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f80
! CHECK-KIND10: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f80
! CHECK-KIND10: fir.store %[[SUB]] to %[[R]] : !fir.ref<f80>
r = mod(a, p)
end subroutine mod_real10

! CHECK-LABEL: @_QPmod_real16
subroutine mod_real16(r, a, p)
implicit none
real(kind=16) :: r, a, p
! CHECK-KIND16: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK-KIND16: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK-KIND16: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK-KIND16: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK-KIND16: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK-KIND16: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f128
! CHECK-KIND16: %[[CV1:.*]] = fir.convert %[[DIV]] : (f128) -> si128
! CHECK-KIND16: %[[CV2:.*]] = fir.convert %[[CV1]] : (si128) -> f128
! CHECK-KIND16: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f128
! CHECK-KIND16: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f128
! CHECK-KIND16: fir.store %[[SUB]] to %[[R]] : !fir.ref<f128>
r = mod(a, p)
end subroutine mod_real16
Loading