Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -2778,6 +2778,9 @@ def fno_unsafe_math_optimizations : Flag<["-"], "fno-unsafe-math-optimizations">
Group<f_Group>;
def fassociative_math : Flag<["-"], "fassociative-math">, Visibility<[ClangOption, FlangOption]>, Group<f_Group>;
def fno_associative_math : Flag<["-"], "fno-associative-math">, Visibility<[ClangOption, FlangOption]>, Group<f_Group>;
def ffast_real_mod : Flag<["-"], "ffast-real-mod">,
Group<f_Group>, Visibility<[FlangOption, FC1Option]>,
HelpText<"Enable optimization of MOD for REAL types">;
def fno_fast_real_mod : Flag<["-"], "fno-fast-real-mod">,
Group<f_Group>, Visibility<[FlangOption, FC1Option]>,
HelpText<"Disable optimization of MOD for REAL types in presence of -ffast-math">;
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Driver/ToolChains/Flang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -822,6 +822,8 @@ static void addFloatingPointOptions(const Driver &D, const ArgList &Args,
complexRangeKindToStr(Range)));
}

if (Args.hasArg(options::OPT_ffast_real_mod))
CmdArgs.push_back("-ffast-real-mod");
if (Args.hasArg(options::OPT_fno_fast_real_mod))
CmdArgs.push_back("-fno-fast-real-mod");

Expand Down
2 changes: 1 addition & 1 deletion flang/include/flang/Support/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ LANGOPT(OpenMPNoNestedParallelism, 1, 0)
/// Use SIMD only OpenMP support.
LANGOPT(OpenMPSimd, 1, false)
/// Enable fast MOD operations for REAL
LANGOPT(NoFastRealMod, 1, false)
LANGOPT(FastRealMod, 1, false)
LANGOPT(VScaleMin, 32, 0) ///< Minimum vscale range value
LANGOPT(VScaleMax, 32, 0) ///< Maximum vscale range value

Expand Down
11 changes: 9 additions & 2 deletions flang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1428,11 +1428,18 @@ static bool parseFloatingPointArgs(CompilerInvocation &invoc,
opts.ReciprocalMath = true;
opts.ApproxFunc = true;
opts.NoSignedZeros = true;
opts.FastRealMod = true;
opts.setFPContractMode(Fortran::common::LangOptions::FPM_Fast);
}

if (args.hasArg(clang::driver::options::OPT_fno_fast_real_mod))
opts.NoFastRealMod = true;
if (llvm::opt::Arg *arg =
args.getLastArg(clang::driver::options::OPT_ffast_real_mod,
clang::driver::options::OPT_fno_fast_real_mod)) {
if (arg->getOption().matches(clang::driver::options::OPT_ffast_real_mod))
opts.FastRealMod = true;
if (arg->getOption().matches(clang::driver::options::OPT_fno_fast_real_mod))
opts.FastRealMod = false;
}

return true;
}
Expand Down
4 changes: 2 additions & 2 deletions flang/lib/Frontend/FrontendActions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,11 +277,11 @@ bool CodeGenAction::beginSourceFileAction() {
ci.getInvocation().getLangOpts().OpenMPVersion);
}

if (ci.getInvocation().getLangOpts().NoFastRealMod) {
if (ci.getInvocation().getLangOpts().FastRealMod) {
mlir::ModuleOp mod = lb.getModule();
mod.getOperation()->setAttr(
mlir::StringAttr::get(mod.getContext(),
llvm::Twine{"fir.no_fast_real_mod"}),
llvm::Twine{"fir.fast_real_mod"}),
mlir::BoolAttr::get(mod.getContext(), true));
}

Expand Down
10 changes: 4 additions & 6 deletions flang/lib/Optimizer/Builder/IntrinsicCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6509,11 +6509,9 @@ static mlir::Value genFastMod(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
auto mod = builder.getModule();
bool dontUseFastRealMod = false;
bool canUseApprox = mlir::arith::bitEnumContainsAny(
builder.getFastMathFlags(), mlir::arith::FastMathFlags::afn);
if (auto attr = mod->getAttrOfType<mlir::BoolAttr>("fir.no_fast_real_mod"))
dontUseFastRealMod = attr.getValue();
bool useFastRealMod = false;
if (auto attr = mod->getAttrOfType<mlir::BoolAttr>("fir.fast_real_mod"))
useFastRealMod = attr.getValue();

assert(args.size() == 2);
if (resultType.isUnsignedInteger()) {
Expand All @@ -6526,7 +6524,7 @@ mlir::Value IntrinsicLibrary::genMod(mlir::Type resultType,
if (mlir::isa<mlir::IntegerType>(resultType))
return mlir::arith::RemSIOp::create(builder, loc, args[0], args[1]);

if (resultType.isFloat() && canUseApprox && !dontUseFastRealMod) {
if (resultType.isFloat() && useFastRealMod) {
// Treat MOD as an approximate function and code-gen inline code
// instead of calling into the Fortran runtime library.
return builder.createConvert(loc, resultType,
Expand Down
2 changes: 2 additions & 0 deletions flang/test/Driver/fast-real-mod.f90
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
! RUN: %flang -ffast-real-mod -### -c %s 2>&1 | FileCheck %s -check-prefix CHECK-FAST-REAL-MOD
! RUN: %flang -fno-fast-real-mod -### -c %s 2>&1 | FileCheck %s -check-prefix CHECK-NO-FAST-REAL-MOD
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may be a good idea to add tests that check what happens when both -ffast-real-mod and -fno-fast-real-mod appear on the command line. The last to appear should "win".

Since these are related to ffast-math, it may be good to check that they work as expected in the presence of -ffast-math

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point! That's another thing that I can add to the test.


! CHECK-FAST-REAL-MOD: "-ffast-real-mod"
! CHECK-NO-FAST-REAL-MOD: "-fno-fast-real-mod"

program test
Expand Down
141 changes: 94 additions & 47 deletions flang/test/Lower/Intrinsics/fast-real-mod.f90
Original file line number Diff line number Diff line change
@@ -1,24 +1,38 @@
! RUN: %flang_fc1 -ffast-math -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK%if target=x86_64{{.*}} %{,CHECK-KIND10%}%if flang-supports-f128-math %{,CHECK-KIND16%}
! RUN: %flang_fc1 -ffast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK-FRM%if target=x86_64{{.*}} %{,CHECK-FRM-KIND10%}%if flang-supports-f128-math %{,CHECK-FRM-KIND16%}
! RUN: %flang_fc1 -ffast-real-mod -fno-fast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK-NFRM%if target=x86_64{{.*}} %{,CHECK-NFRM-KIND10%}%if flang-supports-f128-math %{,CHECK-NFRM-KIND16%}
! RUN: %flang_fc1 -fno-fast-real-mod -ffast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK-FRM%if target=x86_64{{.*}} %{,CHECK-FRM-KIND10%}%if flang-supports-f128-math %{,CHECK-FRM-KIND16%}
! RUN: %flang_fc1 -ffast-math -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK-FM%if target=x86_64{{.*}} %{,CHECK-FM-KIND10%}%if flang-supports-f128-math %{,CHECK-FM-KIND16%}
! RUN: %flang_fc1 -ffast-math -fno-fast-real-mod -emit-mlir -o - %s | FileCheck %s --check-prefixes=CHECK-NFRM%if target=x86_64{{.*}} %{,CHECK-NFRM-KIND10%}%if flang-supports-f128-math %{,CHECK-NFRM-KIND16%}

! TODO: check line that fir.fast_real_mod is not there
! CHECK-NFRM: module attributes {{{.*}}fir.no_fast_real_mod = true{{.*}}}
! CHECK-FM: module attributes {{{.*}}fir.fast_real_mod = true{{.*}}}
! CHECK-FRM: module attributes {{{.*}}fir.fast_real_mod = true{{.*}}}

! CHECK-LABEL: @_QPmod_real4
subroutine mod_real4(r, a, p)
implicit none
real(kind=4) :: r, a, p
! CHECK: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f32
! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f32) -> si32
! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si32) -> f32
! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f32
! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f32
! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref<f32>
! CHECK-FRM: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK-FRM: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK-FRM: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK-FRM: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK-FRM: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK-FRM: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f32
! CHECK-FRM: %[[CV1:.*]] = fir.convert %[[DIV]] : (f32) -> si32
! CHECK-FRM: %[[CV2:.*]] = fir.convert %[[CV1]] : (si32) -> f32
! CHECK-FRM: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f32
! CHECK-FRM: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f32
! CHECK-FRM: fir.store %[[SUB]] to %[[R]] : !fir.ref<f32>
! CHECK-FM: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK-FM: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK-FM: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK-FM: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK-FM: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK-FM: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f32
! CHECK-FM: %[[CV1:.*]] = fir.convert %[[DIV]] : (f32) -> si32
! CHECK-FM: %[[CV2:.*]] = fir.convert %[[CV1]] : (si32) -> f32
! CHECK-FM: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f32
! CHECK-FM: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f32
! CHECK-FM: fir.store %[[SUB]] to %[[R]] : !fir.ref<f32>
! CHECK-NFRM: fir.call @_FortranAModReal4(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f32, f32, !fir.ref<i8>, i32) -> f32
r = mod(a, p)
end subroutine mod_real4
Expand All @@ -27,17 +41,28 @@ end subroutine mod_real4
subroutine mod_real8(r, a, p)
implicit none
real(kind=8) :: r, a, p
! CHECK: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f64
! CHECK: %[[CV1:.*]] = fir.convert %[[DIV]] : (f64) -> si64
! CHECK: %[[CV2:.*]] = fir.convert %[[CV1]] : (si64) -> f64
! CHECK: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f64
! CHECK: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f64
! CHECK: fir.store %[[SUB]] to %[[R]] : !fir.ref<f64>
! CHECK-FRM: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK-FRM: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK-FRM: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK-FRM: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK-FRM: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK-FRM: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f64
! CHECK-FRM: %[[CV1:.*]] = fir.convert %[[DIV]] : (f64) -> si64
! CHECK-FRM: %[[CV2:.*]] = fir.convert %[[CV1]] : (si64) -> f64
! CHECK-FRM: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f64
! CHECK-FRM: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f64
! CHECK-FRM: fir.store %[[SUB]] to %[[R]] : !fir.ref<f64>
! CHECK-FM: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK-FM: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK-FM: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK-FM: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK-FM: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK-FM: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f64
! CHECK-FM: %[[CV1:.*]] = fir.convert %[[DIV]] : (f64) -> si64
! CHECK-FM: %[[CV2:.*]] = fir.convert %[[CV1]] : (si64) -> f64
! CHECK-FM: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f64
! CHECK-FM: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f64
! CHECK-FM: fir.store %[[SUB]] to %[[R]] : !fir.ref<f64>
! CHECK-NFRM: fir.call @_FortranAModReal8(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f64, f64, !fir.ref<i8>, i32) -> f64
r = mod(a, p)
end subroutine mod_real8
Expand All @@ -47,17 +72,28 @@ subroutine mod_real10(r, a, p)
implicit none
integer, parameter :: kind10 = merge(10, 4, selected_real_kind(p=18).eq.10)
real(kind=kind10) :: r, a, p
! CHECK-KIND10: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK-KIND10: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK-KIND10: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK-KIND10: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK-KIND10: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK-KIND10: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f80
! CHECK-KIND10: %[[CV1:.*]] = fir.convert %[[DIV]] : (f80) -> si80
! CHECK-KIND10: %[[CV2:.*]] = fir.convert %[[CV1]] : (si80) -> f80
! CHECK-KIND10: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f80
! CHECK-KIND10: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f80
! CHECK-KIND10: fir.store %[[SUB]] to %[[R]] : !fir.ref<f80>
! CHECK-FRM-KIND10: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK-FRM-KIND10: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK-FRM-KIND10: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK-FRM-KIND10: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK-FRM-KIND10: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK-FRM-KIND10: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f80
! CHECK-FRM-KIND10: %[[CV1:.*]] = fir.convert %[[DIV]] : (f80) -> si80
! CHECK-FRM-KIND10: %[[CV2:.*]] = fir.convert %[[CV1]] : (si80) -> f80
! CHECK-FRM-KIND10: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f80
! CHECK-FRM-KIND10: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f80
! CHECK-FRM-KIND10: fir.store %[[SUB]] to %[[R]] : !fir.ref<f80>
! CHECK-FM-KIND10: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK-FM-KIND10: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK-FM-KIND10: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK-FM-KIND10: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK-FM-KIND10: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK-FM-KIND10: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f80
! CHECK-FM-KIND10: %[[CV1:.*]] = fir.convert %[[DIV]] : (f80) -> si80
! CHECK-FM-KIND10: %[[CV2:.*]] = fir.convert %[[CV1]] : (si80) -> f80
! CHECK-FM-KIND10: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f80
! CHECK-FM-KIND10: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f80
! CHECK-FM-KIND10: fir.store %[[SUB]] to %[[R]] : !fir.ref<f80>
! CHECK-NFRM-KIND10: fir.call @_FortranAModReal10(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f80, f80, !fir.ref<i8>, i32) -> f80
r = mod(a, p)
end subroutine mod_real10
Expand All @@ -67,17 +103,28 @@ subroutine mod_real16(r, a, p)
implicit none
integer, parameter :: kind16 = merge(16, 4, selected_real_kind(p=33).eq.16)
real(kind=kind16) :: r, a, p
! CHECK-KIND16: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK-KIND16: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK-KIND16: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK-KIND16: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK-KIND16: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK-KIND16: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f128
! CHECK-KIND16: %[[CV1:.*]] = fir.convert %[[DIV]] : (f128) -> si128
! CHECK-KIND16: %[[CV2:.*]] = fir.convert %[[CV1]] : (si128) -> f128
! CHECK-KIND16: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f128
! CHECK-KIND16: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f128
! CHECK-KIND16: fir.store %[[SUB]] to %[[R]] : !fir.ref<f128>
! CHECK-FRM-KIND16: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK-FRM-KIND16: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK-FRM-KIND16: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK-FRM-KIND16: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK-FRM-KIND16: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK-FRM-KIND16: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<contract> : f128
! CHECK-FRM-KIND16: %[[CV1:.*]] = fir.convert %[[DIV]] : (f128) -> si128
! CHECK-FRM-KIND16: %[[CV2:.*]] = fir.convert %[[CV1]] : (si128) -> f128
! CHECK-FRM-KIND16: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<contract> : f128
! CHECK-FRM-KIND16: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<contract> : f128
! CHECK-FRM-KIND16: fir.store %[[SUB]] to %[[R]] : !fir.ref<f128>
! CHECK-FM-KIND16: %[[A:.*]] = fir.declare{{.*}}a"
! CHECK-FM-KIND16: %[[P:.*]] = fir.declare{{.*}}p"
! CHECK-FM-KIND16: %[[R:.*]] = fir.declare{{.*}}r"
! CHECK-FM-KIND16: %[[A_LOAD:.*]] = fir.load %[[A]]
! CHECK-FM-KIND16: %[[P_LOAD:.*]] = fir.load %[[P]]
! CHECK-FM-KIND16: %[[DIV:.*]] = arith.divf %[[A_LOAD]], %[[P_LOAD]] fastmath<fast> : f128
! CHECK-FM-KIND16: %[[CV1:.*]] = fir.convert %[[DIV]] : (f128) -> si128
! CHECK-FM-KIND16: %[[CV2:.*]] = fir.convert %[[CV1]] : (si128) -> f128
! CHECK-FM-KIND16: %[[MUL:.*]] = arith.mulf %[[CV2]], %[[P_LOAD]] fastmath<fast> : f128
! CHECK-FM-KIND16: %[[SUB:.*]] = arith.subf %[[A_LOAD]], %[[MUL]] fastmath<fast> : f128
! CHECK-FM-KIND16: fir.store %[[SUB]] to %[[R]] : !fir.ref<f128>
! CHECK-NFRM-KIND16: fir.call @_FortranAModReal16(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (f128, f128, !fir.ref<i8>, i32) -> f128
r = mod(a, p)
end subroutine mod_real16