[flang] Fold ERFC_SCALED (llvm#112287)

klausler · DanielCChen · commit a47b24ac83bb · 2024-10-16T10:48:13.000-04:00
Move the ErfcScaled template function from the runtime into a new header
file in flang/include/Common, then use it in constant folding to
implement folding for the erfc_scaled() intrinsic function.
diff --git a/flang/include/flang/Common/erfc-scaled.h b/flang/include/flang/Common/erfc-scaled.h
@@ -0,0 +1,116 @@
+//===-- include/flang/Common/erfc-scaled.h-----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_COMMON_ERFC_SCALED_H_
+#define FORTRAN_COMMON_ERFC_SCALED_H_
+
+namespace Fortran::common {
+template <typename T> inline T ErfcScaled(T arg) {
+  // Coefficients for approximation to erfc in the first interval.
+  static const T a[5] = {3.16112374387056560e00, 1.13864154151050156e02,
+      3.77485237685302021e02, 3.20937758913846947e03, 1.85777706184603153e-1};
+  static const T b[4] = {2.36012909523441209e01, 2.44024637934444173e02,
+      1.28261652607737228e03, 2.84423683343917062e03};
+
+  // Coefficients for approximation to erfc in the second interval.
+  static const T c[9] = {5.64188496988670089e-1, 8.88314979438837594e00,
+      6.61191906371416295e01, 2.98635138197400131e02, 8.81952221241769090e02,
+      1.71204761263407058e03, 2.05107837782607147e03, 1.23033935479799725e03,
+      2.15311535474403846e-8};
+  static const T d[8] = {1.57449261107098347e01, 1.17693950891312499e02,
+      5.37181101862009858e02, 1.62138957456669019e03, 3.29079923573345963e03,
+      4.36261909014324716e03, 3.43936767414372164e03, 1.23033935480374942e03};
+
+  // Coefficients for approximation to erfc in the third interval.
+  static const T p[6] = {3.05326634961232344e-1, 3.60344899949804439e-1,
+      1.25781726111229246e-1, 1.60837851487422766e-2, 6.58749161529837803e-4,
+      1.63153871373020978e-2};
+  static const T q[5] = {2.56852019228982242e00, 1.87295284992346047e00,
+      5.27905102951428412e-1, 6.05183413124413191e-2, 2.33520497626869185e-3};
+
+  constexpr T sqrtpi{1.7724538509078120380404576221783883301349L};
+  constexpr T rsqrtpi{0.5641895835477562869480794515607725858440L};
+  constexpr T epsilonby2{std::numeric_limits<T>::epsilon() * 0.5};
+  constexpr T xneg{-26.628e0};
+  constexpr T xhuge{6.71e7};
+  constexpr T thresh{0.46875e0};
+  constexpr T zero{0.0};
+  constexpr T one{1.0};
+  constexpr T four{4.0};
+  constexpr T sixteen{16.0};
+  constexpr T xmax{1.0 / (sqrtpi * std::numeric_limits<T>::min())};
+  static_assert(xmax > xhuge, "xmax must be greater than xhuge");
+
+  T ysq;
+  T xnum;
+  T xden;
+  T del;
+  T result;
+
+  auto x{arg};
+  auto y{std::fabs(x)};
+
+  if (y <= thresh) {
+    // evaluate erf for  |x| <= 0.46875
+    ysq = zero;
+    if (y > epsilonby2) {
+      ysq = y * y;
+    }
+    xnum = a[4] * ysq;
+    xden = ysq;
+    for (int i{0}; i < 3; i++) {
+      xnum = (xnum + a[i]) * ysq;
+      xden = (xden + b[i]) * ysq;
+    }
+    result = x * (xnum + a[3]) / (xden + b[3]);
+    result = one - result;
+    result = std::exp(ysq) * result;
+    return result;
+  } else if (y <= four) {
+    //  evaluate erfc for 0.46875 < |x| <= 4.0
+    xnum = c[8] * y;
+    xden = y;
+    for (int i{0}; i < 7; ++i) {
+      xnum = (xnum + c[i]) * y;
+      xden = (xden + d[i]) * y;
+    }
+    result = (xnum + c[7]) / (xden + d[7]);
+  } else {
+    //  evaluate erfc for |x| > 4.0
+    result = zero;
+    if (y >= xhuge) {
+      if (y < xmax) {
+        result = rsqrtpi / y;
+      }
+    } else {
+      ysq = one / (y * y);
+      xnum = p[5] * ysq;
+      xden = ysq;
+      for (int i{0}; i < 4; ++i) {
+        xnum = (xnum + p[i]) * ysq;
+        xden = (xden + q[i]) * ysq;
+      }
+      result = ysq * (xnum + p[4]) / (xden + q[4]);
+      result = (rsqrtpi - result) / y;
+    }
+  }
+  //  fix up for negative argument, erf, etc.
+  if (x < zero) {
+    if (x < xneg) {
+      result = std::numeric_limits<T>::max();
+    } else {
+      ysq = trunc(x * sixteen) / sixteen;
+      del = (x - ysq) * (x + ysq);
+      y = std::exp((ysq * ysq)) * std::exp((del));
+      result = (y + y) - result;
+    }
+  }
+  return result;
+}
+} // namespace Fortran::common
+#endif // FORTRAN_COMMON_ERFC_SCALED_H_
diff --git a/flang/lib/Evaluate/intrinsics-library.cpp b/flang/lib/Evaluate/intrinsics-library.cpp
@@ -14,6 +14,7 @@
 #include "flang/Evaluate/intrinsics-library.h"
 #include "fold-implementation.h"
 #include "host.h"
+#include "flang/Common/erfc-scaled.h"
 #include "flang/Common/static-multimap-view.h"
 #include "flang/Evaluate/expression.h"
 #include <cfloat>
@@ -231,6 +232,7 @@ struct HostRuntimeLibrary<HostT, LibraryVersion::Libm> {
       FolderFactory<F, F{std::cosh}>::Create("cosh"),
       FolderFactory<F, F{std::erf}>::Create("erf"),
       FolderFactory<F, F{std::erfc}>::Create("erfc"),
+      FolderFactory<F, F{common::ErfcScaled}>::Create("erfc_scaled"),
       FolderFactory<F, F{std::exp}>::Create("exp"),
       FolderFactory<F, F{std::tgamma}>::Create("gamma"),
       FolderFactory<F, F{std::log}>::Create("log"),
diff --git a/flang/runtime/numeric-templates.h b/flang/runtime/numeric-templates.h
@@ -21,6 +21,7 @@
 #include "terminator.h"
 #include "tools.h"
 #include "flang/Common/api-attrs.h"
+#include "flang/Common/erfc-scaled.h"
 #include "flang/Common/float128.h"
 #include <cstdint>
 #include <limits>
@@ -362,106 +363,7 @@ template <int PREC, typename T> inline RT_API_ATTRS T Spacing(T x) {
 
 // ERFC_SCALED (16.9.71)
 template <typename T> inline RT_API_ATTRS T ErfcScaled(T arg) {
-  // Coefficients for approximation to erfc in the first interval.
-  static const T a[5] = {3.16112374387056560e00, 1.13864154151050156e02,
-      3.77485237685302021e02, 3.20937758913846947e03, 1.85777706184603153e-1};
-  static const T b[4] = {2.36012909523441209e01, 2.44024637934444173e02,
-      1.28261652607737228e03, 2.84423683343917062e03};
-
-  // Coefficients for approximation to erfc in the second interval.
-  static const T c[9] = {5.64188496988670089e-1, 8.88314979438837594e00,
-      6.61191906371416295e01, 2.98635138197400131e02, 8.81952221241769090e02,
-      1.71204761263407058e03, 2.05107837782607147e03, 1.23033935479799725e03,
-      2.15311535474403846e-8};
-  static const T d[8] = {1.57449261107098347e01, 1.17693950891312499e02,
-      5.37181101862009858e02, 1.62138957456669019e03, 3.29079923573345963e03,
-      4.36261909014324716e03, 3.43936767414372164e03, 1.23033935480374942e03};
-
-  // Coefficients for approximation to erfc in the third interval.
-  static const T p[6] = {3.05326634961232344e-1, 3.60344899949804439e-1,
-      1.25781726111229246e-1, 1.60837851487422766e-2, 6.58749161529837803e-4,
-      1.63153871373020978e-2};
-  static const T q[5] = {2.56852019228982242e00, 1.87295284992346047e00,
-      5.27905102951428412e-1, 6.05183413124413191e-2, 2.33520497626869185e-3};
-
-  constexpr T sqrtpi{1.7724538509078120380404576221783883301349L};
-  constexpr T rsqrtpi{0.5641895835477562869480794515607725858440L};
-  constexpr T epsilonby2{std::numeric_limits<T>::epsilon() * 0.5};
-  constexpr T xneg{-26.628e0};
-  constexpr T xhuge{6.71e7};
-  constexpr T thresh{0.46875e0};
-  constexpr T zero{0.0};
-  constexpr T one{1.0};
-  constexpr T four{4.0};
-  constexpr T sixteen{16.0};
-  constexpr T xmax{1.0 / (sqrtpi * std::numeric_limits<T>::min())};
-  static_assert(xmax > xhuge, "xmax must be greater than xhuge");
-
-  T ysq;
-  T xnum;
-  T xden;
-  T del;
-  T result;
-
-  auto x{arg};
-  auto y{std::fabs(x)};
-
-  if (y <= thresh) {
-    // evaluate erf for  |x| <= 0.46875
-    ysq = zero;
-    if (y > epsilonby2) {
-      ysq = y * y;
-    }
-    xnum = a[4] * ysq;
-    xden = ysq;
-    for (int i{0}; i < 3; i++) {
-      xnum = (xnum + a[i]) * ysq;
-      xden = (xden + b[i]) * ysq;
-    }
-    result = x * (xnum + a[3]) / (xden + b[3]);
-    result = one - result;
-    result = std::exp(ysq) * result;
-    return result;
-  } else if (y <= four) {
-    //  evaluate erfc for 0.46875 < |x| <= 4.0
-    xnum = c[8] * y;
-    xden = y;
-    for (int i{0}; i < 7; ++i) {
-      xnum = (xnum + c[i]) * y;
-      xden = (xden + d[i]) * y;
-    }
-    result = (xnum + c[7]) / (xden + d[7]);
-  } else {
-    //  evaluate erfc for |x| > 4.0
-    result = zero;
-    if (y >= xhuge) {
-      if (y < xmax) {
-        result = rsqrtpi / y;
-      }
-    } else {
-      ysq = one / (y * y);
-      xnum = p[5] * ysq;
-      xden = ysq;
-      for (int i{0}; i < 4; ++i) {
-        xnum = (xnum + p[i]) * ysq;
-        xden = (xden + q[i]) * ysq;
-      }
-      result = ysq * (xnum + p[4]) / (xden + q[4]);
-      result = (rsqrtpi - result) / y;
-    }
-  }
-  //  fix up for negative argument, erf, etc.
-  if (x < zero) {
-    if (x < xneg) {
-      result = std::numeric_limits<T>::max();
-    } else {
-      ysq = trunc(x * sixteen) / sixteen;
-      del = (x - ysq) * (x + ysq);
-      y = std::exp((ysq * ysq)) * std::exp((del));
-      result = (y + y) - result;
-    }
-  }
-  return result;
+  return common::ErfcScaled(arg);
 }
 
 } // namespace Fortran::runtime
diff --git a/flang/test/Evaluate/fold-erfc-scaled.f90 b/flang/test/Evaluate/fold-erfc-scaled.f90
@@ -0,0 +1,7 @@
+! RUN: %python %S/test_folding.py %s %flang_fc1
+module m
+  real(4), parameter :: x20_4 = erfc_scaled(20._4)
+  logical, parameter :: t20_4 = x20_4 == 0.02817435003817081451416015625_4
+  real(8), parameter :: x20_8 = erfc_scaled(20._8)
+  logical, parameter :: t20_8 = x20_8 == 0.0281743487410513193669459042212110944092273712158203125_8
+end