diff --git a/llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h b/llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h
index 20850ba4c02d2..a9a370b27988a 100644
--- a/llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h
+++ b/llvm/include/llvm/Transforms/HipStdPar/HipStdPar.h
@@ -41,6 +41,13 @@ class HipStdParAllocationInterpositionPass
   static bool isRequired() { return true; }
 };
 
+class HipStdParMathFixupPass : public PassInfoMixin<HipStdParMathFixupPass> {
+public:
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
+
+  static bool isRequired() { return true; }
+};
+
 } // namespace llvm
 
 #endif // LLVM_TRANSFORMS_HIPSTDPAR_HIPSTDPAR_H
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index fd895830cc030..1b111dc20d35c 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -84,6 +84,7 @@ MODULE_PASS("global-merge-func", GlobalMergeFuncPass())
 MODULE_PASS("globalopt", GlobalOptPass())
 MODULE_PASS("globalsplit", GlobalSplitPass())
 MODULE_PASS("hipstdpar-interpose-alloc", HipStdParAllocationInterpositionPass())
+MODULE_PASS("hipstdpar-math-fixup", HipStdParMathFixupPass())
 MODULE_PASS("hipstdpar-select-accelerator-code",
             HipStdParAcceleratorCodeSelectionPass())
 MODULE_PASS("hotcoldsplit", HotColdSplittingPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index c865082a1dcea..38f9ee58f78ed 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -836,8 +836,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
           // When we are not using -fgpu-rdc, we can run accelerator code
           // selection relatively early, but still after linking to prevent
           // eager removal of potentially reachable symbols.
-          if (EnableHipStdPar)
+          if (EnableHipStdPar) {
+            PM.addPass(HipStdParMathFixupPass());
             PM.addPass(HipStdParAcceleratorCodeSelectionPass());
+          }
           PM.addPass(AMDGPUPrintfRuntimeBindingPass());
         }
 
@@ -916,8 +918,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
         // selection after linking to prevent, otherwise we end up removing
         // potentially reachable symbols that were exported as external in other
         // modules.
-        if (EnableHipStdPar)
+        if (EnableHipStdPar) {
+          PM.addPass(HipStdParMathFixupPass());
           PM.addPass(HipStdParAcceleratorCodeSelectionPass());
+        }
         // We want to support the -lto-partitions=N option as "best effort".
         // For that, we need to lower LDS earlier in the pipeline before the
         // module is partitioned for codegen.
diff --git a/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp b/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp
index b3910c42b4c74..d895cd7d78bf6 100644
--- a/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp
+++ b/llvm/lib/Transforms/HipStdPar/HipStdPar.cpp
@@ -37,6 +37,16 @@
 //    memory that ends up in one of the runtime equivalents, since this can
 //    happen if e.g. a library that was compiled without interposition returns
 //    an allocation that can be validly passed to `free`.
+//
+// 3. MathFixup (required): Some accelerators might have an incomplete
+//    implementation for the intrinsics used to implement some of the math
+//    functions in <cmath> / their corresponding libcall lowerings. Since this
+//    can vary quite significantly between accelerators, we replace calls to a
+//    set of intrinsics / lib functions known to be problematic with calls to a
+//    HIPSTDPAR specific forwarding layer, which gives an uniform interface for
+//    accelerators to implement in their own runtime components. This pass
+//    should run before AcceleratorCodeSelection so as to prevent the spurious
+//    removal of the HIPSTDPAR specific forwarding functions.
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/HipStdPar/HipStdPar.h"
@@ -49,6 +59,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 
@@ -519,3 +530,110 @@ HipStdParAllocationInterpositionPass::run(Module &M, ModuleAnalysisManager&) {
 
   return PreservedAnalyses::none();
 }
+
+static constexpr std::pair<StringLiteral, StringLiteral> MathLibToHipStdPar[]{
+    {"acosh", "__hipstdpar_acosh_f64"},
+    {"acoshf", "__hipstdpar_acosh_f32"},
+    {"asinh", "__hipstdpar_asinh_f64"},
+    {"asinhf", "__hipstdpar_asinh_f32"},
+    {"atanh", "__hipstdpar_atanh_f64"},
+    {"atanhf", "__hipstdpar_atanh_f32"},
+    {"cbrt", "__hipstdpar_cbrt_f64"},
+    {"cbrtf", "__hipstdpar_cbrt_f32"},
+    {"erf", "__hipstdpar_erf_f64"},
+    {"erff", "__hipstdpar_erf_f32"},
+    {"erfc", "__hipstdpar_erfc_f64"},
+    {"erfcf", "__hipstdpar_erfc_f32"},
+    {"fdim", "__hipstdpar_fdim_f64"},
+    {"fdimf", "__hipstdpar_fdim_f32"},
+    {"expm1", "__hipstdpar_expm1_f64"},
+    {"expm1f", "__hipstdpar_expm1_f32"},
+    {"hypot", "__hipstdpar_hypot_f64"},
+    {"hypotf", "__hipstdpar_hypot_f32"},
+    {"ilogb", "__hipstdpar_ilogb_f64"},
+    {"ilogbf", "__hipstdpar_ilogb_f32"},
+    {"lgamma", "__hipstdpar_lgamma_f64"},
+    {"lgammaf", "__hipstdpar_lgamma_f32"},
+    {"log1p", "__hipstdpar_log1p_f64"},
+    {"log1pf", "__hipstdpar_log1p_f32"},
+    {"logb", "__hipstdpar_logb_f64"},
+    {"logbf", "__hipstdpar_logb_f32"},
+    {"nextafter", "__hipstdpar_nextafter_f64"},
+    {"nextafterf", "__hipstdpar_nextafter_f32"},
+    {"nexttoward", "__hipstdpar_nexttoward_f64"},
+    {"nexttowardf", "__hipstdpar_nexttoward_f32"},
+    {"remainder", "__hipstdpar_remainder_f64"},
+    {"remainderf", "__hipstdpar_remainder_f32"},
+    {"remquo", "__hipstdpar_remquo_f64"},
+    {"remquof", "__hipstdpar_remquo_f32"},
+    {"scalbln", "__hipstdpar_scalbln_f64"},
+    {"scalblnf", "__hipstdpar_scalbln_f32"},
+    {"scalbn", "__hipstdpar_scalbn_f64"},
+    {"scalbnf", "__hipstdpar_scalbn_f32"},
+    {"tgamma", "__hipstdpar_tgamma_f64"},
+    {"tgammaf", "__hipstdpar_tgamma_f32"}};
+
+PreservedAnalyses HipStdParMathFixupPass::run(Module &M,
+                                              ModuleAnalysisManager &) {
+  if (M.empty())
+    return PreservedAnalyses::all();
+
+  SmallVector<std::pair<Function *, std::string>> ToReplace;
+  for (auto &&F : M) {
+    if (!F.hasName())
+      continue;
+
+    StringRef N = F.getName();
+    Intrinsic::ID ID = F.getIntrinsicID();
+
+    switch (ID) {
+    case Intrinsic::not_intrinsic: {
+      auto It =
+          find_if(MathLibToHipStdPar, [&](auto &&M) { return M.first == N; });
+      if (It == std::cend(MathLibToHipStdPar))
+        continue;
+      ToReplace.emplace_back(&F, It->second);
+      break;
+    }
+    case Intrinsic::acos:
+    case Intrinsic::asin:
+    case Intrinsic::atan:
+    case Intrinsic::atan2:
+    case Intrinsic::cosh:
+    case Intrinsic::modf:
+    case Intrinsic::sinh:
+    case Intrinsic::tan:
+    case Intrinsic::tanh:
+      break;
+    default: {
+      if (F.getReturnType()->isDoubleTy()) {
+        switch (ID) {
+        case Intrinsic::cos:
+        case Intrinsic::exp:
+        case Intrinsic::exp2:
+        case Intrinsic::log:
+        case Intrinsic::log10:
+        case Intrinsic::log2:
+        case Intrinsic::pow:
+        case Intrinsic::sin:
+          break;
+        default:
+          continue;
+        }
+        break;
+      }
+      continue;
+    }
+    }
+
+    ToReplace.emplace_back(&F, N);
+    llvm::replace(ToReplace.back().second, '.', '_');
+    StringRef Prefix = "llvm";
+    ToReplace.back().second.replace(0, Prefix.size(), "__hipstdpar");
+  }
+  for (auto &&[F, NewF] : ToReplace)
+    F->replaceAllUsesWith(
+        M.getOrInsertFunction(NewF, F->getFunctionType()).getCallee());
+
+  return PreservedAnalyses::none();
+}
diff --git a/llvm/test/Transforms/HipStdPar/math-fixup.ll b/llvm/test/Transforms/HipStdPar/math-fixup.ll
new file mode 100644
index 0000000000000..2c4622cb1f603
--- /dev/null
+++ b/llvm/test/Transforms/HipStdPar/math-fixup.ll
@@ -0,0 +1,548 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=hipstdpar-math-fixup %s | FileCheck %s
+
+define void @test_acos(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_acos(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_acos_f64(double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_acos_f32(float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @llvm.acos.f64(double %dbl)
+  %1 = call float @llvm.acos.f32(float %flt)
+  ret void
+}
+
+define void @test_acosh(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_acosh(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_acosh_f64(double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_acosh_f32(float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @acosh(double %dbl)
+  %1 = call float @acoshf(float %flt)
+  ret void
+}
+
+define void @test_asin(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_asin(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_asin_f64(double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_asin_f32(float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @llvm.asin.f64(double %dbl)
+  %1 = call float @llvm.asin.f32(float %flt)
+  ret void
+}
+
+define void @test_asinh(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_asinh(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_asinh_f64(double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_asinh_f32(float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @asinh(double %dbl)
+  %1 = call float @asinhf(float %flt)
+  ret void
+}
+
+define void @test_atan(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_atan(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_atan_f64(double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_atan_f32(float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @llvm.atan.f64(double %dbl)
+  %1 = call float @llvm.atan.f32(float %flt)
+  ret void
+}
+
+define void @test_atanh(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_atanh(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_atanh_f64(double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_atanh_f32(float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @atanh(double %dbl)
+  %1 = call float @atanhf(float %flt)
+  ret void
+}
+
+define void @test_atan2(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_atan2(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_atan2_f64(double [[DBL]], double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_atan2_f32(float [[FLT]], float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @llvm.atan2.f64(double %dbl, double %dbl)
+  %1 = call float @llvm.atan2.f32(float %flt, float %flt)
+  ret void
+}
+
+define void @test_cbrt(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_cbrt(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_cbrt_f64(double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_cbrt_f32(float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @cbrt(double %dbl)
+  %1 = call float @cbrtf(float %flt)
+  ret void
+}
+
+define void @test_cos(double %dbl) {
+; CHECK-LABEL: define void @test_cos(
+; CHECK-SAME: double [[DBL:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_cos_f64(double [[DBL]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @llvm.cos.f64(double %dbl)
+  ret void
+}
+
+define void @test_cosh(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_cosh(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_cosh_f64(double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_cosh_f32(float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @llvm.cosh.f64(double %dbl)
+  %1 = call float @llvm.cosh.f32(float %flt)
+  ret void
+}
+
+define void @test_erf(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_erf(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_erf_f64(double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_erf_f32(float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @erf(double %dbl)
+  %1 = call float @erff(float %flt)
+  ret void
+}
+
+define void @test_erfc(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_erfc(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_erfc_f64(double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_erfc_f32(float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @erfc(double %dbl)
+  %1 = call float @erfcf(float %flt)
+  ret void
+}
+
+define void @test_exp(double %dbl) {
+; CHECK-LABEL: define void @test_exp(
+; CHECK-SAME: double [[DBL:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_exp_f64(double [[DBL]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @llvm.exp.f64(double %dbl)
+  ret void
+}
+
+define void @test_exp2(double %dbl) {
+; CHECK-LABEL: define void @test_exp2(
+; CHECK-SAME: double [[DBL:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_exp2_f64(double [[DBL]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @llvm.exp2.f64(double %dbl)
+  ret void
+}
+
+define void @test_expm1(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_expm1(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_expm1_f64(double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_expm1_f32(float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @expm1(double %dbl)
+  %1 = call float @expm1f(float %flt)
+  ret void
+}
+
+define void @test_fdim(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_fdim(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_fdim_f64(double [[DBL]], double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_fdim_f32(float [[FLT]], float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @fdim(double %dbl, double %dbl)
+  %1 = call float @fdimf(float %flt, float %flt)
+  ret void
+}
+
+define void @test_hypot(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_hypot(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_hypot_f64(double [[DBL]], double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_hypot_f32(float [[FLT]], float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @hypot(double %dbl, double %dbl)
+  %1 = call float @hypotf(float %flt, float %flt)
+  ret void
+}
+
+define void @test_lgamma(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_lgamma(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_lgamma_f64(double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_lgamma_f32(float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @lgamma(double %dbl)
+  %1 = call float @lgammaf(float %flt)
+  ret void
+}
+
+define void @test_log(double %dbl) {
+; CHECK-LABEL: define void @test_log(
+; CHECK-SAME: double [[DBL:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_log_f64(double [[DBL]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @llvm.log.f64(double %dbl)
+  ret void
+}
+
+define void @test_log10(double %dbl) {
+; CHECK-LABEL: define void @test_log10(
+; CHECK-SAME: double [[DBL:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_log10_f64(double [[DBL]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @llvm.log10.f64(double %dbl)
+  ret void
+}
+
+define void @test_log2(double %dbl) {
+; CHECK-LABEL: define void @test_log2(
+; CHECK-SAME: double [[DBL:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_log2_f64(double [[DBL]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @llvm.log2.f64(double %dbl)
+  ret void
+}
+
+define void @test_log1p(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_log1p(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_log1p_f64(double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_log1p_f32(float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @log1p(double %dbl)
+  %1 = call float @log1pf(float %flt)
+  ret void
+}
+
+define void @test_modf(double %dbl, float %flt, ptr %pdbl, ptr %pflt) {
+; CHECK-LABEL: define void @test_modf(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]], ptr [[PDBL:%.*]], ptr [[PFLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call { double, double } @__hipstdpar_modf_f64(double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { double, double } [[TMP0]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { double, double } [[TMP0]], 1
+; CHECK-NEXT:    store double [[TMP2]], ptr [[PDBL]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call { float, float } @__hipstdpar_modf_f32(float [[FLT]])
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { float, float } [[TMP3]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { float, float } [[TMP3]], 1
+; CHECK-NEXT:    store float [[TMP5]], ptr [[PFLT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = tail call { double, double } @llvm.modf.f64(double %dbl)
+  %1 = extractvalue { double, double } %0, 0
+  %2 = extractvalue { double, double } %0, 1
+  store double %2, ptr %pdbl, align 8
+  %3 = tail call { float, float } @llvm.modf.f32(float %flt)
+  %4 = extractvalue { float, float } %3, 0
+  %5 = extractvalue { float, float } %3, 1
+  store float %5, ptr %pflt, align 4
+  ret void
+}
+
+define void @test_pow(double %dbl) {
+; CHECK-LABEL: define void @test_pow(
+; CHECK-SAME: double [[DBL:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_pow_f64(double [[DBL]], double [[DBL]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @llvm.pow.f64(double %dbl, double %dbl)
+  ret void
+}
+
+define void @test_remainder(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_remainder(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_remainder_f64(double [[DBL]], double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_remainder_f32(float [[FLT]], float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @remainder(double %dbl, double %dbl)
+  %1 = call float @remainderf(float %flt, float %flt)
+  ret void
+}
+
+define void @test_remquo(double %dbl, float %flt, ptr %p) {
+; CHECK-LABEL: define void @test_remquo(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]], ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_remquo_f64(double [[DBL]], double [[DBL]], ptr [[P]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_remquo_f32(float [[FLT]], float [[FLT]], ptr [[P]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @remquo(double %dbl, double %dbl, ptr %p)
+  %1 = call float @remquof(float %flt, float %flt, ptr %p)
+  ret void
+}
+
+define void @test_sin(double %dbl) {
+; CHECK-LABEL: define void @test_sin(
+; CHECK-SAME: double [[DBL:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_sin_f64(double [[DBL]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @llvm.sin.f64(double %dbl)
+  ret void
+}
+
+define void @test_sinh(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_sinh(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_sinh_f64(double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_sinh_f32(float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @llvm.sinh.f64(double %dbl)
+  %1 = call float @llvm.sinh.f32(float %flt)
+  ret void
+}
+
+define void @test_tan(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_tan(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_tan_f64(double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_tan_f32(float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @llvm.tan.f64(double %dbl)
+  %1 = call float @llvm.tan.f32(float %flt)
+  ret void
+}
+
+define void @test_tanh(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_tanh(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_tanh_f64(double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_tanh_f32(float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @llvm.tanh.f64(double %dbl)
+  %1 = call float @llvm.tanh.f32(float %flt)
+  ret void
+}
+
+define void @test_tgamma(double %dbl, float %flt) {
+; CHECK-LABEL: define void @test_tgamma(
+; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_tgamma_f64(double [[DBL]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_tgamma_f32(float [[FLT]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = call double @tgamma(double %dbl)
+  %1 = call float @tgammaf(float %flt)
+  ret void
+}
+
+@globdbl = global double 4.200000e+01
+@globflt = global float 4.200000e+01
+
+define void @global_args() {
+; CHECK-LABEL: define void @global_args() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[DBL:%.*]] = load double, ptr @globdbl, align 8
+; CHECK-NEXT:    [[FLT:%.*]] = load float, ptr @globflt, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = call double @__hipstdpar_remquo_f64(double [[DBL]], double [[DBL]], ptr @globdbl)
+; CHECK-NEXT:    [[TMP1:%.*]] = call float @__hipstdpar_remquo_f32(float [[FLT]], float [[FLT]], ptr @globflt)
+; CHECK-NEXT:    ret void
+;
+entry:
+  %dbl = load double, ptr @globdbl
+  %flt = load float, ptr @globflt
+  %1 = call double @remquo(double %dbl, double %dbl, ptr @globdbl)
+  %2 = call float @remquof(float %flt, float %flt, ptr @globflt)
+  ret void
+}
+
+declare hidden double @remainder(double, double)
+
+declare hidden float @remainderf(float, float)
+
+declare hidden double @remquo(double, double, ptr)
+
+declare hidden float @remquof(float, float, ptr)
+
+declare hidden double @fdim(double, double)
+
+declare hidden float @fdimf(float, float)
+
+declare double @llvm.exp.f64(double)
+
+declare float @llvm.exp.f32(float)
+
+declare double @llvm.exp2.f64(double)
+
+declare float @llvm.exp2.f32(float)
+
+declare hidden double @expm1(double)
+
+declare hidden float @expm1f(float)
+
+declare double @llvm.log.f64(double)
+
+declare double @llvm.log10.f64(double)
+
+declare double @llvm.log2.f64(double)
+
+declare hidden double @log1p(double)
+
+declare hidden float @log1pf(float)
+
+declare { float, float } @llvm.modf.f32(float)
+
+declare { double, double } @llvm.modf.f64(double)
+
+declare double @llvm.pow.f64(double, double)
+
+declare hidden double @cbrt(double)
+
+declare hidden float @cbrtf(float)
+
+declare hidden double @hypot(double, double)
+
+declare hidden float @hypotf(float, float)
+
+declare double @llvm.sin.f64(double)
+
+declare double @llvm.cos.f64(double)
+
+declare double @llvm.tan.f64(double)
+
+declare double @llvm.asin.f64(double)
+
+declare double @llvm.acos.f64(double)
+
+declare double @llvm.atan.f64(double)
+
+declare double @llvm.atan2.f64(double, double)
+
+declare double @llvm.sinh.f64(double)
+
+declare double @llvm.cosh.f64(double)
+
+declare double @llvm.tanh.f64(double)
+
+declare hidden double @asinh(double)
+
+declare hidden float @asinhf(float)
+
+declare hidden double @acosh(double)
+
+declare hidden float @acoshf(float)
+
+declare hidden double @atanh(double)
+
+declare hidden float @atanhf(float)
+
+declare hidden double @erf(double)
+
+declare hidden float @erff(float)
+
+declare hidden double @erfc(double)
+
+declare hidden float @erfcf(float)
+
+declare hidden double @tgamma(double)
+
+declare hidden float @tgammaf(float)
+
+declare hidden double @lgamma(double)
+
+declare hidden float @lgammaf(float)