diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 554dbaff2ce0d..3497dd53e190d 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9017,6 +9017,10 @@ def err_global_call_not_config : Error<
 def err_ref_bad_target : Error<
   "reference to %select{__device__|__global__|__host__|__host__ __device__}0 "
   "%select{function|variable}1 %2 in %select{__device__|__global__|__host__|__host__ __device__}3 function">;
+def warn_target_specfier_ignored : Warning<
+  "target attribute has been ignored for overload resolution; "
+  "move the target attribute to the beginning of the declaration to use it for overload resolution">,
+  InGroup<IgnoredAttributes>;
 def note_cuda_const_var_unpromoted : Note<
   "const variable cannot be emitted on device side due to dynamic initialization">;
 def note_cuda_host_var : Note<
diff --git a/clang/include/clang/Sema/SemaCUDA.h b/clang/include/clang/Sema/SemaCUDA.h
index 63dc3f4da240b..26d839db7fe6c 100644
--- a/clang/include/clang/Sema/SemaCUDA.h
+++ b/clang/include/clang/Sema/SemaCUDA.h
@@ -104,6 +104,8 @@ class SemaCUDA : public SemaBase {
   CUDAFunctionTarget IdentifyTarget(const FunctionDecl *D,
                                     bool IgnoreImplicitHDAttr = false);
   CUDAFunctionTarget IdentifyTarget(const ParsedAttributesView &Attrs);
+  CUDAFunctionTarget IdentifyTarget(
+      const SmallVectorImpl<clang::AttributeCommonInfo::Kind> &AttrKinds);
 
   enum CUDAVariableTarget {
     CVT_Device,  /// Emitted on device side with a shadow variable on host side
@@ -120,21 +122,44 @@ class SemaCUDA : public SemaBase {
     CTCK_Unknown,       /// Unknown context
     CTCK_InitGlobalVar, /// Function called during global variable
                         /// initialization
+    CTCK_Declaration,   /// Function called in a declaration specifier or
+                        /// declarator outside of other contexts, usually in
+                        /// template arguments.
   };
 
   /// Define the current global CUDA host/device context where a function may be
   /// called. Only used when a function is called outside of any functions.
-  struct CUDATargetContext {
-    CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice;
+  class CUDATargetContext {
+  public:
     CUDATargetContextKind Kind = CTCK_Unknown;
-    Decl *D = nullptr;
+
+    CUDATargetContext() = default;
+
+    CUDATargetContext(SemaCUDA *S, CUDATargetContextKind Kind,
+                      CUDAFunctionTarget Target)
+        : Kind(Kind), S(S), Target(Target) {}
+
+    CUDAFunctionTarget getTarget();
+
+    /// If this is a CTCK_Declaration context, update the Target based on Attrs.
+    /// No-op otherwise.
+    /// Issues a diagnostic if the target changes after it has been queried
+    /// before.
+    void tryRegisterTargetAttrs(const ParsedAttributesView &Attrs);
+
+  private:
+    SemaCUDA *S = nullptr;
+    CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice;
+    SmallVector<clang::AttributeCommonInfo::Kind, 0> AttrKinds;
+    bool TargetQueried = false;
+
   } CurCUDATargetCtx;
 
   struct CUDATargetContextRAII {
     SemaCUDA &S;
     SemaCUDA::CUDATargetContext SavedCtx;
     CUDATargetContextRAII(SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K,
-                          Decl *D);
+                          Decl *D = nullptr);
     ~CUDATargetContextRAII() { S.CurCUDATargetCtx = SavedCtx; }
   };
 
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index a8a9d3f3f5b08..e6270b252cda8 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -311,6 +311,9 @@ void Parser::ParseGNUAttributes(ParsedAttributes &Attrs,
   }
 
   Attrs.Range = SourceRange(StartLoc, EndLoc);
+
+  if (Actions.getLangOpts().CUDA)
+    Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(Attrs);
 }
 
 /// Determine whether the given attribute has an identifier argument.
@@ -1003,6 +1006,9 @@ void Parser::ParseMicrosoftDeclSpecs(ParsedAttributes &Attrs) {
   }
 
   Attrs.Range = SourceRange(StartLoc, EndLoc);
+
+  if (Actions.getLangOpts().CUDA)
+    Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(Attrs);
 }
 
 void Parser::ParseMicrosoftTypeAttributes(ParsedAttributes &attrs) {
@@ -2023,6 +2029,13 @@ Parser::DeclGroupPtrTy Parser::ParseDeclaration(DeclaratorContext Context,
   // parsing c none objective-c decls.
   ObjCDeclContextSwitch ObjCDC(*this);
 
+  SemaCUDA::CUDATargetContextRAII CTCRAII(Actions.CUDA(),
+                                          SemaCUDA::CTCK_Declaration);
+  if (Actions.getLangOpts().CUDA) {
+    Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(DeclAttrs);
+    Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(DeclSpecAttrs);
+  }
+
   Decl *SingleDecl = nullptr;
   switch (Tok.getKind()) {
   case tok::kw_template:
diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp
index aac89d910bbc8..0001073104333 100644
--- a/clang/lib/Parse/ParseDeclCXX.cpp
+++ b/clang/lib/Parse/ParseDeclCXX.cpp
@@ -27,6 +27,7 @@
 #include "clang/Sema/EnterExpressionEvaluationContext.h"
 #include "clang/Sema/ParsedTemplate.h"
 #include "clang/Sema/Scope.h"
+#include "clang/Sema/SemaCUDA.h"
 #include "clang/Sema/SemaCodeCompletion.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/TimeProfiler.h"
@@ -2852,6 +2853,11 @@ Parser::DeclGroupPtrTy Parser::ParseCXXClassMemberDeclaration(
     ParsedTemplateInfo &TemplateInfo, ParsingDeclRAIIObject *TemplateDiags) {
   assert(getLangOpts().CPlusPlus &&
          "ParseCXXClassMemberDeclaration should only be called in C++ mode");
+  SemaCUDA::CUDATargetContextRAII CTCRAII(Actions.CUDA(),
+                                          SemaCUDA::CTCK_Declaration);
+  if (Actions.getLangOpts().CUDA)
+    Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(AccessAttrs);
+
   if (Tok.is(tok::at)) {
     if (getLangOpts().ObjC && NextToken().isObjCAtKeyword(tok::objc_defs))
       Diag(Tok, diag::err_at_defs_cxx);
diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp
index 04c2f1d380bc4..b7bc11964e968 100644
--- a/clang/lib/Parse/Parser.cpp
+++ b/clang/lib/Parse/Parser.cpp
@@ -21,6 +21,7 @@
 #include "clang/Sema/DeclSpec.h"
 #include "clang/Sema/ParsedTemplate.h"
 #include "clang/Sema/Scope.h"
+#include "clang/Sema/SemaCUDA.h"
 #include "clang/Sema/SemaCodeCompletion.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/TimeProfiler.h"
@@ -1133,6 +1134,13 @@ bool Parser::isStartOfFunctionDefinition(const ParsingDeclarator &Declarator) {
 Parser::DeclGroupPtrTy Parser::ParseDeclOrFunctionDefInternal(
     ParsedAttributes &Attrs, ParsedAttributes &DeclSpecAttrs,
     ParsingDeclSpec &DS, AccessSpecifier AS) {
+  SemaCUDA::CUDATargetContextRAII CTCRAII(Actions.CUDA(),
+                                          SemaCUDA::CTCK_Declaration);
+  if (Actions.getLangOpts().CUDA) {
+    Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(Attrs);
+    Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(DeclSpecAttrs);
+  }
+
   // Because we assume that the DeclSpec has not yet been initialised, we simply
   // overwrite the source range and attribute the provided leading declspec
   // attributes.
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index ec37c0df56c67..e4c7c1c2c57d8 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -18,6 +18,7 @@
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/Lookup.h"
+#include "clang/Sema/ParsedAttr.h"
 #include "clang/Sema/ScopeInfo.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaDiagnostic.h"
@@ -68,13 +69,28 @@ ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc,
                                /*IsExecConfig=*/true);
 }
 
-CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) {
+namespace {
+
+// This iterator adaptor enables sharing a IdentifyTarget implementation for
+// ParsedAttributesView and for vectors of AttributeCommonInfo::Kind.
+struct AttrKindIterator
+    : llvm::iterator_adaptor_base<
+          AttrKindIterator, ParsedAttributesView::const_iterator,
+          std::random_access_iterator_tag, clang::AttributeCommonInfo::Kind> {
+  AttrKindIterator() : iterator_adaptor_base(nullptr) {}
+  AttrKindIterator(ParsedAttributesView::const_iterator I)
+      : iterator_adaptor_base(I) {}
+  clang::AttributeCommonInfo::Kind operator*() const { return I->getKind(); }
+};
+
+template <typename AKIterRange>
+CUDAFunctionTarget IdentifyTargetImpl(const AKIterRange &AttrKinds) {
   bool HasHostAttr = false;
   bool HasDeviceAttr = false;
   bool HasGlobalAttr = false;
   bool HasInvalidTargetAttr = false;
-  for (const ParsedAttr &AL : Attrs) {
-    switch (AL.getKind()) {
+  for (const auto &AK : AttrKinds) {
+    switch (AK) {
     case ParsedAttr::AT_CUDAGlobal:
       HasGlobalAttr = true;
       break;
@@ -107,6 +123,18 @@ CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) {
   return CUDAFunctionTarget::Host;
 }
 
+} // namespace
+
+CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) {
+  return IdentifyTargetImpl(make_range(AttrKindIterator(Attrs.begin()),
+                                       AttrKindIterator(Attrs.end())));
+}
+
+CUDAFunctionTarget SemaCUDA::IdentifyTarget(
+    const SmallVectorImpl<clang::AttributeCommonInfo::Kind> &AttrKinds) {
+  return IdentifyTargetImpl(AttrKinds);
+}
+
 template <typename A>
 static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) {
   return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
@@ -115,20 +143,60 @@ static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) {
          });
 }
 
+CUDAFunctionTarget SemaCUDA::CUDATargetContext::getTarget() {
+  TargetQueried = true;
+  return Target;
+}
+
+void SemaCUDA::CUDATargetContext::tryRegisterTargetAttrs(
+    const ParsedAttributesView &Attrs) {
+  if (Kind != CTCK_Declaration)
+    return;
+  for (const auto &A : Attrs) {
+    auto AK = A.getKind();
+    switch (AK) {
+    case ParsedAttr::AT_CUDAGlobal:
+    case ParsedAttr::AT_CUDAHost:
+    case ParsedAttr::AT_CUDADevice:
+    case ParsedAttr::AT_CUDAInvalidTarget:
+      break;
+    default:
+      continue;
+    }
+    AttrKinds.push_back(AK);
+    CUDAFunctionTarget NewTarget = S->IdentifyTarget(AttrKinds);
+    if (TargetQueried && (NewTarget != Target))
+      S->Diag(A.getLoc(), diag::warn_target_specfier_ignored);
+    Target = NewTarget;
+  }
+}
+
 SemaCUDA::CUDATargetContextRAII::CUDATargetContextRAII(
     SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, Decl *D)
     : S(S_) {
   SavedCtx = S.CurCUDATargetCtx;
-  assert(K == SemaCUDA::CTCK_InitGlobalVar);
-  auto *VD = dyn_cast_or_null<VarDecl>(D);
-  if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) {
-    auto Target = CUDAFunctionTarget::Host;
-    if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) &&
-         !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) ||
-        hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) ||
-        hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true))
-      Target = CUDAFunctionTarget::Device;
-    S.CurCUDATargetCtx = {Target, K, VD};
+
+  switch (K) {
+  case SemaCUDA::CTCK_InitGlobalVar: {
+    auto *VD = dyn_cast_or_null<VarDecl>(D);
+    if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) {
+      auto Target = CUDAFunctionTarget::Host;
+      if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) &&
+           !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) ||
+          hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) ||
+          hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true))
+        Target = CUDAFunctionTarget::Device;
+      S.CurCUDATargetCtx = CUDATargetContext(&S, K, Target);
+    }
+    break;
+  }
+  case SemaCUDA::CTCK_Declaration:
+    // The target is updated once relevant attributes are parsed. Initialize
+    // with the target used if no attributes are present: Host.
+    S.CurCUDATargetCtx = CUDATargetContext(&S, K, CUDAFunctionTarget::Host);
+    break;
+  default:
+    llvm_unreachable("unexpected context kind");
   }
 }
 
@@ -137,7 +205,7 @@ CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D,
                                             bool IgnoreImplicitHDAttr) {
   // Code that lives outside a function gets the target from CurCUDATargetCtx.
   if (D == nullptr)
-    return CurCUDATargetCtx.Target;
+    return CurCUDATargetCtx.getTarget();
 
   if (D->hasAttr<CUDAInvalidTargetAttr>())
     return CUDAFunctionTarget::InvalidTarget;
@@ -232,7 +300,7 @@ SemaCUDA::IdentifyPreference(const FunctionDecl *Caller,
   // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor
   // will be diagnosed by checkAllowedInitializer.
   if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar &&
-      CurCUDATargetCtx.Target == CUDAFunctionTarget::Device &&
+      CurCUDATargetCtx.getTarget() == CUDAFunctionTarget::Device &&
       (isa<CXXConstructorDecl>(Callee) || isa<CXXDestructorDecl>(Callee)))
     return CFP_HostDevice;
 
@@ -297,8 +365,16 @@ SemaCUDA::IdentifyPreference(const FunctionDecl *Caller,
       (CallerTarget == CUDAFunctionTarget::Device &&
        CalleeTarget == CUDAFunctionTarget::Host) ||
       (CallerTarget == CUDAFunctionTarget::Global &&
-       CalleeTarget == CUDAFunctionTarget::Host))
+       CalleeTarget == CUDAFunctionTarget::Host)) {
+    // In declaration contexts outside of function bodies and variable
+    // initializers, tolerate mismatched function targets as long as they are
+    // not codegened.
+    if (CurCUDATargetCtx.Kind == CTCK_Declaration &&
+        !this->SemaRef.getCurFunctionDecl(/*AllowLambda=*/true))
+      return CFP_WrongSide;
+
     return CFP_Never;
+  }
 
   llvm_unreachable("All cases should've been handled by now.");
 }
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 52f640eb96b73..e3703c2c735fe 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -10747,7 +10747,7 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
         llvm::any_of(Candidates, [&](OverloadCandidate *Cand) {
           // Check viable function only.
           return Cand->Viable && Cand->Function &&
-                 S.CUDA().IdentifyPreference(Caller, Cand->Function) ==
+                 S.CUDA().IdentifyPreference(Caller, Cand->Function) >=
                      SemaCUDA::CFP_SameSide;
         });
     if (ContainsSameSideCandidate) {
diff --git a/clang/test/SemaCUDA/target-overloads-availability-warnings.cu b/clang/test/SemaCUDA/target-overloads-availability-warnings.cu
new file mode 100644
index 0000000000000..93498c6fe22e1
--- /dev/null
+++ b/clang/test/SemaCUDA/target-overloads-availability-warnings.cu
@@ -0,0 +1,199 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify=expected,onhost %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify=expected,ondevice %s
+
+template <bool C, class T = void> struct my_enable_if {};
+
+template <class T> struct my_enable_if<true, T> {
+  typedef T type;
+};
+
+__attribute__((host, device)) void use(int x);
+
+// For 'OverloadFunHostDepr', the host overload is deprecated, the device overload is not.
+__attribute__((device)) constexpr int OverloadFunHostDepr(void) { return 1; }
+__attribute__((host, deprecated("Host variant"))) constexpr int OverloadFunHostDepr(void) { return 1; } // expected-note 0+ {{has been explicitly marked deprecated here}}
+
+
+// For 'OverloadFunDeviceDepr', the device overload is deprecated, the host overload is not.
+__attribute__((device, deprecated("Device variant"))) constexpr int OverloadFunDeviceDepr(void) { return 1; } // expected-note 0+ {{has been explicitly marked deprecated here}}
+__attribute__((host)) constexpr int OverloadFunDeviceDepr(void) { return 1; }
+
+
+// For 'TemplateOverloadFun', the host overload is deprecated, the device overload is not.
+template<typename T>
+__attribute__((device)) constexpr T TemplateOverloadFun(void) { return 1; }
+
+template<typename T>
+__attribute__((host, deprecated("Host variant"))) constexpr T TemplateOverloadFun(void) { return 1; } // expected-note 0+ {{has been explicitly marked deprecated here}}
+
+
+// There is only a device overload, and it is deprecated.
+__attribute__((device, deprecated)) constexpr int // expected-note 0+ {{has been explicitly marked deprecated here}}
+DeviceOnlyFunDeprecated(void) { return 1; }
+
+// There is only a host overload, and it is deprecated.
+__attribute__((host, deprecated)) constexpr int // expected-note 0+ {{has been explicitly marked deprecated here}}
+HostOnlyFunDeprecated(void) { return 1; }
+
+class FunSelector {
+public:
+  // This should use the non-deprecated device overload.
+  template<int X> __attribute__((device))
+  auto devicefun(void) -> typename my_enable_if<(X == OverloadFunHostDepr()), int>::type {
+    return 1;
+  }
+
+  // This should use the non-deprecated device overload.
+  template<int X> __attribute__((device))
+  auto devicefun(void) -> typename my_enable_if<(X != OverloadFunHostDepr()), int>::type {
+      return 0;
+  }
+
+  // This should use the deprecated device overload.
+  template<int X> __attribute__((device))
+  auto devicefun_wrong(void) -> typename my_enable_if<(X == OverloadFunDeviceDepr()), int>::type { // expected-warning {{'OverloadFunDeviceDepr' is deprecated: Device variant}}
+    return 1;
+  }
+
+  // This should use the deprecated device overload.
+  template<int X> __attribute__((device))
+  auto devicefun_wrong(void) -> typename my_enable_if<(X != OverloadFunDeviceDepr()), int>::type { // expected-warning {{'OverloadFunDeviceDepr' is deprecated: Device variant}}
+      return 0;
+  }
+
+  // This should use the non-deprecated host overload.
+  template<int X> __attribute__((host))
+  auto hostfun(void) -> typename my_enable_if<(X == OverloadFunDeviceDepr()), int>::type {
+    return 1;
+  }
+
+  // This should use the non-deprecated host overload.
+  template<int X> __attribute__((host))
+  auto hostfun(void) -> typename my_enable_if<(X != OverloadFunDeviceDepr()), int>::type {
+      return 0;
+  }
+
+  // This should use the deprecated host overload.
+  template<int X> __attribute__((host))
+  auto hostfun_wrong(void) -> typename my_enable_if<(X == OverloadFunHostDepr()), int>::type { // expected-warning {{'OverloadFunHostDepr' is deprecated: Host variant}}
+    return 1;
+  }
+
+  // This should use the deprecated host overload.
+  template<int X> __attribute__((host))
+  auto hostfun_wrong(void) -> typename my_enable_if<(X != OverloadFunHostDepr()), int>::type { // expected-warning {{'OverloadFunHostDepr' is deprecated: Host variant}}
+      return 0;
+  }
+};
+
+
+// These should not be diagnosed since the device overload of
+// OverloadFunHostDepr is not deprecated:
+__attribute__((device)) my_enable_if<(OverloadFunHostDepr() > 0), int>::type
+DeviceUserOverloadFunHostDepr1(void) { return 2; }
+
+__attribute__((device)) my_enable_if<(OverloadFunHostDepr() > 0), int>::type constexpr
+DeviceUserOverloadFunHostDeprConstexpr(void) { return 2; }
+
+
+// Analogously for OverloadFunDeviceDepr:
+__attribute__((host)) my_enable_if<(OverloadFunDeviceDepr() > 0), int>::type
+DeviceUserOverloadFunDeviceDepr1(void) { return 2; }
+
+my_enable_if<(OverloadFunDeviceDepr() > 0), int>::type __attribute__((host))
+DeviceUserOverloadFunDeviceDepr2(void) { return 2; }
+
+__attribute__((host)) my_enable_if<(OverloadFunDeviceDepr() > 0), int>::type constexpr
+DeviceUserOverloadFunDeviceDeprConstexpr(void) { return 2; }
+
+
+// Actual uses of the deprecated overloads should be diagnosed:
+__attribute__((host, device)) my_enable_if<(OverloadFunHostDepr() > 0), int>::type // onhost-warning {{'OverloadFunHostDepr' is deprecated: Host variant}}
+HostDeviceUserOverloadFunHostDepr(void) { return 3; }
+
+__attribute__((host)) my_enable_if<(OverloadFunHostDepr() > 0), int>::type constexpr // expected-warning {{'OverloadFunHostDepr' is deprecated: Host variant}}
+HostUserOverloadFunHostDeprConstexpr(void) { return 3; }
+
+__attribute__((device)) my_enable_if<(OverloadFunDeviceDepr() > 0), int>::type constexpr // expected-warning {{'OverloadFunDeviceDepr' is deprecated: Device variant}}
+HostUserOverloadFunDeviceDeprConstexpr(void) { return 3; }
+
+
+// Making the offending decl a template shouldn't change anything:
+__attribute__((host)) my_enable_if<(TemplateOverloadFun<int>() > 0), int>::type // expected-warning {{'TemplateOverloadFun<int>' is deprecated: Host variant}}
+HostUserTemplateOverloadFun(void) { return 3; }
+
+__attribute__((device)) my_enable_if<(TemplateOverloadFun<int>() > 0), int>::type
+DeviceUserTemplateOverloadFun(void) { return 3; }
+
+
+__attribute__((device, deprecated)) constexpr int DeviceVarConstDepr = 1; // expected-note 0+ {{has been explicitly marked deprecated here}}
+
+// Diagnostics for uses in function bodies should work as expected:
+__attribute__((host)) void HostUser(void) {
+  use(DeviceVarConstDepr); // expected-warning {{'DeviceVarConstDepr' is deprecated}}
+  use(HostOnlyFunDeprecated()); // expected-warning {{'HostOnlyFunDeprecated' is deprecated}}
+  use(OverloadFunHostDepr()); // expected-warning {{'OverloadFunHostDepr' is deprecated: Host variant}}
+  use(TemplateOverloadFun<int>()); // expected-warning {{'TemplateOverloadFun<int>' is deprecated: Host variant}}
+
+  use(OverloadFunDeviceDepr());
+}
+
+__attribute__((device)) void DeviceUser(void) {
+  use(DeviceVarConstDepr); // expected-warning {{'DeviceVarConstDepr' is deprecated}}
+  use(DeviceOnlyFunDeprecated()); // expected-warning {{'DeviceOnlyFunDeprecated' is deprecated}}
+  use(OverloadFunDeviceDepr()); // expected-warning {{'OverloadFunDeviceDepr' is deprecated: Device variant}}
+
+  use(OverloadFunHostDepr());
+  use(TemplateOverloadFun<int>());
+}
+
+
+// Template functions outside of classes:
+
+// This should use the non-deprecated device overload.
+template<int X> __attribute__((device))
+auto devicefun(void) -> typename my_enable_if<(X == OverloadFunHostDepr()), int>::type {
+  return 1;
+}
+
+// This should use the non-deprecated device overload.
+template<int X> __attribute__((device))
+auto devicefun(void) -> typename my_enable_if<(X != OverloadFunHostDepr()), int>::type {
+    return 0;
+}
+
+// This should use the deprecated device overload.
+template<int X> __attribute__((device))
+auto devicefun_wrong(void) -> typename my_enable_if<(X == OverloadFunDeviceDepr()), int>::type { // expected-warning {{'OverloadFunDeviceDepr' is deprecated: Device variant}}
+  return 1;
+}
+
+// This should use the deprecated device overload.
+template<int X> __attribute__((device))
+auto devicefun_wrong(void) -> typename my_enable_if<(X != OverloadFunDeviceDepr()), int>::type { // expected-warning {{'OverloadFunDeviceDepr' is deprecated: Device variant}}
+    return 0;
+}
+
+// This should use the non-deprecated host overload.
+template<int X> __attribute__((host))
+auto hostfun(void) -> typename my_enable_if<(X == OverloadFunDeviceDepr()), int>::type {
+  return 1;
+}
+
+// This should use the non-deprecated host overload.
+template<int X> __attribute__((host))
+auto hostfun(void) -> typename my_enable_if<(X != OverloadFunDeviceDepr()), int>::type {
+    return 0;
+}
+
+// This should use the deprecated host overload.
+template<int X> __attribute__((host))
+auto hostfun_wrong(void) -> typename my_enable_if<(X == OverloadFunHostDepr()), int>::type { // expected-warning {{'OverloadFunHostDepr' is deprecated: Host variant}}
+  return 1;
+}
+
+// This should use the deprecated host overload.
+template<int X> __attribute__((host))
+auto hostfun_wrong(void) -> typename my_enable_if<(X != OverloadFunHostDepr()), int>::type { // expected-warning {{'OverloadFunHostDepr' is deprecated: Host variant}}
+    return 0;
+}
diff --git a/clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu b/clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu
new file mode 100644
index 0000000000000..0d8f9aa8676b5
--- /dev/null
+++ b/clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu
@@ -0,0 +1,703 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify=expected,onhost %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify=expected,ondevice %s
+
+
+// Tests to ensure that functions with host and device overloads in that are
+// called outside of function bodies and variable initializers, e.g., in
+// template arguments are resolved with respect to the declaration to which they
+// belong.
+
+// Opaque types used for tests:
+struct DeviceTy {};
+struct HostTy {};
+struct HostDeviceTy {};
+struct TemplateTy {};
+
+struct TrueTy { static const bool value = true; };
+struct FalseTy { static const bool value = false; };
+
+// Select one of two types based on a boolean condition.
+template <bool COND, typename T, typename F> struct select_type {};
+template <typename T, typename F> struct select_type<true, T, F> { typedef T type; };
+template <typename T, typename F> struct select_type<false, T, F> { typedef F type; };
+
+template <bool C> struct check : public select_type<C, TrueTy, FalseTy> { };
+
+// Check if two types are the same.
+template<class T, class U> struct is_same : public FalseTy { };
+template<class T> struct is_same<T, T> : public TrueTy { };
+
+// A static assertion that fails at compile time if the expression E does not
+// have type T.
+#define ASSERT_HAS_TYPE(E, T) static_assert(is_same<decltype(E), T>::value);
+
+
+// is_on_device() is true when called in a device context and false if called in a host context.
+__attribute__((host)) constexpr bool is_on_device(void) { return false; }
+__attribute__((device)) constexpr bool is_on_device(void) { return true; }
+
+
+// this type depends on whether it occurs in host or device code
+#define targetdep_t select_type<is_on_device(), DeviceTy, HostTy>::type
+
+// Defines and typedefs with different values in host and device compilation.
+#ifdef __CUDA_ARCH__
+#define CurrentTarget DEVICE
+typedef DeviceTy CurrentTargetTy;
+typedef DeviceTy TemplateIfHostTy;
+#else
+#define CurrentTarget HOST
+typedef HostTy CurrentTargetTy;
+typedef TemplateTy TemplateIfHostTy;
+#endif
+
+
+
+// targetdep_t in function declarations should depend on the target of the
+// declared function.
+__attribute__((device)) targetdep_t decl_ret_early_device(void);
+ASSERT_HAS_TYPE(decl_ret_early_device(), DeviceTy)
+
+__attribute__((host)) targetdep_t decl_ret_early_host(void);
+ASSERT_HAS_TYPE(decl_ret_early_host(), HostTy)
+
+__attribute__((host,device)) targetdep_t decl_ret_early_host_device(void);
+ASSERT_HAS_TYPE(decl_ret_early_host_device(), CurrentTargetTy)
+
+// If the function target is specified too late and can therefore not be
+// considered for overload resolution in targetdep_t, warn.
+targetdep_t __attribute__((device)) decl_ret_late_device(void); // expected-warning {{target attribute has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(decl_ret_late_device(), HostTy)
+
+// No warning necessary if the ignored attribute doesn't change the result.
+targetdep_t __attribute__((host)) decl_ret_late_host(void);
+ASSERT_HAS_TYPE(decl_ret_late_host(), HostTy)
+
+targetdep_t __attribute__((host,device)) decl_ret_late_host_device(void); // expected-warning {{target attribute has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(decl_ret_late_host_device(), HostTy)
+
+// An odd way of writing this, but it's possible.
+__attribute__((device)) targetdep_t __attribute__((host)) decl_ret_early_device_late_host(void); // expected-warning {{target attribute has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(decl_ret_early_device_late_host(), DeviceTy)
+
+
+// The same for function definitions and parameter types:
+__attribute__((device)) targetdep_t ret_early_device(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+__attribute__((host)) targetdep_t ret_early_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+__attribute__((host, device)) targetdep_t ret_early_hostdevice(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_hostdevice({}), CurrentTargetTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+// The parameter is still after the attribute, so it needs no warning.
+targetdep_t __attribute__((device)) // expected-warning {{target attribute has been ignored for overload resolution}}
+ret_late_device(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_late_device({}), HostTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+targetdep_t __attribute__((host, device)) // expected-warning {{target attribute has been ignored for overload resolution}}
+ret_late_hostdevice(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_late_hostdevice({}), HostTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+targetdep_t __attribute__((host)) ret_late_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_late_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+__attribute__((device)) targetdep_t __attribute__((host)) // expected-warning {{target attribute has been ignored for overload resolution}}
+ret_early_device_late_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ret_early_device_late_host({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+// The attribute is even later, so we can't choose the expected overload.
+targetdep_t ret_verylate_device(targetdep_t x) __attribute__((device)) { // expected-warning {{target attribute has been ignored for overload resolution}}
+  ASSERT_HAS_TYPE(ret_verylate_device({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+// It's possible to get two different wrong types:
+targetdep_t __attribute__((device)) // expected-warning {{target attribute has been ignored for overload resolution}}
+ret_late_device_verylate_host(targetdep_t x) __attribute__((host)) { // expected-warning {{target attribute has been ignored for overload resolution}}
+  ASSERT_HAS_TYPE(ret_late_device_verylate_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+
+// Increasingly unusual ways to specify a return type:
+
+// The attribute is specified much earlier than the overload happens, works as
+// expected.
+__attribute__((device)) auto autoret_early_device(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(autoret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+// The attribute is specified much earlier than the overload happens, works as
+// expected.
+__attribute__((host)) auto autoret_early_host(targetdep_t x) -> targetdep_t  {
+  ASSERT_HAS_TYPE(autoret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+// The attribute is specified much earlier than the overload happens, works as
+// expected.
+__attribute__((host,device)) auto autoret_early_hostdevice(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(autoret_early_hostdevice({}), CurrentTargetTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+
+// The attribute is still specified earlier than the overload happens, works as
+// expected.
+auto __attribute__((device)) autoret_late_device(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(autoret_late_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+// The attribute is still specified earlier than the overload happens, works as
+// expected.
+auto __attribute__((host)) autoret_late_host(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(autoret_late_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+// The attribute is still specified earlier than the overload happens, works as
+// expected.
+auto __attribute__((host,device)) autoret_late_hostdevice(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(autoret_late_hostdevice({}), CurrentTargetTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+
+// There should be no problem if the return type is inferred from an expression in the body:
+auto __attribute__((device)) fullauto_device(targetdep_t x) {
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return (targetdep_t)(x);
+}
+ASSERT_HAS_TYPE(fullauto_device({}), DeviceTy)
+
+auto __attribute__((host)) fullauto_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(x, HostTy)
+  return (targetdep_t)(x);
+}
+ASSERT_HAS_TYPE(fullauto_host({}), HostTy)
+
+// The return type is as expected, but the argument type precedes the attribute,
+// so we don't get the right type for it.
+auto fullauto_verylate_device(targetdep_t x) __attribute__((device)) { // expected-warning {{target attribute has been ignored for overload resolution}}
+  ASSERT_HAS_TYPE(x, HostTy)
+  return targetdep_t();
+}
+ASSERT_HAS_TYPE(fullauto_verylate_device({}), DeviceTy)
+
+auto fullauto_verylate_host(targetdep_t x) __attribute__((host)) {
+  ASSERT_HAS_TYPE(x, HostTy)
+  return targetdep_t();
+}
+ASSERT_HAS_TYPE(fullauto_verylate_host({}), HostTy)
+
+
+// MS __declspec syntax:
+__declspec(__device__) targetdep_t ms_ret_early_device(targetdep_t x) {
+  ASSERT_HAS_TYPE(ms_ret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+__declspec(__host__) targetdep_t ms_ret_early_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ms_ret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+__declspec(__host__) __declspec(__device__) targetdep_t ms_ret_early_hostdevice(targetdep_t x) {
+  ASSERT_HAS_TYPE(ms_ret_early_hostdevice({}), CurrentTargetTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+targetdep_t __declspec(__device__) ms_ret_late_device(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}}
+  ASSERT_HAS_TYPE(ms_ret_late_device({}), HostTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+targetdep_t __declspec(__host__) ms_ret_late_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ms_ret_late_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+targetdep_t __declspec(__host__) __declspec(__device__) ms_ret_late_hostdevice(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}}
+  ASSERT_HAS_TYPE(ms_ret_late_hostdevice({}), HostTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+__declspec(__device__) targetdep_t __declspec(__host__) ms_ret_early_device_late_host(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}}
+  ASSERT_HAS_TYPE(ms_ret_early_device_late_host({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+__declspec(__device__) auto ms_autoret_early_device(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(ms_autoret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+__declspec(__host__) auto ms_autoret_early_host(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(ms_autoret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+__declspec(__host__) __declspec(__device__) auto ms_autoret_early_hostdevice(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(ms_autoret_early_hostdevice({}), CurrentTargetTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+
+auto __declspec(__device__) ms_autoret_late_device(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(ms_autoret_late_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+auto __declspec(__host__) ms_autoret_late_host(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(ms_autoret_late_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+auto __declspec(__host__) __declspec(__device__) ms_autoret_late_hostdevice(targetdep_t x) -> targetdep_t {
+  ASSERT_HAS_TYPE(ms_autoret_late_hostdevice({}), CurrentTargetTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+
+// Class/Struct member functions:
+
+struct MethodTests {
+  __attribute__((device)) targetdep_t ret_early_device(targetdep_t x) {
+    ASSERT_HAS_TYPE(ret_early_device({}), DeviceTy)
+    ASSERT_HAS_TYPE(x, DeviceTy)
+    return {};
+  }
+
+  __attribute__((host)) targetdep_t ret_early_host(targetdep_t x) {
+    ASSERT_HAS_TYPE(ret_early_host({}), HostTy)
+    ASSERT_HAS_TYPE(x, HostTy)
+    return {};
+  }
+
+  __attribute__((host,device)) targetdep_t ret_early_hostdevice(targetdep_t x) {
+    ASSERT_HAS_TYPE(ret_early_hostdevice({}), CurrentTargetTy)
+    ASSERT_HAS_TYPE(x, CurrentTargetTy)
+    return {};
+  }
+
+  __attribute__((device)) auto autoret_early_device(targetdep_t x) -> targetdep_t {
+    ASSERT_HAS_TYPE(autoret_early_device({}), DeviceTy)
+    ASSERT_HAS_TYPE(x, DeviceTy)
+    return {};
+  }
+  __attribute__((host)) auto autoret_early_host(targetdep_t x) -> targetdep_t {
+    ASSERT_HAS_TYPE(autoret_early_host({}), HostTy)
+    ASSERT_HAS_TYPE(x, HostTy)
+    return {};
+  }
+
+  __attribute__((host,device)) auto autoret_early_hostdevice(targetdep_t x) -> targetdep_t {
+    ASSERT_HAS_TYPE(autoret_early_hostdevice({}), CurrentTargetTy)
+    ASSERT_HAS_TYPE(x, CurrentTargetTy)
+    return {};
+  }
+
+
+  // Overloaded call happens in return type, attribute is after that.
+  targetdep_t __attribute__((device)) ret_late_device(targetdep_t x) {  // expected-warning {{target attribute has been ignored for overload resolution}}
+    ASSERT_HAS_TYPE(ret_late_device({}), HostTy)
+    ASSERT_HAS_TYPE(x, DeviceTy)
+    return {};
+  }
+
+  targetdep_t __attribute__((host)) ret_late_host(targetdep_t x) {
+    ASSERT_HAS_TYPE(ret_late_host({}), HostTy)
+    ASSERT_HAS_TYPE(x, HostTy)
+    return {};
+  }
+
+  targetdep_t __attribute__((host,device)) ret_late_hostdevice(targetdep_t x) {  // expected-warning {{target attribute has been ignored for overload resolution}}
+    ASSERT_HAS_TYPE(ret_late_hostdevice({}), HostTy)
+    ASSERT_HAS_TYPE(x, CurrentTargetTy)
+    return {};
+  }
+
+
+  // Member declarations (tested in the 'tests' function further below):
+  __attribute__((device)) targetdep_t decl_ret_early_device(void);
+  __attribute__((host)) targetdep_t decl_ret_early_host(void);
+  __attribute__((host,device)) targetdep_t decl_ret_early_hostdevice(void);
+  targetdep_t __attribute__((device)) decl_ret_late_device(void);  // expected-warning {{target attribute has been ignored for overload resolution}}
+  targetdep_t __attribute__((host)) decl_ret_late_host(void);
+  targetdep_t __attribute__((host,device)) decl_ret_late_hostdevice(void);  // expected-warning {{target attribute has been ignored for overload resolution}}
+
+  // for out of line definitions:
+  __attribute__((device)) targetdep_t ool_ret_early_device(targetdep_t x);
+  __attribute__((host)) targetdep_t ool_ret_early_host(targetdep_t x);
+  __attribute__((host,device)) targetdep_t ool_ret_early_hostdevice(targetdep_t x);
+  targetdep_t __attribute__((device)) ool_ret_late_device(targetdep_t x);  // expected-warning {{target attribute has been ignored for overload resolution}}
+  targetdep_t __attribute__((host)) ool_ret_late_host(targetdep_t x);
+  targetdep_t __attribute__((host,device)) ool_ret_late_hostdevice(targetdep_t x);  // expected-warning {{target attribute has been ignored for overload resolution}}
+
+};
+
+__attribute__((device)) targetdep_t MethodTests::ool_ret_early_device(targetdep_t x) {
+  ASSERT_HAS_TYPE(ool_ret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+__attribute__((host)) targetdep_t MethodTests::ool_ret_early_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ool_ret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+__attribute__((host,device)) targetdep_t MethodTests::ool_ret_early_hostdevice(targetdep_t x) {
+  ASSERT_HAS_TYPE(ool_ret_early_hostdevice({}), CurrentTargetTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+targetdep_t __attribute__((device)) MethodTests::ool_ret_late_device(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}}
+  ASSERT_HAS_TYPE(ool_ret_late_device({}), HostTy)
+  ASSERT_HAS_TYPE(x, DeviceTy)
+  return {};
+}
+
+targetdep_t __attribute__((host)) MethodTests::ool_ret_late_host(targetdep_t x) {
+  ASSERT_HAS_TYPE(ool_ret_late_host({}), HostTy)
+  ASSERT_HAS_TYPE(x, HostTy)
+  return {};
+}
+
+targetdep_t __attribute__((host,device)) MethodTests::ool_ret_late_hostdevice(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}}
+  ASSERT_HAS_TYPE(ool_ret_late_hostdevice({}), HostTy)
+  ASSERT_HAS_TYPE(x, CurrentTargetTy)
+  return {};
+}
+
+
+// members of templated structs should also work.
+template <unsigned int N>
+struct TemplateMethodTests {
+  __attribute__((device)) targetdep_t ret_early_device(targetdep_t x) {
+    ASSERT_HAS_TYPE(ret_early_device({}), DeviceTy)
+    ASSERT_HAS_TYPE(x, DeviceTy)
+    return {};
+  }
+
+  __attribute__((host)) targetdep_t ret_early_host(targetdep_t x) {
+    ASSERT_HAS_TYPE(ret_early_host({}), HostTy)
+    ASSERT_HAS_TYPE(x, HostTy)
+    return {};
+  }
+
+  __attribute__((host,device)) targetdep_t ret_early_hostdevice(targetdep_t x) {
+    ASSERT_HAS_TYPE(ret_early_hostdevice({}), CurrentTargetTy)
+    ASSERT_HAS_TYPE(x, CurrentTargetTy)
+    return {};
+  }
+
+  __attribute__((device)) auto autoret_early_device(targetdep_t x) -> targetdep_t {
+    ASSERT_HAS_TYPE(autoret_early_device({}), DeviceTy)
+    ASSERT_HAS_TYPE(x, DeviceTy)
+    return {};
+  }
+
+  __attribute__((host)) auto autoret_early_host(targetdep_t x) -> targetdep_t {
+    ASSERT_HAS_TYPE(autoret_early_host({}), HostTy)
+    ASSERT_HAS_TYPE(x, HostTy)
+    return {};
+  }
+
+  __attribute__((host,device)) auto autoret_early_hostdevice(targetdep_t x) -> targetdep_t {
+    ASSERT_HAS_TYPE(autoret_early_hostdevice({}), CurrentTargetTy)
+    ASSERT_HAS_TYPE(x, CurrentTargetTy)
+    return {};
+  }
+
+  targetdep_t __attribute__((device)) ret_late_device(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}}
+    ASSERT_HAS_TYPE(ret_late_device({}), HostTy)
+    ASSERT_HAS_TYPE(x, DeviceTy)
+    return {};
+  }
+
+  targetdep_t __attribute__((host)) ret_late_host(targetdep_t x) {
+    ASSERT_HAS_TYPE(ret_late_host({}), HostTy)
+    ASSERT_HAS_TYPE(x, HostTy)
+    return {};
+  }
+
+  targetdep_t __attribute__((host,device)) ret_late_hostdevice(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}}
+    ASSERT_HAS_TYPE(ret_late_hostdevice({}), HostTy)
+    ASSERT_HAS_TYPE(x, CurrentTargetTy)
+    return {};
+  }
+
+
+  __attribute__((device)) targetdep_t decl_ret_early_device(void);
+  __attribute__((host)) targetdep_t decl_ret_early_host(void);
+  __attribute__((host,device)) targetdep_t decl_ret_early_hostdevice(void);
+
+  targetdep_t __attribute__((device)) decl_ret_late_device(void); // expected-warning {{target attribute has been ignored for overload resolution}}
+  targetdep_t __attribute__((host)) decl_ret_late_host(void);
+  targetdep_t __attribute__((host,device)) decl_ret_late_hostdevice(void); // expected-warning {{target attribute has been ignored for overload resolution}}
+};
+
+void tests(void) {
+  MethodTests mt;
+
+  ASSERT_HAS_TYPE(mt.ret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(mt.ret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(mt.ret_early_hostdevice({}), CurrentTargetTy)
+
+  ASSERT_HAS_TYPE(mt.autoret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(mt.autoret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(mt.autoret_early_hostdevice({}), CurrentTargetTy)
+
+  // The target attribute is too late to be considered:
+  ASSERT_HAS_TYPE(mt.ret_late_device({}), HostTy)
+  ASSERT_HAS_TYPE(mt.ret_late_host({}), HostTy)
+  ASSERT_HAS_TYPE(mt.ret_late_hostdevice({}), HostTy)
+
+  ASSERT_HAS_TYPE(mt.decl_ret_early_device(), DeviceTy)
+  ASSERT_HAS_TYPE(mt.decl_ret_early_host(), HostTy)
+  ASSERT_HAS_TYPE(mt.decl_ret_early_hostdevice(), CurrentTargetTy)
+
+  // The target attribute is too late to be considered:
+  ASSERT_HAS_TYPE(mt.decl_ret_late_device(), HostTy)
+  ASSERT_HAS_TYPE(mt.decl_ret_late_host(), HostTy)
+  ASSERT_HAS_TYPE(mt.decl_ret_late_hostdevice(), HostTy)
+
+  TemplateMethodTests<42> tmt;
+  ASSERT_HAS_TYPE(tmt.ret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(tmt.ret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(tmt.ret_early_hostdevice({}), CurrentTargetTy)
+
+  ASSERT_HAS_TYPE(tmt.autoret_early_device({}), DeviceTy)
+  ASSERT_HAS_TYPE(tmt.autoret_early_host({}), HostTy)
+  ASSERT_HAS_TYPE(tmt.autoret_early_hostdevice({}), CurrentTargetTy)
+
+  ASSERT_HAS_TYPE(tmt.ret_late_device({}), HostTy)
+  ASSERT_HAS_TYPE(tmt.ret_late_host({}), HostTy)
+  ASSERT_HAS_TYPE(tmt.ret_late_hostdevice({}), HostTy)
+
+  ASSERT_HAS_TYPE(tmt.decl_ret_early_device(), DeviceTy)
+  ASSERT_HAS_TYPE(tmt.decl_ret_early_host(), HostTy)
+  ASSERT_HAS_TYPE(tmt.decl_ret_early_hostdevice(), CurrentTargetTy)
+
+  ASSERT_HAS_TYPE(tmt.decl_ret_late_device(), HostTy)
+  ASSERT_HAS_TYPE(tmt.decl_ret_late_host(), HostTy)
+  ASSERT_HAS_TYPE(tmt.decl_ret_late_hostdevice(), HostTy)
+}
+
+
+// global variables:
+__attribute__((device)) targetdep_t var_early_device = {};
+ASSERT_HAS_TYPE(var_early_device, DeviceTy)
+
+targetdep_t var_early_host = {};
+ASSERT_HAS_TYPE(var_early_host, HostTy)
+
+targetdep_t __attribute__((device)) var_late_device = {}; // expected-warning {{target attribute has been ignored for overload resolution}}
+ASSERT_HAS_TYPE(var_late_device, HostTy)
+
+
+// Tests for the overload candidate ordering compared to templates:
+
+enum Candidate {
+  TEMPLATE,
+  HOST,
+  DEVICE,
+  HOSTDEVICE,
+};
+
+// (1.) If the overloaded functions are constexpr
+
+// (1.a) Prefer fitting overloads.
+template <typename T> constexpr Candidate ce_template_vs_H_D_functions(T arg) { return TEMPLATE; }
+__attribute__((device)) constexpr Candidate ce_template_vs_H_D_functions(float arg) { return DEVICE; }
+__attribute__((host)) constexpr Candidate ce_template_vs_H_D_functions(float arg) { return HOST; }
+
+__attribute__((device)) check<ce_template_vs_H_D_functions(1.0f) == DEVICE>::type
+test_ce_template_vs_H_D_functions_for_device() {
+  return TrueTy();
+}
+
+__attribute__((host)) check<ce_template_vs_H_D_functions(1.0f) == HOST>::type
+test_ce_template_vs_H_D_functions_for_host() {
+  return TrueTy();
+}
+
+__attribute__((host,device)) check<ce_template_vs_H_D_functions(1.0f) == CurrentTarget>::type
+test_ce_template_vs_H_D_functions_for_hd() {
+  return TrueTy();
+}
+
+
+// (1.b) Always prefer an HD candidate over a template candidate.
+template <typename T> constexpr Candidate ce_template_vs_HD_function(T arg) { return TEMPLATE; }
+__attribute__((host, device)) constexpr Candidate ce_template_vs_HD_function(float arg) { return HOSTDEVICE; }
+
+__attribute__((device)) check<ce_template_vs_HD_function(1.0f) == HOSTDEVICE>::type
+test_ce_template_vs_HD_function_for_device() {
+  return TrueTy();
+}
+
+__attribute__((host)) check<ce_template_vs_HD_function(1.0f) == HOSTDEVICE>::type
+test_ce_template_vs_HD_function_for_host() {
+  return TrueTy();
+}
+
+__attribute__((host,device)) check<ce_template_vs_HD_function(1.0f) == HOSTDEVICE>::type
+test_ce_template_vs_HD_function_for_hd() {
+  return TrueTy();
+}
+
+
+// (1.c) Even wrong-sided calls are okay if the called function is constexpr, so
+// prefer the device overload over the template.
+template <typename T> constexpr Candidate ce_template_vs_D_function(T arg) { return TEMPLATE; }
+__attribute__((device)) constexpr Candidate ce_template_vs_D_function(float arg) { return DEVICE; }
+
+__attribute__((host)) check<ce_template_vs_D_function(1.0f) == DEVICE>::type
+test_ce_template_vs_D_function_for_host() {
+  return TrueTy();
+}
+
+__attribute__((device)) check<ce_template_vs_D_function(1.0f) == DEVICE>::type
+test_ce_template_vs_D_function_for_device() {
+  return TrueTy();
+}
+
+__attribute__((host,device)) check<ce_template_vs_D_function(1.0f) == DEVICE>::type
+test_ce_template_vs_D_function_for_hd() {
+  return TrueTy();
+}
+
+
+// (2.) If the overloaded functions are NOT constexpr
+
+// (2.a) Prefer fitting overloads.
+template <typename T> TemplateTy template_vs_H_D_functions(T arg) { return {}; }
+__attribute__((device)) DeviceTy template_vs_H_D_functions(float arg) { return {}; }
+__attribute__((host)) HostTy template_vs_H_D_functions(float arg) { return {}; }
+
+__attribute__((device)) check<is_same<decltype(template_vs_H_D_functions(1.0f)), DeviceTy>::value>::type
+test_template_vs_H_D_functions_for_device() {
+  return TrueTy{};
+}
+
+__attribute__((host)) check<is_same<decltype(template_vs_H_D_functions(1.0f)), HostTy>::value>::type
+test_template_vs_H_D_functions_for_host() {
+  return TrueTy{};
+}
+
+__attribute__((host,device)) check<is_same<decltype(template_vs_H_D_functions(1.0f)), CurrentTargetTy>::value>::type
+test_template_vs_H_D_functions_for_hd() {
+  return TrueTy{};
+}
+
+// (2.b) Always prefer an HD candidate over a template candidate.
+template <typename T> TemplateTy template_vs_HD_function(T arg) { return {}; }
+__attribute__((host,device)) HostDeviceTy template_vs_HD_function(float arg) { return {}; }
+
+__attribute__((device)) check<is_same<decltype(template_vs_HD_function(1.0f)), HostDeviceTy>::value>::type
+test_template_vs_HD_function_for_device() {
+  return TrueTy{};
+}
+
+__attribute__((host)) check<is_same<decltype(template_vs_HD_function(1.0f)), HostDeviceTy>::value>::type
+test_template_vs_HD_function_for_host() {
+  return TrueTy{};
+}
+
+__attribute__((host,device)) check<is_same<decltype(template_vs_HD_function(1.0f)), HostDeviceTy>::value>::type
+test_template_vs_HD_function_for_hd() {
+  return TrueTy{};
+}
+
+
+// (2.c) For non-constexpr functions, prefer a sameside or native template
+// function over a wrongside non-template function:
+template <typename T> TemplateTy template_vs_D_function(T arg) { return {}; }
+__attribute__((device)) DeviceTy template_vs_D_function(float arg) { return {}; }
+
+__attribute__((host,device)) check<is_same<decltype(template_vs_D_function(1.0f)), TemplateIfHostTy>::value>::type
+test_template_vs_D_function_for_hd() {
+  return TrueTy{};
+}
+
+__attribute__((device)) check<is_same<decltype(template_vs_D_function(1.0f)), DeviceTy>::value>::type
+test_template_vs_D_function_for_device() {
+  return TrueTy{};
+}
+
+__attribute__((host)) check<is_same<decltype(template_vs_D_function(1.0f)), TemplateTy>::value>::type
+test_template_vs_D_function_for_host() {
+  return TrueTy{};
+}
+
+
+// If only a wrongside function is available, it is selected.
+__attribute__((device)) DeviceTy only_D_function(float arg) { return {}; }
+
+__attribute__((host)) check<is_same<decltype(only_D_function(1.0f)), DeviceTy>::value>::type
+test_only_D_function_for_host() {
+  return TrueTy{};
+}
+
+// Default arguments for template parameters occur before the target attribute,
+// so we can't identify the "right" overload for them.
+template <typename T = targetdep_t>
+__attribute__((device)) // expected-warning {{target attribute has been ignored for overload resolution}}
+T use_in_template_default_arg(void) {
+  return HostTy{};
+}
+
+__attribute__((device))
+void test_use_in_template(void) {
+  use_in_template_default_arg<>();
+}