diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 554dbaff2ce0d..3497dd53e190d 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -9017,6 +9017,10 @@ def err_global_call_not_config : Error< def err_ref_bad_target : Error< "reference to %select{__device__|__global__|__host__|__host__ __device__}0 " "%select{function|variable}1 %2 in %select{__device__|__global__|__host__|__host__ __device__}3 function">; +def warn_target_specfier_ignored : Warning< + "target attribute has been ignored for overload resolution; " + "move the target attribute to the beginning of the declaration to use it for overload resolution">, + InGroup; def note_cuda_const_var_unpromoted : Note< "const variable cannot be emitted on device side due to dynamic initialization">; def note_cuda_host_var : Note< diff --git a/clang/include/clang/Sema/SemaCUDA.h b/clang/include/clang/Sema/SemaCUDA.h index 63dc3f4da240b..26d839db7fe6c 100644 --- a/clang/include/clang/Sema/SemaCUDA.h +++ b/clang/include/clang/Sema/SemaCUDA.h @@ -104,6 +104,8 @@ class SemaCUDA : public SemaBase { CUDAFunctionTarget IdentifyTarget(const FunctionDecl *D, bool IgnoreImplicitHDAttr = false); CUDAFunctionTarget IdentifyTarget(const ParsedAttributesView &Attrs); + CUDAFunctionTarget IdentifyTarget( + const SmallVectorImpl &AttrKinds); enum CUDAVariableTarget { CVT_Device, /// Emitted on device side with a shadow variable on host side @@ -120,21 +122,44 @@ class SemaCUDA : public SemaBase { CTCK_Unknown, /// Unknown context CTCK_InitGlobalVar, /// Function called during global variable /// initialization + CTCK_Declaration, /// Function called in a declaration specifier or + /// declarator outside of other contexts, usually in + /// template arguments. }; /// Define the current global CUDA host/device context where a function may be /// called. Only used when a function is called outside of any functions. - struct CUDATargetContext { - CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice; + class CUDATargetContext { + public: CUDATargetContextKind Kind = CTCK_Unknown; - Decl *D = nullptr; + + CUDATargetContext() = default; + + CUDATargetContext(SemaCUDA *S, CUDATargetContextKind Kind, + CUDAFunctionTarget Target) + : Kind(Kind), S(S), Target(Target) {} + + CUDAFunctionTarget getTarget(); + + /// If this is a CTCK_Declaration context, update the Target based on Attrs. + /// No-op otherwise. + /// Issues a diagnostic if the target changes after it has been queried + /// before. + void tryRegisterTargetAttrs(const ParsedAttributesView &Attrs); + + private: + SemaCUDA *S = nullptr; + CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice; + SmallVector AttrKinds; + bool TargetQueried = false; + } CurCUDATargetCtx; struct CUDATargetContextRAII { SemaCUDA &S; SemaCUDA::CUDATargetContext SavedCtx; CUDATargetContextRAII(SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, - Decl *D); + Decl *D = nullptr); ~CUDATargetContextRAII() { S.CurCUDATargetCtx = SavedCtx; } }; diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index a8a9d3f3f5b08..e6270b252cda8 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -311,6 +311,9 @@ void Parser::ParseGNUAttributes(ParsedAttributes &Attrs, } Attrs.Range = SourceRange(StartLoc, EndLoc); + + if (Actions.getLangOpts().CUDA) + Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(Attrs); } /// Determine whether the given attribute has an identifier argument. @@ -1003,6 +1006,9 @@ void Parser::ParseMicrosoftDeclSpecs(ParsedAttributes &Attrs) { } Attrs.Range = SourceRange(StartLoc, EndLoc); + + if (Actions.getLangOpts().CUDA) + Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(Attrs); } void Parser::ParseMicrosoftTypeAttributes(ParsedAttributes &attrs) { @@ -2023,6 +2029,13 @@ Parser::DeclGroupPtrTy Parser::ParseDeclaration(DeclaratorContext Context, // parsing c none objective-c decls. ObjCDeclContextSwitch ObjCDC(*this); + SemaCUDA::CUDATargetContextRAII CTCRAII(Actions.CUDA(), + SemaCUDA::CTCK_Declaration); + if (Actions.getLangOpts().CUDA) { + Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(DeclAttrs); + Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(DeclSpecAttrs); + } + Decl *SingleDecl = nullptr; switch (Tok.getKind()) { case tok::kw_template: diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index aac89d910bbc8..0001073104333 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -27,6 +27,7 @@ #include "clang/Sema/EnterExpressionEvaluationContext.h" #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" +#include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaCodeCompletion.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/TimeProfiler.h" @@ -2852,6 +2853,11 @@ Parser::DeclGroupPtrTy Parser::ParseCXXClassMemberDeclaration( ParsedTemplateInfo &TemplateInfo, ParsingDeclRAIIObject *TemplateDiags) { assert(getLangOpts().CPlusPlus && "ParseCXXClassMemberDeclaration should only be called in C++ mode"); + SemaCUDA::CUDATargetContextRAII CTCRAII(Actions.CUDA(), + SemaCUDA::CTCK_Declaration); + if (Actions.getLangOpts().CUDA) + Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(AccessAttrs); + if (Tok.is(tok::at)) { if (getLangOpts().ObjC && NextToken().isObjCAtKeyword(tok::objc_defs)) Diag(Tok, diag::err_at_defs_cxx); diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index 04c2f1d380bc4..b7bc11964e968 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -21,6 +21,7 @@ #include "clang/Sema/DeclSpec.h" #include "clang/Sema/ParsedTemplate.h" #include "clang/Sema/Scope.h" +#include "clang/Sema/SemaCUDA.h" #include "clang/Sema/SemaCodeCompletion.h" #include "llvm/Support/Path.h" #include "llvm/Support/TimeProfiler.h" @@ -1133,6 +1134,13 @@ bool Parser::isStartOfFunctionDefinition(const ParsingDeclarator &Declarator) { Parser::DeclGroupPtrTy Parser::ParseDeclOrFunctionDefInternal( ParsedAttributes &Attrs, ParsedAttributes &DeclSpecAttrs, ParsingDeclSpec &DS, AccessSpecifier AS) { + SemaCUDA::CUDATargetContextRAII CTCRAII(Actions.CUDA(), + SemaCUDA::CTCK_Declaration); + if (Actions.getLangOpts().CUDA) { + Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(Attrs); + Actions.CUDA().CurCUDATargetCtx.tryRegisterTargetAttrs(DeclSpecAttrs); + } + // Because we assume that the DeclSpec has not yet been initialised, we simply // overwrite the source range and attribute the provided leading declspec // attributes. diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index ec37c0df56c67..e4c7c1c2c57d8 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -18,6 +18,7 @@ #include "clang/Basic/TargetInfo.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/Lookup.h" +#include "clang/Sema/ParsedAttr.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/Sema.h" #include "clang/Sema/SemaDiagnostic.h" @@ -68,13 +69,28 @@ ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc, /*IsExecConfig=*/true); } -CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) { +namespace { + +// This iterator adaptor enables sharing a IdentifyTarget implementation for +// ParsedAttributesView and for vectors of AttributeCommonInfo::Kind. +struct AttrKindIterator + : llvm::iterator_adaptor_base< + AttrKindIterator, ParsedAttributesView::const_iterator, + std::random_access_iterator_tag, clang::AttributeCommonInfo::Kind> { + AttrKindIterator() : iterator_adaptor_base(nullptr) {} + AttrKindIterator(ParsedAttributesView::const_iterator I) + : iterator_adaptor_base(I) {} + clang::AttributeCommonInfo::Kind operator*() const { return I->getKind(); } +}; + +template +CUDAFunctionTarget IdentifyTargetImpl(const AKIterRange &AttrKinds) { bool HasHostAttr = false; bool HasDeviceAttr = false; bool HasGlobalAttr = false; bool HasInvalidTargetAttr = false; - for (const ParsedAttr &AL : Attrs) { - switch (AL.getKind()) { + for (const auto &AK : AttrKinds) { + switch (AK) { case ParsedAttr::AT_CUDAGlobal: HasGlobalAttr = true; break; @@ -107,6 +123,18 @@ CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) { return CUDAFunctionTarget::Host; } +} // namespace + +CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) { + return IdentifyTargetImpl(make_range(AttrKindIterator(Attrs.begin()), + AttrKindIterator(Attrs.end()))); +} + +CUDAFunctionTarget SemaCUDA::IdentifyTarget( + const SmallVectorImpl &AttrKinds) { + return IdentifyTargetImpl(AttrKinds); +} + template static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) { return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { @@ -115,20 +143,60 @@ static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) { }); } +CUDAFunctionTarget SemaCUDA::CUDATargetContext::getTarget() { + TargetQueried = true; + return Target; +} + +void SemaCUDA::CUDATargetContext::tryRegisterTargetAttrs( + const ParsedAttributesView &Attrs) { + if (Kind != CTCK_Declaration) + return; + for (const auto &A : Attrs) { + auto AK = A.getKind(); + switch (AK) { + case ParsedAttr::AT_CUDAGlobal: + case ParsedAttr::AT_CUDAHost: + case ParsedAttr::AT_CUDADevice: + case ParsedAttr::AT_CUDAInvalidTarget: + break; + default: + continue; + } + AttrKinds.push_back(AK); + CUDAFunctionTarget NewTarget = S->IdentifyTarget(AttrKinds); + if (TargetQueried && (NewTarget != Target)) + S->Diag(A.getLoc(), diag::warn_target_specfier_ignored); + Target = NewTarget; + } +} + SemaCUDA::CUDATargetContextRAII::CUDATargetContextRAII( SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, Decl *D) : S(S_) { SavedCtx = S.CurCUDATargetCtx; - assert(K == SemaCUDA::CTCK_InitGlobalVar); - auto *VD = dyn_cast_or_null(D); - if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) { - auto Target = CUDAFunctionTarget::Host; - if ((hasAttr(VD, /*IgnoreImplicit=*/true) && - !hasAttr(VD, /*IgnoreImplicit=*/true)) || - hasAttr(VD, /*IgnoreImplicit=*/true) || - hasAttr(VD, /*IgnoreImplicit=*/true)) - Target = CUDAFunctionTarget::Device; - S.CurCUDATargetCtx = {Target, K, VD}; + + switch (K) { + case SemaCUDA::CTCK_InitGlobalVar: { + auto *VD = dyn_cast_or_null(D); + if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) { + auto Target = CUDAFunctionTarget::Host; + if ((hasAttr(VD, /*IgnoreImplicit=*/true) && + !hasAttr(VD, /*IgnoreImplicit=*/true)) || + hasAttr(VD, /*IgnoreImplicit=*/true) || + hasAttr(VD, /*IgnoreImplicit=*/true)) + Target = CUDAFunctionTarget::Device; + S.CurCUDATargetCtx = CUDATargetContext(&S, K, Target); + } + break; + } + case SemaCUDA::CTCK_Declaration: + // The target is updated once relevant attributes are parsed. Initialize + // with the target used if no attributes are present: Host. + S.CurCUDATargetCtx = CUDATargetContext(&S, K, CUDAFunctionTarget::Host); + break; + default: + llvm_unreachable("unexpected context kind"); } } @@ -137,7 +205,7 @@ CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D, bool IgnoreImplicitHDAttr) { // Code that lives outside a function gets the target from CurCUDATargetCtx. if (D == nullptr) - return CurCUDATargetCtx.Target; + return CurCUDATargetCtx.getTarget(); if (D->hasAttr()) return CUDAFunctionTarget::InvalidTarget; @@ -232,7 +300,7 @@ SemaCUDA::IdentifyPreference(const FunctionDecl *Caller, // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor // will be diagnosed by checkAllowedInitializer. if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar && - CurCUDATargetCtx.Target == CUDAFunctionTarget::Device && + CurCUDATargetCtx.getTarget() == CUDAFunctionTarget::Device && (isa(Callee) || isa(Callee))) return CFP_HostDevice; @@ -297,8 +365,16 @@ SemaCUDA::IdentifyPreference(const FunctionDecl *Caller, (CallerTarget == CUDAFunctionTarget::Device && CalleeTarget == CUDAFunctionTarget::Host) || (CallerTarget == CUDAFunctionTarget::Global && - CalleeTarget == CUDAFunctionTarget::Host)) + CalleeTarget == CUDAFunctionTarget::Host)) { + // In declaration contexts outside of function bodies and variable + // initializers, tolerate mismatched function targets as long as they are + // not codegened. + if (CurCUDATargetCtx.Kind == CTCK_Declaration && + !this->SemaRef.getCurFunctionDecl(/*AllowLambda=*/true)) + return CFP_WrongSide; + return CFP_Never; + } llvm_unreachable("All cases should've been handled by now."); } diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 52f640eb96b73..e3703c2c735fe 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -10747,7 +10747,7 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc, llvm::any_of(Candidates, [&](OverloadCandidate *Cand) { // Check viable function only. return Cand->Viable && Cand->Function && - S.CUDA().IdentifyPreference(Caller, Cand->Function) == + S.CUDA().IdentifyPreference(Caller, Cand->Function) >= SemaCUDA::CFP_SameSide; }); if (ContainsSameSideCandidate) { diff --git a/clang/test/SemaCUDA/target-overloads-availability-warnings.cu b/clang/test/SemaCUDA/target-overloads-availability-warnings.cu new file mode 100644 index 0000000000000..93498c6fe22e1 --- /dev/null +++ b/clang/test/SemaCUDA/target-overloads-availability-warnings.cu @@ -0,0 +1,199 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify=expected,onhost %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify=expected,ondevice %s + +template struct my_enable_if {}; + +template struct my_enable_if { + typedef T type; +}; + +__attribute__((host, device)) void use(int x); + +// For 'OverloadFunHostDepr', the host overload is deprecated, the device overload is not. +__attribute__((device)) constexpr int OverloadFunHostDepr(void) { return 1; } +__attribute__((host, deprecated("Host variant"))) constexpr int OverloadFunHostDepr(void) { return 1; } // expected-note 0+ {{has been explicitly marked deprecated here}} + + +// For 'OverloadFunDeviceDepr', the device overload is deprecated, the host overload is not. +__attribute__((device, deprecated("Device variant"))) constexpr int OverloadFunDeviceDepr(void) { return 1; } // expected-note 0+ {{has been explicitly marked deprecated here}} +__attribute__((host)) constexpr int OverloadFunDeviceDepr(void) { return 1; } + + +// For 'TemplateOverloadFun', the host overload is deprecated, the device overload is not. +template +__attribute__((device)) constexpr T TemplateOverloadFun(void) { return 1; } + +template +__attribute__((host, deprecated("Host variant"))) constexpr T TemplateOverloadFun(void) { return 1; } // expected-note 0+ {{has been explicitly marked deprecated here}} + + +// There is only a device overload, and it is deprecated. +__attribute__((device, deprecated)) constexpr int // expected-note 0+ {{has been explicitly marked deprecated here}} +DeviceOnlyFunDeprecated(void) { return 1; } + +// There is only a host overload, and it is deprecated. +__attribute__((host, deprecated)) constexpr int // expected-note 0+ {{has been explicitly marked deprecated here}} +HostOnlyFunDeprecated(void) { return 1; } + +class FunSelector { +public: + // This should use the non-deprecated device overload. + template __attribute__((device)) + auto devicefun(void) -> typename my_enable_if<(X == OverloadFunHostDepr()), int>::type { + return 1; + } + + // This should use the non-deprecated device overload. + template __attribute__((device)) + auto devicefun(void) -> typename my_enable_if<(X != OverloadFunHostDepr()), int>::type { + return 0; + } + + // This should use the deprecated device overload. + template __attribute__((device)) + auto devicefun_wrong(void) -> typename my_enable_if<(X == OverloadFunDeviceDepr()), int>::type { // expected-warning {{'OverloadFunDeviceDepr' is deprecated: Device variant}} + return 1; + } + + // This should use the deprecated device overload. + template __attribute__((device)) + auto devicefun_wrong(void) -> typename my_enable_if<(X != OverloadFunDeviceDepr()), int>::type { // expected-warning {{'OverloadFunDeviceDepr' is deprecated: Device variant}} + return 0; + } + + // This should use the non-deprecated host overload. + template __attribute__((host)) + auto hostfun(void) -> typename my_enable_if<(X == OverloadFunDeviceDepr()), int>::type { + return 1; + } + + // This should use the non-deprecated host overload. + template __attribute__((host)) + auto hostfun(void) -> typename my_enable_if<(X != OverloadFunDeviceDepr()), int>::type { + return 0; + } + + // This should use the deprecated host overload. + template __attribute__((host)) + auto hostfun_wrong(void) -> typename my_enable_if<(X == OverloadFunHostDepr()), int>::type { // expected-warning {{'OverloadFunHostDepr' is deprecated: Host variant}} + return 1; + } + + // This should use the deprecated host overload. + template __attribute__((host)) + auto hostfun_wrong(void) -> typename my_enable_if<(X != OverloadFunHostDepr()), int>::type { // expected-warning {{'OverloadFunHostDepr' is deprecated: Host variant}} + return 0; + } +}; + + +// These should not be diagnosed since the device overload of +// OverloadFunHostDepr is not deprecated: +__attribute__((device)) my_enable_if<(OverloadFunHostDepr() > 0), int>::type +DeviceUserOverloadFunHostDepr1(void) { return 2; } + +__attribute__((device)) my_enable_if<(OverloadFunHostDepr() > 0), int>::type constexpr +DeviceUserOverloadFunHostDeprConstexpr(void) { return 2; } + + +// Analogously for OverloadFunDeviceDepr: +__attribute__((host)) my_enable_if<(OverloadFunDeviceDepr() > 0), int>::type +DeviceUserOverloadFunDeviceDepr1(void) { return 2; } + +my_enable_if<(OverloadFunDeviceDepr() > 0), int>::type __attribute__((host)) +DeviceUserOverloadFunDeviceDepr2(void) { return 2; } + +__attribute__((host)) my_enable_if<(OverloadFunDeviceDepr() > 0), int>::type constexpr +DeviceUserOverloadFunDeviceDeprConstexpr(void) { return 2; } + + +// Actual uses of the deprecated overloads should be diagnosed: +__attribute__((host, device)) my_enable_if<(OverloadFunHostDepr() > 0), int>::type // onhost-warning {{'OverloadFunHostDepr' is deprecated: Host variant}} +HostDeviceUserOverloadFunHostDepr(void) { return 3; } + +__attribute__((host)) my_enable_if<(OverloadFunHostDepr() > 0), int>::type constexpr // expected-warning {{'OverloadFunHostDepr' is deprecated: Host variant}} +HostUserOverloadFunHostDeprConstexpr(void) { return 3; } + +__attribute__((device)) my_enable_if<(OverloadFunDeviceDepr() > 0), int>::type constexpr // expected-warning {{'OverloadFunDeviceDepr' is deprecated: Device variant}} +HostUserOverloadFunDeviceDeprConstexpr(void) { return 3; } + + +// Making the offending decl a template shouldn't change anything: +__attribute__((host)) my_enable_if<(TemplateOverloadFun() > 0), int>::type // expected-warning {{'TemplateOverloadFun' is deprecated: Host variant}} +HostUserTemplateOverloadFun(void) { return 3; } + +__attribute__((device)) my_enable_if<(TemplateOverloadFun() > 0), int>::type +DeviceUserTemplateOverloadFun(void) { return 3; } + + +__attribute__((device, deprecated)) constexpr int DeviceVarConstDepr = 1; // expected-note 0+ {{has been explicitly marked deprecated here}} + +// Diagnostics for uses in function bodies should work as expected: +__attribute__((host)) void HostUser(void) { + use(DeviceVarConstDepr); // expected-warning {{'DeviceVarConstDepr' is deprecated}} + use(HostOnlyFunDeprecated()); // expected-warning {{'HostOnlyFunDeprecated' is deprecated}} + use(OverloadFunHostDepr()); // expected-warning {{'OverloadFunHostDepr' is deprecated: Host variant}} + use(TemplateOverloadFun()); // expected-warning {{'TemplateOverloadFun' is deprecated: Host variant}} + + use(OverloadFunDeviceDepr()); +} + +__attribute__((device)) void DeviceUser(void) { + use(DeviceVarConstDepr); // expected-warning {{'DeviceVarConstDepr' is deprecated}} + use(DeviceOnlyFunDeprecated()); // expected-warning {{'DeviceOnlyFunDeprecated' is deprecated}} + use(OverloadFunDeviceDepr()); // expected-warning {{'OverloadFunDeviceDepr' is deprecated: Device variant}} + + use(OverloadFunHostDepr()); + use(TemplateOverloadFun()); +} + + +// Template functions outside of classes: + +// This should use the non-deprecated device overload. +template __attribute__((device)) +auto devicefun(void) -> typename my_enable_if<(X == OverloadFunHostDepr()), int>::type { + return 1; +} + +// This should use the non-deprecated device overload. +template __attribute__((device)) +auto devicefun(void) -> typename my_enable_if<(X != OverloadFunHostDepr()), int>::type { + return 0; +} + +// This should use the deprecated device overload. +template __attribute__((device)) +auto devicefun_wrong(void) -> typename my_enable_if<(X == OverloadFunDeviceDepr()), int>::type { // expected-warning {{'OverloadFunDeviceDepr' is deprecated: Device variant}} + return 1; +} + +// This should use the deprecated device overload. +template __attribute__((device)) +auto devicefun_wrong(void) -> typename my_enable_if<(X != OverloadFunDeviceDepr()), int>::type { // expected-warning {{'OverloadFunDeviceDepr' is deprecated: Device variant}} + return 0; +} + +// This should use the non-deprecated host overload. +template __attribute__((host)) +auto hostfun(void) -> typename my_enable_if<(X == OverloadFunDeviceDepr()), int>::type { + return 1; +} + +// This should use the non-deprecated host overload. +template __attribute__((host)) +auto hostfun(void) -> typename my_enable_if<(X != OverloadFunDeviceDepr()), int>::type { + return 0; +} + +// This should use the deprecated host overload. +template __attribute__((host)) +auto hostfun_wrong(void) -> typename my_enable_if<(X == OverloadFunHostDepr()), int>::type { // expected-warning {{'OverloadFunHostDepr' is deprecated: Host variant}} + return 1; +} + +// This should use the deprecated host overload. +template __attribute__((host)) +auto hostfun_wrong(void) -> typename my_enable_if<(X != OverloadFunHostDepr()), int>::type { // expected-warning {{'OverloadFunHostDepr' is deprecated: Host variant}} + return 0; +} diff --git a/clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu b/clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu new file mode 100644 index 0000000000000..0d8f9aa8676b5 --- /dev/null +++ b/clang/test/SemaCUDA/target-overloads-in-function-prototypes.cu @@ -0,0 +1,703 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify=expected,onhost %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify=expected,ondevice %s + + +// Tests to ensure that functions with host and device overloads in that are +// called outside of function bodies and variable initializers, e.g., in +// template arguments are resolved with respect to the declaration to which they +// belong. + +// Opaque types used for tests: +struct DeviceTy {}; +struct HostTy {}; +struct HostDeviceTy {}; +struct TemplateTy {}; + +struct TrueTy { static const bool value = true; }; +struct FalseTy { static const bool value = false; }; + +// Select one of two types based on a boolean condition. +template struct select_type {}; +template struct select_type { typedef T type; }; +template struct select_type { typedef F type; }; + +template struct check : public select_type { }; + +// Check if two types are the same. +template struct is_same : public FalseTy { }; +template struct is_same : public TrueTy { }; + +// A static assertion that fails at compile time if the expression E does not +// have type T. +#define ASSERT_HAS_TYPE(E, T) static_assert(is_same::value); + + +// is_on_device() is true when called in a device context and false if called in a host context. +__attribute__((host)) constexpr bool is_on_device(void) { return false; } +__attribute__((device)) constexpr bool is_on_device(void) { return true; } + + +// this type depends on whether it occurs in host or device code +#define targetdep_t select_type::type + +// Defines and typedefs with different values in host and device compilation. +#ifdef __CUDA_ARCH__ +#define CurrentTarget DEVICE +typedef DeviceTy CurrentTargetTy; +typedef DeviceTy TemplateIfHostTy; +#else +#define CurrentTarget HOST +typedef HostTy CurrentTargetTy; +typedef TemplateTy TemplateIfHostTy; +#endif + + + +// targetdep_t in function declarations should depend on the target of the +// declared function. +__attribute__((device)) targetdep_t decl_ret_early_device(void); +ASSERT_HAS_TYPE(decl_ret_early_device(), DeviceTy) + +__attribute__((host)) targetdep_t decl_ret_early_host(void); +ASSERT_HAS_TYPE(decl_ret_early_host(), HostTy) + +__attribute__((host,device)) targetdep_t decl_ret_early_host_device(void); +ASSERT_HAS_TYPE(decl_ret_early_host_device(), CurrentTargetTy) + +// If the function target is specified too late and can therefore not be +// considered for overload resolution in targetdep_t, warn. +targetdep_t __attribute__((device)) decl_ret_late_device(void); // expected-warning {{target attribute has been ignored for overload resolution}} +ASSERT_HAS_TYPE(decl_ret_late_device(), HostTy) + +// No warning necessary if the ignored attribute doesn't change the result. +targetdep_t __attribute__((host)) decl_ret_late_host(void); +ASSERT_HAS_TYPE(decl_ret_late_host(), HostTy) + +targetdep_t __attribute__((host,device)) decl_ret_late_host_device(void); // expected-warning {{target attribute has been ignored for overload resolution}} +ASSERT_HAS_TYPE(decl_ret_late_host_device(), HostTy) + +// An odd way of writing this, but it's possible. +__attribute__((device)) targetdep_t __attribute__((host)) decl_ret_early_device_late_host(void); // expected-warning {{target attribute has been ignored for overload resolution}} +ASSERT_HAS_TYPE(decl_ret_early_device_late_host(), DeviceTy) + + +// The same for function definitions and parameter types: +__attribute__((device)) targetdep_t ret_early_device(targetdep_t x) { + ASSERT_HAS_TYPE(ret_early_device({}), DeviceTy) + ASSERT_HAS_TYPE(x, DeviceTy) + return {}; +} + +__attribute__((host)) targetdep_t ret_early_host(targetdep_t x) { + ASSERT_HAS_TYPE(ret_early_host({}), HostTy) + ASSERT_HAS_TYPE(x, HostTy) + return {}; +} + +__attribute__((host, device)) targetdep_t ret_early_hostdevice(targetdep_t x) { + ASSERT_HAS_TYPE(ret_early_hostdevice({}), CurrentTargetTy) + ASSERT_HAS_TYPE(x, CurrentTargetTy) + return {}; +} + +// The parameter is still after the attribute, so it needs no warning. +targetdep_t __attribute__((device)) // expected-warning {{target attribute has been ignored for overload resolution}} +ret_late_device(targetdep_t x) { + ASSERT_HAS_TYPE(ret_late_device({}), HostTy) + ASSERT_HAS_TYPE(x, DeviceTy) + return {}; +} + +targetdep_t __attribute__((host, device)) // expected-warning {{target attribute has been ignored for overload resolution}} +ret_late_hostdevice(targetdep_t x) { + ASSERT_HAS_TYPE(ret_late_hostdevice({}), HostTy) + ASSERT_HAS_TYPE(x, CurrentTargetTy) + return {}; +} + +targetdep_t __attribute__((host)) ret_late_host(targetdep_t x) { + ASSERT_HAS_TYPE(ret_late_host({}), HostTy) + ASSERT_HAS_TYPE(x, HostTy) + return {}; +} + +__attribute__((device)) targetdep_t __attribute__((host)) // expected-warning {{target attribute has been ignored for overload resolution}} +ret_early_device_late_host(targetdep_t x) { + ASSERT_HAS_TYPE(ret_early_device_late_host({}), DeviceTy) + ASSERT_HAS_TYPE(x, CurrentTargetTy) + return {}; +} + +// The attribute is even later, so we can't choose the expected overload. +targetdep_t ret_verylate_device(targetdep_t x) __attribute__((device)) { // expected-warning {{target attribute has been ignored for overload resolution}} + ASSERT_HAS_TYPE(ret_verylate_device({}), HostTy) + ASSERT_HAS_TYPE(x, HostTy) + return {}; +} + +// It's possible to get two different wrong types: +targetdep_t __attribute__((device)) // expected-warning {{target attribute has been ignored for overload resolution}} +ret_late_device_verylate_host(targetdep_t x) __attribute__((host)) { // expected-warning {{target attribute has been ignored for overload resolution}} + ASSERT_HAS_TYPE(ret_late_device_verylate_host({}), HostTy) + ASSERT_HAS_TYPE(x, DeviceTy) + return {}; +} + + +// Increasingly unusual ways to specify a return type: + +// The attribute is specified much earlier than the overload happens, works as +// expected. +__attribute__((device)) auto autoret_early_device(targetdep_t x) -> targetdep_t { + ASSERT_HAS_TYPE(autoret_early_device({}), DeviceTy) + ASSERT_HAS_TYPE(x, DeviceTy) + return {}; +} + +// The attribute is specified much earlier than the overload happens, works as +// expected. +__attribute__((host)) auto autoret_early_host(targetdep_t x) -> targetdep_t { + ASSERT_HAS_TYPE(autoret_early_host({}), HostTy) + ASSERT_HAS_TYPE(x, HostTy) + return {}; +} + +// The attribute is specified much earlier than the overload happens, works as +// expected. +__attribute__((host,device)) auto autoret_early_hostdevice(targetdep_t x) -> targetdep_t { + ASSERT_HAS_TYPE(autoret_early_hostdevice({}), CurrentTargetTy) + ASSERT_HAS_TYPE(x, CurrentTargetTy) + return {}; +} + + +// The attribute is still specified earlier than the overload happens, works as +// expected. +auto __attribute__((device)) autoret_late_device(targetdep_t x) -> targetdep_t { + ASSERT_HAS_TYPE(autoret_late_device({}), DeviceTy) + ASSERT_HAS_TYPE(x, DeviceTy) + return {}; +} + +// The attribute is still specified earlier than the overload happens, works as +// expected. +auto __attribute__((host)) autoret_late_host(targetdep_t x) -> targetdep_t { + ASSERT_HAS_TYPE(autoret_late_host({}), HostTy) + ASSERT_HAS_TYPE(x, HostTy) + return {}; +} + +// The attribute is still specified earlier than the overload happens, works as +// expected. +auto __attribute__((host,device)) autoret_late_hostdevice(targetdep_t x) -> targetdep_t { + ASSERT_HAS_TYPE(autoret_late_hostdevice({}), CurrentTargetTy) + ASSERT_HAS_TYPE(x, CurrentTargetTy) + return {}; +} + + +// There should be no problem if the return type is inferred from an expression in the body: +auto __attribute__((device)) fullauto_device(targetdep_t x) { + ASSERT_HAS_TYPE(x, DeviceTy) + return (targetdep_t)(x); +} +ASSERT_HAS_TYPE(fullauto_device({}), DeviceTy) + +auto __attribute__((host)) fullauto_host(targetdep_t x) { + ASSERT_HAS_TYPE(x, HostTy) + return (targetdep_t)(x); +} +ASSERT_HAS_TYPE(fullauto_host({}), HostTy) + +// The return type is as expected, but the argument type precedes the attribute, +// so we don't get the right type for it. +auto fullauto_verylate_device(targetdep_t x) __attribute__((device)) { // expected-warning {{target attribute has been ignored for overload resolution}} + ASSERT_HAS_TYPE(x, HostTy) + return targetdep_t(); +} +ASSERT_HAS_TYPE(fullauto_verylate_device({}), DeviceTy) + +auto fullauto_verylate_host(targetdep_t x) __attribute__((host)) { + ASSERT_HAS_TYPE(x, HostTy) + return targetdep_t(); +} +ASSERT_HAS_TYPE(fullauto_verylate_host({}), HostTy) + + +// MS __declspec syntax: +__declspec(__device__) targetdep_t ms_ret_early_device(targetdep_t x) { + ASSERT_HAS_TYPE(ms_ret_early_device({}), DeviceTy) + ASSERT_HAS_TYPE(x, DeviceTy) + return {}; +} + +__declspec(__host__) targetdep_t ms_ret_early_host(targetdep_t x) { + ASSERT_HAS_TYPE(ms_ret_early_host({}), HostTy) + ASSERT_HAS_TYPE(x, HostTy) + return {}; +} + +__declspec(__host__) __declspec(__device__) targetdep_t ms_ret_early_hostdevice(targetdep_t x) { + ASSERT_HAS_TYPE(ms_ret_early_hostdevice({}), CurrentTargetTy) + ASSERT_HAS_TYPE(x, CurrentTargetTy) + return {}; +} + +targetdep_t __declspec(__device__) ms_ret_late_device(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}} + ASSERT_HAS_TYPE(ms_ret_late_device({}), HostTy) + ASSERT_HAS_TYPE(x, DeviceTy) + return {}; +} + +targetdep_t __declspec(__host__) ms_ret_late_host(targetdep_t x) { + ASSERT_HAS_TYPE(ms_ret_late_host({}), HostTy) + ASSERT_HAS_TYPE(x, HostTy) + return {}; +} + +targetdep_t __declspec(__host__) __declspec(__device__) ms_ret_late_hostdevice(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}} + ASSERT_HAS_TYPE(ms_ret_late_hostdevice({}), HostTy) + ASSERT_HAS_TYPE(x, CurrentTargetTy) + return {}; +} + +__declspec(__device__) targetdep_t __declspec(__host__) ms_ret_early_device_late_host(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}} + ASSERT_HAS_TYPE(ms_ret_early_device_late_host({}), DeviceTy) + ASSERT_HAS_TYPE(x, CurrentTargetTy) + return {}; +} + +__declspec(__device__) auto ms_autoret_early_device(targetdep_t x) -> targetdep_t { + ASSERT_HAS_TYPE(ms_autoret_early_device({}), DeviceTy) + ASSERT_HAS_TYPE(x, DeviceTy) + return {}; +} + +__declspec(__host__) auto ms_autoret_early_host(targetdep_t x) -> targetdep_t { + ASSERT_HAS_TYPE(ms_autoret_early_host({}), HostTy) + ASSERT_HAS_TYPE(x, HostTy) + return {}; +} + +__declspec(__host__) __declspec(__device__) auto ms_autoret_early_hostdevice(targetdep_t x) -> targetdep_t { + ASSERT_HAS_TYPE(ms_autoret_early_hostdevice({}), CurrentTargetTy) + ASSERT_HAS_TYPE(x, CurrentTargetTy) + return {}; +} + + +auto __declspec(__device__) ms_autoret_late_device(targetdep_t x) -> targetdep_t { + ASSERT_HAS_TYPE(ms_autoret_late_device({}), DeviceTy) + ASSERT_HAS_TYPE(x, DeviceTy) + return {}; +} + +auto __declspec(__host__) ms_autoret_late_host(targetdep_t x) -> targetdep_t { + ASSERT_HAS_TYPE(ms_autoret_late_host({}), HostTy) + ASSERT_HAS_TYPE(x, HostTy) + return {}; +} + +auto __declspec(__host__) __declspec(__device__) ms_autoret_late_hostdevice(targetdep_t x) -> targetdep_t { + ASSERT_HAS_TYPE(ms_autoret_late_hostdevice({}), CurrentTargetTy) + ASSERT_HAS_TYPE(x, CurrentTargetTy) + return {}; +} + + +// Class/Struct member functions: + +struct MethodTests { + __attribute__((device)) targetdep_t ret_early_device(targetdep_t x) { + ASSERT_HAS_TYPE(ret_early_device({}), DeviceTy) + ASSERT_HAS_TYPE(x, DeviceTy) + return {}; + } + + __attribute__((host)) targetdep_t ret_early_host(targetdep_t x) { + ASSERT_HAS_TYPE(ret_early_host({}), HostTy) + ASSERT_HAS_TYPE(x, HostTy) + return {}; + } + + __attribute__((host,device)) targetdep_t ret_early_hostdevice(targetdep_t x) { + ASSERT_HAS_TYPE(ret_early_hostdevice({}), CurrentTargetTy) + ASSERT_HAS_TYPE(x, CurrentTargetTy) + return {}; + } + + __attribute__((device)) auto autoret_early_device(targetdep_t x) -> targetdep_t { + ASSERT_HAS_TYPE(autoret_early_device({}), DeviceTy) + ASSERT_HAS_TYPE(x, DeviceTy) + return {}; + } + __attribute__((host)) auto autoret_early_host(targetdep_t x) -> targetdep_t { + ASSERT_HAS_TYPE(autoret_early_host({}), HostTy) + ASSERT_HAS_TYPE(x, HostTy) + return {}; + } + + __attribute__((host,device)) auto autoret_early_hostdevice(targetdep_t x) -> targetdep_t { + ASSERT_HAS_TYPE(autoret_early_hostdevice({}), CurrentTargetTy) + ASSERT_HAS_TYPE(x, CurrentTargetTy) + return {}; + } + + + // Overloaded call happens in return type, attribute is after that. + targetdep_t __attribute__((device)) ret_late_device(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}} + ASSERT_HAS_TYPE(ret_late_device({}), HostTy) + ASSERT_HAS_TYPE(x, DeviceTy) + return {}; + } + + targetdep_t __attribute__((host)) ret_late_host(targetdep_t x) { + ASSERT_HAS_TYPE(ret_late_host({}), HostTy) + ASSERT_HAS_TYPE(x, HostTy) + return {}; + } + + targetdep_t __attribute__((host,device)) ret_late_hostdevice(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}} + ASSERT_HAS_TYPE(ret_late_hostdevice({}), HostTy) + ASSERT_HAS_TYPE(x, CurrentTargetTy) + return {}; + } + + + // Member declarations (tested in the 'tests' function further below): + __attribute__((device)) targetdep_t decl_ret_early_device(void); + __attribute__((host)) targetdep_t decl_ret_early_host(void); + __attribute__((host,device)) targetdep_t decl_ret_early_hostdevice(void); + targetdep_t __attribute__((device)) decl_ret_late_device(void); // expected-warning {{target attribute has been ignored for overload resolution}} + targetdep_t __attribute__((host)) decl_ret_late_host(void); + targetdep_t __attribute__((host,device)) decl_ret_late_hostdevice(void); // expected-warning {{target attribute has been ignored for overload resolution}} + + // for out of line definitions: + __attribute__((device)) targetdep_t ool_ret_early_device(targetdep_t x); + __attribute__((host)) targetdep_t ool_ret_early_host(targetdep_t x); + __attribute__((host,device)) targetdep_t ool_ret_early_hostdevice(targetdep_t x); + targetdep_t __attribute__((device)) ool_ret_late_device(targetdep_t x); // expected-warning {{target attribute has been ignored for overload resolution}} + targetdep_t __attribute__((host)) ool_ret_late_host(targetdep_t x); + targetdep_t __attribute__((host,device)) ool_ret_late_hostdevice(targetdep_t x); // expected-warning {{target attribute has been ignored for overload resolution}} + +}; + +__attribute__((device)) targetdep_t MethodTests::ool_ret_early_device(targetdep_t x) { + ASSERT_HAS_TYPE(ool_ret_early_device({}), DeviceTy) + ASSERT_HAS_TYPE(x, DeviceTy) + return {}; +} + +__attribute__((host)) targetdep_t MethodTests::ool_ret_early_host(targetdep_t x) { + ASSERT_HAS_TYPE(ool_ret_early_host({}), HostTy) + ASSERT_HAS_TYPE(x, HostTy) + return {}; +} + +__attribute__((host,device)) targetdep_t MethodTests::ool_ret_early_hostdevice(targetdep_t x) { + ASSERT_HAS_TYPE(ool_ret_early_hostdevice({}), CurrentTargetTy) + ASSERT_HAS_TYPE(x, CurrentTargetTy) + return {}; +} + +targetdep_t __attribute__((device)) MethodTests::ool_ret_late_device(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}} + ASSERT_HAS_TYPE(ool_ret_late_device({}), HostTy) + ASSERT_HAS_TYPE(x, DeviceTy) + return {}; +} + +targetdep_t __attribute__((host)) MethodTests::ool_ret_late_host(targetdep_t x) { + ASSERT_HAS_TYPE(ool_ret_late_host({}), HostTy) + ASSERT_HAS_TYPE(x, HostTy) + return {}; +} + +targetdep_t __attribute__((host,device)) MethodTests::ool_ret_late_hostdevice(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}} + ASSERT_HAS_TYPE(ool_ret_late_hostdevice({}), HostTy) + ASSERT_HAS_TYPE(x, CurrentTargetTy) + return {}; +} + + +// members of templated structs should also work. +template +struct TemplateMethodTests { + __attribute__((device)) targetdep_t ret_early_device(targetdep_t x) { + ASSERT_HAS_TYPE(ret_early_device({}), DeviceTy) + ASSERT_HAS_TYPE(x, DeviceTy) + return {}; + } + + __attribute__((host)) targetdep_t ret_early_host(targetdep_t x) { + ASSERT_HAS_TYPE(ret_early_host({}), HostTy) + ASSERT_HAS_TYPE(x, HostTy) + return {}; + } + + __attribute__((host,device)) targetdep_t ret_early_hostdevice(targetdep_t x) { + ASSERT_HAS_TYPE(ret_early_hostdevice({}), CurrentTargetTy) + ASSERT_HAS_TYPE(x, CurrentTargetTy) + return {}; + } + + __attribute__((device)) auto autoret_early_device(targetdep_t x) -> targetdep_t { + ASSERT_HAS_TYPE(autoret_early_device({}), DeviceTy) + ASSERT_HAS_TYPE(x, DeviceTy) + return {}; + } + + __attribute__((host)) auto autoret_early_host(targetdep_t x) -> targetdep_t { + ASSERT_HAS_TYPE(autoret_early_host({}), HostTy) + ASSERT_HAS_TYPE(x, HostTy) + return {}; + } + + __attribute__((host,device)) auto autoret_early_hostdevice(targetdep_t x) -> targetdep_t { + ASSERT_HAS_TYPE(autoret_early_hostdevice({}), CurrentTargetTy) + ASSERT_HAS_TYPE(x, CurrentTargetTy) + return {}; + } + + targetdep_t __attribute__((device)) ret_late_device(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}} + ASSERT_HAS_TYPE(ret_late_device({}), HostTy) + ASSERT_HAS_TYPE(x, DeviceTy) + return {}; + } + + targetdep_t __attribute__((host)) ret_late_host(targetdep_t x) { + ASSERT_HAS_TYPE(ret_late_host({}), HostTy) + ASSERT_HAS_TYPE(x, HostTy) + return {}; + } + + targetdep_t __attribute__((host,device)) ret_late_hostdevice(targetdep_t x) { // expected-warning {{target attribute has been ignored for overload resolution}} + ASSERT_HAS_TYPE(ret_late_hostdevice({}), HostTy) + ASSERT_HAS_TYPE(x, CurrentTargetTy) + return {}; + } + + + __attribute__((device)) targetdep_t decl_ret_early_device(void); + __attribute__((host)) targetdep_t decl_ret_early_host(void); + __attribute__((host,device)) targetdep_t decl_ret_early_hostdevice(void); + + targetdep_t __attribute__((device)) decl_ret_late_device(void); // expected-warning {{target attribute has been ignored for overload resolution}} + targetdep_t __attribute__((host)) decl_ret_late_host(void); + targetdep_t __attribute__((host,device)) decl_ret_late_hostdevice(void); // expected-warning {{target attribute has been ignored for overload resolution}} +}; + +void tests(void) { + MethodTests mt; + + ASSERT_HAS_TYPE(mt.ret_early_device({}), DeviceTy) + ASSERT_HAS_TYPE(mt.ret_early_host({}), HostTy) + ASSERT_HAS_TYPE(mt.ret_early_hostdevice({}), CurrentTargetTy) + + ASSERT_HAS_TYPE(mt.autoret_early_device({}), DeviceTy) + ASSERT_HAS_TYPE(mt.autoret_early_host({}), HostTy) + ASSERT_HAS_TYPE(mt.autoret_early_hostdevice({}), CurrentTargetTy) + + // The target attribute is too late to be considered: + ASSERT_HAS_TYPE(mt.ret_late_device({}), HostTy) + ASSERT_HAS_TYPE(mt.ret_late_host({}), HostTy) + ASSERT_HAS_TYPE(mt.ret_late_hostdevice({}), HostTy) + + ASSERT_HAS_TYPE(mt.decl_ret_early_device(), DeviceTy) + ASSERT_HAS_TYPE(mt.decl_ret_early_host(), HostTy) + ASSERT_HAS_TYPE(mt.decl_ret_early_hostdevice(), CurrentTargetTy) + + // The target attribute is too late to be considered: + ASSERT_HAS_TYPE(mt.decl_ret_late_device(), HostTy) + ASSERT_HAS_TYPE(mt.decl_ret_late_host(), HostTy) + ASSERT_HAS_TYPE(mt.decl_ret_late_hostdevice(), HostTy) + + TemplateMethodTests<42> tmt; + ASSERT_HAS_TYPE(tmt.ret_early_device({}), DeviceTy) + ASSERT_HAS_TYPE(tmt.ret_early_host({}), HostTy) + ASSERT_HAS_TYPE(tmt.ret_early_hostdevice({}), CurrentTargetTy) + + ASSERT_HAS_TYPE(tmt.autoret_early_device({}), DeviceTy) + ASSERT_HAS_TYPE(tmt.autoret_early_host({}), HostTy) + ASSERT_HAS_TYPE(tmt.autoret_early_hostdevice({}), CurrentTargetTy) + + ASSERT_HAS_TYPE(tmt.ret_late_device({}), HostTy) + ASSERT_HAS_TYPE(tmt.ret_late_host({}), HostTy) + ASSERT_HAS_TYPE(tmt.ret_late_hostdevice({}), HostTy) + + ASSERT_HAS_TYPE(tmt.decl_ret_early_device(), DeviceTy) + ASSERT_HAS_TYPE(tmt.decl_ret_early_host(), HostTy) + ASSERT_HAS_TYPE(tmt.decl_ret_early_hostdevice(), CurrentTargetTy) + + ASSERT_HAS_TYPE(tmt.decl_ret_late_device(), HostTy) + ASSERT_HAS_TYPE(tmt.decl_ret_late_host(), HostTy) + ASSERT_HAS_TYPE(tmt.decl_ret_late_hostdevice(), HostTy) +} + + +// global variables: +__attribute__((device)) targetdep_t var_early_device = {}; +ASSERT_HAS_TYPE(var_early_device, DeviceTy) + +targetdep_t var_early_host = {}; +ASSERT_HAS_TYPE(var_early_host, HostTy) + +targetdep_t __attribute__((device)) var_late_device = {}; // expected-warning {{target attribute has been ignored for overload resolution}} +ASSERT_HAS_TYPE(var_late_device, HostTy) + + +// Tests for the overload candidate ordering compared to templates: + +enum Candidate { + TEMPLATE, + HOST, + DEVICE, + HOSTDEVICE, +}; + +// (1.) If the overloaded functions are constexpr + +// (1.a) Prefer fitting overloads. +template constexpr Candidate ce_template_vs_H_D_functions(T arg) { return TEMPLATE; } +__attribute__((device)) constexpr Candidate ce_template_vs_H_D_functions(float arg) { return DEVICE; } +__attribute__((host)) constexpr Candidate ce_template_vs_H_D_functions(float arg) { return HOST; } + +__attribute__((device)) check::type +test_ce_template_vs_H_D_functions_for_device() { + return TrueTy(); +} + +__attribute__((host)) check::type +test_ce_template_vs_H_D_functions_for_host() { + return TrueTy(); +} + +__attribute__((host,device)) check::type +test_ce_template_vs_H_D_functions_for_hd() { + return TrueTy(); +} + + +// (1.b) Always prefer an HD candidate over a template candidate. +template constexpr Candidate ce_template_vs_HD_function(T arg) { return TEMPLATE; } +__attribute__((host, device)) constexpr Candidate ce_template_vs_HD_function(float arg) { return HOSTDEVICE; } + +__attribute__((device)) check::type +test_ce_template_vs_HD_function_for_device() { + return TrueTy(); +} + +__attribute__((host)) check::type +test_ce_template_vs_HD_function_for_host() { + return TrueTy(); +} + +__attribute__((host,device)) check::type +test_ce_template_vs_HD_function_for_hd() { + return TrueTy(); +} + + +// (1.c) Even wrong-sided calls are okay if the called function is constexpr, so +// prefer the device overload over the template. +template constexpr Candidate ce_template_vs_D_function(T arg) { return TEMPLATE; } +__attribute__((device)) constexpr Candidate ce_template_vs_D_function(float arg) { return DEVICE; } + +__attribute__((host)) check::type +test_ce_template_vs_D_function_for_host() { + return TrueTy(); +} + +__attribute__((device)) check::type +test_ce_template_vs_D_function_for_device() { + return TrueTy(); +} + +__attribute__((host,device)) check::type +test_ce_template_vs_D_function_for_hd() { + return TrueTy(); +} + + +// (2.) If the overloaded functions are NOT constexpr + +// (2.a) Prefer fitting overloads. +template TemplateTy template_vs_H_D_functions(T arg) { return {}; } +__attribute__((device)) DeviceTy template_vs_H_D_functions(float arg) { return {}; } +__attribute__((host)) HostTy template_vs_H_D_functions(float arg) { return {}; } + +__attribute__((device)) check::value>::type +test_template_vs_H_D_functions_for_device() { + return TrueTy{}; +} + +__attribute__((host)) check::value>::type +test_template_vs_H_D_functions_for_host() { + return TrueTy{}; +} + +__attribute__((host,device)) check::value>::type +test_template_vs_H_D_functions_for_hd() { + return TrueTy{}; +} + +// (2.b) Always prefer an HD candidate over a template candidate. +template TemplateTy template_vs_HD_function(T arg) { return {}; } +__attribute__((host,device)) HostDeviceTy template_vs_HD_function(float arg) { return {}; } + +__attribute__((device)) check::value>::type +test_template_vs_HD_function_for_device() { + return TrueTy{}; +} + +__attribute__((host)) check::value>::type +test_template_vs_HD_function_for_host() { + return TrueTy{}; +} + +__attribute__((host,device)) check::value>::type +test_template_vs_HD_function_for_hd() { + return TrueTy{}; +} + + +// (2.c) For non-constexpr functions, prefer a sameside or native template +// function over a wrongside non-template function: +template TemplateTy template_vs_D_function(T arg) { return {}; } +__attribute__((device)) DeviceTy template_vs_D_function(float arg) { return {}; } + +__attribute__((host,device)) check::value>::type +test_template_vs_D_function_for_hd() { + return TrueTy{}; +} + +__attribute__((device)) check::value>::type +test_template_vs_D_function_for_device() { + return TrueTy{}; +} + +__attribute__((host)) check::value>::type +test_template_vs_D_function_for_host() { + return TrueTy{}; +} + + +// If only a wrongside function is available, it is selected. +__attribute__((device)) DeviceTy only_D_function(float arg) { return {}; } + +__attribute__((host)) check::value>::type +test_only_D_function_for_host() { + return TrueTy{}; +} + +// Default arguments for template parameters occur before the target attribute, +// so we can't identify the "right" overload for them. +template +__attribute__((device)) // expected-warning {{target attribute has been ignored for overload resolution}} +T use_in_template_default_arg(void) { + return HostTy{}; +} + +__attribute__((device)) +void test_use_in_template(void) { + use_in_template_default_arg<>(); +}