Skip to content

Commit 33d853e

Browse files
committed
[Clang][HIP] Warn when __AMDGCN_WAVEFRONT_SIZE is used in host code without relying on target-dependent overload resolution
The __AMDGCN_WAVEFRONT_SIZE and __AMDGCN_WAVEFRONT_SIZE__ macros in HIP can only provide meaningful values during device compilation. They are currently usable in host code, but only contain the default value of 64, independent of the target device(s). This patch checks for numeric literals in clearly identifiable host code if they are the result of expanding the wavefront-size macros and issues a diagnostic if that's the case. A alternative PR, llvm#91478, relied on constexpr functions with host and device overloads (where the host overload is marked as deprecated) to diagnose uses of these macros in host code. A problem with this approach are uses of the macros outside of function bodies, e.g., in template arguments of return types, or default template arguments of functions. In these cases, calls to functions with target overloads are resolved to the host variant during host compilation and to the device variant during device compilation - independently of the target of the function they belong to. Therefore, using the wavefront size macros in such cases leads to diagnostics during host compilation with llvm#91478, even if they are only associated to a device function. PR llvm#93546 is a proposal to suppress these spurious diagnostics. PR llvm#103031 is a proposal to change the behavior of target-dependent overload resolution outside of function bodies to use the target attributes that occur before the overloaded call to select the overload candidate. In contrast to llvm#91478, this PR will not diagnose uses of the wavefront-size macros outside of function bodies or initializers of global host variables. Implements SWDEV-449015.
1 parent e0bd8d3 commit 33d853e

File tree

5 files changed

+157
-0
lines changed

5 files changed

+157
-0
lines changed

clang/include/clang/Basic/DiagnosticSemaKinds.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9109,6 +9109,8 @@ def warn_offload_incompatible_redeclare : Warning<
91099109
"new declaration is %select{__device__|__global__|__host__|__host__ __device__}0 function, "
91109110
"old declaration is %select{__device__|__global__|__host__|__host__ __device__}1 function">,
91119111
InGroup<DiagGroup<"nvcc-compat">>, DefaultIgnore;
9112+
def warn_ref_device_macro_on_host : Warning<
9113+
"device-specific macro %0 is not available in a %select{__device__|__global__|__host__|__host__ __device__}1 context">, InGroup<DiagGroup<"hip-wavefrontsize">>;
91129114

91139115
def err_cuda_device_builtin_surftex_cls_template : Error<
91149116
"illegal device builtin %select{surface|texture}0 reference "

clang/include/clang/Sema/SemaCUDA.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,10 @@ class SemaCUDA : public SemaBase {
263263
// for __constant__ and __device__ variables.
264264
void checkAllowedInitializer(VarDecl *VD);
265265

266+
/// Check if the token is part of a macro that is used outside of its allowed
267+
/// compilation mode.
268+
void checkTargetMacroUse(const Token &Tok);
269+
266270
/// Check whether NewFD is a valid overload for CUDA. Emits
267271
/// diagnostics and invalidates NewFD if not.
268272
void checkTargetOverload(FunctionDecl *NewFD, const LookupResult &Previous);

clang/lib/Sema/SemaCUDA.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -703,6 +703,45 @@ void SemaCUDA::checkAllowedInitializer(VarDecl *VD) {
703703
}
704704
}
705705

706+
void SemaCUDA::checkTargetMacroUse(const Token &Tok) {
707+
assert(SemaRef.LangOpts.HIP);
708+
709+
// Currently, we check only for the AMDGCN_WAVEFRONT_SIZE macros, which should
710+
// only be used in device compilation.
711+
if (SemaRef.LangOpts.CUDAIsDevice)
712+
return;
713+
714+
auto *FD = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
715+
// If we are not in a FunctionDecl and we have no other meaningful way of
716+
// determining the compilation mode, avoid potentially spurious warnings.
717+
if (!FD && SemaRef.CUDA().CurCUDATargetCtx.Kind == SemaCUDA::CTCK_Unknown)
718+
return;
719+
720+
auto Target = SemaRef.CUDA().IdentifyTarget(FD);
721+
if (Target != CUDAFunctionTarget::HostDevice &&
722+
Target != CUDAFunctionTarget::Host)
723+
return;
724+
725+
const auto &Loc = Tok.getLocation();
726+
if (!Loc.isMacroID())
727+
return;
728+
729+
// Get the location of the innermost macro that contributed the token.
730+
const auto &SM = SemaRef.getSourceManager();
731+
const auto &IMCLoc = SM.getImmediateMacroCallerLoc(Loc);
732+
const auto &SpellingLoc = SM.getSpellingLoc(IMCLoc);
733+
734+
SmallString<16> buffer;
735+
auto MacroName = SemaRef.getPreprocessor().getSpelling(SpellingLoc, buffer);
736+
if (MacroName == "__AMDGCN_WAVEFRONT_SIZE" ||
737+
MacroName == "__AMDGCN_WAVEFRONT_SIZE__") {
738+
// Only report the actual use of the macro, not its builtin definition.
739+
auto UseLoc = SM.getExpansionLoc(Tok.getLocation());
740+
SemaRef.Diag(UseLoc, diag::warn_ref_device_macro_on_host)
741+
<< MacroName << llvm::to_underlying(SemaRef.CUDA().CurrentTarget());
742+
}
743+
}
744+
706745
void SemaCUDA::RecordImplicitHostDeviceFuncUsedByDevice(
707746
const FunctionDecl *Callee) {
708747
FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);

clang/lib/Sema/SemaExpr.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4079,6 +4079,9 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
40794079
ResultVal = ResultVal.trunc(Width);
40804080
}
40814081
Res = IntegerLiteral::Create(Context, ResultVal, Ty, Tok.getLocation());
4082+
4083+
if (SemaRef.LangOpts.HIP)
4084+
SemaRef.CUDA().checkTargetMacroUse(Tok);
40824085
}
40834086

40844087
// If this is an imaginary literal, create the ImaginaryLiteral wrapper.
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
// REQUIRES: amdgpu-registered-target
2+
// RUN: %clang -xhip --offload-arch=gfx1030 --offload-host-only -pedantic -nogpuinc -nogpulib -nobuiltininc -fsyntax-only -Xclang -verify=onhost %s
3+
// RUN: %clang -xhip --offload-arch=gfx1030 --offload-device-only -pedantic -nogpuinc -nogpulib -nobuiltininc -fsyntax-only -Xclang -verify=ondevice %s
4+
5+
// ondevice-no-diagnostics
6+
7+
#include <type_traits>
8+
9+
#define WRAPPED __AMDGCN_WAVEFRONT_SIZE__
10+
11+
#define DOUBLE_WRAPPED (WRAPPED)
12+
13+
__attribute__((host, device)) void use(int, const char*);
14+
15+
template<int N> __attribute__((host, device)) int templatify(int x) {
16+
return x + N;
17+
}
18+
19+
// no warning expected
20+
#if defined(__HIP_DEVICE_COMPILE__) && (__AMDGCN_WAVEFRONT_SIZE__ == 64) && (__AMDGCN_WAVEFRONT_SIZE == 64)
21+
int foo(void);
22+
#endif
23+
24+
// no warning expected
25+
__attribute__((device)) int device_var = __AMDGCN_WAVEFRONT_SIZE__;
26+
27+
__attribute__((device))
28+
void device_fun() {
29+
// no warnings expected
30+
use(__AMDGCN_WAVEFRONT_SIZE, "device function");
31+
use(__AMDGCN_WAVEFRONT_SIZE__, "device function");
32+
use(WRAPPED, "device function");
33+
use(DOUBLE_WRAPPED, "device function");
34+
use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "device function");
35+
}
36+
37+
__attribute__((global))
38+
void global_fun() {
39+
// no warnings expected
40+
use(__AMDGCN_WAVEFRONT_SIZE, "global function");
41+
use(__AMDGCN_WAVEFRONT_SIZE__, "global function");
42+
use(WRAPPED, "global function");
43+
use(DOUBLE_WRAPPED, "global function");
44+
use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "global function");
45+
}
46+
47+
// warning expected
48+
int host_var = __AMDGCN_WAVEFRONT_SIZE__; // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
49+
int host_var_alt = __AMDGCN_WAVEFRONT_SIZE; // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE is not available in a __host__ context}}
50+
int host_var_wrapped = WRAPPED; // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
51+
int host_var_double_wrapped = DOUBLE_WRAPPED; // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
52+
53+
__attribute__((host))
54+
void host_fun() {
55+
// warnings expected
56+
use(__AMDGCN_WAVEFRONT_SIZE, "host function"); // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE is not available in a __host__ context}}
57+
use(__AMDGCN_WAVEFRONT_SIZE__, "host function"); // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
58+
use(WRAPPED, "host function"); // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
59+
use(DOUBLE_WRAPPED, "host function"); // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
60+
use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "host function"); // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
61+
}
62+
63+
__attribute((host, device))
64+
void host_device_fun() {
65+
// warnings expected
66+
use(__AMDGCN_WAVEFRONT_SIZE__, "host device function"); // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ __device__ context}}
67+
use(WRAPPED, "host device function"); // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ __device__ context}}
68+
use(DOUBLE_WRAPPED, "host device function"); // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ __device__ context}}
69+
use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "host device function"); // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ __device__ context}}
70+
}
71+
72+
// Variations of this construct are used in rocPRIM and should compile without diagnostics.
73+
template <unsigned int OuterWarpSize = __AMDGCN_WAVEFRONT_SIZE>
74+
class FunSelector {
75+
public:
76+
template<unsigned int FunWarpSize = OuterWarpSize>
77+
__attribute__((device))
78+
auto fun(void)
79+
-> typename std::enable_if<(FunWarpSize <= __AMDGCN_WAVEFRONT_SIZE), void>::type
80+
{
81+
use(1, "yay!");
82+
}
83+
84+
template<unsigned int FunWarpSize = OuterWarpSize>
85+
__attribute__((device))
86+
auto fun(void)
87+
-> typename std::enable_if<(FunWarpSize > __AMDGCN_WAVEFRONT_SIZE), void>::type
88+
{
89+
use(0, "nay!");
90+
}
91+
};
92+
93+
__attribute__((device))
94+
void device_fun_selector_user() {
95+
FunSelector<> f;
96+
f.fun<>();
97+
f.fun<1>();
98+
f.fun<1000>();
99+
100+
std::enable_if<(1 <= __AMDGCN_WAVEFRONT_SIZE), int>::type x = 42;
101+
}
102+
103+
__attribute__((device)) std::enable_if<(1 <= __AMDGCN_WAVEFRONT_SIZE), int>::type DeviceFunTemplateRet(void) {
104+
return 42;
105+
}
106+
107+
__attribute__((device)) int DeviceFunTemplateArg(std::enable_if<(1 <= __AMDGCN_WAVEFRONT_SIZE), int>::type x) {
108+
return x;
109+
}

0 commit comments

Comments
 (0)