[Clang][HIP] Warn when __AMDGCN_WAVEFRONT_SIZE is used in host code without relying on target-dependent overload resolution

ritter-x2a · ritter-x2a · commit 33d853eaa124 · 2024-09-23T08:54:07.000-04:00
The __AMDGCN_WAVEFRONT_SIZE and __AMDGCN_WAVEFRONT_SIZE__ macros in HIP can only provide meaningful values during device compilation. They are currently usable in host code, but only contain the default value of 64, independent of the target device(s). This patch checks for numeric literals in clearly identifiable host code if they are the result of expanding the wavefront-size macros and issues a diagnostic if that's the case. A alternative PR, llvm#91478, relied on constexpr functions with host and device overloads (where the host overload is marked as deprecated) to diagnose uses of these macros in host code. A problem with this approach are uses of the macros outside of function bodies, e.g., in template arguments of return types, or default template arguments of functions. In these cases, calls to functions with target overloads are resolved to the host variant during host compilation and to the device variant during device compilation - independently of the target of the function they belong to. Therefore, using the wavefront size macros in such cases leads to diagnostics during host compilation with llvm#91478, even if they are only associated to a device function. PR llvm#93546 is a proposal to suppress these spurious diagnostics. PR llvm#103031 is a proposal to change the behavior of target-dependent overload resolution outside of function bodies to use the target attributes that occur before the overloaded call to select the overload candidate. In contrast to llvm#91478, this PR will not diagnose uses of the wavefront-size macros outside of function bodies or initializers of global host variables. Implements SWDEV-449015.
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9109,6 +9109,8 @@ def warn_offload_incompatible_redeclare : Warning<
   "new declaration is %select{__device__|__global__|__host__|__host__ __device__}0 function, "
   "old declaration is %select{__device__|__global__|__host__|__host__ __device__}1 function">,
   InGroup<DiagGroup<"nvcc-compat">>, DefaultIgnore;
+def warn_ref_device_macro_on_host : Warning<
+  "device-specific macro %0 is not available in a %select{__device__|__global__|__host__|__host__ __device__}1 context">, InGroup<DiagGroup<"hip-wavefrontsize">>;
 
 def err_cuda_device_builtin_surftex_cls_template : Error<
     "illegal device builtin %select{surface|texture}0 reference "
diff --git a/clang/include/clang/Sema/SemaCUDA.h b/clang/include/clang/Sema/SemaCUDA.h
@@ -263,6 +263,10 @@ class SemaCUDA : public SemaBase {
   // for __constant__ and __device__ variables.
   void checkAllowedInitializer(VarDecl *VD);
 
+  /// Check if the token is part of a macro that is used outside of its allowed
+  /// compilation mode.
+  void checkTargetMacroUse(const Token &Tok);
+
   /// Check whether NewFD is a valid overload for CUDA. Emits
   /// diagnostics and invalidates NewFD if not.
   void checkTargetOverload(FunctionDecl *NewFD, const LookupResult &Previous);
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
@@ -703,6 +703,45 @@ void SemaCUDA::checkAllowedInitializer(VarDecl *VD) {
   }
 }
 
+void SemaCUDA::checkTargetMacroUse(const Token &Tok) {
+  assert(SemaRef.LangOpts.HIP);
+
+  // Currently, we check only for the AMDGCN_WAVEFRONT_SIZE macros, which should
+  // only be used in device compilation.
+  if (SemaRef.LangOpts.CUDAIsDevice)
+    return;
+
+  auto *FD = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
+  // If we are not in a FunctionDecl and we have no other meaningful way of
+  // determining the compilation mode, avoid potentially spurious warnings.
+  if (!FD && SemaRef.CUDA().CurCUDATargetCtx.Kind == SemaCUDA::CTCK_Unknown)
+    return;
+
+  auto Target = SemaRef.CUDA().IdentifyTarget(FD);
+  if (Target != CUDAFunctionTarget::HostDevice &&
+      Target != CUDAFunctionTarget::Host)
+    return;
+
+  const auto &Loc = Tok.getLocation();
+  if (!Loc.isMacroID())
+    return;
+
+  // Get the location of the innermost macro that contributed the token.
+  const auto &SM = SemaRef.getSourceManager();
+  const auto &IMCLoc = SM.getImmediateMacroCallerLoc(Loc);
+  const auto &SpellingLoc = SM.getSpellingLoc(IMCLoc);
+
+  SmallString<16> buffer;
+  auto MacroName = SemaRef.getPreprocessor().getSpelling(SpellingLoc, buffer);
+  if (MacroName == "__AMDGCN_WAVEFRONT_SIZE" ||
+      MacroName == "__AMDGCN_WAVEFRONT_SIZE__") {
+    // Only report the actual use of the macro, not its builtin definition.
+    auto UseLoc = SM.getExpansionLoc(Tok.getLocation());
+    SemaRef.Diag(UseLoc, diag::warn_ref_device_macro_on_host)
+        << MacroName << llvm::to_underlying(SemaRef.CUDA().CurrentTarget());
+  }
+}
+
 void SemaCUDA::RecordImplicitHostDeviceFuncUsedByDevice(
     const FunctionDecl *Callee) {
   FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
@@ -4079,6 +4079,9 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
         ResultVal = ResultVal.trunc(Width);
     }
     Res = IntegerLiteral::Create(Context, ResultVal, Ty, Tok.getLocation());
+
+    if (SemaRef.LangOpts.HIP)
+      SemaRef.CUDA().checkTargetMacroUse(Tok);
   }
 
   // If this is an imaginary literal, create the ImaginaryLiteral wrapper.
diff --git a/clang/test/Driver/hip-wavefront-size-host-diagnostics.hip b/clang/test/Driver/hip-wavefront-size-host-diagnostics.hip
@@ -0,0 +1,109 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang -xhip --offload-arch=gfx1030 --offload-host-only -pedantic -nogpuinc -nogpulib -nobuiltininc -fsyntax-only -Xclang -verify=onhost %s
+// RUN: %clang -xhip --offload-arch=gfx1030 --offload-device-only -pedantic -nogpuinc -nogpulib -nobuiltininc -fsyntax-only -Xclang -verify=ondevice %s
+
+// ondevice-no-diagnostics
+
+#include <type_traits>
+
+#define WRAPPED __AMDGCN_WAVEFRONT_SIZE__
+
+#define DOUBLE_WRAPPED (WRAPPED)
+
+__attribute__((host, device)) void use(int, const char*);
+
+template<int N> __attribute__((host, device)) int templatify(int x) {
+    return x + N;
+}
+
+// no warning expected
+#if defined(__HIP_DEVICE_COMPILE__) && (__AMDGCN_WAVEFRONT_SIZE__ == 64) && (__AMDGCN_WAVEFRONT_SIZE == 64)
+int foo(void);
+#endif
+
+// no warning expected
+__attribute__((device)) int device_var = __AMDGCN_WAVEFRONT_SIZE__;
+
+__attribute__((device))
+void device_fun() {
+    // no warnings expected
+    use(__AMDGCN_WAVEFRONT_SIZE, "device function");
+    use(__AMDGCN_WAVEFRONT_SIZE__, "device function");
+    use(WRAPPED, "device function");
+    use(DOUBLE_WRAPPED, "device function");
+    use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "device function");
+}
+
+__attribute__((global))
+void global_fun() {
+    // no warnings expected
+    use(__AMDGCN_WAVEFRONT_SIZE, "global function");
+    use(__AMDGCN_WAVEFRONT_SIZE__, "global function");
+    use(WRAPPED, "global function");
+    use(DOUBLE_WRAPPED, "global function");
+    use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "global function");
+}
+
+// warning expected
+int host_var = __AMDGCN_WAVEFRONT_SIZE__;  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
+int host_var_alt = __AMDGCN_WAVEFRONT_SIZE;  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE is not available in a __host__ context}}
+int host_var_wrapped = WRAPPED;  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
+int host_var_double_wrapped = DOUBLE_WRAPPED;  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
+
+__attribute__((host))
+void host_fun() {
+    // warnings expected
+    use(__AMDGCN_WAVEFRONT_SIZE, "host function");  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE is not available in a __host__ context}}
+    use(__AMDGCN_WAVEFRONT_SIZE__, "host function");  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
+    use(WRAPPED, "host function");  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
+    use(DOUBLE_WRAPPED, "host function");  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
+    use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "host function");  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
+}
+
+__attribute((host, device))
+void host_device_fun() {
+    // warnings expected
+    use(__AMDGCN_WAVEFRONT_SIZE__, "host device function");  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ __device__ context}}
+    use(WRAPPED, "host device function");  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ __device__ context}}
+    use(DOUBLE_WRAPPED, "host device function");  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ __device__ context}}
+    use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "host device function");  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ __device__ context}}
+}
+
+// Variations of this construct are used in rocPRIM and should compile without diagnostics.
+template <unsigned int OuterWarpSize = __AMDGCN_WAVEFRONT_SIZE>
+class FunSelector {
+public:
+    template<unsigned int FunWarpSize = OuterWarpSize>
+    __attribute__((device))
+    auto fun(void)
+        -> typename std::enable_if<(FunWarpSize <= __AMDGCN_WAVEFRONT_SIZE), void>::type
+    {
+        use(1, "yay!");
+    }
+
+    template<unsigned int FunWarpSize = OuterWarpSize>
+    __attribute__((device))
+    auto fun(void)
+        -> typename std::enable_if<(FunWarpSize > __AMDGCN_WAVEFRONT_SIZE), void>::type
+    {
+        use(0, "nay!");
+    }
+};
+
+__attribute__((device))
+void device_fun_selector_user() {
+    FunSelector<> f;
+    f.fun<>();
+    f.fun<1>();
+    f.fun<1000>();
+
+    std::enable_if<(1 <= __AMDGCN_WAVEFRONT_SIZE), int>::type x = 42;
+}
+
+__attribute__((device)) std::enable_if<(1 <= __AMDGCN_WAVEFRONT_SIZE), int>::type DeviceFunTemplateRet(void) {
+    return 42;
+}
+
+__attribute__((device)) int DeviceFunTemplateArg(std::enable_if<(1 <= __AMDGCN_WAVEFRONT_SIZE), int>::type x) {
+    return x;
+}

Original file line number	Diff line number	Diff line change
`@@ -4079,6 +4079,9 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {`
`4079`	`4079`	`ResultVal = ResultVal.trunc(Width);`
`4080`	`4080`	`}`
`4081`	`4081`	`Res = IntegerLiteral::Create(Context, ResultVal, Ty, Tok.getLocation());`
	`4082`	`+`
	`4083`	`+ if (SemaRef.LangOpts.HIP)`
	`4084`	`+ SemaRef.CUDA().checkTargetMacroUse(Tok);`
`4082`	`4085`	`}`
`4083`	`4086`
`4084`	`4087`	`// If this is an imaginary literal, create the ImaginaryLiteral wrapper.`