diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index e4e04bff8b512..557d2803021f6 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9109,6 +9109,8 @@ def warn_offload_incompatible_redeclare : Warning<
   "new declaration is %select{__device__|__global__|__host__|__host__ __device__}0 function, "
   "old declaration is %select{__device__|__global__|__host__|__host__ __device__}1 function">,
   InGroup<DiagGroup<"nvcc-compat">>, DefaultIgnore;
+def warn_ref_device_macro_on_host : Warning<
+  "device-specific macro %0 is not available in a %select{__device__|__global__|__host__|__host__ __device__}1 context">, InGroup<DiagGroup<"hip-wavefrontsize">>;
 
 def err_cuda_device_builtin_surftex_cls_template : Error<
     "illegal device builtin %select{surface|texture}0 reference "
diff --git a/clang/include/clang/Sema/SemaCUDA.h b/clang/include/clang/Sema/SemaCUDA.h
index 71f05e88fb539..80b8dc24664b6 100644
--- a/clang/include/clang/Sema/SemaCUDA.h
+++ b/clang/include/clang/Sema/SemaCUDA.h
@@ -263,6 +263,10 @@ class SemaCUDA : public SemaBase {
   // for __constant__ and __device__ variables.
   void checkAllowedInitializer(VarDecl *VD);
 
+  /// Check if the token is part of a macro that is used outside of its allowed
+  /// compilation mode.
+  void checkTargetMacroUse(const Token &Tok);
+
   /// Check whether NewFD is a valid overload for CUDA. Emits
   /// diagnostics and invalidates NewFD if not.
   void checkTargetOverload(FunctionDecl *NewFD, const LookupResult &Previous);
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index fbb3de4b3e416..b09319bbd894d 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -703,6 +703,45 @@ void SemaCUDA::checkAllowedInitializer(VarDecl *VD) {
   }
 }
 
+void SemaCUDA::checkTargetMacroUse(const Token &Tok) {
+  assert(SemaRef.LangOpts.HIP);
+
+  // Currently, we check only for the AMDGCN_WAVEFRONT_SIZE macros, which should
+  // only be used in device compilation.
+  if (SemaRef.LangOpts.CUDAIsDevice)
+    return;
+
+  auto *FD = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
+  // If we are not in a FunctionDecl and we have no other meaningful way of
+  // determining the compilation mode, avoid potentially spurious warnings.
+  if (!FD && SemaRef.CUDA().CurCUDATargetCtx.Kind == SemaCUDA::CTCK_Unknown)
+    return;
+
+  auto Target = SemaRef.CUDA().IdentifyTarget(FD);
+  if (Target != CUDAFunctionTarget::HostDevice &&
+      Target != CUDAFunctionTarget::Host)
+    return;
+
+  const auto &Loc = Tok.getLocation();
+  if (!Loc.isMacroID())
+    return;
+
+  // Get the location of the innermost macro that contributed the token.
+  const auto &SM = SemaRef.getSourceManager();
+  const auto &IMCLoc = SM.getImmediateMacroCallerLoc(Loc);
+  const auto &SpellingLoc = SM.getSpellingLoc(IMCLoc);
+
+  SmallString<16> buffer;
+  auto MacroName = SemaRef.getPreprocessor().getSpelling(SpellingLoc, buffer);
+  if (MacroName == "__AMDGCN_WAVEFRONT_SIZE" ||
+      MacroName == "__AMDGCN_WAVEFRONT_SIZE__") {
+    // Only report the actual use of the macro, not its builtin definition.
+    auto UseLoc = SM.getExpansionLoc(Tok.getLocation());
+    SemaRef.Diag(UseLoc, diag::warn_ref_device_macro_on_host)
+        << MacroName << llvm::to_underlying(SemaRef.CUDA().CurrentTarget());
+  }
+}
+
 void SemaCUDA::RecordImplicitHostDeviceFuncUsedByDevice(
     const FunctionDecl *Callee) {
   FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true);
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 66df9c969256a..4c7178fb8f520 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -4079,6 +4079,9 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
         ResultVal = ResultVal.trunc(Width);
     }
     Res = IntegerLiteral::Create(Context, ResultVal, Ty, Tok.getLocation());
+
+    if (SemaRef.LangOpts.HIP)
+      SemaRef.CUDA().checkTargetMacroUse(Tok);
   }
 
   // If this is an imaginary literal, create the ImaginaryLiteral wrapper.
diff --git a/clang/test/Driver/hip-wavefront-size-host-diagnostics.hip b/clang/test/Driver/hip-wavefront-size-host-diagnostics.hip
new file mode 100644
index 0000000000000..3bde9730ccb0d
--- /dev/null
+++ b/clang/test/Driver/hip-wavefront-size-host-diagnostics.hip
@@ -0,0 +1,109 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang -xhip --offload-arch=gfx1030 --offload-host-only -pedantic -nogpuinc -nogpulib -nobuiltininc -fsyntax-only -Xclang -verify=onhost %s
+// RUN: %clang -xhip --offload-arch=gfx1030 --offload-device-only -pedantic -nogpuinc -nogpulib -nobuiltininc -fsyntax-only -Xclang -verify=ondevice %s
+
+// ondevice-no-diagnostics
+
+#include <type_traits>
+
+#define WRAPPED __AMDGCN_WAVEFRONT_SIZE__
+
+#define DOUBLE_WRAPPED (WRAPPED)
+
+__attribute__((host, device)) void use(int, const char*);
+
+template<int N> __attribute__((host, device)) int templatify(int x) {
+    return x + N;
+}
+
+// no warning expected
+#if defined(__HIP_DEVICE_COMPILE__) && (__AMDGCN_WAVEFRONT_SIZE__ == 64) && (__AMDGCN_WAVEFRONT_SIZE == 64)
+int foo(void);
+#endif
+
+// no warning expected
+__attribute__((device)) int device_var = __AMDGCN_WAVEFRONT_SIZE__;
+
+__attribute__((device))
+void device_fun() {
+    // no warnings expected
+    use(__AMDGCN_WAVEFRONT_SIZE, "device function");
+    use(__AMDGCN_WAVEFRONT_SIZE__, "device function");
+    use(WRAPPED, "device function");
+    use(DOUBLE_WRAPPED, "device function");
+    use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "device function");
+}
+
+__attribute__((global))
+void global_fun() {
+    // no warnings expected
+    use(__AMDGCN_WAVEFRONT_SIZE, "global function");
+    use(__AMDGCN_WAVEFRONT_SIZE__, "global function");
+    use(WRAPPED, "global function");
+    use(DOUBLE_WRAPPED, "global function");
+    use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "global function");
+}
+
+// warning expected
+int host_var = __AMDGCN_WAVEFRONT_SIZE__;  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
+int host_var_alt = __AMDGCN_WAVEFRONT_SIZE;  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE is not available in a __host__ context}}
+int host_var_wrapped = WRAPPED;  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
+int host_var_double_wrapped = DOUBLE_WRAPPED;  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
+
+__attribute__((host))
+void host_fun() {
+    // warnings expected
+    use(__AMDGCN_WAVEFRONT_SIZE, "host function");  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE is not available in a __host__ context}}
+    use(__AMDGCN_WAVEFRONT_SIZE__, "host function");  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
+    use(WRAPPED, "host function");  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
+    use(DOUBLE_WRAPPED, "host function");  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
+    use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "host function");  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ context}}
+}
+
+__attribute((host, device))
+void host_device_fun() {
+    // warnings expected
+    use(__AMDGCN_WAVEFRONT_SIZE__, "host device function");  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ __device__ context}}
+    use(WRAPPED, "host device function");  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ __device__ context}}
+    use(DOUBLE_WRAPPED, "host device function");  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ __device__ context}}
+    use(templatify<__AMDGCN_WAVEFRONT_SIZE__>(42), "host device function");  // onhost-warning {{device-specific macro __AMDGCN_WAVEFRONT_SIZE__ is not available in a __host__ __device__ context}}
+}
+
+// Variations of this construct are used in rocPRIM and should compile without diagnostics.
+template <unsigned int OuterWarpSize = __AMDGCN_WAVEFRONT_SIZE>
+class FunSelector {
+public:
+    template<unsigned int FunWarpSize = OuterWarpSize>
+    __attribute__((device))
+    auto fun(void)
+        -> typename std::enable_if<(FunWarpSize <= __AMDGCN_WAVEFRONT_SIZE), void>::type
+    {
+        use(1, "yay!");
+    }
+
+    template<unsigned int FunWarpSize = OuterWarpSize>
+    __attribute__((device))
+    auto fun(void)
+        -> typename std::enable_if<(FunWarpSize > __AMDGCN_WAVEFRONT_SIZE), void>::type
+    {
+        use(0, "nay!");
+    }
+};
+
+__attribute__((device))
+void device_fun_selector_user() {
+    FunSelector<> f;
+    f.fun<>();
+    f.fun<1>();
+    f.fun<1000>();
+
+    std::enable_if<(1 <= __AMDGCN_WAVEFRONT_SIZE), int>::type x = 42;
+}
+
+__attribute__((device)) std::enable_if<(1 <= __AMDGCN_WAVEFRONT_SIZE), int>::type DeviceFunTemplateRet(void) {
+    return 42;
+}
+
+__attribute__((device)) int DeviceFunTemplateArg(std::enable_if<(1 <= __AMDGCN_WAVEFRONT_SIZE), int>::type x) {
+    return x;
+}