llvm · rnk · Feb 3, 2025 · Nov 6, 2024 · Nov 6, 2024 · Jan 30, 2025
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
@@ -146,8 +146,13 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in
 // current formulation is based on what was easiest to recognize from the
 // pre-TableGen version.
 
-let Features = "mmx", Attributes = [NoThrow, Const] in {
-  def _mm_prefetch : X86NoPrefixBuiltin<"void(char const *, int)">;
+let Features = "mmx", Header = "immintrin.h", Attributes = [NoThrow, Const] in {
+  def _mm_prefetch : X86LibBuiltin<"void(char const *, int)">;
+}
+
+let Features = "mmx", Header = "intrin.h", Attributes = [NoThrow, Const] in {
+  def _m_prefetch : X86LibBuiltin<"void(void *)">;
+  def _m_prefetchw : X86LibBuiltin<"void(void volatile const *)">;
 }
 
 let Features = "sse", Attributes = [NoThrow] in {

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -15254,6 +15254,17 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
     return Builder.CreateCall(F, {Address, RW, Locality, Data});
   }
+  case X86::BI_m_prefetch:
+  case X86::BI_m_prefetchw: {
+    Value *Address = Ops[0];
+    // The 'w' suffix implies write.
+    Value *RW =
+        ConstantInt::get(Int32Ty, BuiltinID == X86::BI_m_prefetchw ? 1 : 0);
+    Value *Locality = ConstantInt::get(Int32Ty, 0x3);
+    Value *Data = ConstantInt::get(Int32Ty, 1);
+    Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType());
+    return Builder.CreateCall(F, {Address, RW, Locality, Data});
+  }
   case X86::BI_mm_clflush: {
     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
                               Ops[0]);

diff --git a/clang/lib/Headers/prfchwintrin.h b/clang/lib/Headers/prfchwintrin.h
@@ -14,6 +14,10 @@
 #ifndef __PRFCHWINTRIN_H
 #define __PRFCHWINTRIN_H
 
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
 /// Loads a memory sequence containing the specified memory address into
 ///    all data cache levels.
 ///
@@ -26,11 +30,7 @@
 ///
 /// \param __P
 ///    A pointer specifying the memory address to be prefetched.
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
-_m_prefetch(void *__P)
-{
-  __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);
-}
+void _m_prefetch(void *__P);
 
 /// Loads a memory sequence containing the specified memory address into
 ///    the L1 data cache and sets the cache-coherency state to modified.
@@ -48,13 +48,10 @@ _m_prefetch(void *__P)
 ///
 /// \param __P
 ///    A pointer specifying the memory address to be prefetched.
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
-_m_prefetchw(volatile const void *__P)
-{
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wcast-qual"
-  __builtin_prefetch ((const void*)__P, 1, 3 /* _MM_HINT_T0 */);
-#pragma clang diagnostic pop
-}
+void _m_prefetchw(volatile const void *__P);
+
+#if defined(__cplusplus)
+} // extern "C"
+#endif
 
 #endif /* __PRFCHWINTRIN_H */