[PowerPC] Add initial support for AMO load builtins #168746

maryammo · 2025-11-19T18:16:40Z

This commit adds two Clang builtins for PowerPC AMO load operations:

__builtin_amo_lwat for 32-bit unsigned operations
__builtin_amo_ldat for 64-bit unsigned operations

Also adds an amo.h header that maps GCC's AMO functions to these Clang builtins for compatibility.

This commit adds two Clang builtins for PowerPC AMO load operations: - __builtin_amo_lwat for 32-bit unsigned operations - __builtin_amo_ldat for 64-bit unsigned operations Also adds an amo.h header that maps GCC's AMO functions to these Clang builtins for compatibility.

llvmbot · 2025-11-19T18:17:04Z

@llvm/pr-subscribers-backend-x86
@llvm/pr-subscribers-backend-powerpc

@llvm/pr-subscribers-clang

Author: Maryam Moghadas (maryammo)

Changes

This commit adds two Clang builtins for PowerPC AMO load operations:

__builtin_amo_lwat for 32-bit unsigned operations
__builtin_amo_ldat for 64-bit unsigned operations
Also adds an amo.h header that maps GCC's AMO functions to these Clang builtins for compatibility.

Patch is 24.41 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168746.diff

14 Files Affected:

(modified) clang/include/clang/Basic/BuiltinsPPC.def (+4)
(modified) clang/lib/Headers/CMakeLists.txt (+1)
(added) clang/lib/Headers/amo.h (+97)
(modified) clang/lib/Sema/SemaPPC.cpp (+15)
(added) clang/test/CodeGen/PowerPC/builtins-amo-err.c (+18)
(added) clang/test/CodeGen/PowerPC/builtins-ppc-amo.c (+58)
(added) clang/test/CodeGen/PowerPC/ppc-amo-header.c (+91)
(modified) llvm/include/llvm/IR/IntrinsicsPowerPC.td (+12)
(modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+64-2)
(modified) llvm/lib/Target/PowerPC/PPCISelLowering.h (+1)
(modified) llvm/lib/Target/PowerPC/PPCInstr64Bit.td (+10-3)
(modified) llvm/lib/Target/PowerPC/PPCInstrInfo.td (+11-4)
(modified) llvm/lib/Target/PowerPC/PPCRegisterInfo.td (+1-1)
(added) llvm/test/CodeGen/PowerPC/amo-enable.ll (+51)

diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index cf8bdd2a429df..216b5fdb69ff7 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -1001,6 +1001,10 @@ TARGET_BUILTIN(__builtin_darn_32, "i", "", "isa-v30-instructions")
 TARGET_BUILTIN(__builtin_unpack_vector_int128, "ULLiV1LLLii", "", "vsx")
 TARGET_BUILTIN(__builtin_pack_vector_int128, "V1LLLiULLiULLi", "", "vsx")
 
+// AMO builtins
+TARGET_BUILTIN(__builtin_amo_lwat, "UiUi*UiIi", "", "isa-v30-instructions")
+TARGET_BUILTIN(__builtin_amo_ldat, "ULiULi*ULiIi", "", "isa-v30-instructions")
+
 // Set the floating point rounding mode
 BUILTIN(__builtin_setrnd, "di", "")
 
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 33fff7645df65..038859a513eb8 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -119,6 +119,7 @@ set(opencl_files
 
 set(ppc_files
   altivec.h
+  amo.h
   )
 
 set(ppc_htm_files
diff --git a/clang/lib/Headers/amo.h b/clang/lib/Headers/amo.h
new file mode 100644
index 0000000000000..fda2984b97626
--- /dev/null
+++ b/clang/lib/Headers/amo.h
@@ -0,0 +1,97 @@
+/*===---- amo.h - PowerPC Atomic Memory Operations ------------------------===*\
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+\*===----------------------------------------------------------------------===*/
+
+/* This header provides compatibility wrappers for GCC's AMO functions.
+ * The functions here call Clang's underlying AMO builtins.
+ */
+
+#ifndef _AMO_H
+#define _AMO_H
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* AMO Load Operation Codes (FC values) */
+enum {
+  _AMO_LD_ADD = 0x00,  /* Fetch and Add */
+  _AMO_LD_XOR = 0x01,  /* Fetch and XOR */
+  _AMO_LD_IOR = 0x02,  /* Fetch and OR */
+  _AMO_LD_AND = 0x03,  /* Fetch and AND */
+  _AMO_LD_UMAX = 0x04, /* Fetch and Maximum Unsigned */
+  _AMO_LD_SMAX = 0x05, /* Fetch and Maximum Signed */
+  _AMO_LD_UMIN = 0x06, /* Fetch and Minimum Unsigned */
+  _AMO_LD_SMIN = 0x07, /* Fetch and Minimum Signed */
+  _AMO_LD_SWAP = 0x08  /* Swap */
+};
+
+/* 32-bit unsigned AMO load operations */
+static inline uint32_t amo_lwat_add(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_ADD);
+}
+
+static inline uint32_t amo_lwat_xor(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_XOR);
+}
+
+static inline uint32_t amo_lwat_ior(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_IOR);
+}
+
+static inline uint32_t amo_lwat_and(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_AND);
+}
+
+static inline uint32_t amo_lwat_umax(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_UMAX);
+}
+
+static inline uint32_t amo_lwat_umin(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_UMIN);
+}
+
+static inline uint32_t amo_lwat_swap(uint32_t *ptr, uint32_t val) {
+  return __builtin_amo_lwat(ptr, val, _AMO_LD_SWAP);
+}
+
+/* 64-bit unsigned AMO load operations */
+static inline uint64_t amo_ldat_add(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_ADD);
+}
+
+static inline uint64_t amo_ldat_xor(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_XOR);
+}
+
+static inline uint64_t amo_ldat_ior(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_IOR);
+}
+
+static inline uint64_t amo_ldat_and(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_AND);
+}
+
+static inline uint64_t amo_ldat_umax(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_UMAX);
+}
+
+static inline uint64_t amo_ldat_umin(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_UMIN);
+}
+
+static inline uint64_t amo_ldat_swap(uint64_t *ptr, uint64_t val) {
+  return __builtin_amo_ldat(ptr, val, _AMO_LD_SWAP);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _AMO_H */
diff --git a/clang/lib/Sema/SemaPPC.cpp b/clang/lib/Sema/SemaPPC.cpp
index bfa458d207b46..3d6615b6b5395 100644
--- a/clang/lib/Sema/SemaPPC.cpp
+++ b/clang/lib/Sema/SemaPPC.cpp
@@ -87,6 +87,8 @@ static bool isPPC_64Builtin(unsigned BuiltinID) {
   case PPC::BI__builtin_ppc_fetch_and_andlp:
   case PPC::BI__builtin_ppc_fetch_and_orlp:
   case PPC::BI__builtin_ppc_fetch_and_swaplp:
+  case PPC::BI__builtin_amo_lwat:
+  case PPC::BI__builtin_amo_ldat:
     return true;
   }
   return false;
@@ -253,6 +255,19 @@ bool SemaPPC::CheckPPCBuiltinFunctionCall(const TargetInfo &TI,
   case PPC::BI__builtin_##Name:                                                \
     return BuiltinPPCMMACall(TheCall, BuiltinID, Types);
 #include "clang/Basic/BuiltinsPPC.def"
+  case PPC::BI__builtin_amo_lwat:
+  case PPC::BI__builtin_amo_ldat: {
+    llvm::APSInt Result;
+    if (SemaRef.BuiltinConstantArg(TheCall, 2, Result))
+      return true;
+    unsigned Val = Result.getZExtValue();
+    static constexpr unsigned ValidFC[] = {0, 1, 2, 3, 4, 6, 8};
+    if (llvm::is_contained(ValidFC, Val))
+      return false;
+    Expr *Arg = TheCall->getArg(2);
+    return SemaRef.Diag(Arg->getBeginLoc(), diag::err_argument_invalid_range)
+           << toString(Result, 10) << "0-4, 6" << "8" << Arg->getSourceRange();
+  }
   }
   llvm_unreachable("must return from switch");
 }
diff --git a/clang/test/CodeGen/PowerPC/builtins-amo-err.c b/clang/test/CodeGen/PowerPC/builtins-amo-err.c
new file mode 100644
index 0000000000000..cdc14ef7f7e04
--- /dev/null
+++ b/clang/test/CodeGen/PowerPC/builtins-amo-err.c
@@ -0,0 +1,18 @@
+// RUN: not %clang_cc1 -triple powerpc-ibm-aix -target-cpu pwr9 \
+// RUN:   -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=AIX32-ERROR
+// RUN: not %clang_cc1 -triple powerpc64-ibm-aix -target-cpu pwr9 \
+// RUN:   -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=FC-ERROR
+
+void test_amo() {
+  unsigned int *ptr1, value1;
+  // AIX32-ERROR: error: this builtin is only available on 64-bit targets
+  __builtin_amo_lwat(ptr1, value1, 0);
+  // FC-ERROR: argument value 9 is outside the valid range [0-4, 6, 8]
+  __builtin_amo_lwat(ptr1, value1, 9);
+
+  unsigned long int *ptr2, value2;
+  // AIX32-ERROR: error: this builtin is only available on 64-bit targets
+  __builtin_amo_ldat(ptr2, value2, 3);
+  // FC-ERROR: error: argument value 26 is outside the valid range [0-4, 6, 8]
+  __builtin_amo_ldat(ptr2, value2, 26);
+}
diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-amo.c b/clang/test/CodeGen/PowerPC/builtins-ppc-amo.c
new file mode 100644
index 0000000000000..2975b99786869
--- /dev/null
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-amo.c
@@ -0,0 +1,58 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -O3 -triple powerpc64le-unknown-unknown -target-cpu pwr9 \
+// RUN:  -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O3 -triple powerpc64-ibm-aix -target-cpu pwr9 \
+// RUN: -emit-llvm %s -o - | FileCheck %s --check-prefix=AIX
+
+// CHECK-LABEL: define dso_local void @test_unsigned_lwat(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], i32 noundef zeroext [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ppc.amo.lwat(ptr [[PTR]], i32 [[VALUE]], i32 0)
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[RESP]], align 4, !tbaa [[INT_TBAA2:![0-9]+]]
+// CHECK-NEXT:    ret void
+//
+// AIX-LABEL: define void @test_unsigned_lwat(
+// AIX-SAME: ptr noundef [[PTR:%.*]], i32 noundef zeroext [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 4)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// AIX-NEXT:  [[ENTRY:.*:]]
+// AIX-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.ppc.amo.lwat(ptr [[PTR]], i32 [[VALUE]], i32 0)
+// AIX-NEXT:    store i32 [[TMP0]], ptr [[RESP]], align 4, !tbaa [[INT_TBAA2:![0-9]+]]
+// AIX-NEXT:    ret void
+//
+void test_unsigned_lwat(unsigned int *ptr, unsigned int value, unsigned int * resp) {
+  unsigned int res = __builtin_amo_lwat(ptr, value, 0);
+  *resp = res;
+}
+
+// CHECK-LABEL: define dso_local void @test_unsigned_ldat(
+// CHECK-SAME: ptr noundef [[PTR:%.*]], i64 noundef [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ppc.amo.ldat(ptr [[PTR]], i64 [[VALUE]], i32 3)
+// CHECK-NEXT:    store i64 [[TMP0]], ptr [[RESP]], align 8, !tbaa [[LONG_TBAA6:![0-9]+]]
+// CHECK-NEXT:    ret void
+//
+// AIX-LABEL: define void @test_unsigned_ldat(
+// AIX-SAME: ptr noundef [[PTR:%.*]], i64 noundef [[VALUE:%.*]], ptr noundef writeonly captures(none) initializes((0, 8)) [[RESP:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// AIX-NEXT:  [[ENTRY:.*:]]
+// AIX-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.ppc.amo.ldat(ptr [[PTR]], i64 [[VALUE]], i32 3)
+// AIX-NEXT:    store i64 [[TMP0]], ptr [[RESP]], align 8, !tbaa [[LONG_TBAA6:![0-9]+]]
+// AIX-NEXT:    ret void
+//
+void test_unsigned_ldat(unsigned long int *ptr, unsigned long int value, unsigned long int * resp) {
+  unsigned long int res = __builtin_amo_ldat(ptr, value, 3);
+  *resp = res;
+}
+//.
+// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[LONG_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// CHECK: [[META7]] = !{!"long", [[META4]], i64 0}
+//.
+// AIX: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// AIX: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
+// AIX: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// AIX: [[META5]] = !{!"Simple C/C++ TBAA"}
+// AIX: [[LONG_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0}
+// AIX: [[META7]] = !{!"long", [[META4]], i64 0}
+//.
diff --git a/clang/test/CodeGen/PowerPC/ppc-amo-header.c b/clang/test/CodeGen/PowerPC/ppc-amo-header.c
new file mode 100644
index 0000000000000..f544cdef1e7d0
--- /dev/null
+++ b/clang/test/CodeGen/PowerPC/ppc-amo-header.c
@@ -0,0 +1,91 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -target-cpu pwr9 \
+// RUN:   -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple powerpc64-ibm-aix -target-cpu pwr9 \
+// RUN:   -emit-llvm %s -o - | FileCheck %s
+
+#include <amo.h>
+
+uint32_t test_lwat_add(uint32_t *ptr, uint32_t val) {
+  // CHECK-LABEL: @test_lwat_add
+  // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 0)
+  return amo_lwat_add(ptr, val);
+}
+
+uint32_t test_lwat_xor(uint32_t *ptr, uint32_t val) {
+  // CHECK-LABEL: @test_lwat_xor
+  // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 1)
+  return amo_lwat_xor(ptr, val);
+}
+
+uint32_t test_lwat_ior(uint32_t *ptr, uint32_t val) {
+  // CHECK-LABEL: @test_lwat_ior
+  // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 2)
+  return amo_lwat_ior(ptr, val);
+}
+
+uint32_t test_lwat_and(uint32_t *ptr, uint32_t val) {
+  // CHECK-LABEL: @test_lwat_and
+  // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 3)
+  return amo_lwat_and(ptr, val);
+}
+
+uint32_t test_lwat_umax(uint32_t *ptr, uint32_t val) {
+  // CHECK-LABEL: @test_lwat_umax
+  // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 4)
+  return amo_lwat_umax(ptr, val);
+}
+
+uint32_t test_lwat_umin(uint32_t *ptr, uint32_t val) {
+  // CHECK-LABEL: @test_lwat_umin
+  // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 6)
+  return amo_lwat_umin(ptr, val);
+}
+
+uint32_t test_lwat_swap(uint32_t *ptr, uint32_t val) {
+  // CHECK-LABEL: @test_lwat_swap
+  // CHECK: call i32 @llvm.ppc.amo.lwat(ptr %{{.*}}, i32 %{{.*}}, i32 8)
+  return amo_lwat_swap(ptr, val);
+}
+
+uint64_t test_ldat_add(uint64_t *ptr, uint64_t val) {
+  // CHECK-LABEL: @test_ldat_add
+  // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 0)
+  return amo_ldat_add(ptr, val);
+}
+
+uint64_t test_ldat_xor(uint64_t *ptr, uint64_t val) {
+  // CHECK-LABEL: @test_ldat_xor
+  // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 1)
+  return amo_ldat_xor(ptr, val);
+}
+
+uint64_t test_ldat_ior(uint64_t *ptr, uint64_t val) {
+  // CHECK-LABEL: @test_ldat_ior
+  // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 2)
+  return amo_ldat_ior(ptr, val);
+}
+
+uint64_t test_ldat_and(uint64_t *ptr, uint64_t val) {
+  // CHECK-LABEL: @test_ldat_and
+  // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 3)
+  return amo_ldat_and(ptr, val);
+}
+
+uint64_t test_ldat_umax(uint64_t *ptr, uint64_t val) {
+  // CHECK-LABEL: @test_ldat_umax
+  // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 4)
+  return amo_ldat_umax(ptr, val);
+}
+
+uint64_t test_ldat_umin(uint64_t *ptr, uint64_t val) {
+  // CHECK-LABEL: @test_ldat_umin
+  // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 6)
+  return amo_ldat_umin(ptr, val);
+}
+
+uint64_t test_ldat_swap(uint64_t *ptr, uint64_t val) {
+  // CHECK-LABEL: @test_ldat_swap
+  // CHECK: call i64 @llvm.ppc.amo.ldat(ptr %{{.*}}, i64 %{{.*}}, i32 8)
+  return amo_ldat_swap(ptr, val);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 3907e864bed1e..446113c4670dd 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -2139,3 +2139,15 @@ let TargetPrefix = "ppc" in {
     Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty],
               [IntrArgMemOnly, IntrWriteMem, NoCapture<ArgIndex<2>>]>;
 }
+
+// AMO intrisics
+let TargetPrefix = "ppc" in {
+ def int_ppc_amo_lwat : ClangBuiltin<"__builtin_amo_lwat">,
+    DefaultAttrsIntrinsic<[llvm_i32_ty],[llvm_ptr_ty,
+                           llvm_i32_ty, llvm_i32_ty],
+                          [IntrArgMemOnly, ImmArg<ArgIndex<2>>]>;
+ def int_ppc_amo_ldat : ClangBuiltin<"__builtin_amo_ldat">,
+    DefaultAttrsIntrinsic<[llvm_i64_ty],[llvm_ptr_ty,
+                           llvm_i64_ty, llvm_i32_ty],
+                          [IntrArgMemOnly, ImmArg<ArgIndex<2>>]>;
+}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index dd233e236e17f..ec5e61b724d87 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -677,6 +677,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
 
   // To handle counter-based loop conditions.
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
+  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
 
   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
@@ -11633,6 +11634,29 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   return Flags;
 }
 
+SDValue PPCTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  unsigned IntrinsicID = Op.getConstantOperandVal(1);
+
+  SDLoc dl(Op);
+  switch (IntrinsicID) {
+  case Intrinsic::ppc_amo_lwat:
+  case Intrinsic::ppc_amo_ldat:
+    SDValue Ptr = Op.getOperand(2);
+    SDValue Val1 = Op.getOperand(3);
+    SDValue FC = Op.getOperand(4);
+    SDValue Ops[] = {Ptr, Val1, FC};
+    bool IsLwat = IntrinsicID == Intrinsic::ppc_amo_lwat;
+    unsigned Opcode = IsLwat ? PPC::LWAT_PSEUDO : PPC::LDAT_PSEUDO;
+    MachineSDNode *MNode = DAG.getMachineNode(
+        Opcode, dl, {IsLwat ? MVT::i32 : MVT::i64, MVT::Other}, Ops);
+    SDValue Result = SDValue(MNode, 0);
+    SDValue OutChain = SDValue(MNode, 1);
+    return DAG.getMergeValues({Result, OutChain}, dl);
+  }
+  return SDValue();
+}
+
 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
                                                SelectionDAG &DAG) const {
   // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
@@ -12803,8 +12827,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return LowerFP_ROUND(Op, DAG);
   case ISD::ROTL:               return LowerROTL(Op, DAG);
 
-  // For counter-based loop handling.
-  case ISD::INTRINSIC_W_CHAIN:  return SDValue();
+  // For counter-based loop handling, and amo load.
+  case ISD::INTRINSIC_W_CHAIN:
+    return LowerINTRINSIC_W_CHAIN(Op, DAG);
 
   case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
 
@@ -14715,6 +14740,43 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
         .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
         .addImm(0)
         .addReg(Ptr);
+  } else if (MI.getOpcode() == PPC::LWAT_PSEUDO ||
+             MI.getOpcode() == PPC::LDAT_PSEUDO) {
+    DebugLoc DL = MI.getDebugLoc();
+    Register DstReg = MI.getOperand(0).getReg();
+    Register PtrReg = MI.getOperand(1).getReg();
+    Register ValReg = MI.getOperand(2).getReg();
+    unsigned FC = MI.getOperand(3).getImm();
+    bool IsLwat = MI.getOpcode() == PPC::LWAT_PSEUDO;
+    Register Val64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
+    if (IsLwat)
+      BuildMI(*BB, MI, DL, TII->get(TargetOpcode::SUBREG_TO_REG), Val64)
+          .addImm(0)
+          .addReg(ValReg)
+          .addImm(PPC::sub_32);
+    else
+      Val64 = ValReg;
+    Register Pair = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
+    BuildMI(*BB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Pair);
+    Register PairWithVal = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
+    BuildMI(*BB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), PairWithVal)
+        .addReg(Pair)
+        .addReg(Val64)
+        .addImm(PPC::sub_gp8_x1);
+    Register PairResult = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
+    BuildMI(*BB, MI, DL, TII->get(IsLwat ? PPC::LWAT : PPC::LDAT), PairResult)
+        .addReg(PairWithVal)
+        .addReg(PtrReg)
+        .addImm(FC);
+    Register Result64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
+    BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), Result64)
+        .addReg(PairResult, 0, PPC::sub_gp8_x0);
+    if (IsLwat)
+      BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
+          .addReg(Result64, 0, PPC::sub_32);
+    else
+      BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
+          .addReg(Result64);
   } else {
     llvm_unreachable("Unexpected instr type to insert");
   }
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index d967018982734..839f797e2dfed 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1312,6 +1312,7 @@ namespace llvm {
                        EVT VT, SDValue V1, SDValue V2) const;
     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index fdca5ebc854ba..462535601e05e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -327,12 +327,19 @@ def LQARXL : XForm_1<31, 276, (outs g8prc:$RST), (ins (memrr $RA, $RB):$addr),
                      "lqarx $RST, $addr, 1", IIC_LdStLQARX, []>,
                      isPPC64, isRecordForm;
 
-let hasExtraDefRegAllocReq = 1 in
-def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8rc:$RST), (ins g8rc:$RA, u5imm:$RB),
+let hasExtraDefRegAllocReq = 1, mayStore = 1 in
+def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8prc:$RST), (ins g8prc:$RSTi, ptr_rc_nor0:$RA, u5imm:$RB),
                          "ldat $RST, $RA, $RB", IIC...
[truncated]

github-actions · 2025-11-19T19:15:44Z

🐧 Linux x64 Test Results

193538 tests passed
6238 tests skipped

✅ The build succeeded and all tests passed.

RolandF77 · 2025-11-26T19:59:38Z

clang/lib/Headers/amo.h

+ *
+\*===----------------------------------------------------------------------===*/
+
+/* This header provides compatibility wrappers for GCC's AMO functions.


I think you can just say provides compatibility with GCC AMO functions.

RolandF77 · 2025-11-26T23:07:40Z

llvm/lib/Target/PowerPC/PPCInstrInfo.td

-let hasExtraSrcRegAllocReq = 1 in
-def LWAT : X_RD5_RS5_IM5<31, 582, (outs gprc:$RST), (ins gprc:$RA, u5imm:$RB),
+let hasExtraSrcRegAllocReq = 1, mayLoad = 1, mayStore = 1 in
+def LWAT : X_RD5_RS5_IM5<31, 582, (outs g8prc:$RST), (ins g8prc:$RSTi, ptr_rc_nor0:$RA, u5imm:$RB),


Can we have a PPCISD node for this so there is type checking?

I added patterns for intrinsics to get matched to pseudo opcodes, so TableGen checks the types.

This instruction is now changed to take g8prc but technically this is also valid for gprc. Maybe a codegen only version should be generated for this in the 64bit Instr td file?

RolandF77 · 2025-11-26T23:10:36Z

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

+    Register Pair = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
+    BuildMI(*BB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Pair);
+    Register PairWithVal = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
+    BuildMI(*BB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), PairWithVal)


We are encouraged to use REG_SEQUENCE for this type of thing.

RolandF77

LGTM

llvm-ci · 2025-12-03T23:47:37Z

LLVM Buildbot has detected a new failure on builder mlir-nvidia running on mlir-nvidia while building clang,llvm at step 7 "test-build-check-mlir-build-only-check-mlir".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/138/builds/22657

Here is the relevant piece of the build log for the reference

Step 7 (test-build-check-mlir-build-only-check-mlir) failure: test (failure)
******************** TEST 'MLIR :: Integration/GPU/CUDA/async.mlir' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-kernel-outlining  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -pass-pipeline='builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary="format=fatbin"  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-runner    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_cuda_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_async_runtime.so    --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_runner_utils.so    --entry-point-result=void -O0  | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-kernel-outlining
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt '-pass-pipeline=builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm),nvvm-attach-target)'
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -gpu-async-region -gpu-to-llvm -reconcile-unrealized-casts -gpu-module-to-binary=format=fatbin
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -async-to-async-runtime -async-runtime-ref-counting
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-opt -convert-async-to-llvm -convert-func-to-llvm -convert-arith-to-llvm -convert-cf-to-llvm -reconcile-unrealized-casts
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/mlir-runner --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_cuda_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_async_runtime.so --shared-libs=/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib/libmlir_runner_utils.so --entry-point-result=void -O0
# .---command stderr------------
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuStreamWaitEvent(stream, event, 0)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventSynchronize(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# | 'cuEventDestroy(event)' failed with 'CUDA_ERROR_CONTEXT_IS_DESTROYED'
# `-----------------------------
# executed command: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/bin/FileCheck /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# .---command stderr------------
# | /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir:68:12: error: CHECK: expected string not found in input
# |  // CHECK: [84, 84]
# |            ^
# | <stdin>:1:1: note: scanning from here
# | Unranked Memref base@ = 0x5be194ac4d10 rank = 1 offset = 0 sizes = [2] strides = [1] data = 
# | ^
# | <stdin>:2:1: note: possible intended match here
# | [42, 42]
# | ^
# | 
# | Input file: <stdin>
# | Check file: /vol/worker/mlir-nvidia/mlir-nvidia/llvm.src/mlir/test/Integration/GPU/CUDA/async.mlir
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |             1: Unranked Memref base@ = 0x5be194ac4d10 rank = 1 offset = 0 sizes = [2] strides = [1] data =  
# | check:68'0     X~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ error: no match found
# |             2: [42, 42] 
# | check:68'0     ~~~~~~~~~
# | check:68'1     ?         possible intended match
...

maryammo · 2025-12-04T00:07:51Z

https://lab.llvm.org/buildbot/#/builders/138/builds/22657

The failure is not related to this PR, also the next build is fine.

This commit adds two Clang builtins for PowerPC AMO load operations: __builtin_amo_lwat for 32-bit unsigned operations __builtin_amo_ldat for 64-bit unsigned operations Also adds an amo.h header that maps GCC's AMO functions to these Clang builtins for compatibility.

lei137 · 2025-12-04T22:20:40Z

clang/lib/Sema/SemaPPC.cpp

+  case PPC::BI__builtin_amo_lwat:
+  case PPC::BI__builtin_amo_ldat: {
+    llvm::APSInt Result;
+    if (SemaRef.BuiltinConstantArg(TheCall, 2, Result))
+      return true;
+    unsigned Val = Result.getZExtValue();
+    static constexpr unsigned ValidFC[] = {0, 1, 2, 3, 4, 6, 8};
+    if (llvm::is_contained(ValidFC, Val))
+      return false;
+    Expr *Arg = TheCall->getArg(2);
+    return SemaRef.Diag(Arg->getBeginLoc(), diag::err_argument_invalid_range)
+           << toString(Result, 10) << "0-4, 6" << "8" << Arg->getSourceRange();
+  }


Can we place this above the custom mma builtin handling? Would be good to keep this together with the other special sema check handling.

lei137 · 2025-12-04T22:56:27Z

llvm/lib/Target/PowerPC/PPCInstrInfo.td

-let hasExtraSrcRegAllocReq = 1 in
-def LWAT : X_RD5_RS5_IM5<31, 582, (outs gprc:$RST), (ins gprc:$RA, u5imm:$RB),
+let hasExtraSrcRegAllocReq = 1, mayLoad = 1, mayStore = 1 in
+def LWAT : X_RD5_RS5_IM5<31, 582, (outs g8prc:$RST), (ins g8prc:$RSTi, ptr_rc_nor0:$RA, u5imm:$RB),


This instruction is now changed to take g8prc but technically this is also valid for gprc. Maybe a codegen only version should be generated for this in the 64bit Instr td file?

lei137 · 2025-12-04T23:00:27Z

llvm/lib/Target/PowerPC/PPCInstrInfo.td

+           Requires<[IsISA3_0]>,
+           RegConstraint<"$RSTi = $RST">;
+
+def LWAT_PSEUDO : PPCCustomInserterPseudo<


Would be good if some documentation can be provided as to why this pseudo is needed?

maryammo added 6 commits November 12, 2025 23:57

Merge branch 'main' into amo1

9fab82a

Fixing amo-enable.ll

4b51574

Fix valid FC values for unsigned AMO load builtins

9cb29f5

Exclude invalid FC values for unsigned AMO load builtins

84167f8

Apply clang-format

95d0a17

maryammo requested review from RolandF77 and lei137 November 19, 2025 18:16

maryammo added clang Clang issues not falling into any other category backend:PowerPC labels Nov 19, 2025

llvmbot added backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics llvm:ir labels Nov 19, 2025

maryammo mentioned this pull request Nov 19, 2025

[PowerPC] Add initial support for AMO load builtins #167790

Closed

RolandF77 reviewed Nov 26, 2025

View reviewed changes

Address review comments

29edda1

RolandF77 approved these changes Dec 3, 2025

View reviewed changes

maryammo merged commit f650330 into main Dec 3, 2025
10 checks passed

maryammo deleted the users/maryammo/amo1 branch December 3, 2025 22:47

lei137 reviewed Dec 4, 2025

View reviewed changes

[PowerPC] Add initial support for AMO load builtins #168746

[PowerPC] Add initial support for AMO load builtins #168746

Uh oh!

Conversation

maryammo commented Nov 19, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Nov 19, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Nov 19, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

🐧 Linux x64 Test Results

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

RolandF77 left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Dec 3, 2025

Uh oh!

maryammo commented Dec 4, 2025

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

6 participants

maryammo commented Nov 19, 2025 •

edited

Loading

llvmbot commented Nov 19, 2025 •

edited

Loading

github-actions bot commented Nov 19, 2025 •

edited

Loading