Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions clang/include/clang/CIR/Dialect/IR/CIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -4985,6 +4985,11 @@ def CIR_AtomicFetchKind : CIR_I32EnumAttr<
I32EnumAttrCase<"Min", 7, "min">
]>;

def CIR_SyncScopeKind : CIR_I32EnumAttr<"SyncScopeKind", "sync scope kind", [
I32EnumAttrCase<"SingleThread", 0, "single_thread">,
I32EnumAttrCase<"System", 1, "system">
]>;

def CIR_AtomicFetchOp : CIR_Op<"atomic.fetch", [
AllTypesMatch<["result", "val"]>,
TypesMatchWith<"type of 'val' must match the pointee type of 'ptr'",
Expand Down Expand Up @@ -5208,6 +5213,36 @@ def CIR_AtomicClearOp : CIR_Op<"atomic.clear"> {
}];
}

def CIR_AtomicFence : CIR_Op<"atomic.fence"> {
let summary = "Atomic thread fence";
let description = [{
C/C++ Atomic thread fence synchronization primitive. Implements the builtin
`__atomic_thread_fence` which enforces memory ordering constraints across
threads within the specified synchronization scope.

This handles all variations including:
- `__atomic_thread_fence`
- `__atomic_signal_fence`
- `__c11_atomic_thread_fence`
- `__c11_atomic_signal_fence`

Example:
```mlir
cir.atomic.fence syncscope(system) seq_cst
cir.atomic.fence syncscope(single_thread) seq_cst
```
}];

let arguments = (ins
Arg<CIR_MemOrder, "memory order">:$ordering,
OptionalAttr<CIR_SyncScopeKind>:$syncscope
);

let assemblyFormat = [{
(`syncscope` `(` $syncscope^ `)`)? $ordering attr-dict
}];
}

//===----------------------------------------------------------------------===//
// BlockAddressOp
//===----------------------------------------------------------------------===//
Expand Down
32 changes: 32 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,33 @@ static RValue emitBuiltinBitOp(CIRGenFunction &cgf, const CallExpr *e,
return RValue::get(result);
}

static mlir::Value makeAtomicFenceValue(CIRGenFunction &cgf,
const CallExpr *expr,
cir::SyncScopeKind syncScope) {
CIRGenBuilderTy &builder = cgf.getBuilder();
mlir::Value orderingVal = cgf.emitScalarExpr(expr->getArg(0));

auto constOrdering = orderingVal.getDefiningOp<cir::ConstantOp>();

if (!constOrdering) {
// TODO(cir): Emit code to switch on `orderingVal`,
// and creating the fence op for valid values.
cgf.cgm.errorNYI("Variable atomic fence ordering");
return {};
}

auto constOrderingAttr = constOrdering.getValueAttr<cir::IntAttr>();
assert(constOrderingAttr && "Expected integer constant for ordering");

auto ordering = static_cast<cir::MemOrder>(constOrderingAttr.getUInt());

cir::AtomicFence::create(
builder, cgf.getLoc(expr->getSourceRange()), ordering,
cir::SyncScopeKindAttr::get(&cgf.getMLIRContext(), syncScope));

return {};
}

namespace {
struct WidthAndSignedness {
unsigned width;
Expand Down Expand Up @@ -928,8 +955,13 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
case Builtin::BI__atomic_is_lock_free:
case Builtin::BI__atomic_test_and_set:
case Builtin::BI__atomic_clear:
return errorBuiltinNYI(*this, e, builtinID);
case Builtin::BI__atomic_thread_fence:
return RValue::get(
makeAtomicFenceValue(*this, e, cir::SyncScopeKind::System));
case Builtin::BI__atomic_signal_fence:
return RValue::get(
makeAtomicFenceValue(*this, e, cir::SyncScopeKind::SingleThread));
case Builtin::BI__c11_atomic_thread_fence:
case Builtin::BI__c11_atomic_signal_fence:
case Builtin::BI__scoped_atomic_thread_fence:
Expand Down
22 changes: 22 additions & 0 deletions clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,15 @@ getLLVMMemOrder(std::optional<cir::MemOrder> memorder) {
llvm_unreachable("unknown memory order");
}

static std::optional<llvm::StringRef>
getLLVMSyncScope(std::optional<cir::SyncScopeKind> syncScope) {
if (syncScope.has_value())
return syncScope.value() == cir::SyncScopeKind::SingleThread
? "singlethread"
: "";
return std::nullopt;
}

mlir::LogicalResult CIRToLLVMAtomicCmpXchgOpLowering::matchAndRewrite(
cir::AtomicCmpXchgOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
Expand Down Expand Up @@ -834,6 +843,19 @@ mlir::LogicalResult CIRToLLVMAtomicClearOpLowering::matchAndRewrite(
return mlir::success();
}

mlir::LogicalResult CIRToLLVMAtomicFenceLowering::matchAndRewrite(
cir::AtomicFence op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
mlir::LLVM::AtomicOrdering llvmOrder = getLLVMMemOrder(adaptor.getOrdering());

auto fence = mlir::LLVM::FenceOp::create(rewriter, op.getLoc(), llvmOrder);
fence.setSyncscope(getLLVMSyncScope(adaptor.getSyncscope()));

rewriter.replaceOp(op, fence);

return mlir::success();
}

static mlir::LLVM::AtomicBinOp
getLLVMAtomicBinOp(cir::AtomicFetchKind k, bool isInt, bool isSignedInt) {
switch (k) {
Expand Down
181 changes: 181 additions & 0 deletions clang/test/CIR/CodeGen/atomic-thread-fence.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -emit-llvm %s -o %t.ll
// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s

struct Data {
int value;
void *ptr;
};

typedef struct Data *DataPtr;

void applyThreadFence() {
__atomic_thread_fence(__ATOMIC_SEQ_CST);
// CIR-LABEL: @applyThreadFence
// CIR: cir.atomic.fence syncscope(system) seq_cst
// CIR: cir.return

// LLVM-LABEL: @applyThreadFence
// LLVM: fence seq_cst
// LLVM: ret void

// OGCG-LABEL: @applyThreadFence
// OGCG: fence seq_cst
// OGCG: ret void
}

void applySignalFence() {
__atomic_signal_fence(__ATOMIC_SEQ_CST);
// CIR-LABEL: @applySignalFence
// CIR: cir.atomic.fence syncscope(single_thread) seq_cst
// CIR: cir.return

// LLVM-LABEL: @applySignalFence
// LLVM: fence syncscope("singlethread") seq_cst
// LLVM: ret void

// OGCG-LABEL: @applySignalFence
// OGCG: fence syncscope("singlethread") seq_cst
// OGCG: ret void
}

void modifyWithThreadFence(DataPtr d) {
__atomic_thread_fence(__ATOMIC_SEQ_CST);
d->value = 42;
// CIR-LABEL: @modifyWithThreadFence
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
// CIR: cir.atomic.fence syncscope(system) seq_cst
// CIR: %[[VAL_42:.*]] = cir.const #cir.int<42> : !s32i
// CIR: %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][0] {name = "value"} : !cir.ptr<!rec_Data> -> !cir.ptr<!s32i>
// CIR: cir.store{{.*}} %[[VAL_42]], %[[DATA_VALUE]] : !s32i, !cir.ptr<!s32i>
// CIR: cir.return

// LLVM-LABEL: @modifyWithThreadFence
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
// LLVM: fence seq_cst
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
// LLVM: store i32 42, ptr %[[DATA_VALUE]], align 8
// LLVM: ret void

// OGCG-LABEL: @modifyWithThreadFence
// OGCG: %[[DATA:.*]] = alloca ptr, align 8
// OGCG: fence seq_cst
// OGCG: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// OGCG: %[[DATA_VALUE:.*]] = getelementptr inbounds nuw %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
// OGCG: store i32 42, ptr %[[DATA_VALUE]], align 8
// OGCG: ret void
}

void modifyWithSignalFence(DataPtr d) {
__atomic_signal_fence(__ATOMIC_SEQ_CST);
d->value = 24;
// CIR-LABEL: @modifyWithSignalFence
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
// CIR: cir.atomic.fence syncscope(single_thread) seq_cst
// CIR: %[[VAL_42:.*]] = cir.const #cir.int<24> : !s32i
// CIR: %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][0] {name = "value"} : !cir.ptr<!rec_Data> -> !cir.ptr<!s32i>
// CIR: cir.store{{.*}} %[[VAL_42]], %[[DATA_VALUE]] : !s32i, !cir.ptr<!s32i>
// CIR: cir.return

// LLVM-LABEL: @modifyWithSignalFence
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
// LLVM: fence syncscope("singlethread") seq_cst
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
// LLVM: store i32 24, ptr %[[DATA_VALUE]], align 8
// LLVM: ret void

// OGCG-LABEL: @modifyWithSignalFence
// OGCG: %[[DATA:.*]] = alloca ptr, align 8
// OGCG: fence syncscope("singlethread") seq_cst
// OGCG: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// OGCG: %[[DATA_VALUE:.*]] = getelementptr inbounds nuw %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
// OGCG: store i32 24, ptr %[[DATA_VALUE]], align 8
// OGCG: ret void
}

void loadWithThreadFence(DataPtr d) {
__atomic_thread_fence(__ATOMIC_SEQ_CST);
__atomic_load_n(&d->ptr, __ATOMIC_SEQ_CST);
// CIR-LABEL: @loadWithThreadFence
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
// CIR: %[[ATOMIC_TEMP:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["atomic-temp"] {alignment = 8 : i64}
// CIR: cir.atomic.fence syncscope(system) seq_cst
// CIR: %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][1] {name = "ptr"} : !cir.ptr<!rec_Data> -> !cir.ptr<!cir.ptr<!void>>
// CIR: %[[CASTED_DATA_VALUE:.*]] = cir.cast bitcast %[[DATA_VALUE]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
// CIR: %[[CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
// CIR: %[[ATOMIC_LOAD:.*]] = cir.load{{.*}} atomic(seq_cst) %[[CASTED_DATA_VALUE]] : !cir.ptr<!u64i>, !u64i
// CIR: cir.store{{.*}} %[[ATOMIC_LOAD]], %[[CASTED_ATOMIC_TEMP]] : !u64i, !cir.ptr<!u64i>
// CIR: %[[DOUBLE_CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[CASTED_ATOMIC_TEMP]] : !cir.ptr<!u64i> -> !cir.ptr<!cir.ptr<!void>>
// CIR: %[[ATOMIC_LOAD_PTR:.*]] = cir.load{{.*}} %[[DOUBLE_CASTED_ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
// CIR: cir.return

// LLVM-LABEL: @loadWithThreadFence
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
// LLVM: %[[DATA_TEMP:.*]] = alloca ptr, i64 1, align 8
// LLVM: fence seq_cst
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
// LLVM: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
// LLVM: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
// LLVM: %[[DATA_TEMP_LOAD:.*]] = load ptr, ptr %[[DATA_TEMP]], align 8
// LLVM: ret void

// OGCG-LABEL: @loadWithThreadFence
// OGCG: %[[DATA:.*]] = alloca ptr, align 8
// OGCG: %[[DATA_TEMP:.*]] = alloca ptr, align 8
// OGCG: fence seq_cst
// OGCG: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// OGCG: %[[DATA_VALUE:.*]] = getelementptr inbounds nuw %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
// OGCG: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
// OGCG: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
// OGCG: %[[DATA_TEMP_LOAD:.*]] = load ptr, ptr %[[DATA_TEMP]], align 8
// OGCG: ret void
}

void loadWithSignalFence(DataPtr d) {
__atomic_signal_fence(__ATOMIC_SEQ_CST);
__atomic_load_n(&d->ptr, __ATOMIC_SEQ_CST);
// CIR-LABEL: @loadWithSignalFence
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
// CIR: %[[ATOMIC_TEMP:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["atomic-temp"] {alignment = 8 : i64}
// CIR: cir.atomic.fence syncscope(single_thread) seq_cst
// CIR: %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
// CIR: %[[DATA_PTR:.*]] = cir.get_member %[[LOAD_DATA]][1] {name = "ptr"} : !cir.ptr<!rec_Data> -> !cir.ptr<!cir.ptr<!void>>
// CIR: %[[CASTED_DATA_PTR:.*]] = cir.cast bitcast %[[DATA_PTR]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
// CIR: %[[CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
// CIR: %[[ATOMIC_LOAD:.*]] = cir.load{{.*}} atomic(seq_cst) %[[CASTED_DATA_PTR]] : !cir.ptr<!u64i>, !u64i
// CIR: cir.store{{.*}} %[[ATOMIC_LOAD]], %[[CASTED_ATOMIC_TEMP]] : !u64i, !cir.ptr<!u64i>
// CIR: %[[DOUBLE_CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[CASTED_ATOMIC_TEMP]] : !cir.ptr<!u64i> -> !cir.ptr<!cir.ptr<!void>>
// CIR: %[[LOAD_ATOMIC_TEMP:.*]] = cir.load{{.*}} %[[DOUBLE_CASTED_ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
// CIR: cir.return

// LLVM-LABEL: @loadWithSignalFence
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
// LLVM: %[[DATA_TEMP:.*]] = alloca ptr, i64 1, align 8
// LLVM: fence syncscope("singlethread") seq_cst
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
// LLVM: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
// LLVM: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
// LLVM: %[[DATA_TEMP_LOAD]] = load ptr, ptr %[[DATA_TEMP]], align 8
// LLVM: ret void

// OGCG-LABEL: @loadWithSignalFence
// OGCG: %[[DATA:.*]] = alloca ptr, align 8
// OGCG: %[[DATA_TEMP:.*]] = alloca ptr, align 8
// OGCG: fence syncscope("singlethread") seq_cst
// OGCG: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// OGCG: %[[DATA_VALUE:.*]] = getelementptr inbounds nuw %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
// OGCG: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
// OGCG: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
// OGCG: %[[DATA_TEMP_LOAD]] = load ptr, ptr %[[DATA_TEMP]], align 8
// OGCG: ret void
}
Loading