Skip to content

Commit a3d9b5c

Browse files
HendrikHuebnerHoney Goyal
authored andcommitted
[CIR] Add AtomicFenceOp and signal/thread fence builtins and required helpers (llvm#168346)
This PR adds the AtomicFenceOp and signal/thread fence builtins.
1 parent 45b5a2e commit a3d9b5c

File tree

4 files changed

+270
-0
lines changed

4 files changed

+270
-0
lines changed

clang/include/clang/CIR/Dialect/IR/CIROps.td

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5188,6 +5188,11 @@ def CIR_AtomicFetchKind : CIR_I32EnumAttr<
51885188
I32EnumAttrCase<"Min", 7, "min">
51895189
]>;
51905190

5191+
def CIR_SyncScopeKind : CIR_I32EnumAttr<"SyncScopeKind", "sync scope kind", [
5192+
I32EnumAttrCase<"SingleThread", 0, "single_thread">,
5193+
I32EnumAttrCase<"System", 1, "system">
5194+
]>;
5195+
51915196
def CIR_AtomicFetchOp : CIR_Op<"atomic.fetch", [
51925197
AllTypesMatch<["result", "val"]>,
51935198
TypesMatchWith<"type of 'val' must match the pointee type of 'ptr'",
@@ -5411,6 +5416,36 @@ def CIR_AtomicClearOp : CIR_Op<"atomic.clear"> {
54115416
}];
54125417
}
54135418

5419+
def CIR_AtomicFence : CIR_Op<"atomic.fence"> {
5420+
let summary = "Atomic thread fence";
5421+
let description = [{
5422+
C/C++ Atomic thread fence synchronization primitive. Implements the builtin
5423+
`__atomic_thread_fence` which enforces memory ordering constraints across
5424+
threads within the specified synchronization scope.
5425+
5426+
This handles all variations including:
5427+
- `__atomic_thread_fence`
5428+
- `__atomic_signal_fence`
5429+
- `__c11_atomic_thread_fence`
5430+
- `__c11_atomic_signal_fence`
5431+
5432+
Example:
5433+
```mlir
5434+
cir.atomic.fence syncscope(system) seq_cst
5435+
cir.atomic.fence syncscope(single_thread) seq_cst
5436+
```
5437+
}];
5438+
5439+
let arguments = (ins
5440+
Arg<CIR_MemOrder, "memory order">:$ordering,
5441+
OptionalAttr<CIR_SyncScopeKind>:$syncscope
5442+
);
5443+
5444+
let assemblyFormat = [{
5445+
(`syncscope` `(` $syncscope^ `)`)? $ordering attr-dict
5446+
}];
5447+
}
5448+
54145449
//===----------------------------------------------------------------------===//
54155450
// BlockAddressOp
54165451
//===----------------------------------------------------------------------===//

clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,33 @@ static RValue emitBuiltinBitOp(CIRGenFunction &cgf, const CallExpr *e,
6060
return RValue::get(result);
6161
}
6262

63+
static mlir::Value makeAtomicFenceValue(CIRGenFunction &cgf,
64+
const CallExpr *expr,
65+
cir::SyncScopeKind syncScope) {
66+
CIRGenBuilderTy &builder = cgf.getBuilder();
67+
mlir::Value orderingVal = cgf.emitScalarExpr(expr->getArg(0));
68+
69+
auto constOrdering = orderingVal.getDefiningOp<cir::ConstantOp>();
70+
71+
if (!constOrdering) {
72+
// TODO(cir): Emit code to switch on `orderingVal`,
73+
// and creating the fence op for valid values.
74+
cgf.cgm.errorNYI("Variable atomic fence ordering");
75+
return {};
76+
}
77+
78+
auto constOrderingAttr = constOrdering.getValueAttr<cir::IntAttr>();
79+
assert(constOrderingAttr && "Expected integer constant for ordering");
80+
81+
auto ordering = static_cast<cir::MemOrder>(constOrderingAttr.getUInt());
82+
83+
cir::AtomicFence::create(
84+
builder, cgf.getLoc(expr->getSourceRange()), ordering,
85+
cir::SyncScopeKindAttr::get(&cgf.getMLIRContext(), syncScope));
86+
87+
return {};
88+
}
89+
6390
namespace {
6491
struct WidthAndSignedness {
6592
unsigned width;
@@ -982,8 +1009,13 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
9821009
case Builtin::BI__atomic_is_lock_free:
9831010
case Builtin::BI__atomic_test_and_set:
9841011
case Builtin::BI__atomic_clear:
1012+
return errorBuiltinNYI(*this, e, builtinID);
9851013
case Builtin::BI__atomic_thread_fence:
1014+
return RValue::get(
1015+
makeAtomicFenceValue(*this, e, cir::SyncScopeKind::System));
9861016
case Builtin::BI__atomic_signal_fence:
1017+
return RValue::get(
1018+
makeAtomicFenceValue(*this, e, cir::SyncScopeKind::SingleThread));
9871019
case Builtin::BI__c11_atomic_thread_fence:
9881020
case Builtin::BI__c11_atomic_signal_fence:
9891021
case Builtin::BI__scoped_atomic_thread_fence:

clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -775,6 +775,15 @@ getLLVMMemOrder(std::optional<cir::MemOrder> memorder) {
775775
llvm_unreachable("unknown memory order");
776776
}
777777

778+
static std::optional<llvm::StringRef>
779+
getLLVMSyncScope(std::optional<cir::SyncScopeKind> syncScope) {
780+
if (syncScope.has_value())
781+
return syncScope.value() == cir::SyncScopeKind::SingleThread
782+
? "singlethread"
783+
: "";
784+
return std::nullopt;
785+
}
786+
778787
mlir::LogicalResult CIRToLLVMAtomicCmpXchgOpLowering::matchAndRewrite(
779788
cir::AtomicCmpXchgOp op, OpAdaptor adaptor,
780789
mlir::ConversionPatternRewriter &rewriter) const {
@@ -851,6 +860,19 @@ mlir::LogicalResult CIRToLLVMAtomicClearOpLowering::matchAndRewrite(
851860
return mlir::success();
852861
}
853862

863+
mlir::LogicalResult CIRToLLVMAtomicFenceLowering::matchAndRewrite(
864+
cir::AtomicFence op, OpAdaptor adaptor,
865+
mlir::ConversionPatternRewriter &rewriter) const {
866+
mlir::LLVM::AtomicOrdering llvmOrder = getLLVMMemOrder(adaptor.getOrdering());
867+
868+
auto fence = mlir::LLVM::FenceOp::create(rewriter, op.getLoc(), llvmOrder);
869+
fence.setSyncscope(getLLVMSyncScope(adaptor.getSyncscope()));
870+
871+
rewriter.replaceOp(op, fence);
872+
873+
return mlir::success();
874+
}
875+
854876
static mlir::LLVM::AtomicBinOp
855877
getLLVMAtomicBinOp(cir::AtomicFetchKind k, bool isInt, bool isSignedInt) {
856878
switch (k) {
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
2+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
4+
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
5+
// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -emit-llvm %s -o %t.ll
6+
// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
7+
8+
struct Data {
9+
int value;
10+
void *ptr;
11+
};
12+
13+
typedef struct Data *DataPtr;
14+
15+
void applyThreadFence() {
16+
__atomic_thread_fence(__ATOMIC_SEQ_CST);
17+
// CIR-LABEL: @applyThreadFence
18+
// CIR: cir.atomic.fence syncscope(system) seq_cst
19+
// CIR: cir.return
20+
21+
// LLVM-LABEL: @applyThreadFence
22+
// LLVM: fence seq_cst
23+
// LLVM: ret void
24+
25+
// OGCG-LABEL: @applyThreadFence
26+
// OGCG: fence seq_cst
27+
// OGCG: ret void
28+
}
29+
30+
void applySignalFence() {
31+
__atomic_signal_fence(__ATOMIC_SEQ_CST);
32+
// CIR-LABEL: @applySignalFence
33+
// CIR: cir.atomic.fence syncscope(single_thread) seq_cst
34+
// CIR: cir.return
35+
36+
// LLVM-LABEL: @applySignalFence
37+
// LLVM: fence syncscope("singlethread") seq_cst
38+
// LLVM: ret void
39+
40+
// OGCG-LABEL: @applySignalFence
41+
// OGCG: fence syncscope("singlethread") seq_cst
42+
// OGCG: ret void
43+
}
44+
45+
void modifyWithThreadFence(DataPtr d) {
46+
__atomic_thread_fence(__ATOMIC_SEQ_CST);
47+
d->value = 42;
48+
// CIR-LABEL: @modifyWithThreadFence
49+
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
50+
// CIR: cir.atomic.fence syncscope(system) seq_cst
51+
// CIR: %[[VAL_42:.*]] = cir.const #cir.int<42> : !s32i
52+
// CIR: %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
53+
// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][0] {name = "value"} : !cir.ptr<!rec_Data> -> !cir.ptr<!s32i>
54+
// CIR: cir.store{{.*}} %[[VAL_42]], %[[DATA_VALUE]] : !s32i, !cir.ptr<!s32i>
55+
// CIR: cir.return
56+
57+
// LLVM-LABEL: @modifyWithThreadFence
58+
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
59+
// LLVM: fence seq_cst
60+
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
61+
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
62+
// LLVM: store i32 42, ptr %[[DATA_VALUE]], align 8
63+
// LLVM: ret void
64+
65+
// OGCG-LABEL: @modifyWithThreadFence
66+
// OGCG: %[[DATA:.*]] = alloca ptr, align 8
67+
// OGCG: fence seq_cst
68+
// OGCG: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
69+
// OGCG: %[[DATA_VALUE:.*]] = getelementptr inbounds nuw %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
70+
// OGCG: store i32 42, ptr %[[DATA_VALUE]], align 8
71+
// OGCG: ret void
72+
}
73+
74+
void modifyWithSignalFence(DataPtr d) {
75+
__atomic_signal_fence(__ATOMIC_SEQ_CST);
76+
d->value = 24;
77+
// CIR-LABEL: @modifyWithSignalFence
78+
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
79+
// CIR: cir.atomic.fence syncscope(single_thread) seq_cst
80+
// CIR: %[[VAL_42:.*]] = cir.const #cir.int<24> : !s32i
81+
// CIR: %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
82+
// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][0] {name = "value"} : !cir.ptr<!rec_Data> -> !cir.ptr<!s32i>
83+
// CIR: cir.store{{.*}} %[[VAL_42]], %[[DATA_VALUE]] : !s32i, !cir.ptr<!s32i>
84+
// CIR: cir.return
85+
86+
// LLVM-LABEL: @modifyWithSignalFence
87+
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
88+
// LLVM: fence syncscope("singlethread") seq_cst
89+
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
90+
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
91+
// LLVM: store i32 24, ptr %[[DATA_VALUE]], align 8
92+
// LLVM: ret void
93+
94+
// OGCG-LABEL: @modifyWithSignalFence
95+
// OGCG: %[[DATA:.*]] = alloca ptr, align 8
96+
// OGCG: fence syncscope("singlethread") seq_cst
97+
// OGCG: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
98+
// OGCG: %[[DATA_VALUE:.*]] = getelementptr inbounds nuw %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
99+
// OGCG: store i32 24, ptr %[[DATA_VALUE]], align 8
100+
// OGCG: ret void
101+
}
102+
103+
void loadWithThreadFence(DataPtr d) {
104+
__atomic_thread_fence(__ATOMIC_SEQ_CST);
105+
__atomic_load_n(&d->ptr, __ATOMIC_SEQ_CST);
106+
// CIR-LABEL: @loadWithThreadFence
107+
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
108+
// CIR: %[[ATOMIC_TEMP:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["atomic-temp"] {alignment = 8 : i64}
109+
// CIR: cir.atomic.fence syncscope(system) seq_cst
110+
// CIR: %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
111+
// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][1] {name = "ptr"} : !cir.ptr<!rec_Data> -> !cir.ptr<!cir.ptr<!void>>
112+
// CIR: %[[CASTED_DATA_VALUE:.*]] = cir.cast bitcast %[[DATA_VALUE]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
113+
// CIR: %[[CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
114+
// CIR: %[[ATOMIC_LOAD:.*]] = cir.load{{.*}} atomic(seq_cst) %[[CASTED_DATA_VALUE]] : !cir.ptr<!u64i>, !u64i
115+
// CIR: cir.store{{.*}} %[[ATOMIC_LOAD]], %[[CASTED_ATOMIC_TEMP]] : !u64i, !cir.ptr<!u64i>
116+
// CIR: %[[DOUBLE_CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[CASTED_ATOMIC_TEMP]] : !cir.ptr<!u64i> -> !cir.ptr<!cir.ptr<!void>>
117+
// CIR: %[[ATOMIC_LOAD_PTR:.*]] = cir.load{{.*}} %[[DOUBLE_CASTED_ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
118+
// CIR: cir.return
119+
120+
// LLVM-LABEL: @loadWithThreadFence
121+
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
122+
// LLVM: %[[DATA_TEMP:.*]] = alloca ptr, i64 1, align 8
123+
// LLVM: fence seq_cst
124+
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
125+
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
126+
// LLVM: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
127+
// LLVM: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
128+
// LLVM: %[[DATA_TEMP_LOAD:.*]] = load ptr, ptr %[[DATA_TEMP]], align 8
129+
// LLVM: ret void
130+
131+
// OGCG-LABEL: @loadWithThreadFence
132+
// OGCG: %[[DATA:.*]] = alloca ptr, align 8
133+
// OGCG: %[[DATA_TEMP:.*]] = alloca ptr, align 8
134+
// OGCG: fence seq_cst
135+
// OGCG: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
136+
// OGCG: %[[DATA_VALUE:.*]] = getelementptr inbounds nuw %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
137+
// OGCG: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
138+
// OGCG: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
139+
// OGCG: %[[DATA_TEMP_LOAD:.*]] = load ptr, ptr %[[DATA_TEMP]], align 8
140+
// OGCG: ret void
141+
}
142+
143+
void loadWithSignalFence(DataPtr d) {
144+
__atomic_signal_fence(__ATOMIC_SEQ_CST);
145+
__atomic_load_n(&d->ptr, __ATOMIC_SEQ_CST);
146+
// CIR-LABEL: @loadWithSignalFence
147+
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
148+
// CIR: %[[ATOMIC_TEMP:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["atomic-temp"] {alignment = 8 : i64}
149+
// CIR: cir.atomic.fence syncscope(single_thread) seq_cst
150+
// CIR: %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
151+
// CIR: %[[DATA_PTR:.*]] = cir.get_member %[[LOAD_DATA]][1] {name = "ptr"} : !cir.ptr<!rec_Data> -> !cir.ptr<!cir.ptr<!void>>
152+
// CIR: %[[CASTED_DATA_PTR:.*]] = cir.cast bitcast %[[DATA_PTR]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
153+
// CIR: %[[CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
154+
// CIR: %[[ATOMIC_LOAD:.*]] = cir.load{{.*}} atomic(seq_cst) %[[CASTED_DATA_PTR]] : !cir.ptr<!u64i>, !u64i
155+
// CIR: cir.store{{.*}} %[[ATOMIC_LOAD]], %[[CASTED_ATOMIC_TEMP]] : !u64i, !cir.ptr<!u64i>
156+
// CIR: %[[DOUBLE_CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[CASTED_ATOMIC_TEMP]] : !cir.ptr<!u64i> -> !cir.ptr<!cir.ptr<!void>>
157+
// CIR: %[[LOAD_ATOMIC_TEMP:.*]] = cir.load{{.*}} %[[DOUBLE_CASTED_ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
158+
// CIR: cir.return
159+
160+
// LLVM-LABEL: @loadWithSignalFence
161+
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
162+
// LLVM: %[[DATA_TEMP:.*]] = alloca ptr, i64 1, align 8
163+
// LLVM: fence syncscope("singlethread") seq_cst
164+
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
165+
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
166+
// LLVM: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
167+
// LLVM: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
168+
// LLVM: %[[DATA_TEMP_LOAD]] = load ptr, ptr %[[DATA_TEMP]], align 8
169+
// LLVM: ret void
170+
171+
// OGCG-LABEL: @loadWithSignalFence
172+
// OGCG: %[[DATA:.*]] = alloca ptr, align 8
173+
// OGCG: %[[DATA_TEMP:.*]] = alloca ptr, align 8
174+
// OGCG: fence syncscope("singlethread") seq_cst
175+
// OGCG: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
176+
// OGCG: %[[DATA_VALUE:.*]] = getelementptr inbounds nuw %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
177+
// OGCG: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
178+
// OGCG: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
179+
// OGCG: %[[DATA_TEMP_LOAD]] = load ptr, ptr %[[DATA_TEMP]], align 8
180+
// OGCG: ret void
181+
}

0 commit comments

Comments
 (0)