Skip to content

Commit 24e3f1a

Browse files
[CIR] Add atomic fence op
1 parent 76f1949 commit 24e3f1a

File tree

4 files changed

+266
-0
lines changed

4 files changed

+266
-0
lines changed

clang/include/clang/CIR/Dialect/IR/CIROps.td

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5122,6 +5122,41 @@ def CIR_AtomicClearOp : CIR_Op<"atomic.clear"> {
51225122
}];
51235123
}
51245124

5125+
def CIR_SyncScopeKind : CIR_I32EnumAttr<"SyncScopeKind", "sync scope kind", [
5126+
I32EnumAttrCase<"SingleThread", 0, "single_thread">,
5127+
I32EnumAttrCase<"System", 1, "system">
5128+
]>;
5129+
5130+
def CIR_AtomicFence : CIR_Op<"atomic.fence"> {
5131+
let summary = "Atomic thread fence";
5132+
let description = [{
5133+
C/C++ Atomic thread fence synchronization primitive. Implements the builtin
5134+
`__atomic_thread_fence` which enforces memory ordering constraints across
5135+
threads within the specified synchronization scope.
5136+
5137+
This handles all variations including:
5138+
- `__atomic_thread_fence`
5139+
- `__atomic_signal_fence`
5140+
- `__c11_atomic_thread_fence`
5141+
- `__c11_atomic_signal_fence`
5142+
5143+
Example:
5144+
```mlir
5145+
cir.atomic.fence syncscope(system) seq_cst
5146+
cir.atomic.fence syncscope(single_thread) seq_cst
5147+
```
5148+
}];
5149+
5150+
let arguments = (ins
5151+
Arg<CIR_MemOrder, "memory order">:$ordering,
5152+
OptionalAttr<CIR_SyncScopeKind>:$syncscope
5153+
);
5154+
5155+
let assemblyFormat = [{
5156+
(`syncscope` `(` $syncscope^ `)`)? $ordering attr-dict
5157+
}];
5158+
}
5159+
51255160
//===----------------------------------------------------------------------===//
51265161
// BlockAddressOp
51275162
//===----------------------------------------------------------------------===//

clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,28 @@ static RValue emitBuiltinBitOp(CIRGenFunction &cgf, const CallExpr *e,
5858
return RValue::get(result);
5959
}
6060

61+
static mlir::Value makeAtomicFenceValue(CIRGenFunction &cgf,
62+
const CallExpr *expr,
63+
cir::SyncScopeKind syncScope) {
64+
auto &builder = cgf.getBuilder();
65+
mlir::Value orderingVal = cgf.emitScalarExpr(expr->getArg(0));
66+
67+
auto constOrdering = orderingVal.getDefiningOp<cir::ConstantOp>();
68+
if (!constOrdering)
69+
llvm_unreachable("NYI: variable ordering not supported");
70+
71+
if (auto constOrderingAttr = constOrdering.getValueAttr<cir::IntAttr>()) {
72+
cir::MemOrder ordering =
73+
static_cast<cir::MemOrder>(constOrderingAttr.getUInt());
74+
75+
cir::AtomicFence::create(
76+
builder, cgf.getLoc(expr->getSourceRange()), ordering,
77+
cir::SyncScopeKindAttr::get(&cgf.getMLIRContext(), syncScope));
78+
}
79+
80+
return {};
81+
}
82+
6183
RValue CIRGenFunction::emitRotate(const CallExpr *e, bool isRotateLeft) {
6284
mlir::Value input = emitScalarExpr(e->getArg(0));
6385
mlir::Value amount = emitScalarExpr(e->getArg(1));
@@ -612,6 +634,12 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID,
612634
builder.createIsFPClass(loc, v, cir::FPClassTest(test)),
613635
convertType(e->getType())));
614636
}
637+
case Builtin::BI__atomic_thread_fence:
638+
return RValue::get(
639+
makeAtomicFenceValue(*this, e, cir::SyncScopeKind::System));
640+
case Builtin::BI__atomic_signal_fence:
641+
return RValue::get(
642+
makeAtomicFenceValue(*this, e, cir::SyncScopeKind::SingleThread));
615643
}
616644

617645
// If this is an alias for a lib function (e.g. __builtin_sin), emit

clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,15 @@ getLLVMMemOrder(std::optional<cir::MemOrder> memorder) {
750750
llvm_unreachable("unknown memory order");
751751
}
752752

753+
static std::optional<llvm::StringRef>
754+
getLLVMSyncScope(std::optional<cir::SyncScopeKind> syncScope) {
755+
if (syncScope.has_value())
756+
return syncScope.value() == cir::SyncScopeKind::SingleThread
757+
? "singlethread"
758+
: "";
759+
return std::nullopt;
760+
}
761+
753762
mlir::LogicalResult CIRToLLVMAtomicCmpXchgOpLowering::matchAndRewrite(
754763
cir::AtomicCmpXchgOp op, OpAdaptor adaptor,
755764
mlir::ConversionPatternRewriter &rewriter) const {
@@ -826,6 +835,19 @@ mlir::LogicalResult CIRToLLVMAtomicClearOpLowering::matchAndRewrite(
826835
return mlir::success();
827836
}
828837

838+
mlir::LogicalResult CIRToLLVMAtomicFenceLowering::matchAndRewrite(
839+
cir::AtomicFence op, OpAdaptor adaptor,
840+
mlir::ConversionPatternRewriter &rewriter) const {
841+
mlir::LLVM::AtomicOrdering llvmOrder = getLLVMMemOrder(adaptor.getOrdering());
842+
843+
auto fence = mlir::LLVM::FenceOp::create(rewriter, op.getLoc(), llvmOrder);
844+
fence.setSyncscope(getLLVMSyncScope(adaptor.getSyncscope()));
845+
846+
rewriter.replaceOp(op, fence);
847+
848+
return mlir::success();
849+
}
850+
829851
static mlir::LLVM::AtomicBinOp
830852
getLLVMAtomicBinOp(cir::AtomicFetchKind k, bool isInt, bool isSignedInt) {
831853
switch (k) {
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
2+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
3+
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
4+
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
5+
// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -emit-llvm %s -o %t.ll
6+
// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
7+
8+
struct Data {
9+
int value;
10+
void *ptr;
11+
};
12+
13+
typedef struct Data *DataPtr;
14+
15+
void applyThreadFence() {
16+
__atomic_thread_fence(__ATOMIC_SEQ_CST);
17+
// CIR-LABEL: @applyThreadFence
18+
// CIR: cir.atomic.fence syncscope(system) seq_cst
19+
// CIR: cir.return
20+
21+
// LLVM-LABEL: @applyThreadFence
22+
// LLVM: fence seq_cst
23+
// LLVM: ret void
24+
25+
// OGCG-LABEL: @applyThreadFence
26+
// OGCG: fence seq_cst
27+
// OGCG: ret void
28+
}
29+
30+
void applySignalFence() {
31+
__atomic_signal_fence(__ATOMIC_SEQ_CST);
32+
// CIR-LABEL: @applySignalFence
33+
// CIR: cir.atomic.fence syncscope(single_thread) seq_cst
34+
// CIR: cir.return
35+
36+
// LLVM-LABEL: @applySignalFence
37+
// LLVM: fence syncscope("singlethread") seq_cst
38+
// LLVM: ret void
39+
40+
// OGCG-LABEL: @applySignalFence
41+
// OGCG: fence syncscope("singlethread") seq_cst
42+
// OGCG: ret void
43+
}
44+
45+
void modifyWithThreadFence(DataPtr d) {
46+
__atomic_thread_fence(__ATOMIC_SEQ_CST);
47+
d->value = 42;
48+
// CIR-LABEL: @modifyWithThreadFence
49+
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
50+
// CIR: cir.atomic.fence syncscope(system) seq_cst
51+
// CIR: %[[VAL_42:.*]] = cir.const #cir.int<42> : !s32i
52+
// CIR: %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
53+
// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][0] {name = "value"} : !cir.ptr<!rec_Data> -> !cir.ptr<!s32i>
54+
// CIR: cir.store{{.*}} %[[VAL_42]], %[[DATA_VALUE]] : !s32i, !cir.ptr<!s32i>
55+
// CIR: cir.return
56+
57+
// LLVM-LABEL: @modifyWithThreadFence
58+
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
59+
// LLVM: fence seq_cst
60+
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
61+
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
62+
// LLVM: store i32 42, ptr %[[DATA_VALUE]], align 8
63+
// LLVM: ret void
64+
65+
// OGCG-LABEL: @modifyWithThreadFence
66+
// OGCG: %[[DATA:.*]] = alloca ptr, align 8
67+
// OGCG: fence seq_cst
68+
// OGCG: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
69+
// OGCG: %[[DATA_VALUE:.*]] = getelementptr inbounds nuw %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
70+
// OGCG: store i32 42, ptr %[[DATA_VALUE]], align 8
71+
// OGCG: ret void
72+
}
73+
74+
void modifyWithSignalFence(DataPtr d) {
75+
__atomic_signal_fence(__ATOMIC_SEQ_CST);
76+
d->value = 24;
77+
// CIR-LABEL: @modifyWithSignalFence
78+
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
79+
// CIR: cir.atomic.fence syncscope(single_thread) seq_cst
80+
// CIR: %[[VAL_42:.*]] = cir.const #cir.int<24> : !s32i
81+
// CIR: %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
82+
// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][0] {name = "value"} : !cir.ptr<!rec_Data> -> !cir.ptr<!s32i>
83+
// CIR: cir.store{{.*}} %[[VAL_42]], %[[DATA_VALUE]] : !s32i, !cir.ptr<!s32i>
84+
// CIR: cir.return
85+
86+
// LLVM-LABEL: @modifyWithSignalFence
87+
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
88+
// LLVM: fence syncscope("singlethread") seq_cst
89+
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
90+
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
91+
// LLVM: store i32 24, ptr %[[DATA_VALUE]], align 8
92+
// LLVM: ret void
93+
94+
// OGCG-LABEL: @modifyWithSignalFence
95+
// OGCG: %[[DATA:.*]] = alloca ptr, align 8
96+
// OGCG: fence syncscope("singlethread") seq_cst
97+
// OGCG: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
98+
// OGCG: %[[DATA_VALUE:.*]] = getelementptr inbounds nuw %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
99+
// OGCG: store i32 24, ptr %[[DATA_VALUE]], align 8
100+
// OGCG: ret void
101+
}
102+
103+
void loadWithThreadFence(DataPtr d) {
104+
__atomic_thread_fence(__ATOMIC_SEQ_CST);
105+
__atomic_load_n(&d->ptr, __ATOMIC_SEQ_CST);
106+
// CIR-LABEL: @loadWithThreadFence
107+
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
108+
// CIR: %[[ATOMIC_TEMP:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["atomic-temp"] {alignment = 8 : i64}
109+
// CIR: cir.atomic.fence syncscope(system) seq_cst
110+
// CIR: %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
111+
// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][1] {name = "ptr"} : !cir.ptr<!rec_Data> -> !cir.ptr<!cir.ptr<!void>>
112+
// CIR: %[[CASTED_DATA_VALUE:.*]] = cir.cast bitcast %[[DATA_VALUE]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
113+
// CIR: %[[CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
114+
// CIR: %[[ATOMIC_LOAD:.*]] = cir.load{{.*}} atomic(seq_cst) %[[CASTED_DATA_VALUE]] : !cir.ptr<!u64i>, !u64i
115+
// CIR: cir.store{{.*}} %[[ATOMIC_LOAD]], %[[CASTED_ATOMIC_TEMP]] : !u64i, !cir.ptr<!u64i>
116+
// CIR: %[[DOUBLE_CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[CASTED_ATOMIC_TEMP]] : !cir.ptr<!u64i> -> !cir.ptr<!cir.ptr<!void>>
117+
// CIR: %[[ATOMIC_LOAD_PTR:.*]] = cir.load{{.*}} %[[DOUBLE_CASTED_ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
118+
// CIR: cir.return
119+
120+
// LLVM-LABEL: @loadWithThreadFence
121+
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
122+
// LLVM: %[[DATA_TEMP:.*]] = alloca ptr, i64 1, align 8
123+
// LLVM: fence seq_cst
124+
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
125+
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
126+
// LLVM: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
127+
// LLVM: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
128+
// LLVM: %[[DATA_TEMP_LOAD:.*]] = load ptr, ptr %[[DATA_TEMP]], align 8
129+
// LLVM: ret void
130+
131+
// OGCG-LABEL: @loadWithThreadFence
132+
// OGCG: %[[DATA:.*]] = alloca ptr, align 8
133+
// OGCG: %[[DATA_TEMP:.*]] = alloca ptr, align 8
134+
// OGCG: fence seq_cst
135+
// OGCG: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
136+
// OGCG: %[[DATA_VALUE:.*]] = getelementptr inbounds nuw %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
137+
// OGCG: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
138+
// OGCG: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
139+
// OGCG: %[[DATA_TEMP_LOAD:.*]] = load ptr, ptr %[[DATA_TEMP]], align 8
140+
// OGCG: ret void
141+
}
142+
143+
void loadWithSignalFence(DataPtr d) {
144+
__atomic_signal_fence(__ATOMIC_SEQ_CST);
145+
__atomic_load_n(&d->ptr, __ATOMIC_SEQ_CST);
146+
// CIR-LABEL: @loadWithSignalFence
147+
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!rec_Data>, !cir.ptr<!cir.ptr<!rec_Data>>, ["d", init] {alignment = 8 : i64}
148+
// CIR: %[[ATOMIC_TEMP:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["atomic-temp"] {alignment = 8 : i64}
149+
// CIR: cir.atomic.fence syncscope(single_thread) seq_cst
150+
// CIR: %[[LOAD_DATA:.*]] = cir.load{{.*}} %[[DATA]] : !cir.ptr<!cir.ptr<!rec_Data>>, !cir.ptr<!rec_Data>
151+
// CIR: %[[DATA_PTR:.*]] = cir.get_member %[[LOAD_DATA]][1] {name = "ptr"} : !cir.ptr<!rec_Data> -> !cir.ptr<!cir.ptr<!void>>
152+
// CIR: %[[CASTED_DATA_PTR:.*]] = cir.cast bitcast %[[DATA_PTR]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
153+
// CIR: %[[CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>> -> !cir.ptr<!u64i>
154+
// CIR: %[[ATOMIC_LOAD:.*]] = cir.load{{.*}} atomic(seq_cst) %[[CASTED_DATA_PTR]] : !cir.ptr<!u64i>, !u64i
155+
// CIR: cir.store{{.*}} %[[ATOMIC_LOAD]], %[[CASTED_ATOMIC_TEMP]] : !u64i, !cir.ptr<!u64i>
156+
// CIR: %[[DOUBLE_CASTED_ATOMIC_TEMP:.*]] = cir.cast bitcast %[[CASTED_ATOMIC_TEMP]] : !cir.ptr<!u64i> -> !cir.ptr<!cir.ptr<!void>>
157+
// CIR: %[[LOAD_ATOMIC_TEMP:.*]] = cir.load{{.*}} %[[DOUBLE_CASTED_ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
158+
// CIR: cir.return
159+
160+
// LLVM-LABEL: @loadWithSignalFence
161+
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
162+
// LLVM: %[[DATA_TEMP:.*]] = alloca ptr, i64 1, align 8
163+
// LLVM: fence syncscope("singlethread") seq_cst
164+
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
165+
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
166+
// LLVM: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
167+
// LLVM: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
168+
// LLVM: %[[DATA_TEMP_LOAD]] = load ptr, ptr %[[DATA_TEMP]], align 8
169+
// LLVM: ret void
170+
171+
// OGCG-LABEL: @loadWithSignalFence
172+
// OGCG: %[[DATA:.*]] = alloca ptr, align 8
173+
// OGCG: %[[DATA_TEMP:.*]] = alloca ptr, align 8
174+
// OGCG: fence syncscope("singlethread") seq_cst
175+
// OGCG: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
176+
// OGCG: %[[DATA_VALUE:.*]] = getelementptr inbounds nuw %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
177+
// OGCG: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
178+
// OGCG: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
179+
// OGCG: %[[DATA_TEMP_LOAD]] = load ptr, ptr %[[DATA_TEMP]], align 8
180+
// OGCG: ret void
181+
}

0 commit comments

Comments
 (0)