Skip to content

Commit ca16548

Browse files
committed
Add a swift::atomic<T> which uses a better ABI than MSVC's 128-bit std::atomic.
1 parent 33c4d7f commit ca16548

File tree

3 files changed

+155
-0
lines changed

3 files changed

+155
-0
lines changed

cmake/modules/AddSwift.cmake

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,18 @@ function(_add_host_variant_c_compile_flags target)
248248
endif()
249249
endif()
250250

251+
# The concurrency library uses double-word atomics. MSVC's std::atomic
252+
# uses a spin lock for this, so to get reasonable behavior we have to
253+
# implement it ourselves using _InterlockedCompareExchange128.
254+
# clang-cl requires us to enable the `cx16` feature to use this intrinsic.
255+
if(SWIFT_HOST_VARIANT_SDK STREQUAL WINDOWS)
256+
if(SWIFT_HOST_VARIANT_ARCH STREQUAL x86_64)
257+
if(CMAKE_C_COMPILER_ID MATCHES Clang)
258+
target_compile_options(${target} PRIVATE -mcx16)
259+
endif()
260+
endif()
261+
endif()
262+
251263
if(LLVM_ENABLE_ASSERTIONS)
252264
target_compile_options(${target} PRIVATE -UNDEBUG)
253265
else()

include/swift/Runtime/Atomic.h

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717
#ifndef SWIFT_RUNTIME_ATOMIC_H
1818
#define SWIFT_RUNTIME_ATOMIC_H
1919

20+
#include "swift/Runtime/Config.h"
21+
#include <assert.h>
22+
#include <atomic>
23+
2024
// FIXME: Workaround for rdar://problem/18889711. 'Consume' does not require
2125
// a barrier on ARM64, but LLVM doesn't know that. Although 'relaxed'
2226
// is formally UB by C++11 language rules, we should be OK because neither
@@ -28,4 +32,137 @@
2832
# define SWIFT_MEMORY_ORDER_CONSUME (std::memory_order_consume)
2933
#endif
3034

35+
#if defined(_M_ARM) || defined(__arm__) || defined(__aarch64__)
36+
#define SWIFT_HAS_MSVC_ARM_ATOMICS 1
37+
#else
38+
#define SWIFT_HAS_MSVC_ARM_ATOMICS 0
39+
#endif
40+
41+
namespace swift {
42+
namespace impl {
43+
44+
/// The default implementation for swift::atomic<T>, which just wraps
45+
/// std::atomic with minor differences.
46+
///
47+
/// TODO: should we make this use non-atomic operations when the runtime
48+
/// is single-threaded?
49+
template <class Value, size_t Size = sizeof(Value)>
50+
class alignas(Size) atomic_impl {
51+
std::atomic<Value> value;
52+
public:
53+
constexpr atomic_impl(Value value) : value(value) {}
54+
55+
/// Force clients to always pass an order.
56+
Value load(std::memory_order order) {
57+
return value.load(order);
58+
}
59+
60+
/// Force clients to always pass an order.
61+
bool compare_exchange_weak(Value &oldValue, Value newValue,
62+
std::memory_order successOrder,
63+
std::memory_order failureOrder) {
64+
return value.compare_exchange_weak(oldValue, newValue, successOrder,
65+
failureOrder);
66+
}
67+
};
68+
69+
#if defined(_WIN64)
70+
#include <intrin.h>
71+
72+
/// MSVC's std::atomic uses an inline spin lock for 16-byte atomics,
73+
/// which is not only unnecessarily inefficient but also doubles the size
74+
/// of the atomic object. We don't care about supporting ancient
75+
/// AMD processors that lack cmpxchg16b, so we just use the intrinsic.
76+
template <class Value>
77+
class alignas(2 * sizeof(void*)) atomic_impl<Value, 2 * sizeof(void*)> {
78+
// MSVC is not strict about aliasing, so we can get away with this.
79+
union {
80+
volatile Value atomicValue;
81+
volatile __int64 atomicArray[2];
82+
};
83+
public:
84+
constexpr atomic_impl(Value initialValue) : atomicValue(initialValue) {}
85+
86+
atomic_impl(const atomic_impl &) = delete;
87+
atomic_impl &operator=(const atomic_impl &) = delete;
88+
89+
Value load(std::memory_order order) {
90+
assert(order == std::memory_order_relaxed ||
91+
order == std::memory_order_acquire ||
92+
order == std::memory_order_consume);
93+
// Aligned SSE loads are atomic on every known processor, but
94+
// the only 16-byte access that's architecturally guaranteed to be
95+
// atomic is lock cmpxchg16b, so we do that with identical comparison
96+
// and new values purely for the side-effect of updating the old value.
97+
__int64 resultArray[2] = {};
98+
#if SWIFT_HAS_MSVC_ARM_ATOMICS
99+
if (order != std::memory_order_acquire) {
100+
(void) _InterlockedCompareExchange128_nf(atomicArray, 0, 0, resultArray);
101+
} else {
102+
#endif
103+
(void) _InterlockedCompareExchange128(atomicArray, 0, 0, resultArray);
104+
#if SWIFT_HAS_MSVC_ARM_ATOMICS
105+
}
106+
#endif
107+
return reinterpret_cast<Value &>(resultArray);
108+
}
109+
110+
bool compare_exchange_weak(Value &oldValue, Value newValue,
111+
std::memory_order successOrder,
112+
std::memory_order failureOrder) {
113+
assert(failureOrder == std::memory_order_relaxed ||
114+
failureOrder == std::memory_order_acquire ||
115+
failureOrder == std::memory_order_consume);
116+
assert(successOrder == std::memory_order_relaxed ||
117+
successOrder == std::memory_order_release);
118+
__int64 newValueArray[2];
119+
memcpy(newValueArray, &newValue, sizeof(Value));
120+
#if SWIFT_HAS_MSVC_ARM_ATOMICS
121+
if (successOrder == std::memory_order_relaxed &&
122+
failureOrder != std::memory_order_acquire) {
123+
return _InterlockedCompareExchange128_nf(atomicArray,
124+
newValueArray[0],
125+
newValueArray[1],
126+
reinterpret_cast<__int64*>(&oldValue));
127+
} else if (successOrder == std::memory_order_relaxed) {
128+
return _InterlockedCompareExchange128_acq(atomicArray,
129+
newValueArray[0],
130+
newValueArray[1],
131+
reinterpret_cast<__int64*>(&oldValue));
132+
} else if (failureOrder != std::memory_order_acquire) {
133+
return _InterlockedCompareExchange128_rel(atomicArray,
134+
newValueArray[0],
135+
newValueArray[1],
136+
reinterpret_cast<__int64*>(&oldValue));
137+
} else {
138+
#endif
139+
return _InterlockedCompareExchange128(atomicArray,
140+
newValueArray[0],
141+
newValueArray[1],
142+
reinterpret_cast<__int64*>(&oldValue));
143+
#if SWIFT_HAS_MSVC_ARM_ATOMICS
144+
}
145+
#endif
146+
}
147+
};
148+
149+
#endif
150+
151+
} // end namespace swift::impl
152+
153+
/// A simple wrapper for std::atomic that provides the most important
154+
/// interfaces and fixes the API bug where all of the orderings dafault
155+
/// to sequentially-consistent.
156+
///
157+
/// It also sometimes uses a different implementation in cases where
158+
/// std::atomic has made unfortunate choices; our uses of this broadly
159+
/// don't have the ABI-compatibility issues that std::atomic faces.
160+
template <class T>
161+
class atomic : public impl::atomic_impl<T> {
162+
public:
163+
atomic(T value) : impl::atomic_impl<T>(value) {}
164+
};
165+
166+
} // end namespace swift
167+
31168
#endif

stdlib/public/Concurrency/CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,12 @@
1010
#
1111
#===----------------------------------------------------------------------===#
1212

13+
set(swift_concurrency_objc_sources
14+
SwiftNativeNSObject.mm)
15+
16+
set(LLVM_OPTIONAL_SOURCES
17+
${swift_concurrency_objc_sources})
18+
1319
add_swift_target_library(swift_Concurrency ${SWIFT_STDLIB_LIBRARY_BUILD_TYPES} IS_STDLIB
1420
Actor.swift
1521
PartialAsyncTask.swift

0 commit comments

Comments
 (0)