|
17 | 17 | #ifndef SWIFT_RUNTIME_ATOMIC_H
|
18 | 18 | #define SWIFT_RUNTIME_ATOMIC_H
|
19 | 19 |
|
| 20 | +#include "swift/Runtime/Config.h" |
| 21 | +#include <assert.h> |
| 22 | +#include <atomic> |
| 23 | + |
20 | 24 | // FIXME: Workaround for rdar://problem/18889711. 'Consume' does not require
|
21 | 25 | // a barrier on ARM64, but LLVM doesn't know that. Although 'relaxed'
|
22 | 26 | // is formally UB by C++11 language rules, we should be OK because neither
|
|
28 | 32 | # define SWIFT_MEMORY_ORDER_CONSUME (std::memory_order_consume)
|
29 | 33 | #endif
|
30 | 34 |
|
| 35 | +#if defined(_M_ARM) || defined(__arm__) || defined(__aarch64__) |
| 36 | +#define SWIFT_HAS_MSVC_ARM_ATOMICS 1 |
| 37 | +#else |
| 38 | +#define SWIFT_HAS_MSVC_ARM_ATOMICS 0 |
| 39 | +#endif |
| 40 | + |
| 41 | +namespace swift { |
| 42 | +namespace impl { |
| 43 | + |
| 44 | +/// The default implementation for swift::atomic<T>, which just wraps |
| 45 | +/// std::atomic with minor differences. |
| 46 | +/// |
| 47 | +/// TODO: should we make this use non-atomic operations when the runtime |
| 48 | +/// is single-threaded? |
| 49 | +template <class Value, size_t Size = sizeof(Value)> |
| 50 | +class alignas(Size) atomic_impl { |
| 51 | + std::atomic<Value> value; |
| 52 | +public: |
| 53 | + constexpr atomic_impl(Value value) : value(value) {} |
| 54 | + |
| 55 | + /// Force clients to always pass an order. |
| 56 | + Value load(std::memory_order order) { |
| 57 | + return value.load(order); |
| 58 | + } |
| 59 | + |
| 60 | + /// Force clients to always pass an order. |
| 61 | + bool compare_exchange_weak(Value &oldValue, Value newValue, |
| 62 | + std::memory_order successOrder, |
| 63 | + std::memory_order failureOrder) { |
| 64 | + return value.compare_exchange_weak(oldValue, newValue, successOrder, |
| 65 | + failureOrder); |
| 66 | + } |
| 67 | +}; |
| 68 | + |
| 69 | +#if defined(_WIN64) |
| 70 | +#include <intrin.h> |
| 71 | + |
| 72 | +/// MSVC's std::atomic uses an inline spin lock for 16-byte atomics, |
| 73 | +/// which is not only unnecessarily inefficient but also doubles the size |
| 74 | +/// of the atomic object. We don't care about supporting ancient |
| 75 | +/// AMD processors that lack cmpxchg16b, so we just use the intrinsic. |
| 76 | +template <class Value> |
| 77 | +class alignas(2 * sizeof(void*)) atomic_impl<Value, 2 * sizeof(void*)> { |
| 78 | + // MSVC is not strict about aliasing, so we can get away with this. |
| 79 | + union { |
| 80 | + volatile Value atomicValue; |
| 81 | + volatile __int64 atomicArray[2]; |
| 82 | + }; |
| 83 | +public: |
| 84 | + constexpr atomic_impl(Value initialValue) : atomicValue(initialValue) {} |
| 85 | + |
| 86 | + atomic_impl(const atomic_impl &) = delete; |
| 87 | + atomic_impl &operator=(const atomic_impl &) = delete; |
| 88 | + |
| 89 | + Value load(std::memory_order order) { |
| 90 | + assert(order == std::memory_order_relaxed || |
| 91 | + order == std::memory_order_acquire || |
| 92 | + order == std::memory_order_consume); |
| 93 | + // Aligned SSE loads are atomic on every known processor, but |
| 94 | + // the only 16-byte access that's architecturally guaranteed to be |
| 95 | + // atomic is lock cmpxchg16b, so we do that with identical comparison |
| 96 | + // and new values purely for the side-effect of updating the old value. |
| 97 | + __int64 resultArray[2] = {}; |
| 98 | +#if SWIFT_HAS_MSVC_ARM_ATOMICS |
| 99 | + if (order != std::memory_order_acquire) { |
| 100 | + (void) _InterlockedCompareExchange128_nf(atomicArray, 0, 0, resultArray); |
| 101 | + } else { |
| 102 | +#endif |
| 103 | + (void) _InterlockedCompareExchange128(atomicArray, 0, 0, resultArray); |
| 104 | +#if SWIFT_HAS_MSVC_ARM_ATOMICS |
| 105 | + } |
| 106 | +#endif |
| 107 | + return reinterpret_cast<Value &>(resultArray); |
| 108 | + } |
| 109 | + |
| 110 | + bool compare_exchange_weak(Value &oldValue, Value newValue, |
| 111 | + std::memory_order successOrder, |
| 112 | + std::memory_order failureOrder) { |
| 113 | + assert(failureOrder == std::memory_order_relaxed || |
| 114 | + failureOrder == std::memory_order_acquire || |
| 115 | + failureOrder == std::memory_order_consume); |
| 116 | + assert(successOrder == std::memory_order_relaxed || |
| 117 | + successOrder == std::memory_order_release); |
| 118 | + __int64 newValueArray[2]; |
| 119 | + memcpy(newValueArray, &newValue, sizeof(Value)); |
| 120 | +#if SWIFT_HAS_MSVC_ARM_ATOMICS |
| 121 | + if (successOrder == std::memory_order_relaxed && |
| 122 | + failureOrder != std::memory_order_acquire) { |
| 123 | + return _InterlockedCompareExchange128_nf(atomicArray, |
| 124 | + newValueArray[0], |
| 125 | + newValueArray[1], |
| 126 | + reinterpret_cast<__int64*>(&oldValue)); |
| 127 | + } else if (successOrder == std::memory_order_relaxed) { |
| 128 | + return _InterlockedCompareExchange128_acq(atomicArray, |
| 129 | + newValueArray[0], |
| 130 | + newValueArray[1], |
| 131 | + reinterpret_cast<__int64*>(&oldValue)); |
| 132 | + } else if (failureOrder != std::memory_order_acquire) { |
| 133 | + return _InterlockedCompareExchange128_rel(atomicArray, |
| 134 | + newValueArray[0], |
| 135 | + newValueArray[1], |
| 136 | + reinterpret_cast<__int64*>(&oldValue)); |
| 137 | + } else { |
| 138 | +#endif |
| 139 | + return _InterlockedCompareExchange128(atomicArray, |
| 140 | + newValueArray[0], |
| 141 | + newValueArray[1], |
| 142 | + reinterpret_cast<__int64*>(&oldValue)); |
| 143 | +#if SWIFT_HAS_MSVC_ARM_ATOMICS |
| 144 | + } |
| 145 | +#endif |
| 146 | + } |
| 147 | +}; |
| 148 | + |
| 149 | +#endif |
| 150 | + |
| 151 | +} // end namespace swift::impl |
| 152 | + |
| 153 | +/// A simple wrapper for std::atomic that provides the most important |
| 154 | +/// interfaces and fixes the API bug where all of the orderings dafault |
| 155 | +/// to sequentially-consistent. |
| 156 | +/// |
| 157 | +/// It also sometimes uses a different implementation in cases where |
| 158 | +/// std::atomic has made unfortunate choices; our uses of this broadly |
| 159 | +/// don't have the ABI-compatibility issues that std::atomic faces. |
| 160 | +template <class T> |
| 161 | +class atomic : public impl::atomic_impl<T> { |
| 162 | +public: |
| 163 | + atomic(T value) : impl::atomic_impl<T>(value) {} |
| 164 | +}; |
| 165 | + |
| 166 | +} // end namespace swift |
| 167 | + |
31 | 168 | #endif
|
0 commit comments