Skip to content

Commit c346d94

Browse files
committed
Fix the build and implementation of the 16-byte atomics for MSVC.
Credit for the cmake fix here goes to Saleem Abdulrasool. The substantive fix is embarrassing; I didn't pay close attention to the intrinsic's argument order and just assumed that the first argument for the replacement value was the low half (the part you'd find at index 0 if it were an array), but in fact it's the high half (the part you'd find at index 1). I also change the code to be much more reinterpret_casty, which isolates the type-punning mostly "within" the intrinsic, and which seems to match how other code uses it.
1 parent f569be2 commit c346d94

File tree

3 files changed

+40
-39
lines changed

3 files changed

+40
-39
lines changed

cmake/modules/AddSwift.cmake

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -248,18 +248,6 @@ function(_add_host_variant_c_compile_flags target)
248248
endif()
249249
endif()
250250

251-
# The concurrency library uses double-word atomics. MSVC's std::atomic
252-
# uses a spin lock for this, so to get reasonable behavior we have to
253-
# implement it ourselves using _InterlockedCompareExchange128.
254-
# clang-cl requires us to enable the `cx16` feature to use this intrinsic.
255-
if(SWIFT_HOST_VARIANT_SDK STREQUAL WINDOWS)
256-
if(SWIFT_HOST_VARIANT_ARCH STREQUAL x86_64)
257-
if(CMAKE_C_COMPILER_ID MATCHES Clang)
258-
target_compile_options(${target} PRIVATE -mcx16)
259-
endif()
260-
endif()
261-
endif()
262-
263251
if(LLVM_ENABLE_ASSERTIONS)
264252
target_compile_options(${target} PRIVATE -UNDEBUG)
265253
else()

include/swift/Runtime/Atomic.h

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,7 @@ class alignas(Size) atomic_impl {
6666
}
6767
};
6868

69-
// FIXME: get this to build reliably
70-
#if 0 && defined(_WIN64)
69+
#if defined(_WIN64)
7170
#include <intrin.h>
7271

7372
/// MSVC's std::atomic uses an inline spin lock for 16-byte atomics,
@@ -76,11 +75,7 @@ class alignas(Size) atomic_impl {
7675
/// AMD processors that lack cmpxchg16b, so we just use the intrinsic.
7776
template <class Value>
7877
class alignas(2 * sizeof(void*)) atomic_impl<Value, 2 * sizeof(void*)> {
79-
// MSVC is not strict about aliasing, so we can get away with this.
80-
union {
81-
volatile Value atomicValue;
82-
volatile __int64 atomicArray[2];
83-
};
78+
volatile Value atomicValue;
8479
public:
8580
constexpr atomic_impl(Value initialValue) : atomicValue(initialValue) {}
8681

@@ -98,10 +93,14 @@ class alignas(2 * sizeof(void*)) atomic_impl<Value, 2 * sizeof(void*)> {
9893
__int64 resultArray[2] = {};
9994
#if SWIFT_HAS_MSVC_ARM_ATOMICS
10095
if (order != std::memory_order_acquire) {
101-
(void) _InterlockedCompareExchange128_nf(atomicArray, 0, 0, resultArray);
96+
(void) _InterlockedCompareExchange128_nf(
97+
reinterpret_cast<volatile __int64*>(&atomicValue),
98+
0, 0, resultArray);
10299
} else {
103100
#endif
104-
(void) _InterlockedCompareExchange128(atomicArray, 0, 0, resultArray);
101+
(void) _InterlockedCompareExchange128(
102+
reinterpret_cast<volatile __int64*>(&atomicValue),
103+
0, 0, resultArray);
105104
#if SWIFT_HAS_MSVC_ARM_ATOMICS
106105
}
107106
#endif
@@ -116,31 +115,33 @@ class alignas(2 * sizeof(void*)) atomic_impl<Value, 2 * sizeof(void*)> {
116115
failureOrder == std::memory_order_consume);
117116
assert(successOrder == std::memory_order_relaxed ||
118117
successOrder == std::memory_order_release);
119-
__int64 newValueArray[2];
120-
memcpy(newValueArray, &newValue, sizeof(Value));
121118
#if SWIFT_HAS_MSVC_ARM_ATOMICS
122119
if (successOrder == std::memory_order_relaxed &&
123120
failureOrder != std::memory_order_acquire) {
124-
return _InterlockedCompareExchange128_nf(atomicArray,
125-
newValueArray[0],
126-
newValueArray[1],
127-
reinterpret_cast<__int64*>(&oldValue));
121+
return _InterlockedCompareExchange128_nf(
122+
reinterpret_cast<volatile __int64*>(&atomicValue),
123+
reinterpret_cast<const __int64*>(&newValue)[1],
124+
reinterpret_cast<const __int64*>(&newValue)[0],
125+
reinterpret_cast<__int64*>(&oldValue));
128126
} else if (successOrder == std::memory_order_relaxed) {
129-
return _InterlockedCompareExchange128_acq(atomicArray,
130-
newValueArray[0],
131-
newValueArray[1],
132-
reinterpret_cast<__int64*>(&oldValue));
127+
return _InterlockedCompareExchange128_acq(
128+
reinterpret_cast<volatile __int64*>(&atomicValue),
129+
reinterpret_cast<const __int64*>(&newValue)[1],
130+
reinterpret_cast<const __int64*>(&newValue)[0],
131+
reinterpret_cast<__int64*>(&oldValue));
133132
} else if (failureOrder != std::memory_order_acquire) {
134-
return _InterlockedCompareExchange128_rel(atomicArray,
135-
newValueArray[0],
136-
newValueArray[1],
137-
reinterpret_cast<__int64*>(&oldValue));
133+
return _InterlockedCompareExchange128_rel(
134+
reinterpret_cast<volatile __int64*>(&atomicValue),
135+
reinterpret_cast<const __int64*>(&newValue)[1],
136+
reinterpret_cast<const __int64*>(&newValue)[0],
137+
reinterpret_cast<__int64*>(&oldValue));
138138
} else {
139139
#endif
140-
return _InterlockedCompareExchange128(atomicArray,
141-
newValueArray[0],
142-
newValueArray[1],
143-
reinterpret_cast<__int64*>(&oldValue));
140+
return _InterlockedCompareExchange128(
141+
reinterpret_cast<volatile __int64*>(&atomicValue),
142+
reinterpret_cast<const __int64*>(&newValue)[1],
143+
reinterpret_cast<const __int64*>(&newValue)[0],
144+
reinterpret_cast<__int64*>(&oldValue));
144145
#if SWIFT_HAS_MSVC_ARM_ATOMICS
145146
}
146147
#endif

stdlib/cmake/modules/AddSwiftStdlib.cmake

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,18 @@ function(_add_target_variant_c_compile_flags)
261261
endif()
262262
endif()
263263

264+
# The concurrency library uses double-word atomics. MSVC's std::atomic
265+
# uses a spin lock for this, so to get reasonable behavior we have to
266+
# implement it ourselves using _InterlockedCompareExchange128.
267+
# clang-cl requires us to enable the `cx16` feature to use this intrinsic.
268+
if(SWIFT_HOST_VARIANT_ARCH STREQUAL x86_64)
269+
if(SWIFT_COMPILER_IS_MSVC_LIKE)
270+
list(APPEND result /clang:-mcx16)
271+
else()
272+
list(APPEND result -mcx16)
273+
endif()
274+
endif()
275+
264276
if(${CFLAGS_SDK} STREQUAL ANDROID)
265277
if(${CFLAGS_ARCH} STREQUAL x86_64)
266278
# NOTE(compnerd) Android NDK 21 or lower will generate library calls to

0 commit comments

Comments
 (0)