Skip to content

Commit 5c5dbd1

Browse files
committed
update
1 parent 0dc5bf9 commit 5c5dbd1

File tree

3 files changed

+232
-10
lines changed

3 files changed

+232
-10
lines changed

clang/lib/CodeGen/CGAtomic.cpp

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -412,17 +412,48 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
412412

413413
CGF.Builder.SetInsertPoint(StoreExpectedBB);
414414
// Update the memory at Expected with Old's value.
415+
llvm::Type *ExpectedType = ExpectedResult.getElementType();
416+
const llvm::DataLayout &DL = CGF.CGM.getDataLayout();
417+
uint64_t ExpectedSizeInBytes = DL.getTypeStoreSize(ExpectedType);
415418

416-
llvm::Type *ExpectedType = ExpectedResult.getElementType();
417-
uint64_t OriginalSizeInBits = CGF.CGM.getDataLayout().getTypeSizeInBits(ExpectedType);
418-
if (OriginalSizeInBits / 8 == Size) {
419-
auto *I = CGF.Builder.CreateStore(Old, ExpectedResult);
420-
CGF.addInstToCurrentSourceAtom(I, Old);
421-
} else {
422-
// How to just store N bytes to ExpectedResult ?
423-
auto *I = CGF.Builder.CreateStore(Old, ExpectedResult);
424-
CGF.addInstToCurrentSourceAtom(I, Old);
425-
}
419+
if (ExpectedSizeInBytes == Size) {
420+
// Sizes match: store directly
421+
auto I* = CGF.Builder.CreateStore(Old, ExpectedResult);
422+
CGF.addInstToCurrentSourceAtom(I, Old);
423+
} else {
424+
// store only the first ExpectedSizeInBytes bytes of Old
425+
llvm::Type *OldType = Old->getType();
426+
427+
llvm::Align SrcAlignLLVM = DL.getABITypeAlign(OldType);
428+
llvm::Align DstAlignLLVM = DL.getABITypeAlign(ExpectedType);
429+
430+
clang::CharUnits SrcAlign = clang::CharUnits::fromQuantity(SrcAlignLLVM.value());
431+
clang::CharUnits DstAlign = clang::CharUnits::fromQuantity(DstAlignLLVM.value());
432+
433+
// Allocate temporary storage for Old value
434+
llvm::AllocaInst *Alloca = CGF.CreateTempAlloca(OldType, "old.tmp");
435+
436+
// Wrap into clang::CodeGen::Address with proper type and alignment
437+
Address OldStorage(Alloca, OldType, SrcAlign);
438+
439+
// Store Old into this temporary
440+
CGF.Builder.CreateStore(Old, OldStorage);
441+
442+
// Bitcast pointers to i8*
443+
llvm::Type *I8PtrTy = llvm::PointerType::getUnqual(CGF.getLLVMContext());
444+
445+
llvm::Value *SrcPtr = CGF.Builder.CreateBitCast(OldStorage.getBasePointer(), I8PtrTy);
446+
llvm::Value *DstPtr = CGF.Builder.CreateBitCast(ExpectedResult.getBasePointer(), I8PtrTy);
447+
448+
// Perform memcpy for first ExpectedSizeInBytes bytes
449+
CGF.Builder.CreateMemCpy(
450+
DstPtr, DstAlignLLVM,
451+
SrcPtr, SrcAlignLLVM,
452+
llvm::ConstantInt::get(CGF.IntPtrTy, ExpectedSizeInBytes),
453+
/*isVolatile=*/false);
454+
}
455+
456+
426457
// Finally, branch to the exit point.
427458
CGF.Builder.CreateBr(ContinueBB);
428459

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
2+
// RUN: %clang_cc1 -std=c++20 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
3+
4+
5+
template <unsigned Size>
6+
struct S {
7+
char data[Size];
8+
};
9+
10+
// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z21test_compare_exchangePU7_Atomic1SILj3EEPS0_S0_(
11+
// CHECK-SAME: ptr noundef [[A:%.*]], ptr noundef [[EXPECTED:%.*]], i24 [[DESIRED_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
12+
// CHECK-NEXT: [[ENTRY:.*:]]
13+
// CHECK-NEXT: [[DESIRED:%.*]] = alloca [[STRUCT_S:%.*]], align 1
14+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
15+
// CHECK-NEXT: [[EXPECTED_ADDR:%.*]] = alloca ptr, align 8
16+
// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_S]], align 1
17+
// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_S]], [1 x i8] }, align 4
18+
// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca { [[STRUCT_S]], [1 x i8] }, align 4
19+
// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1
20+
// CHECK-NEXT: [[OLD_TMP:%.*]] = alloca i32, align 4
21+
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[DESIRED]], i32 0, i32 0
22+
// CHECK-NEXT: store i24 [[DESIRED_COERCE]], ptr [[COERCE_DIVE]], align 1
23+
// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
24+
// CHECK-NEXT: store ptr [[EXPECTED]], ptr [[EXPECTED_ADDR]], align 8
25+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
26+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[EXPECTED_ADDR]], align 8
27+
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DOTATOMICTMP]], ptr align 1 [[DESIRED]], i64 3, i1 false)
28+
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ATOMIC_TEMP]], ptr align 1 [[TMP1]], i64 3, i1 false)
29+
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], ptr align 1 [[DOTATOMICTMP]], i64 3, i1 false)
30+
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
31+
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4
32+
// CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP2]], i32 [[TMP3]] monotonic monotonic, align 4
33+
// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
34+
// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
35+
// CHECK-NEXT: br i1 [[TMP6]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]]
36+
// CHECK: [[CMPXCHG_STORE_EXPECTED]]:
37+
// CHECK-NEXT: store i32 [[TMP5]], ptr [[OLD_TMP]], align 4
38+
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[TMP1]], ptr align 4 [[OLD_TMP]], i64 3, i1 false)
39+
// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE]]
40+
// CHECK: [[CMPXCHG_CONTINUE]]:
41+
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP6]] to i8
42+
// CHECK-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1
43+
// CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
44+
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1
45+
// CHECK-NEXT: ret i1 [[LOADEDV]]
46+
//
47+
bool test_compare_exchange(_Atomic(S<3>)* a, S<3>* expected, S<3> desired) {
48+
return __c11_atomic_compare_exchange_strong(a, expected, desired, 0, 0);
49+
}
50+
51+
52+
// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z21test_compare_exchangePU7_Atomic1SILj4EEPS0_S0_(
53+
// CHECK-SAME: ptr noundef [[A:%.*]], ptr noundef [[EXPECTED:%.*]], i32 [[DESIRED_COERCE:%.*]]) #[[ATTR0]] {
54+
// CHECK-NEXT: [[ENTRY:.*:]]
55+
// CHECK-NEXT: [[DESIRED:%.*]] = alloca [[STRUCT_S_0:%.*]], align 1
56+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
57+
// CHECK-NEXT: [[EXPECTED_ADDR:%.*]] = alloca ptr, align 8
58+
// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_S_0]], align 1
59+
// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1
60+
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_S_0]], ptr [[DESIRED]], i32 0, i32 0
61+
// CHECK-NEXT: store i32 [[DESIRED_COERCE]], ptr [[COERCE_DIVE]], align 1
62+
// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
63+
// CHECK-NEXT: store ptr [[EXPECTED]], ptr [[EXPECTED_ADDR]], align 8
64+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
65+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[EXPECTED_ADDR]], align 8
66+
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DOTATOMICTMP]], ptr align 1 [[DESIRED]], i64 4, i1 false)
67+
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1
68+
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 1
69+
// CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP2]], i32 [[TMP3]] monotonic monotonic, align 4
70+
// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
71+
// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
72+
// CHECK-NEXT: br i1 [[TMP6]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]]
73+
// CHECK: [[CMPXCHG_STORE_EXPECTED]]:
74+
// CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 1
75+
// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE]]
76+
// CHECK: [[CMPXCHG_CONTINUE]]:
77+
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP6]] to i8
78+
// CHECK-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1
79+
// CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
80+
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1
81+
// CHECK-NEXT: ret i1 [[LOADEDV]]
82+
//
83+
bool test_compare_exchange(_Atomic(S<4>)* a, S<4>* expected, S<4> desired) {
84+
return __c11_atomic_compare_exchange_strong(a, expected, desired, 0, 0);
85+
}
86+
87+
// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z21test_compare_exchangePU7_Atomic1SILj6EEPS0_S0_(
88+
// CHECK-SAME: ptr noundef [[A:%.*]], ptr noundef [[EXPECTED:%.*]], i48 [[DESIRED_COERCE:%.*]]) #[[ATTR0]] {
89+
// CHECK-NEXT: [[ENTRY:.*:]]
90+
// CHECK-NEXT: [[DESIRED:%.*]] = alloca [[STRUCT_S_1:%.*]], align 1
91+
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
92+
// CHECK-NEXT: [[EXPECTED_ADDR:%.*]] = alloca ptr, align 8
93+
// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_S_1]], align 1
94+
// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_S_1]], [2 x i8] }, align 8
95+
// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca { [[STRUCT_S_1]], [2 x i8] }, align 8
96+
// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1
97+
// CHECK-NEXT: [[OLD_TMP:%.*]] = alloca i64, align 8
98+
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_S_1]], ptr [[DESIRED]], i32 0, i32 0
99+
// CHECK-NEXT: store i48 [[DESIRED_COERCE]], ptr [[COERCE_DIVE]], align 1
100+
// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
101+
// CHECK-NEXT: store ptr [[EXPECTED]], ptr [[EXPECTED_ADDR]], align 8
102+
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
103+
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[EXPECTED_ADDR]], align 8
104+
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DOTATOMICTMP]], ptr align 1 [[DESIRED]], i64 6, i1 false)
105+
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_TEMP]], ptr align 1 [[TMP1]], i64 6, i1 false)
106+
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[ATOMIC_TEMP1]], ptr align 1 [[DOTATOMICTMP]], i64 6, i1 false)
107+
// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[ATOMIC_TEMP]], align 8
108+
// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[ATOMIC_TEMP1]], align 8
109+
// CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[TMP0]], i64 [[TMP2]], i64 [[TMP3]] monotonic monotonic, align 8
110+
// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i64, i1 } [[TMP4]], 0
111+
// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i64, i1 } [[TMP4]], 1
112+
// CHECK-NEXT: br i1 [[TMP6]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]]
113+
// CHECK: [[CMPXCHG_STORE_EXPECTED]]:
114+
// CHECK-NEXT: store i64 [[TMP5]], ptr [[OLD_TMP]], align 8
115+
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[TMP1]], ptr align 8 [[OLD_TMP]], i64 6, i1 false)
116+
// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE]]
117+
// CHECK: [[CMPXCHG_CONTINUE]]:
118+
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP6]] to i8
119+
// CHECK-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1
120+
// CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
121+
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1
122+
// CHECK-NEXT: ret i1 [[LOADEDV]]
123+
//
124+
bool test_compare_exchange(_Atomic(S<6>)* a, S<6>* expected, S<6> desired) {
125+
return __c11_atomic_compare_exchange_strong(a, expected, desired, 0, 0);
126+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// https://github.com/llvm/llvm-project/issues/30023
10+
// compare exchange does not work with types of which the size is not a power of 2
11+
12+
#include <atomic>
13+
#include <cstring>
14+
#include <cassert>
15+
16+
template <int Size>
17+
struct S {
18+
char data[Size];
19+
20+
explicit S(char v = 0) noexcept { memset(&data[0], v, sizeof(data)); }
21+
22+
// only used in the test to check the results. Not used in atomic operations.
23+
friend bool operator==(const S& lhs, const S& rhs) noexcept {
24+
return memcmp(&lhs.data[0], &rhs.data[0], sizeof(lhs.data)) == 0;
25+
}
26+
friend bool operator!=(const S& lhs, const S& rhs) noexcept { return !(lhs == rhs); }
27+
};
28+
29+
template <int Size>
30+
struct Expected {
31+
S<Size> s;
32+
bool b = true; // used to validate that s's operation won't overwrite the memory next to it
33+
};
34+
35+
template <int Size>
36+
void test() {
37+
using T = S<Size>;
38+
std::atomic<T> a(T(0));
39+
Expected<Size> expected{T(17)};
40+
41+
assert(a.load() != expected.s);
42+
assert(expected.b);
43+
44+
auto r1 = a.compare_exchange_strong(expected.s, T(18), std::memory_order_relaxed);
45+
46+
assert(!r1);
47+
assert(expected.s == T(0)); // expected.s is modified by compare_exchange_strong
48+
assert(expected.b);
49+
assert(a.load() == T(0));
50+
51+
auto r2 = a.compare_exchange_strong(expected.s, T(18), std::memory_order_relaxed);
52+
assert(r2);
53+
assert(a.load() == T(18));
54+
assert(expected.s == T(0));
55+
assert(expected.b);
56+
}
57+
58+
int main() {
59+
test<1>();
60+
test<2>();
61+
test<3>();
62+
test<4>();
63+
test<5>();
64+
test<6>();
65+
}

0 commit comments

Comments
 (0)