-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[clang] [libc++] fix _Atomic c11 compare exchange does not update expected results #78707
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
@llvm/pr-subscribers-clang-codegen Author: Hui (huixie90) Changesfixes #30023 Patch is 24.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78707.diff 3 Files Affected:
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 51f0799a792fd..9eee834a520e0 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -376,6 +376,7 @@ bool AtomicInfo::emitMemSetZeroIfNecessary() const {
static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
Address Dest, Address Ptr,
Address Val1, Address Val2,
+ Address ExpectedResult,
uint64_t Size,
llvm::AtomicOrdering SuccessOrder,
llvm::AtomicOrdering FailureOrder,
@@ -411,7 +412,48 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
CGF.Builder.SetInsertPoint(StoreExpectedBB);
// Update the memory at Expected with Old's value.
- CGF.Builder.CreateStore(Old, Val1);
+llvm::Type *ExpectedType = ExpectedResult.getElementType();
+const llvm::DataLayout &DL = CGF.CGM.getDataLayout();
+uint64_t ExpectedSizeInBytes = DL.getTypeStoreSize(ExpectedType);
+
+if (ExpectedSizeInBytes == Size) {
+ // Sizes match: store directly
+ CGF.Builder.CreateStore(Old, ExpectedResult);
+
+} else {
+ // store only the first ExpectedSizeInBytes bytes of Old
+ llvm::Type *OldType = Old->getType();
+
+ llvm::Align SrcAlignLLVM = DL.getABITypeAlign(OldType);
+ llvm::Align DstAlignLLVM = DL.getABITypeAlign(ExpectedType);
+
+ clang::CharUnits SrcAlign = clang::CharUnits::fromQuantity(SrcAlignLLVM.value());
+ clang::CharUnits DstAlign = clang::CharUnits::fromQuantity(DstAlignLLVM.value());
+
+ // Allocate temporary storage for Old value
+ llvm::AllocaInst *Alloca = CGF.CreateTempAlloca(OldType, "old.tmp");
+
+ // Wrap into clang::CodeGen::Address with proper type and alignment
+ Address OldStorage(Alloca, OldType, SrcAlign);
+
+ // Store Old into this temporary
+ CGF.Builder.CreateStore(Old, OldStorage);
+
+ // Bitcast pointers to i8*
+ llvm::Type *I8PtrTy = llvm::PointerType::getUnqual(CGF.getLLVMContext());
+
+ llvm::Value *SrcPtr = CGF.Builder.CreateBitCast(OldStorage.getBasePointer(), I8PtrTy);
+ llvm::Value *DstPtr = CGF.Builder.CreateBitCast(ExpectedResult.getBasePointer(), I8PtrTy);
+
+ // Perform memcpy for first ExpectedSizeInBytes bytes
+ CGF.Builder.CreateMemCpy(
+ DstPtr, DstAlignLLVM,
+ SrcPtr, SrcAlignLLVM,
+ llvm::ConstantInt::get(CGF.IntPtrTy, ExpectedSizeInBytes),
+ /*isVolatile=*/false);
+}
+
+
// Finally, branch to the exit point.
CGF.Builder.CreateBr(ContinueBB);
@@ -426,6 +468,7 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
bool IsWeak, Address Dest, Address Ptr,
Address Val1, Address Val2,
+ Address ExpectedResult,
llvm::Value *FailureOrderVal,
uint64_t Size,
llvm::AtomicOrdering SuccessOrder,
@@ -456,7 +499,7 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
// success argument". This condition has been lifted and the only
// precondition is 31.7.2.18. Effectively treat this as a DR and skip
// language version checks.
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult, Size, SuccessOrder,
FailureOrder, Scope);
return;
}
@@ -481,17 +524,17 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
// Emit all the different atomics
CGF.Builder.SetInsertPoint(MonotonicBB);
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult,
Size, SuccessOrder, llvm::AtomicOrdering::Monotonic, Scope);
CGF.Builder.CreateBr(ContBB);
CGF.Builder.SetInsertPoint(AcquireBB);
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult, Size, SuccessOrder,
llvm::AtomicOrdering::Acquire, Scope);
CGF.Builder.CreateBr(ContBB);
CGF.Builder.SetInsertPoint(SeqCstBB);
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult, Size, SuccessOrder,
llvm::AtomicOrdering::SequentiallyConsistent, Scope);
CGF.Builder.CreateBr(ContBB);
@@ -524,6 +567,7 @@ static llvm::Value *EmitPostAtomicMinMax(CGBuilderTy &Builder,
static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
Address Ptr, Address Val1, Address Val2,
+ Address ExpectedResult,
llvm::Value *IsWeak, llvm::Value *FailureOrder,
uint64_t Size, llvm::AtomicOrdering Order,
llvm::SyncScope::ID Scope) {
@@ -540,13 +584,13 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
+ ExpectedResult, FailureOrder, Size, Order, Scope);
return;
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
+ ExpectedResult, FailureOrder, Size, Order, Scope);
return;
case AtomicExpr::AO__atomic_compare_exchange:
case AtomicExpr::AO__atomic_compare_exchange_n:
@@ -554,7 +598,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__scoped_atomic_compare_exchange_n: {
if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) {
emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr,
- Val1, Val2, FailureOrder, Size, Order, Scope);
+ Val1, Val2, ExpectedResult, FailureOrder, Size, Order, Scope);
} else {
// Create all the relevant BB's
llvm::BasicBlock *StrongBB =
@@ -568,12 +612,12 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
CGF.Builder.SetInsertPoint(StrongBB);
emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
+ ExpectedResult, FailureOrder, Size, Order, Scope);
CGF.Builder.CreateBr(ContBB);
CGF.Builder.SetInsertPoint(WeakBB);
emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
+ ExpectedResult, FailureOrder, Size, Order, Scope);
CGF.Builder.CreateBr(ContBB);
CGF.Builder.SetInsertPoint(ContBB);
@@ -777,6 +821,7 @@ EmitValToTemp(CodeGenFunction &CGF, Expr *E) {
static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
Address Ptr, Address Val1, Address Val2,
+ Address OriginalVal1,
llvm::Value *IsWeak, llvm::Value *FailureOrder,
uint64_t Size, llvm::AtomicOrdering Order,
llvm::Value *Scope) {
@@ -796,7 +841,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
Order, CGF.getLLVMContext());
else
SS = llvm::SyncScope::System;
- EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
+ EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, FailureOrder, Size,
Order, SS);
return;
}
@@ -806,7 +851,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
auto SCID = CGF.getTargetHooks().getLLVMSyncScopeID(
CGF.CGM.getLangOpts(), ScopeModel->map(SC->getZExtValue()),
Order, CGF.CGM.getLLVMContext());
- EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
+ EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, FailureOrder, Size,
Order, SCID);
return;
}
@@ -832,7 +877,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
SI->addCase(Builder.getInt32(S), B);
Builder.SetInsertPoint(B);
- EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
+ EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, FailureOrder, Size,
Order,
CGF.getTargetHooks().getLLVMSyncScopeID(CGF.CGM.getLangOpts(),
ScopeModel->map(S),
@@ -1036,6 +1081,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
LValue AtomicVal = MakeAddrLValue(Ptr, AtomicTy);
AtomicInfo Atomics(*this, AtomicVal);
+ Address OriginalVal1 = Val1;
if (ShouldCastToIntPtrTy) {
Ptr = Atomics.castToAtomicIntPointer(Ptr);
if (Val1.isValid())
@@ -1279,30 +1325,30 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
if (llvm::isValidAtomicOrderingCABI(ord))
switch ((llvm::AtomicOrderingCABI)ord) {
case llvm::AtomicOrderingCABI::relaxed:
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::Monotonic, Scope);
break;
case llvm::AtomicOrderingCABI::consume:
case llvm::AtomicOrderingCABI::acquire:
if (IsStore)
break; // Avoid crashing on code with undefined behavior
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::Acquire, Scope);
break;
case llvm::AtomicOrderingCABI::release:
if (IsLoad)
break; // Avoid crashing on code with undefined behavior
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::Release, Scope);
break;
case llvm::AtomicOrderingCABI::acq_rel:
if (IsLoad || IsStore)
break; // Avoid crashing on code with undefined behavior
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::AcquireRelease, Scope);
break;
case llvm::AtomicOrderingCABI::seq_cst:
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::SequentiallyConsistent, Scope);
break;
}
@@ -1338,12 +1384,12 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// Emit all the different atomics
Builder.SetInsertPoint(MonotonicBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::Monotonic, Scope);
Builder.CreateBr(ContBB);
if (!IsStore) {
Builder.SetInsertPoint(AcquireBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::Acquire, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
@@ -1353,7 +1399,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
}
if (!IsLoad) {
Builder.SetInsertPoint(ReleaseBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::Release, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release),
@@ -1361,14 +1407,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
}
if (!IsLoad && !IsStore) {
Builder.SetInsertPoint(AcqRelBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::AcquireRelease, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel),
AcqRelBB);
}
Builder.SetInsertPoint(SeqCstBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::SequentiallyConsistent, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
diff --git a/clang/test/CodeGenCXX/builtin-atomic-compare_exchange.cpp b/clang/test/CodeGenCXX/builtin-atomic-compare_exchange.cpp
new file mode 100644
index 0000000000000..4e2830e7a66cd
--- /dev/null
+++ b/clang/test/CodeGenCXX/builtin-atomic-compare_exchange.cpp
@@ -0,0 +1,126 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -std=c++20 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+
+template <unsigned Size>
+struct S {
+ char data[Size];
+};
+
+// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z21test_compare_exchangePU7_Atomic1SILj3EEPS0_S0_(
+// CHECK-SAME: ptr noundef [[A:%.*]], ptr noundef [[EXPECTED:%.*]], i24 [[DESIRED_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[DESIRED:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[EXPECTED_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_S]], [1 x i8] }, align 4
+// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca { [[STRUCT_S]], [1 x i8] }, align 4
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[OLD_TMP:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[DESIRED]], i32 0, i32 0
+// CHECK-NEXT: store i24 [[DESIRED_COERCE]], ptr [[COERCE_DIVE]], align 1
+// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT: store ptr [[EXPECTED]], ptr [[EXPECTED_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[EXPECTED_ADDR]], align 8
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DOTATOMICTMP]], ptr align 1 [[DESIRED]], i64 3, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ATOMIC_TEMP]], ptr align 1 [[TMP1]], i64 3, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], ptr align 1 [[DOTATOMICTMP]], i64 3, i1 false)
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP2]], i32 [[TMP3]] monotonic monotonic, align 4
+// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+// CHECK-NEXT: br i1 [[TMP6]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]]
+// CHECK: [[CMPXCHG_STORE_EXPECTED]]:
+// CHECK-NEXT: store i32 [[TMP5]], ptr [[OLD_TMP]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[TMP1]], ptr align 4 [[OLD_TMP]], i64 3, i1 false)
+// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE]]
+// CHECK: [[CMPXCHG_CONTINUE]]:
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP6]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1
+// CHECK-NEXT: ret i1 [[LOADEDV]]
+//
+bool test_compare_exchange(_Atomic(S<3>)* a, S<3>* expected, S<3> desired) {
+ return __c11_atomic_compare_exchange_strong(a, expected, desired, 0, 0);
+}
+
+
+// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z21test_compare_exchangePU7_Atomic1SILj4EEPS0_S0_(
+// CHECK-SAME: ptr noundef [[A:%.*]], ptr noundef [[EXPECTED:%.*]], i32 [[DESIRED_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[DESIRED:%.*]] = alloca [[STRUCT_S_0:%.*]], align 1
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[EXPECTED_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_S_0]], align 1
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_S_0]], ptr [[DESIRED]], i32 0, i32 0
+// CHECK-NEXT: store i32 [[DESIRED_COERCE]], ptr [[COERCE_DIVE]], align 1
+// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT: store ptr [[EXPECTED]], ptr [[EXPECTED_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[EXPECTED_ADDR]], align 8
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DOTATOMICTMP]], ptr align 1 [[DESIRED]], i64 4, i1 false)
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 1
+// CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP2]], i32 [[TMP3]] monotonic monotonic, align 4
+// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+// CHECK-NEXT: br i1 [[TMP6]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]]
+// CHECK: [[CMPXCHG_STORE_EXPECTED]]:
+// CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 1
+// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE]]
+// CHECK: [[CMPXCHG_CONTINUE]]:
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP6]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1
+// CHECK-NEXT: ret i1 [[LOADEDV]]
+//
+bool test_compare_exchange(_Atomic(S<4>)* a, S<4>* expected, S<4> desired) {
+ return __c11_atomic_compare_exchange_strong(a, expected, desired, 0, 0);
+}
+
+// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z21test_compare_exchangePU7_Atomic1SILj6EEPS0_S0_(
+// CHECK-SAME: ptr noundef [[A:%.*]], ptr noundef [[EXPECTED:%.*]], i48 [[DESIRED_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[DESIRED:%.*]] = alloca [[STRUCT_S_1:%.*]], align 1
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[EXPECTED_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_S_1]], align 1
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_S_1]], [2 x i8] }, align 8
+// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca { [[STRUCT_S_1]], [2 x i8...
[truncated]
|
@llvm/pr-subscribers-libcxx Author: Hui (huixie90) Changesfixes #30023 Patch is 24.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78707.diff 3 Files Affected:
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 51f0799a792fd..9eee834a520e0 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -376,6 +376,7 @@ bool AtomicInfo::emitMemSetZeroIfNecessary() const {
static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
Address Dest, Address Ptr,
Address Val1, Address Val2,
+ Address ExpectedResult,
uint64_t Size,
llvm::AtomicOrdering SuccessOrder,
llvm::AtomicOrdering FailureOrder,
@@ -411,7 +412,48 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
CGF.Builder.SetInsertPoint(StoreExpectedBB);
// Update the memory at Expected with Old's value.
- CGF.Builder.CreateStore(Old, Val1);
+llvm::Type *ExpectedType = ExpectedResult.getElementType();
+const llvm::DataLayout &DL = CGF.CGM.getDataLayout();
+uint64_t ExpectedSizeInBytes = DL.getTypeStoreSize(ExpectedType);
+
+if (ExpectedSizeInBytes == Size) {
+ // Sizes match: store directly
+ CGF.Builder.CreateStore(Old, ExpectedResult);
+
+} else {
+ // store only the first ExpectedSizeInBytes bytes of Old
+ llvm::Type *OldType = Old->getType();
+
+ llvm::Align SrcAlignLLVM = DL.getABITypeAlign(OldType);
+ llvm::Align DstAlignLLVM = DL.getABITypeAlign(ExpectedType);
+
+ clang::CharUnits SrcAlign = clang::CharUnits::fromQuantity(SrcAlignLLVM.value());
+ clang::CharUnits DstAlign = clang::CharUnits::fromQuantity(DstAlignLLVM.value());
+
+ // Allocate temporary storage for Old value
+ llvm::AllocaInst *Alloca = CGF.CreateTempAlloca(OldType, "old.tmp");
+
+ // Wrap into clang::CodeGen::Address with proper type and alignment
+ Address OldStorage(Alloca, OldType, SrcAlign);
+
+ // Store Old into this temporary
+ CGF.Builder.CreateStore(Old, OldStorage);
+
+ // Bitcast pointers to i8*
+ llvm::Type *I8PtrTy = llvm::PointerType::getUnqual(CGF.getLLVMContext());
+
+ llvm::Value *SrcPtr = CGF.Builder.CreateBitCast(OldStorage.getBasePointer(), I8PtrTy);
+ llvm::Value *DstPtr = CGF.Builder.CreateBitCast(ExpectedResult.getBasePointer(), I8PtrTy);
+
+ // Perform memcpy for first ExpectedSizeInBytes bytes
+ CGF.Builder.CreateMemCpy(
+ DstPtr, DstAlignLLVM,
+ SrcPtr, SrcAlignLLVM,
+ llvm::ConstantInt::get(CGF.IntPtrTy, ExpectedSizeInBytes),
+ /*isVolatile=*/false);
+}
+
+
// Finally, branch to the exit point.
CGF.Builder.CreateBr(ContinueBB);
@@ -426,6 +468,7 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
bool IsWeak, Address Dest, Address Ptr,
Address Val1, Address Val2,
+ Address ExpectedResult,
llvm::Value *FailureOrderVal,
uint64_t Size,
llvm::AtomicOrdering SuccessOrder,
@@ -456,7 +499,7 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
// success argument". This condition has been lifted and the only
// precondition is 31.7.2.18. Effectively treat this as a DR and skip
// language version checks.
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult, Size, SuccessOrder,
FailureOrder, Scope);
return;
}
@@ -481,17 +524,17 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
// Emit all the different atomics
CGF.Builder.SetInsertPoint(MonotonicBB);
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult,
Size, SuccessOrder, llvm::AtomicOrdering::Monotonic, Scope);
CGF.Builder.CreateBr(ContBB);
CGF.Builder.SetInsertPoint(AcquireBB);
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult, Size, SuccessOrder,
llvm::AtomicOrdering::Acquire, Scope);
CGF.Builder.CreateBr(ContBB);
CGF.Builder.SetInsertPoint(SeqCstBB);
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult, Size, SuccessOrder,
llvm::AtomicOrdering::SequentiallyConsistent, Scope);
CGF.Builder.CreateBr(ContBB);
@@ -524,6 +567,7 @@ static llvm::Value *EmitPostAtomicMinMax(CGBuilderTy &Builder,
static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
Address Ptr, Address Val1, Address Val2,
+ Address ExpectedResult,
llvm::Value *IsWeak, llvm::Value *FailureOrder,
uint64_t Size, llvm::AtomicOrdering Order,
llvm::SyncScope::ID Scope) {
@@ -540,13 +584,13 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
+ ExpectedResult, FailureOrder, Size, Order, Scope);
return;
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
+ ExpectedResult, FailureOrder, Size, Order, Scope);
return;
case AtomicExpr::AO__atomic_compare_exchange:
case AtomicExpr::AO__atomic_compare_exchange_n:
@@ -554,7 +598,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__scoped_atomic_compare_exchange_n: {
if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) {
emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr,
- Val1, Val2, FailureOrder, Size, Order, Scope);
+ Val1, Val2, ExpectedResult, FailureOrder, Size, Order, Scope);
} else {
// Create all the relevant BB's
llvm::BasicBlock *StrongBB =
@@ -568,12 +612,12 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
CGF.Builder.SetInsertPoint(StrongBB);
emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
+ ExpectedResult, FailureOrder, Size, Order, Scope);
CGF.Builder.CreateBr(ContBB);
CGF.Builder.SetInsertPoint(WeakBB);
emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
+ ExpectedResult, FailureOrder, Size, Order, Scope);
CGF.Builder.CreateBr(ContBB);
CGF.Builder.SetInsertPoint(ContBB);
@@ -777,6 +821,7 @@ EmitValToTemp(CodeGenFunction &CGF, Expr *E) {
static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
Address Ptr, Address Val1, Address Val2,
+ Address OriginalVal1,
llvm::Value *IsWeak, llvm::Value *FailureOrder,
uint64_t Size, llvm::AtomicOrdering Order,
llvm::Value *Scope) {
@@ -796,7 +841,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
Order, CGF.getLLVMContext());
else
SS = llvm::SyncScope::System;
- EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
+ EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, FailureOrder, Size,
Order, SS);
return;
}
@@ -806,7 +851,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
auto SCID = CGF.getTargetHooks().getLLVMSyncScopeID(
CGF.CGM.getLangOpts(), ScopeModel->map(SC->getZExtValue()),
Order, CGF.CGM.getLLVMContext());
- EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
+ EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, FailureOrder, Size,
Order, SCID);
return;
}
@@ -832,7 +877,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
SI->addCase(Builder.getInt32(S), B);
Builder.SetInsertPoint(B);
- EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
+ EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, FailureOrder, Size,
Order,
CGF.getTargetHooks().getLLVMSyncScopeID(CGF.CGM.getLangOpts(),
ScopeModel->map(S),
@@ -1036,6 +1081,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
LValue AtomicVal = MakeAddrLValue(Ptr, AtomicTy);
AtomicInfo Atomics(*this, AtomicVal);
+ Address OriginalVal1 = Val1;
if (ShouldCastToIntPtrTy) {
Ptr = Atomics.castToAtomicIntPointer(Ptr);
if (Val1.isValid())
@@ -1279,30 +1325,30 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
if (llvm::isValidAtomicOrderingCABI(ord))
switch ((llvm::AtomicOrderingCABI)ord) {
case llvm::AtomicOrderingCABI::relaxed:
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::Monotonic, Scope);
break;
case llvm::AtomicOrderingCABI::consume:
case llvm::AtomicOrderingCABI::acquire:
if (IsStore)
break; // Avoid crashing on code with undefined behavior
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::Acquire, Scope);
break;
case llvm::AtomicOrderingCABI::release:
if (IsLoad)
break; // Avoid crashing on code with undefined behavior
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::Release, Scope);
break;
case llvm::AtomicOrderingCABI::acq_rel:
if (IsLoad || IsStore)
break; // Avoid crashing on code with undefined behavior
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::AcquireRelease, Scope);
break;
case llvm::AtomicOrderingCABI::seq_cst:
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::SequentiallyConsistent, Scope);
break;
}
@@ -1338,12 +1384,12 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// Emit all the different atomics
Builder.SetInsertPoint(MonotonicBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::Monotonic, Scope);
Builder.CreateBr(ContBB);
if (!IsStore) {
Builder.SetInsertPoint(AcquireBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::Acquire, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
@@ -1353,7 +1399,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
}
if (!IsLoad) {
Builder.SetInsertPoint(ReleaseBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::Release, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release),
@@ -1361,14 +1407,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
}
if (!IsLoad && !IsStore) {
Builder.SetInsertPoint(AcqRelBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::AcquireRelease, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel),
AcqRelBB);
}
Builder.SetInsertPoint(SeqCstBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::SequentiallyConsistent, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
diff --git a/clang/test/CodeGenCXX/builtin-atomic-compare_exchange.cpp b/clang/test/CodeGenCXX/builtin-atomic-compare_exchange.cpp
new file mode 100644
index 0000000000000..4e2830e7a66cd
--- /dev/null
+++ b/clang/test/CodeGenCXX/builtin-atomic-compare_exchange.cpp
@@ -0,0 +1,126 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -std=c++20 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+
+template <unsigned Size>
+struct S {
+ char data[Size];
+};
+
+// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z21test_compare_exchangePU7_Atomic1SILj3EEPS0_S0_(
+// CHECK-SAME: ptr noundef [[A:%.*]], ptr noundef [[EXPECTED:%.*]], i24 [[DESIRED_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[DESIRED:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[EXPECTED_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_S]], [1 x i8] }, align 4
+// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca { [[STRUCT_S]], [1 x i8] }, align 4
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[OLD_TMP:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[DESIRED]], i32 0, i32 0
+// CHECK-NEXT: store i24 [[DESIRED_COERCE]], ptr [[COERCE_DIVE]], align 1
+// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT: store ptr [[EXPECTED]], ptr [[EXPECTED_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[EXPECTED_ADDR]], align 8
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DOTATOMICTMP]], ptr align 1 [[DESIRED]], i64 3, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ATOMIC_TEMP]], ptr align 1 [[TMP1]], i64 3, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], ptr align 1 [[DOTATOMICTMP]], i64 3, i1 false)
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP2]], i32 [[TMP3]] monotonic monotonic, align 4
+// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+// CHECK-NEXT: br i1 [[TMP6]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]]
+// CHECK: [[CMPXCHG_STORE_EXPECTED]]:
+// CHECK-NEXT: store i32 [[TMP5]], ptr [[OLD_TMP]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[TMP1]], ptr align 4 [[OLD_TMP]], i64 3, i1 false)
+// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE]]
+// CHECK: [[CMPXCHG_CONTINUE]]:
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP6]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1
+// CHECK-NEXT: ret i1 [[LOADEDV]]
+//
+bool test_compare_exchange(_Atomic(S<3>)* a, S<3>* expected, S<3> desired) {
+ return __c11_atomic_compare_exchange_strong(a, expected, desired, 0, 0);
+}
+
+
+// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z21test_compare_exchangePU7_Atomic1SILj4EEPS0_S0_(
+// CHECK-SAME: ptr noundef [[A:%.*]], ptr noundef [[EXPECTED:%.*]], i32 [[DESIRED_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[DESIRED:%.*]] = alloca [[STRUCT_S_0:%.*]], align 1
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[EXPECTED_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_S_0]], align 1
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_S_0]], ptr [[DESIRED]], i32 0, i32 0
+// CHECK-NEXT: store i32 [[DESIRED_COERCE]], ptr [[COERCE_DIVE]], align 1
+// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT: store ptr [[EXPECTED]], ptr [[EXPECTED_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[EXPECTED_ADDR]], align 8
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DOTATOMICTMP]], ptr align 1 [[DESIRED]], i64 4, i1 false)
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 1
+// CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP2]], i32 [[TMP3]] monotonic monotonic, align 4
+// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+// CHECK-NEXT: br i1 [[TMP6]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]]
+// CHECK: [[CMPXCHG_STORE_EXPECTED]]:
+// CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 1
+// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE]]
+// CHECK: [[CMPXCHG_CONTINUE]]:
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP6]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1
+// CHECK-NEXT: ret i1 [[LOADEDV]]
+//
+bool test_compare_exchange(_Atomic(S<4>)* a, S<4>* expected, S<4> desired) {
+ return __c11_atomic_compare_exchange_strong(a, expected, desired, 0, 0);
+}
+
+// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z21test_compare_exchangePU7_Atomic1SILj6EEPS0_S0_(
+// CHECK-SAME: ptr noundef [[A:%.*]], ptr noundef [[EXPECTED:%.*]], i48 [[DESIRED_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[DESIRED:%.*]] = alloca [[STRUCT_S_1:%.*]], align 1
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[EXPECTED_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_S_1]], align 1
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_S_1]], [2 x i8] }, align 8
+// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca { [[STRUCT_S_1]], [2 x i8...
[truncated]
|
@llvm/pr-subscribers-clang Author: Hui (huixie90) Changesfixes #30023 Patch is 24.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78707.diff 3 Files Affected:
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 51f0799a792fd..9eee834a520e0 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -376,6 +376,7 @@ bool AtomicInfo::emitMemSetZeroIfNecessary() const {
static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
Address Dest, Address Ptr,
Address Val1, Address Val2,
+ Address ExpectedResult,
uint64_t Size,
llvm::AtomicOrdering SuccessOrder,
llvm::AtomicOrdering FailureOrder,
@@ -411,7 +412,48 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
CGF.Builder.SetInsertPoint(StoreExpectedBB);
// Update the memory at Expected with Old's value.
- CGF.Builder.CreateStore(Old, Val1);
+llvm::Type *ExpectedType = ExpectedResult.getElementType();
+const llvm::DataLayout &DL = CGF.CGM.getDataLayout();
+uint64_t ExpectedSizeInBytes = DL.getTypeStoreSize(ExpectedType);
+
+if (ExpectedSizeInBytes == Size) {
+ // Sizes match: store directly
+ CGF.Builder.CreateStore(Old, ExpectedResult);
+
+} else {
+ // store only the first ExpectedSizeInBytes bytes of Old
+ llvm::Type *OldType = Old->getType();
+
+ llvm::Align SrcAlignLLVM = DL.getABITypeAlign(OldType);
+ llvm::Align DstAlignLLVM = DL.getABITypeAlign(ExpectedType);
+
+ clang::CharUnits SrcAlign = clang::CharUnits::fromQuantity(SrcAlignLLVM.value());
+ clang::CharUnits DstAlign = clang::CharUnits::fromQuantity(DstAlignLLVM.value());
+
+ // Allocate temporary storage for Old value
+ llvm::AllocaInst *Alloca = CGF.CreateTempAlloca(OldType, "old.tmp");
+
+ // Wrap into clang::CodeGen::Address with proper type and alignment
+ Address OldStorage(Alloca, OldType, SrcAlign);
+
+ // Store Old into this temporary
+ CGF.Builder.CreateStore(Old, OldStorage);
+
+ // Bitcast pointers to i8*
+ llvm::Type *I8PtrTy = llvm::PointerType::getUnqual(CGF.getLLVMContext());
+
+ llvm::Value *SrcPtr = CGF.Builder.CreateBitCast(OldStorage.getBasePointer(), I8PtrTy);
+ llvm::Value *DstPtr = CGF.Builder.CreateBitCast(ExpectedResult.getBasePointer(), I8PtrTy);
+
+ // Perform memcpy for first ExpectedSizeInBytes bytes
+ CGF.Builder.CreateMemCpy(
+ DstPtr, DstAlignLLVM,
+ SrcPtr, SrcAlignLLVM,
+ llvm::ConstantInt::get(CGF.IntPtrTy, ExpectedSizeInBytes),
+ /*isVolatile=*/false);
+}
+
+
// Finally, branch to the exit point.
CGF.Builder.CreateBr(ContinueBB);
@@ -426,6 +468,7 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
bool IsWeak, Address Dest, Address Ptr,
Address Val1, Address Val2,
+ Address ExpectedResult,
llvm::Value *FailureOrderVal,
uint64_t Size,
llvm::AtomicOrdering SuccessOrder,
@@ -456,7 +499,7 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
// success argument". This condition has been lifted and the only
// precondition is 31.7.2.18. Effectively treat this as a DR and skip
// language version checks.
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult, Size, SuccessOrder,
FailureOrder, Scope);
return;
}
@@ -481,17 +524,17 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
// Emit all the different atomics
CGF.Builder.SetInsertPoint(MonotonicBB);
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult,
Size, SuccessOrder, llvm::AtomicOrdering::Monotonic, Scope);
CGF.Builder.CreateBr(ContBB);
CGF.Builder.SetInsertPoint(AcquireBB);
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult, Size, SuccessOrder,
llvm::AtomicOrdering::Acquire, Scope);
CGF.Builder.CreateBr(ContBB);
CGF.Builder.SetInsertPoint(SeqCstBB);
- emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, SuccessOrder,
+ emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, ExpectedResult, Size, SuccessOrder,
llvm::AtomicOrdering::SequentiallyConsistent, Scope);
CGF.Builder.CreateBr(ContBB);
@@ -524,6 +567,7 @@ static llvm::Value *EmitPostAtomicMinMax(CGBuilderTy &Builder,
static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
Address Ptr, Address Val1, Address Val2,
+ Address ExpectedResult,
llvm::Value *IsWeak, llvm::Value *FailureOrder,
uint64_t Size, llvm::AtomicOrdering Order,
llvm::SyncScope::ID Scope) {
@@ -540,13 +584,13 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
+ ExpectedResult, FailureOrder, Size, Order, Scope);
return;
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
+ ExpectedResult, FailureOrder, Size, Order, Scope);
return;
case AtomicExpr::AO__atomic_compare_exchange:
case AtomicExpr::AO__atomic_compare_exchange_n:
@@ -554,7 +598,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__scoped_atomic_compare_exchange_n: {
if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) {
emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr,
- Val1, Val2, FailureOrder, Size, Order, Scope);
+ Val1, Val2, ExpectedResult, FailureOrder, Size, Order, Scope);
} else {
// Create all the relevant BB's
llvm::BasicBlock *StrongBB =
@@ -568,12 +612,12 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
CGF.Builder.SetInsertPoint(StrongBB);
emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
+ ExpectedResult, FailureOrder, Size, Order, Scope);
CGF.Builder.CreateBr(ContBB);
CGF.Builder.SetInsertPoint(WeakBB);
emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
- FailureOrder, Size, Order, Scope);
+ ExpectedResult, FailureOrder, Size, Order, Scope);
CGF.Builder.CreateBr(ContBB);
CGF.Builder.SetInsertPoint(ContBB);
@@ -777,6 +821,7 @@ EmitValToTemp(CodeGenFunction &CGF, Expr *E) {
static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
Address Ptr, Address Val1, Address Val2,
+ Address OriginalVal1,
llvm::Value *IsWeak, llvm::Value *FailureOrder,
uint64_t Size, llvm::AtomicOrdering Order,
llvm::Value *Scope) {
@@ -796,7 +841,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
Order, CGF.getLLVMContext());
else
SS = llvm::SyncScope::System;
- EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
+ EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, FailureOrder, Size,
Order, SS);
return;
}
@@ -806,7 +851,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
auto SCID = CGF.getTargetHooks().getLLVMSyncScopeID(
CGF.CGM.getLangOpts(), ScopeModel->map(SC->getZExtValue()),
Order, CGF.CGM.getLLVMContext());
- EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
+ EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, FailureOrder, Size,
Order, SCID);
return;
}
@@ -832,7 +877,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
SI->addCase(Builder.getInt32(S), B);
Builder.SetInsertPoint(B);
- EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
+ EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, FailureOrder, Size,
Order,
CGF.getTargetHooks().getLLVMSyncScopeID(CGF.CGM.getLangOpts(),
ScopeModel->map(S),
@@ -1036,6 +1081,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
LValue AtomicVal = MakeAddrLValue(Ptr, AtomicTy);
AtomicInfo Atomics(*this, AtomicVal);
+ Address OriginalVal1 = Val1;
if (ShouldCastToIntPtrTy) {
Ptr = Atomics.castToAtomicIntPointer(Ptr);
if (Val1.isValid())
@@ -1279,30 +1325,30 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
if (llvm::isValidAtomicOrderingCABI(ord))
switch ((llvm::AtomicOrderingCABI)ord) {
case llvm::AtomicOrderingCABI::relaxed:
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::Monotonic, Scope);
break;
case llvm::AtomicOrderingCABI::consume:
case llvm::AtomicOrderingCABI::acquire:
if (IsStore)
break; // Avoid crashing on code with undefined behavior
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::Acquire, Scope);
break;
case llvm::AtomicOrderingCABI::release:
if (IsLoad)
break; // Avoid crashing on code with undefined behavior
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::Release, Scope);
break;
case llvm::AtomicOrderingCABI::acq_rel:
if (IsLoad || IsStore)
break; // Avoid crashing on code with undefined behavior
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::AcquireRelease, Scope);
break;
case llvm::AtomicOrderingCABI::seq_cst:
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::SequentiallyConsistent, Scope);
break;
}
@@ -1338,12 +1384,12 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// Emit all the different atomics
Builder.SetInsertPoint(MonotonicBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::Monotonic, Scope);
Builder.CreateBr(ContBB);
if (!IsStore) {
Builder.SetInsertPoint(AcquireBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::Acquire, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::consume),
@@ -1353,7 +1399,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
}
if (!IsLoad) {
Builder.SetInsertPoint(ReleaseBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::Release, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::release),
@@ -1361,14 +1407,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
}
if (!IsLoad && !IsStore) {
Builder.SetInsertPoint(AcqRelBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::AcquireRelease, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::acq_rel),
AcqRelBB);
}
Builder.SetInsertPoint(SeqCstBB);
- EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail, Size,
+ EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, OriginalVal1, IsWeak, OrderFail, Size,
llvm::AtomicOrdering::SequentiallyConsistent, Scope);
Builder.CreateBr(ContBB);
SI->addCase(Builder.getInt32((int)llvm::AtomicOrderingCABI::seq_cst),
diff --git a/clang/test/CodeGenCXX/builtin-atomic-compare_exchange.cpp b/clang/test/CodeGenCXX/builtin-atomic-compare_exchange.cpp
new file mode 100644
index 0000000000000..4e2830e7a66cd
--- /dev/null
+++ b/clang/test/CodeGenCXX/builtin-atomic-compare_exchange.cpp
@@ -0,0 +1,126 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -std=c++20 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+
+template <unsigned Size>
+struct S {
+ char data[Size];
+};
+
+// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z21test_compare_exchangePU7_Atomic1SILj3EEPS0_S0_(
+// CHECK-SAME: ptr noundef [[A:%.*]], ptr noundef [[EXPECTED:%.*]], i24 [[DESIRED_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[DESIRED:%.*]] = alloca [[STRUCT_S:%.*]], align 1
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[EXPECTED_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_S]], align 1
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_S]], [1 x i8] }, align 4
+// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca { [[STRUCT_S]], [1 x i8] }, align 4
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[OLD_TMP:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[DESIRED]], i32 0, i32 0
+// CHECK-NEXT: store i24 [[DESIRED_COERCE]], ptr [[COERCE_DIVE]], align 1
+// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT: store ptr [[EXPECTED]], ptr [[EXPECTED_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[EXPECTED_ADDR]], align 8
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DOTATOMICTMP]], ptr align 1 [[DESIRED]], i64 3, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ATOMIC_TEMP]], ptr align 1 [[TMP1]], i64 3, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], ptr align 1 [[DOTATOMICTMP]], i64 3, i1 false)
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP2]], i32 [[TMP3]] monotonic monotonic, align 4
+// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+// CHECK-NEXT: br i1 [[TMP6]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]]
+// CHECK: [[CMPXCHG_STORE_EXPECTED]]:
+// CHECK-NEXT: store i32 [[TMP5]], ptr [[OLD_TMP]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[TMP1]], ptr align 4 [[OLD_TMP]], i64 3, i1 false)
+// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE]]
+// CHECK: [[CMPXCHG_CONTINUE]]:
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP6]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1
+// CHECK-NEXT: ret i1 [[LOADEDV]]
+//
+bool test_compare_exchange(_Atomic(S<3>)* a, S<3>* expected, S<3> desired) {
+ return __c11_atomic_compare_exchange_strong(a, expected, desired, 0, 0);
+}
+
+
+// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z21test_compare_exchangePU7_Atomic1SILj4EEPS0_S0_(
+// CHECK-SAME: ptr noundef [[A:%.*]], ptr noundef [[EXPECTED:%.*]], i32 [[DESIRED_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[DESIRED:%.*]] = alloca [[STRUCT_S_0:%.*]], align 1
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[EXPECTED_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_S_0]], align 1
+// CHECK-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_S_0]], ptr [[DESIRED]], i32 0, i32 0
+// CHECK-NEXT: store i32 [[DESIRED_COERCE]], ptr [[COERCE_DIVE]], align 1
+// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT: store ptr [[EXPECTED]], ptr [[EXPECTED_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[EXPECTED_ADDR]], align 8
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DOTATOMICTMP]], ptr align 1 [[DESIRED]], i64 4, i1 false)
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 1
+// CHECK-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP2]], i32 [[TMP3]] monotonic monotonic, align 4
+// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
+// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1
+// CHECK-NEXT: br i1 [[TMP6]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]]
+// CHECK: [[CMPXCHG_STORE_EXPECTED]]:
+// CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 1
+// CHECK-NEXT: br label %[[CMPXCHG_CONTINUE]]
+// CHECK: [[CMPXCHG_CONTINUE]]:
+// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP6]] to i8
+// CHECK-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1
+// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1
+// CHECK-NEXT: ret i1 [[LOADEDV]]
+//
+bool test_compare_exchange(_Atomic(S<4>)* a, S<4>* expected, S<4> desired) {
+ return __c11_atomic_compare_exchange_strong(a, expected, desired, 0, 0);
+}
+
+// CHECK-LABEL: define dso_local noundef zeroext i1 @_Z21test_compare_exchangePU7_Atomic1SILj6EEPS0_S0_(
+// CHECK-SAME: ptr noundef [[A:%.*]], ptr noundef [[EXPECTED:%.*]], i48 [[DESIRED_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[DESIRED:%.*]] = alloca [[STRUCT_S_1:%.*]], align 1
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[EXPECTED_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTATOMICTMP:%.*]] = alloca [[STRUCT_S_1]], align 1
+// CHECK-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { [[STRUCT_S_1]], [2 x i8] }, align 8
+// CHECK-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca { [[STRUCT_S_1]], [2 x i8...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks quite reasonable to me, and it seems we're fixing a pretty bad bug. However, someone else with more Clang/Codegen experience needs to look at this. Thanks for working on this!
CC @philnik777 in case you want to take a look as well |
ping @efriedma-quic does the current status address your concerns? |
I think the current state addresses my concerns, yes. I'd like to make one final pass over it, though; please git-clang-format and rebase the patch. |
Thanks I rebased and clang-formatted the changed lines. The CI fails on the new test I added in libc++, because the libc++ is not using the just-built clang for testing for majority of the targets and they fail as expected. i will check with @ldionne how to skip the previous versions of clangs for that test |
fixes #30023
The issue is that for compare exchange builtin, if the type's size is not power of 2, it creates a temporary of size power of 2, then emit the compare exchange operation. And later, the results of the compare exchange operation has two components: 1. a boolean whether or not the exchange happens. 2. the old value
we are supposed to write the old value into user's "expected" value. However, in case the type is not power of 2, what we actually wrote to is the temporary that was created.
The fix is to pass the "expected" address all the way down so it can wrote to the correct address