Skip to content

Commit d2239fb

Browse files
authored
[clang][CodeGen] Fix sub-optimal clang CodeGen for __atomic_test_and_set (#160098)
Clang CodeGen for `__atomic_test_and_set` would emit a `store` instruction that stores an `i1` value: ```cpp bool f(void *ptr) { return __atomic_test_and_set(ptr, __ATOMIC_RELAXED); } ``` ```llvm %1 = atomicrmw xchg ptr %0, i8 1 monotonic, align 1 %tobool = icmp ne i8 %1, 0 store i1 %tobool, ptr %atomic-temp, align 1 ``` which could lead to suboptimal binary code, for example on x86_64: ```asm f: mov al, 1 xchg byte ptr [rdi], al test al, al setne al setne byte ptr [rsp - 1] ret ``` The last `setne` instruction is obviously redundant. This patch fixes this issue by first zero-extending `%tobool` to an `i8` before the store. This effectively eliminates the last `setne` instruction in the binary code sequence. The `test` and `setne` on `al` is kept still, though. ----- I'm quite conservative about the codegen in this patch. Vanilla gcc actually emits simpler code for `__atomic_test_and_set`: ```cpp bool f(void *ptr) { return __atomic_test_and_set(ptr, __ATOMIC_RELAXED); } ``` ```asm f: mov eax, 1 xchg al, BYTE PTR [rdi] ret ``` It seems like gcc assumes `ptr` would always point to a valid `bool` value as required by the ABI. I'm not sure if we should also make this assumption. Related to #121943 .
1 parent 2a72522 commit d2239fb

File tree

3 files changed

+125
-107
lines changed

3 files changed

+125
-107
lines changed

clang/lib/CodeGen/CGAtomic.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -734,7 +734,8 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
734734
CGF.emitAtomicRMWInst(llvm::AtomicRMWInst::Xchg, Ptr,
735735
CGF.Builder.getInt8(1), Order, Scope, E);
736736
RMWI->setVolatile(E->isVolatile());
737-
llvm::Value *Result = CGF.Builder.CreateIsNotNull(RMWI, "tobool");
737+
llvm::Value *Result = CGF.EmitToMemory(
738+
CGF.Builder.CreateIsNotNull(RMWI, "tobool"), E->getType());
738739
auto *I = CGF.Builder.CreateStore(Result, Dest);
739740
CGF.addInstToCurrentSourceAtom(I, Result);
740741
return;

clang/test/CodeGen/atomic-test-and-set.c

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@ void clear_dynamic(char *ptr, int order) {
8181
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
8282
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 1
8383
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
84-
// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
84+
// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
85+
// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
8586
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
8687
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
8788
// CHECK-NEXT: ret void
@@ -99,7 +100,8 @@ void test_and_set_relaxed(char *ptr) {
99100
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
100101
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acquire, align 1
101102
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
102-
// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
103+
// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
104+
// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
103105
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
104106
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
105107
// CHECK-NEXT: ret void
@@ -117,7 +119,8 @@ void test_and_set_consume(char *ptr) {
117119
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
118120
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acquire, align 1
119121
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
120-
// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
122+
// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
123+
// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
121124
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
122125
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
123126
// CHECK-NEXT: ret void
@@ -135,7 +138,8 @@ void test_and_set_acquire(char *ptr) {
135138
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
136139
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 release, align 1
137140
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
138-
// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
141+
// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
142+
// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
139143
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
140144
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
141145
// CHECK-NEXT: ret void
@@ -153,7 +157,8 @@ void test_and_set_release(char *ptr) {
153157
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
154158
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acq_rel, align 1
155159
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
156-
// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
160+
// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
161+
// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
157162
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
158163
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
159164
// CHECK-NEXT: ret void
@@ -171,7 +176,8 @@ void test_and_set_acq_rel(char *ptr) {
171176
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
172177
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 seq_cst, align 1
173178
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
174-
// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
179+
// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
180+
// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
175181
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
176182
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
177183
// CHECK-NEXT: ret void
@@ -200,27 +206,32 @@ void test_and_set_seq_cst(char *ptr) {
200206
// CHECK: [[MONOTONIC]]:
201207
// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 1
202208
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP2]], 0
203-
// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
209+
// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
210+
// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
204211
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE:.*]]
205212
// CHECK: [[ACQUIRE]]:
206213
// CHECK-NEXT: [[TMP3:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acquire, align 1
207214
// CHECK-NEXT: [[TOBOOL1:%.*]] = icmp ne i8 [[TMP3]], 0
208-
// CHECK-NEXT: store i1 [[TOBOOL1]], ptr [[ATOMIC_TEMP]], align 1
215+
// CHECK-NEXT: [[TOBOOL_ZEXT1:%.*]] = zext i1 [[TOBOOL1]] to i8
216+
// CHECK-NEXT: store i8 [[TOBOOL_ZEXT1]], ptr [[ATOMIC_TEMP]], align 1
209217
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
210218
// CHECK: [[RELEASE]]:
211219
// CHECK-NEXT: [[TMP4:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 release, align 1
212220
// CHECK-NEXT: [[TOBOOL2:%.*]] = icmp ne i8 [[TMP4]], 0
213-
// CHECK-NEXT: store i1 [[TOBOOL2]], ptr [[ATOMIC_TEMP]], align 1
221+
// CHECK-NEXT: [[TOBOOL_ZEXT2:%.*]] = zext i1 [[TOBOOL2]] to i8
222+
// CHECK-NEXT: store i8 [[TOBOOL_ZEXT2]], ptr [[ATOMIC_TEMP]], align 1
214223
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
215224
// CHECK: [[ACQREL]]:
216225
// CHECK-NEXT: [[TMP5:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 acq_rel, align 1
217226
// CHECK-NEXT: [[TOBOOL3:%.*]] = icmp ne i8 [[TMP5]], 0
218-
// CHECK-NEXT: store i1 [[TOBOOL3]], ptr [[ATOMIC_TEMP]], align 1
227+
// CHECK-NEXT: [[TOBOOL_ZEXT3:%.*]] = zext i1 [[TOBOOL3]] to i8
228+
// CHECK-NEXT: store i8 [[TOBOOL_ZEXT3]], ptr [[ATOMIC_TEMP]], align 1
219229
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
220230
// CHECK: [[SEQCST]]:
221231
// CHECK-NEXT: [[TMP6:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 seq_cst, align 1
222232
// CHECK-NEXT: [[TOBOOL4:%.*]] = icmp ne i8 [[TMP6]], 0
223-
// CHECK-NEXT: store i1 [[TOBOOL4]], ptr [[ATOMIC_TEMP]], align 1
233+
// CHECK-NEXT: [[TOBOOL_ZEXT4:%.*]] = zext i1 [[TOBOOL4]] to i8
234+
// CHECK-NEXT: store i8 [[TOBOOL_ZEXT4]], ptr [[ATOMIC_TEMP]], align 1
224235
// CHECK-NEXT: br label %[[ATOMIC_CONTINUE]]
225236
// CHECK: [[ATOMIC_CONTINUE]]:
226237
// CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
@@ -239,7 +250,8 @@ void test_and_set_dynamic(char *ptr, int order) {
239250
// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i32], ptr [[X]], i64 0, i64 0
240251
// CHECK-NEXT: [[TMP0:%.*]] = atomicrmw volatile xchg ptr [[ARRAYDECAY]], i8 1 seq_cst, align 4
241252
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP0]], 0
242-
// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
253+
// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
254+
// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
243255
// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
244256
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP1]] to i1
245257
// CHECK-NEXT: ret void
@@ -301,7 +313,8 @@ void clear_incomplete(struct incomplete *ptr) {
301313
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
302314
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 4
303315
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
304-
// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
316+
// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
317+
// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
305318
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
306319
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
307320
// CHECK-NEXT: ret void
@@ -318,7 +331,8 @@ void test_and_set_int(int *ptr) {
318331
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
319332
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 1
320333
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
321-
// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
334+
// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
335+
// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
322336
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
323337
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
324338
// CHECK-NEXT: ret void
@@ -335,7 +349,8 @@ void test_and_set_void(void *ptr) {
335349
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
336350
// CHECK-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 1 monotonic, align 1
337351
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP1]], 0
338-
// CHECK-NEXT: store i1 [[TOBOOL]], ptr [[ATOMIC_TEMP]], align 1
352+
// CHECK-NEXT: [[TOBOOL_ZEXT:%.*]] = zext i1 [[TOBOOL]] to i8
353+
// CHECK-NEXT: store i8 [[TOBOOL_ZEXT]], ptr [[ATOMIC_TEMP]], align 1
339354
// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1
340355
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
341356
// CHECK-NEXT: ret void

0 commit comments

Comments
 (0)