Skip to content

Commit 6135504

Browse files
committed
[ARM][DAGISel] USe correct ABI for atomic functions
The AArch32 PCS passes small integer arguments in registers by zero- or sign-extending them in the caller, but we were previously generating calls to the __atomic and __sync functions which left other values in the high bits. This is important in practice for the atomic min/max functions, which have signed versions which expect the value to have been correctly sign-extended. Fixes #61880.
1 parent 1bae5be commit 6135504

File tree

4 files changed

+51
-17
lines changed

4 files changed

+51
-17
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4386,23 +4386,46 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
43864386
AtomicOrdering Order = cast<AtomicSDNode>(Node)->getMergedOrdering();
43874387
RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, Order, VT);
43884388
EVT RetVT = Node->getValueType(0);
4389+
SDValue ChainIn = Node->getOperand(0);
4390+
SDValue Pointer = Node->getOperand(1);
4391+
SDLoc dl(Node);
43894392
SmallVector<SDValue, 4> Ops;
4393+
4394+
// Zero/sign extend small operands if required by the target's ABI.
4395+
SmallVector<SDValue, 4> ExtendedOps;
4396+
for (auto Op = Node->op_begin() + 2, E = Node->op_end(); Op != E; ++Op) {
4397+
if (TLI.shouldExtendTypeInLibCall(VT)) {
4398+
bool IsSigned =
4399+
Opc == ISD::ATOMIC_LOAD_MIN || Opc == ISD::ATOMIC_LOAD_MAX;
4400+
if (TLI.shouldSignExtendTypeInLibCall(
4401+
EVT(VT).getTypeForEVT(*DAG.getContext()), IsSigned))
4402+
ExtendedOps.push_back(DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
4403+
Op->getValueType(), *Op,
4404+
DAG.getValueType(VT)));
4405+
else
4406+
ExtendedOps.push_back(DAG.getZeroExtendInReg(*Op, dl, VT));
4407+
4408+
} else {
4409+
ExtendedOps.push_back(*Op);
4410+
}
4411+
}
4412+
43904413
if (TLI.getLibcallName(LC)) {
43914414
// If outline atomic available, prepare its arguments and expand.
4392-
Ops.append(Node->op_begin() + 2, Node->op_end());
4393-
Ops.push_back(Node->getOperand(1));
4415+
Ops.append(ExtendedOps.begin(), ExtendedOps.end());
4416+
Ops.push_back(Pointer);
43944417

43954418
} else {
43964419
LC = RTLIB::getSYNC(Opc, VT);
43974420
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
43984421
"Unexpected atomic op or value type!");
43994422
// Arguments for expansion to sync libcall
4400-
Ops.append(Node->op_begin() + 1, Node->op_end());
4423+
Ops.push_back(Pointer);
4424+
Ops.append(ExtendedOps.begin(), ExtendedOps.end());
44014425
}
4402-
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
4403-
Ops, CallOptions,
4404-
SDLoc(Node),
4405-
Node->getOperand(0));
4426+
4427+
std::pair<SDValue, SDValue> Tmp =
4428+
TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, dl, ChainIn);
44064429
Results.push_back(Tmp.first);
44074430
Results.push_back(Tmp.second);
44084431
break;

llvm/test/CodeGen/ARM/atomic-cmpxchg.ll

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ define zeroext i1 @test_cmpxchg_res_i8(ptr %addr, i8 %desired, i8 zeroext %new)
1212
; CHECK-ARM-LABEL: test_cmpxchg_res_i8:
1313
; CHECK-ARM: .save {r4, lr}
1414
; CHECK-ARM-NEXT: push {r4, lr}
15-
; CHECK-ARM-NEXT: mov r4, r1
15+
; CHECK-ARM-NEXT: and r4, r1, #255
16+
; CHECK-ARM-NEXT: mov r1, r4
1617
; CHECK-ARM-NEXT: bl __sync_val_compare_and_swap_1
17-
; CHECK-ARM-NEXT: and r1, r4, #255
18-
; CHECK-ARM-NEXT: sub r0, r0, r1
18+
; CHECK-ARM-NEXT: sub r0, r0, r4
1919
; CHECK-ARM-NEXT: rsbs r1, r0, #0
2020
; CHECK-ARM-NEXT: adc r0, r0, r1
2121
; CHECK-ARM-NEXT: pop {r4, lr}
@@ -25,10 +25,11 @@ define zeroext i1 @test_cmpxchg_res_i8(ptr %addr, i8 %desired, i8 zeroext %new)
2525
; CHECK-THUMB: .save {r4, lr}
2626
; CHECK-THUMB-NEXT: push {r4, lr}
2727
; CHECK-THUMB-NEXT: movs r4, r1
28-
; CHECK-THUMB-NEXT: bl __sync_val_compare_and_swap_1
2928
; CHECK-THUMB-NEXT: movs r1, #255
30-
; CHECK-THUMB-NEXT: ands r1, r4
31-
; CHECK-THUMB-NEXT: subs r1, r0, r1
29+
; CHECK-THUMB-NEXT: ands r4, r1
30+
; CHECK-THUMB-NEXT: movs r1, r4
31+
; CHECK-THUMB-NEXT: bl __sync_val_compare_and_swap_1
32+
; CHECK-THUMB-NEXT: subs r1, r0, r4
3233
; CHECK-THUMB-NEXT: rsbs r0, r1, #0
3334
; CHECK-THUMB-NEXT: adcs r0, r1
3435
; CHECK-THUMB-NEXT: pop {r4}
@@ -52,10 +53,10 @@ define zeroext i1 @test_cmpxchg_res_i8(ptr %addr, i8 %desired, i8 zeroext %new)
5253
; CHECK-THUMBV6-LABEL: test_cmpxchg_res_i8:
5354
; CHECK-THUMBV6: .save {r4, lr}
5455
; CHECK-THUMBV6-NEXT: push {r4, lr}
55-
; CHECK-THUMBV6-NEXT: mov r4, r1
56+
; CHECK-THUMBV6-NEXT: uxtb r4, r1
57+
; CHECK-THUMBV6-NEXT: mov r1, r4
5658
; CHECK-THUMBV6-NEXT: bl __sync_val_compare_and_swap_1
57-
; CHECK-THUMBV6-NEXT: uxtb r1, r4
58-
; CHECK-THUMBV6-NEXT: subs r1, r0, r1
59+
; CHECK-THUMBV6-NEXT: subs r1, r0, r4
5960
; CHECK-THUMBV6-NEXT: rsbs r0, r1, #0
6061
; CHECK-THUMBV6-NEXT: adcs r0, r1
6162
; CHECK-THUMBV6-NEXT: pop {r4, pc}

llvm/test/CodeGen/ARM/atomic-load-store.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ define void @test4(ptr %ptr1, ptr %ptr2) {
203203
; THUMBONE-NEXT: movs r1, #0
204204
; THUMBONE-NEXT: mov r2, r1
205205
; THUMBONE-NEXT: bl __sync_val_compare_and_swap_1
206-
; THUMBONE-NEXT: mov r1, r0
206+
; THUMBONE-NEXT: uxtb r1, r0
207207
; THUMBONE-NEXT: mov r0, r4
208208
; THUMBONE-NEXT: bl __sync_lock_test_and_set_1
209209
; THUMBONE-NEXT: pop {r4, pc}
@@ -692,6 +692,7 @@ define void @store_atomic_f16__seq_cst(ptr %ptr, half %val1) {
692692
; THUMBONE-LABEL: store_atomic_f16__seq_cst:
693693
; THUMBONE: @ %bb.0:
694694
; THUMBONE-NEXT: push {r7, lr}
695+
; THUMBONE-NEXT: uxth r1, r1
695696
; THUMBONE-NEXT: bl __sync_lock_test_and_set_2
696697
; THUMBONE-NEXT: pop {r7, pc}
697698
;
@@ -756,6 +757,7 @@ define void @store_atomic_bf16__seq_cst(ptr %ptr, bfloat %val1) {
756757
; THUMBONE-LABEL: store_atomic_bf16__seq_cst:
757758
; THUMBONE: @ %bb.0:
758759
; THUMBONE-NEXT: push {r7, lr}
760+
; THUMBONE-NEXT: uxth r1, r1
759761
; THUMBONE-NEXT: bl __sync_lock_test_and_set_2
760762
; THUMBONE-NEXT: pop {r7, pc}
761763
;

llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,7 @@ define i8 @trunc_rmw8(ptr %p, i32 %val) {
396396
; ATOMIC32-NEXT: .save {r7, lr}
397397
; ATOMIC32-NEXT: push {r7, lr}
398398
; ATOMIC32-NEXT: dmb sy
399+
; ATOMIC32-NEXT: uxtb r1, r1
399400
; ATOMIC32-NEXT: bl __sync_fetch_and_add_1
400401
; ATOMIC32-NEXT: dmb sy
401402
; ATOMIC32-NEXT: pop {r7, pc}
@@ -447,6 +448,7 @@ define i8 @trunc_rmw8_signed(ptr %p, i32 %val) {
447448
; ATOMIC32-NEXT: .save {r7, lr}
448449
; ATOMIC32-NEXT: push {r7, lr}
449450
; ATOMIC32-NEXT: dmb sy
451+
; ATOMIC32-NEXT: sxtb r1, r1
450452
; ATOMIC32-NEXT: bl __sync_fetch_and_max_1
451453
; ATOMIC32-NEXT: dmb sy
452454
; ATOMIC32-NEXT: pop {r7, pc}
@@ -478,6 +480,8 @@ define i8 @trunc_cmpxchg8(ptr %p, i32 %cmp, i32 %new) {
478480
; ATOMIC32-NEXT: .save {r7, lr}
479481
; ATOMIC32-NEXT: push {r7, lr}
480482
; ATOMIC32-NEXT: dmb sy
483+
; ATOMIC32-NEXT: uxtb r1, r1
484+
; ATOMIC32-NEXT: uxtb r2, r2
481485
; ATOMIC32-NEXT: bl __sync_val_compare_and_swap_1
482486
; ATOMIC32-NEXT: dmb sy
483487
; ATOMIC32-NEXT: pop {r7, pc}
@@ -524,6 +528,7 @@ define i16 @trunc_rmw16(ptr %p, i32 %val) {
524528
; ATOMIC32-NEXT: .save {r7, lr}
525529
; ATOMIC32-NEXT: push {r7, lr}
526530
; ATOMIC32-NEXT: dmb sy
531+
; ATOMIC32-NEXT: uxth r1, r1
527532
; ATOMIC32-NEXT: bl __sync_fetch_and_add_2
528533
; ATOMIC32-NEXT: dmb sy
529534
; ATOMIC32-NEXT: pop {r7, pc}
@@ -575,6 +580,7 @@ define i16 @trunc_rmw16_signed(ptr %p, i32 %val) {
575580
; ATOMIC32-NEXT: .save {r7, lr}
576581
; ATOMIC32-NEXT: push {r7, lr}
577582
; ATOMIC32-NEXT: dmb sy
583+
; ATOMIC32-NEXT: sxth r1, r1
578584
; ATOMIC32-NEXT: bl __sync_fetch_and_max_2
579585
; ATOMIC32-NEXT: dmb sy
580586
; ATOMIC32-NEXT: pop {r7, pc}
@@ -606,6 +612,8 @@ define i16 @trunc_cmpxchg16(ptr %p, i32 %cmp, i32 %new) {
606612
; ATOMIC32-NEXT: .save {r7, lr}
607613
; ATOMIC32-NEXT: push {r7, lr}
608614
; ATOMIC32-NEXT: dmb sy
615+
; ATOMIC32-NEXT: uxth r1, r1
616+
; ATOMIC32-NEXT: uxth r2, r2
609617
; ATOMIC32-NEXT: bl __sync_val_compare_and_swap_2
610618
; ATOMIC32-NEXT: dmb sy
611619
; ATOMIC32-NEXT: pop {r7, pc}

0 commit comments

Comments
 (0)