Skip to content

Commit 11e796d

Browse files
authored
[NVPTX] Fixup some issues introduced by 128-bit atomics (#155921)
1 parent 3ea7956 commit 11e796d

File tree

3 files changed

+32
-10
lines changed

3 files changed

+32
-10
lines changed

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2325,13 +2325,15 @@ void NVPTXDAGToDAGISel::selectAtomicSwap128(SDNode *N) {
23252325
MemSDNode *AN = cast<MemSDNode>(N);
23262326
SDLoc dl(N);
23272327

2328+
const SDValue Chain = N->getOperand(0);
23282329
const auto [Base, Offset] = selectADDR(N->getOperand(1), CurDAG);
23292330
SmallVector<SDValue, 5> Ops{Base, Offset};
23302331
Ops.append(N->op_begin() + 2, N->op_end());
23312332
Ops.append({
23322333
getI32Imm(getMemOrder(AN), dl),
23332334
getI32Imm(getAtomicScope(AN), dl),
23342335
getI32Imm(getAddrSpace(AN), dl),
2336+
Chain,
23352337
});
23362338

23372339
assert(N->getOpcode() == NVPTXISD::ATOMIC_CMP_SWAP_B128 ||

llvm/test/CodeGen/NVPTX/atomics-b128.ll

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,7 @@ define i128 @test_cmpxchg_monotonic_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
458458
; CHECK-EMPTY:
459459
; CHECK-NEXT: // %bb.0:
460460
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_monotonic_seq_cst_param_0];
461+
; CHECK-NEXT: fence.sc.sys;
461462
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_monotonic_seq_cst_param_1];
462463
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_monotonic_seq_cst_param_2];
463464
; CHECK-NEXT: {
@@ -524,6 +525,7 @@ define i128 @test_cmpxchg_acquire_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
524525
; CHECK-EMPTY:
525526
; CHECK-NEXT: // %bb.0:
526527
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acquire_seq_cst_param_0];
528+
; CHECK-NEXT: fence.sc.sys;
527529
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acquire_seq_cst_param_1];
528530
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acquire_seq_cst_param_2];
529531
; CHECK-NEXT: {
@@ -590,6 +592,7 @@ define i128 @test_cmpxchg_release_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
590592
; CHECK-EMPTY:
591593
; CHECK-NEXT: // %bb.0:
592594
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_release_seq_cst_param_0];
595+
; CHECK-NEXT: fence.sc.sys;
593596
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_release_seq_cst_param_1];
594597
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_release_seq_cst_param_2];
595598
; CHECK-NEXT: {
@@ -656,6 +659,7 @@ define i128 @test_cmpxchg_acq_rel_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
656659
; CHECK-EMPTY:
657660
; CHECK-NEXT: // %bb.0:
658661
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_acq_rel_seq_cst_param_0];
662+
; CHECK-NEXT: fence.sc.sys;
659663
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_acq_rel_seq_cst_param_1];
660664
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_acq_rel_seq_cst_param_2];
661665
; CHECK-NEXT: {
@@ -678,6 +682,7 @@ define i128 @test_cmpxchg_seq_cst_monotonic(ptr %addr, i128 %cmp, i128 %new) {
678682
; CHECK-EMPTY:
679683
; CHECK-NEXT: // %bb.0:
680684
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_monotonic_param_0];
685+
; CHECK-NEXT: fence.sc.sys;
681686
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_monotonic_param_1];
682687
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_monotonic_param_2];
683688
; CHECK-NEXT: {
@@ -700,6 +705,7 @@ define i128 @test_cmpxchg_seq_cst_acquire(ptr %addr, i128 %cmp, i128 %new) {
700705
; CHECK-EMPTY:
701706
; CHECK-NEXT: // %bb.0:
702707
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_acquire_param_0];
708+
; CHECK-NEXT: fence.sc.sys;
703709
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_acquire_param_1];
704710
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_acquire_param_2];
705711
; CHECK-NEXT: {
@@ -722,6 +728,7 @@ define i128 @test_cmpxchg_seq_cst_seq_cst(ptr %addr, i128 %cmp, i128 %new) {
722728
; CHECK-EMPTY:
723729
; CHECK-NEXT: // %bb.0:
724730
; CHECK-NEXT: ld.param.b64 %rd1, [test_cmpxchg_seq_cst_seq_cst_param_0];
731+
; CHECK-NEXT: fence.sc.sys;
725732
; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_cmpxchg_seq_cst_seq_cst_param_1];
726733
; CHECK-NEXT: ld.param.v2.b64 {%rd4, %rd5}, [test_cmpxchg_seq_cst_seq_cst_param_2];
727734
; CHECK-NEXT: {
@@ -1001,3 +1008,26 @@ define i128 @test_atomicrmw_umax(ptr %ptr, i128 %val) {
10011008
%ret = atomicrmw umax ptr %ptr, i128 %val monotonic
10021009
ret i128 %ret
10031010
}
1011+
1012+
1013+
@si128 = internal addrspace(3) global i128 0, align 16
1014+
1015+
define void @test_atomicrmw_xchg_const() {
1016+
; CHECK-LABEL: test_atomicrmw_xchg_const(
1017+
; CHECK: {
1018+
; CHECK-NEXT: .reg .b64 %rd<5>;
1019+
; CHECK-NEXT: // demoted variable
1020+
; CHECK-NEXT: .shared .align 16 .b8 si128[16];
1021+
; CHECK-NEXT: // %bb.0:
1022+
; CHECK-NEXT: mov.b64 %rd1, 0;
1023+
; CHECK-NEXT: mov.b64 %rd2, 23;
1024+
; CHECK-NEXT: {
1025+
; CHECK-NEXT: .reg .b128 amt, dst;
1026+
; CHECK-NEXT: mov.b128 amt, {%rd2, %rd1};
1027+
; CHECK-NEXT: atom.seq_cst.sys.shared.exch.b128 dst, [si128], amt;
1028+
; CHECK-NEXT: mov.b128 {%rd3, %rd4}, dst;
1029+
; CHECK-NEXT: }
1030+
; CHECK-NEXT: ret;
1031+
%res = atomicrmw xchg ptr addrspace(3) @si128, i128 23 seq_cst
1032+
ret void
1033+
}

llvm/test/CodeGen/NVPTX/load-store-atomic.err.ll

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,6 @@
22

33
; CHECK: error: unsupported atomic store
44
; CHECK: error: unsupported atomic load
5-
; CHECK: error: unsupported atomic store
6-
; CHECK: error: unsupported atomic load
7-
8-
;; TODO: we could actually support this but we don't currently support b128
9-
;; load lowering.
10-
define void @test_i128_generic_atomic(ptr %a, ptr %b) {
11-
%a.load = load atomic i128, ptr %a seq_cst, align 16
12-
store atomic i128 %a.load, ptr %b seq_cst, align 16
13-
ret void
14-
}
155

166
define void @test_i256_global_atomic(ptr addrspace(1) %a, ptr addrspace(1) %b) {
177
%a.load = load atomic i256, ptr addrspace(1) %a seq_cst, align 32

0 commit comments

Comments
 (0)