Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -912,7 +912,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// actions
computeRegisterProperties(STI.getRegisterInfo());

setMinCmpXchgSizeInBits(STI.hasAtomCas16() ? 16 : 32);
// PTX support for 16-bit CAS is emulated. Only use 32+
setMinCmpXchgSizeInBits(STI.getMinCmpXchgSizeInBits());
setMaxAtomicSizeInBitsSupported(64);
setMaxDivRemBitWidthSupported(64);
}
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
// set of equivalent memory operations with a scalar data-type, executed in
// an unspecified order on the elements in the vector.
unsigned getMaxRequiredAlignment() const { return 8; }
// Get the smallest cmpxchg word size that the hardware supports.
unsigned getMinCmpXchgSizeInBits() const { return 32; }

unsigned getPTXVersion() const { return PTXVersion; }

Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/NVPTX/atomics-sm70.ll
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,10 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: @%p4 bra $L__BB0_7;
; CHECKPTX62-NEXT: // %bb.8: // %atomicrmw.end
; CHECKPTX62-NEXT: ret;
%r1 = atomicrmw fadd ptr %dp0, half %val seq_cst
%r2 = atomicrmw fadd ptr %dp0, half 1.0 seq_cst
%r3 = atomicrmw fadd ptr addrspace(1) %dp1, half %val seq_cst
%r4 = atomicrmw fadd ptr addrspace(3) %dp3, half %val seq_cst
%r1 = atomicrmw fadd ptr %dp0, half %val monotonic
%r2 = atomicrmw fadd ptr %dp0, half 1.0 monotonic
%r3 = atomicrmw fadd ptr addrspace(1) %dp1, half %val monotonic
%r4 = atomicrmw fadd ptr addrspace(3) %dp3, half %val monotonic
ret void
}

Expand Down
128 changes: 84 additions & 44 deletions llvm/test/CodeGen/NVPTX/atomics-sm90.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,65 +46,105 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-LABEL: test(
; CHECKPTX71: {
; CHECKPTX71-NEXT: .reg .pred %p<5>;
; CHECKPTX71-NEXT: .reg .b16 %rs<34>;
; CHECKPTX71-NEXT: .reg .b32 %r<4>;
; CHECKPTX71-NEXT: .reg .b16 %rs<18>;
; CHECKPTX71-NEXT: .reg .b32 %r<58>;
; CHECKPTX71-NEXT: .reg .f32 %f<12>;
; CHECKPTX71-EMPTY:
; CHECKPTX71-NEXT: // %bb.0:
; CHECKPTX71-NEXT: ld.param.b16 %rs13, [test_param_3];
; CHECKPTX71-NEXT: ld.param.u32 %r3, [test_param_2];
; CHECKPTX71-NEXT: ld.param.u32 %r2, [test_param_1];
; CHECKPTX71-NEXT: ld.param.u32 %r1, [test_param_0];
; CHECKPTX71-NEXT: ld.b16 %rs30, [%r1];
; CHECKPTX71-NEXT: cvt.f32.bf16 %f1, %rs13;
; CHECKPTX71-NEXT: $L__BB0_1: // %atomicrmw.start14
; CHECKPTX71-NEXT: ld.param.b16 %rs1, [test_param_3];
; CHECKPTX71-NEXT: ld.param.u32 %r23, [test_param_2];
; CHECKPTX71-NEXT: ld.param.u32 %r22, [test_param_1];
; CHECKPTX71-NEXT: ld.param.u32 %r24, [test_param_0];
; CHECKPTX71-NEXT: and.b32 %r1, %r24, -4;
; CHECKPTX71-NEXT: and.b32 %r25, %r24, 3;
; CHECKPTX71-NEXT: shl.b32 %r2, %r25, 3;
; CHECKPTX71-NEXT: mov.b32 %r26, 65535;
; CHECKPTX71-NEXT: shl.b32 %r27, %r26, %r2;
; CHECKPTX71-NEXT: not.b32 %r3, %r27;
; CHECKPTX71-NEXT: ld.u32 %r54, [%r1];
; CHECKPTX71-NEXT: cvt.f32.bf16 %f2, %rs1;
; CHECKPTX71-NEXT: $L__BB0_1: // %atomicrmw.start45
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX71-NEXT: cvt.f32.bf16 %f2, %rs30;
; CHECKPTX71-NEXT: add.rn.f32 %f3, %f2, %f1;
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs14, %f3;
; CHECKPTX71-NEXT: atom.cas.b16 %rs17, [%r1], %rs30, %rs14;
; CHECKPTX71-NEXT: setp.ne.s16 %p1, %rs17, %rs30;
; CHECKPTX71-NEXT: mov.u16 %rs30, %rs17;
; CHECKPTX71-NEXT: shr.u32 %r28, %r54, %r2;
; CHECKPTX71-NEXT: cvt.u16.u32 %rs2, %r28;
; CHECKPTX71-NEXT: cvt.f32.bf16 %f1, %rs2;
; CHECKPTX71-NEXT: add.rn.f32 %f3, %f1, %f2;
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs4, %f3;
; CHECKPTX71-NEXT: cvt.u32.u16 %r29, %rs4;
; CHECKPTX71-NEXT: shl.b32 %r30, %r29, %r2;
; CHECKPTX71-NEXT: and.b32 %r31, %r54, %r3;
; CHECKPTX71-NEXT: or.b32 %r32, %r31, %r30;
; CHECKPTX71-NEXT: atom.cas.b32 %r6, [%r1], %r54, %r32;
; CHECKPTX71-NEXT: setp.ne.s32 %p1, %r6, %r54;
; CHECKPTX71-NEXT: mov.u32 %r54, %r6;
; CHECKPTX71-NEXT: @%p1 bra $L__BB0_1;
; CHECKPTX71-NEXT: // %bb.2: // %atomicrmw.end13
; CHECKPTX71-NEXT: ld.b16 %rs31, [%r1];
; CHECKPTX71-NEXT: $L__BB0_3: // %atomicrmw.start8
; CHECKPTX71-NEXT: // %bb.2: // %atomicrmw.end44
; CHECKPTX71-NEXT: ld.u32 %r55, [%r1];
; CHECKPTX71-NEXT: $L__BB0_3: // %atomicrmw.start27
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX71-NEXT: cvt.f32.bf16 %f4, %rs31;
; CHECKPTX71-NEXT: shr.u32 %r33, %r55, %r2;
; CHECKPTX71-NEXT: cvt.u16.u32 %rs6, %r33;
; CHECKPTX71-NEXT: cvt.f32.bf16 %f4, %rs6;
; CHECKPTX71-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs18, %f5;
; CHECKPTX71-NEXT: atom.cas.b16 %rs21, [%r1], %rs31, %rs18;
; CHECKPTX71-NEXT: setp.ne.s16 %p2, %rs21, %rs31;
; CHECKPTX71-NEXT: mov.u16 %rs31, %rs21;
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs8, %f5;
; CHECKPTX71-NEXT: cvt.u32.u16 %r34, %rs8;
; CHECKPTX71-NEXT: shl.b32 %r35, %r34, %r2;
; CHECKPTX71-NEXT: and.b32 %r36, %r55, %r3;
; CHECKPTX71-NEXT: or.b32 %r37, %r36, %r35;
; CHECKPTX71-NEXT: atom.cas.b32 %r9, [%r1], %r55, %r37;
; CHECKPTX71-NEXT: setp.ne.s32 %p2, %r9, %r55;
; CHECKPTX71-NEXT: mov.u32 %r55, %r9;
; CHECKPTX71-NEXT: @%p2 bra $L__BB0_3;
; CHECKPTX71-NEXT: // %bb.4: // %atomicrmw.end7
; CHECKPTX71-NEXT: ld.global.b16 %rs32, [%r2];
; CHECKPTX71-NEXT: $L__BB0_5: // %atomicrmw.start2
; CHECKPTX71-NEXT: // %bb.4: // %atomicrmw.end26
; CHECKPTX71-NEXT: and.b32 %r10, %r22, -4;
; CHECKPTX71-NEXT: shl.b32 %r38, %r22, 3;
; CHECKPTX71-NEXT: and.b32 %r11, %r38, 24;
; CHECKPTX71-NEXT: shl.b32 %r40, %r26, %r11;
; CHECKPTX71-NEXT: not.b32 %r12, %r40;
; CHECKPTX71-NEXT: ld.global.u32 %r56, [%r10];
; CHECKPTX71-NEXT: $L__BB0_5: // %atomicrmw.start9
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX71-NEXT: cvt.f32.bf16 %f7, %rs32;
; CHECKPTX71-NEXT: add.rn.f32 %f8, %f7, %f1;
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs22, %f8;
; CHECKPTX71-NEXT: atom.global.cas.b16 %rs25, [%r2], %rs32, %rs22;
; CHECKPTX71-NEXT: setp.ne.s16 %p3, %rs25, %rs32;
; CHECKPTX71-NEXT: mov.u16 %rs32, %rs25;
; CHECKPTX71-NEXT: shr.u32 %r41, %r56, %r11;
; CHECKPTX71-NEXT: cvt.u16.u32 %rs10, %r41;
; CHECKPTX71-NEXT: cvt.f32.bf16 %f6, %rs10;
; CHECKPTX71-NEXT: add.rn.f32 %f8, %f6, %f2;
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs12, %f8;
; CHECKPTX71-NEXT: cvt.u32.u16 %r42, %rs12;
; CHECKPTX71-NEXT: shl.b32 %r43, %r42, %r11;
; CHECKPTX71-NEXT: and.b32 %r44, %r56, %r12;
; CHECKPTX71-NEXT: or.b32 %r45, %r44, %r43;
; CHECKPTX71-NEXT: atom.global.cas.b32 %r15, [%r10], %r56, %r45;
; CHECKPTX71-NEXT: setp.ne.s32 %p3, %r15, %r56;
; CHECKPTX71-NEXT: mov.u32 %r56, %r15;
; CHECKPTX71-NEXT: @%p3 bra $L__BB0_5;
; CHECKPTX71-NEXT: // %bb.6: // %atomicrmw.end1
; CHECKPTX71-NEXT: ld.shared.b16 %rs33, [%r3];
; CHECKPTX71-NEXT: // %bb.6: // %atomicrmw.end8
; CHECKPTX71-NEXT: and.b32 %r16, %r23, -4;
; CHECKPTX71-NEXT: shl.b32 %r46, %r23, 3;
; CHECKPTX71-NEXT: and.b32 %r17, %r46, 24;
; CHECKPTX71-NEXT: shl.b32 %r48, %r26, %r17;
; CHECKPTX71-NEXT: not.b32 %r18, %r48;
; CHECKPTX71-NEXT: ld.shared.u32 %r57, [%r16];
; CHECKPTX71-NEXT: $L__BB0_7: // %atomicrmw.start
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX71-NEXT: cvt.f32.bf16 %f10, %rs33;
; CHECKPTX71-NEXT: add.rn.f32 %f11, %f10, %f1;
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs26, %f11;
; CHECKPTX71-NEXT: atom.shared.cas.b16 %rs29, [%r3], %rs33, %rs26;
; CHECKPTX71-NEXT: setp.ne.s16 %p4, %rs29, %rs33;
; CHECKPTX71-NEXT: mov.u16 %rs33, %rs29;
; CHECKPTX71-NEXT: shr.u32 %r49, %r57, %r17;
; CHECKPTX71-NEXT: cvt.u16.u32 %rs14, %r49;
; CHECKPTX71-NEXT: cvt.f32.bf16 %f9, %rs14;
; CHECKPTX71-NEXT: add.rn.f32 %f11, %f9, %f2;
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs16, %f11;
; CHECKPTX71-NEXT: cvt.u32.u16 %r50, %rs16;
; CHECKPTX71-NEXT: shl.b32 %r51, %r50, %r17;
; CHECKPTX71-NEXT: and.b32 %r52, %r57, %r18;
; CHECKPTX71-NEXT: or.b32 %r53, %r52, %r51;
; CHECKPTX71-NEXT: atom.shared.cas.b32 %r21, [%r16], %r57, %r53;
; CHECKPTX71-NEXT: setp.ne.s32 %p4, %r21, %r57;
; CHECKPTX71-NEXT: mov.u32 %r57, %r21;
; CHECKPTX71-NEXT: @%p4 bra $L__BB0_7;
; CHECKPTX71-NEXT: // %bb.8: // %atomicrmw.end
; CHECKPTX71-NEXT: ret;
%r1 = atomicrmw fadd ptr %dp0, bfloat %val seq_cst
%r2 = atomicrmw fadd ptr %dp0, bfloat 1.0 seq_cst
%r3 = atomicrmw fadd ptr addrspace(1) %dp1, bfloat %val seq_cst
%r4 = atomicrmw fadd ptr addrspace(3) %dp3, bfloat %val seq_cst
%r1 = atomicrmw fadd ptr %dp0, bfloat %val monotonic
%r2 = atomicrmw fadd ptr %dp0, bfloat 1.0 monotonic
%r3 = atomicrmw fadd ptr addrspace(1) %dp1, bfloat %val monotonic
%r4 = atomicrmw fadd ptr addrspace(3) %dp3, bfloat %val monotonic
ret void
}

Expand Down
102 changes: 64 additions & 38 deletions llvm/test/CodeGen/NVPTX/cmpxchg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -53,43 +53,44 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70-LABEL: relaxed_sys_i8(
; SM70: {
; SM70-NEXT: .reg .pred %p<3>;
; SM70-NEXT: .reg .b16 %rs<17>;
; SM70-NEXT: .reg .b32 %r<3>;
; SM70-NEXT: .reg .b64 %rd<5>;
; SM70-NEXT: .reg .b16 %rs<2>;
; SM70-NEXT: .reg .b32 %r<21>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u8 %rs9, [relaxed_sys_i8_param_2];
; SM70-NEXT: ld.param.u8 %rs1, [relaxed_sys_i8_param_2];
; SM70-NEXT: ld.param.u64 %rd2, [relaxed_sys_i8_param_0];
; SM70-NEXT: and.b64 %rd1, %rd2, -2;
; SM70-NEXT: ld.param.u8 %rs10, [relaxed_sys_i8_param_1];
; SM70-NEXT: and.b64 %rd3, %rd2, 1;
; SM70-NEXT: shl.b64 %rd4, %rd3, 3;
; SM70-NEXT: cvt.u32.u64 %r1, %rd4;
; SM70-NEXT: mov.u16 %rs11, 255;
; SM70-NEXT: shl.b16 %rs12, %rs11, %r1;
; SM70-NEXT: not.b16 %rs2, %rs12;
; SM70-NEXT: shl.b16 %rs3, %rs9, %r1;
; SM70-NEXT: shl.b16 %rs4, %rs10, %r1;
; SM70-NEXT: ld.u16 %rs13, [%rd1];
; SM70-NEXT: and.b16 %rs16, %rs13, %rs2;
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
; SM70-NEXT: cvt.u32.u64 %r9, %rd2;
; SM70-NEXT: and.b32 %r10, %r9, 3;
; SM70-NEXT: shl.b32 %r1, %r10, 3;
; SM70-NEXT: mov.b32 %r11, 255;
; SM70-NEXT: shl.b32 %r12, %r11, %r1;
; SM70-NEXT: not.b32 %r2, %r12;
; SM70-NEXT: cvt.u32.u16 %r13, %rs1;
; SM70-NEXT: and.b32 %r14, %r13, 255;
; SM70-NEXT: shl.b32 %r3, %r14, %r1;
; SM70-NEXT: ld.param.u8 %r15, [relaxed_sys_i8_param_1];
; SM70-NEXT: shl.b32 %r4, %r15, %r1;
; SM70-NEXT: ld.u32 %r16, [%rd1];
; SM70-NEXT: and.b32 %r20, %r16, %r2;
; SM70-NEXT: $L__BB0_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
; SM70-NEXT: or.b16 %rs14, %rs16, %rs3;
; SM70-NEXT: or.b16 %rs15, %rs16, %rs4;
; SM70-NEXT: atom.cas.b16 %rs7, [%rd1], %rs15, %rs14;
; SM70-NEXT: setp.eq.s16 %p1, %rs7, %rs15;
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB0_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB0_1 Depth=1
; SM70-NEXT: and.b16 %rs8, %rs7, %rs2;
; SM70-NEXT: setp.ne.s16 %p2, %rs16, %rs8;
; SM70-NEXT: mov.u16 %rs16, %rs8;
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
; SM70-NEXT: mov.u32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB0_1;
; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end
; SM70-NEXT: cvt.u32.u16 %r2, %rs9;
; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: st.param.b32 [func_retval0], %r13;
; SM70-NEXT: ret;
%pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new seq_cst seq_cst
%pairold = cmpxchg ptr %addr, i8 %cmp, i8 %new monotonic monotonic
ret i8 %new
}

Expand Down Expand Up @@ -137,19 +138,44 @@ define i16 @relaxed_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
;
; SM70-LABEL: relaxed_sys_i16(
; SM70: {
; SM70-NEXT: .reg .b16 %rs<4>;
; SM70-NEXT: .reg .b32 %r<2>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-NEXT: .reg .pred %p<3>;
; SM70-NEXT: .reg .b16 %rs<2>;
; SM70-NEXT: .reg .b32 %r<20>;
; SM70-NEXT: .reg .b64 %rd<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u64 %rd1, [relaxed_sys_i16_param_0];
; SM70-NEXT: ld.param.u16 %rs1, [relaxed_sys_i16_param_1];
; SM70-NEXT: ld.param.u16 %rs2, [relaxed_sys_i16_param_2];
; SM70-NEXT: atom.cas.b16 %rs3, [%rd1], %rs1, %rs2;
; SM70-NEXT: cvt.u32.u16 %r1, %rs2;
; SM70-NEXT: st.param.b32 [func_retval0], %r1;
; SM70-NEXT: ld.param.u16 %rs1, [relaxed_sys_i16_param_2];
; SM70-NEXT: ld.param.u64 %rd2, [relaxed_sys_i16_param_0];
; SM70-NEXT: and.b64 %rd1, %rd2, -4;
; SM70-NEXT: ld.param.u16 %r9, [relaxed_sys_i16_param_1];
; SM70-NEXT: cvt.u32.u64 %r10, %rd2;
; SM70-NEXT: and.b32 %r11, %r10, 3;
; SM70-NEXT: shl.b32 %r1, %r11, 3;
; SM70-NEXT: mov.b32 %r12, 65535;
; SM70-NEXT: shl.b32 %r13, %r12, %r1;
; SM70-NEXT: not.b32 %r2, %r13;
; SM70-NEXT: cvt.u32.u16 %r14, %rs1;
; SM70-NEXT: shl.b32 %r3, %r14, %r1;
; SM70-NEXT: shl.b32 %r4, %r9, %r1;
; SM70-NEXT: ld.u32 %r15, [%rd1];
; SM70-NEXT: and.b32 %r19, %r15, %r2;
; SM70-NEXT: $L__BB1_1: // %partword.cmpxchg.loop
; SM70-NEXT: // =>This Inner Loop Header: Depth=1
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB1_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB1_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
; SM70-NEXT: mov.u32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB1_1;
; SM70-NEXT: $L__BB1_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r14;
; SM70-NEXT: ret;
%pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new seq_cst seq_cst
%pairold = cmpxchg ptr %addr, i16 %cmp, i16 %new monotonic monotonic
ret i16 %new
}

Expand Down Expand Up @@ -180,7 +206,7 @@ define i32 @relaxed_sys_i32(ptr %addr, i32 %cmp, i32 %new) {
; SM70-NEXT: atom.cas.b32 %r3, [%rd1], %r1, %r2;
; SM70-NEXT: st.param.b32 [func_retval0], %r2;
; SM70-NEXT: ret;
%pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new seq_cst seq_cst
%pairold = cmpxchg ptr %addr, i32 %cmp, i32 %new monotonic monotonic
ret i32 %new
}

Expand Down Expand Up @@ -209,7 +235,7 @@ define i64 @relaxed_sys_i64(ptr %addr, i64 %cmp, i64 %new) {
; SM70-NEXT: atom.cas.b64 %rd4, [%rd1], %rd2, %rd3;
; SM70-NEXT: st.param.b64 [func_retval0], %rd3;
; SM70-NEXT: ret;
%pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new seq_cst seq_cst
%pairold = cmpxchg ptr %addr, i64 %cmp, i64 %new monotonic monotonic
ret i64 %new
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
Expand Down
Loading