diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index 0461ed4712221..bdff4d0dbbf1f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -994,7 +994,7 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { Opc = TM.is64Bit() ? NVPTX::cvta_to_local_64 : NVPTX::cvta_to_local; break; case ADDRESS_SPACE_PARAM: - Opc = TM.is64Bit() ? NVPTX::IMOV64rr : NVPTX::IMOV32rr; + Opc = TM.is64Bit() ? NVPTX::IMOV64r : NVPTX::IMOV32r; break; } @@ -2151,10 +2151,10 @@ bool NVPTXDAGToDAGISel::tryBF16ArithToFMA(SDNode *N) { auto API = APF.bitcastToAPInt(); API = API.concat(API); auto Const = CurDAG->getTargetConstant(API, DL, MVT::i32); - return SDValue(CurDAG->getMachineNode(NVPTX::IMOV32ri, DL, VT, Const), 0); + return SDValue(CurDAG->getMachineNode(NVPTX::IMOV32i, DL, VT, Const), 0); } auto Const = CurDAG->getTargetConstantFP(APF, DL, VT); - return SDValue(CurDAG->getMachineNode(NVPTX::BFMOV16ri, DL, VT, Const), 0); + return SDValue(CurDAG->getMachineNode(NVPTX::BFMOV16i, DL, VT, Const), 0); }; switch (N->getOpcode()) { diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp index 18b513039ecea..0551954444e57 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -40,22 +40,22 @@ void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned Op; if (DestRC == &NVPTX::Int1RegsRegClass) { - Op = NVPTX::IMOV1rr; + Op = NVPTX::IMOV1r; } else if (DestRC == &NVPTX::Int16RegsRegClass) { - Op = NVPTX::IMOV16rr; + Op = NVPTX::MOV16r; } else if (DestRC == &NVPTX::Int32RegsRegClass) { - Op = (SrcRC == &NVPTX::Int32RegsRegClass ? NVPTX::IMOV32rr + Op = (SrcRC == &NVPTX::Int32RegsRegClass ? NVPTX::IMOV32r : NVPTX::BITCONVERT_32_F2I); } else if (DestRC == &NVPTX::Int64RegsRegClass) { - Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64rr + Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64r : NVPTX::BITCONVERT_64_F2I); } else if (DestRC == &NVPTX::Int128RegsRegClass) { - Op = NVPTX::IMOV128rr; + Op = NVPTX::IMOV128r; } else if (DestRC == &NVPTX::Float32RegsRegClass) { - Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32rr + Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32r : NVPTX::BITCONVERT_32_I2F); } else if (DestRC == &NVPTX::Float64RegsRegClass) { - Op = (SrcRC == &NVPTX::Float64RegsRegClass ? NVPTX::FMOV64rr + Op = (SrcRC == &NVPTX::Float64RegsRegClass ? NVPTX::FMOV64r : NVPTX::BITCONVERT_64_I2F); } else { llvm_unreachable("Bad register copy"); diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index f94d7099f1b0e..6eabd26667a84 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1945,68 +1945,53 @@ def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>; // Load a memory address into a u32 or u64 register. def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR_base:$a), - "mov.u32 \t$dst, $a;", + "mov.b32 \t$dst, $a;", [(set i32:$dst, (Wrapper tglobaladdr:$a))]>; def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins ADDR_base:$a), - "mov.u64 \t$dst, $a;", + "mov.b64 \t$dst, $a;", [(set i64:$dst, (Wrapper tglobaladdr:$a))]>; // Get pointer to local stack. let hasSideEffects = false in { def MOV_DEPOT_ADDR : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num), - "mov.u32 \t$d, __local_depot$num;", []>; + "mov.b32 \t$d, __local_depot$num;", []>; def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num), - "mov.u64 \t$d, __local_depot$num;", []>; + "mov.b64 \t$d, __local_depot$num;", []>; } // copyPhysreg is hard-coded in NVPTXInstrInfo.cpp -let hasSideEffects=0, isAsCheapAsAMove=1 in { - def IMOV1rr : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss), - "mov.pred \t$dst, $sss;", []>; - def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss), - "mov.u16 \t$dst, $sss;", []>; - def IMOV32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss), - "mov.u32 \t$dst, $sss;", []>; - def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss), - "mov.u64 \t$dst, $sss;", []>; - def IMOV128rr : NVPTXInst<(outs Int128Regs:$dst), (ins Int128Regs:$sss), - "mov.b128 \t$dst, $sss;", []>; - - def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), - "mov.f32 \t$dst, $src;", []>; - def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src), - "mov.f64 \t$dst, $src;", []>; - - def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src), - "mov.pred \t$dst, $src;", - [(set i1:$dst, imm:$src)]>; - def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src), - "mov.b16 \t$dst, $src;", - [(set i16:$dst, imm:$src)]>; - def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src), - "mov.b32 \t$dst, $src;", - [(set i32:$dst, imm:$src)]>; - def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src), - "mov.b64 \t$dst, $src;", - [(set i64:$dst, imm:$src)]>; - - def FMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins f16imm:$src), - "mov.b16 \t$dst, $src;", - [(set f16:$dst, fpimm:$src)]>; - def BFMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins bf16imm:$src), - "mov.b16 \t$dst, $src;", - [(set bf16:$dst, fpimm:$src)]>; - def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src), - "mov.f32 \t$dst, $src;", - [(set f32:$dst, fpimm:$src)]>; - def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src), - "mov.f64 \t$dst, $src;", - [(set f64:$dst, fpimm:$src)]>; -} - -def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>; -def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>; +let hasSideEffects = false, isAsCheapAsAMove = true in { + // Class for register-to-register moves + class MOVr : + NVPTXInst<(outs RC:$dst), (ins RC:$src), + "mov." # OpStr # " \t$dst, $src;", []>; + + // Class for immediate-to-register moves + class MOVi : + NVPTXInst<(outs RC:$dst), (ins IMMType:$src), + "mov." # OpStr # " \t$dst, $src;", + [(set VT:$dst, ImmNode:$src)]>; +} + +def IMOV1r : MOVr; +def IMOV1i : MOVi; +def MOV16r : MOVr; +def IMOV16i : MOVi; +def IMOV32r : MOVr; +def IMOV32i : MOVi; +def IMOV64r : MOVr; +def IMOV64i : MOVi; +def IMOV128r : MOVr; +def FMOV16i : MOVi; +def BFMOV16i : MOVi; +def FMOV32r : MOVr; +def FMOV32i : MOVi; +def FMOV64r : MOVr; +def FMOV64i : MOVi; + +def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32i texternalsym:$dst)>; +def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64i texternalsym:$dst)>; //---- Copy Frame Index ---- def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR:$addr), @@ -2717,8 +2702,8 @@ def ProxyRegI1 : ProxyRegInst<"pred", i1, Int1Regs>; def ProxyRegI16 : ProxyRegInst<"b16", i16, Int16Regs>; def ProxyRegI32 : ProxyRegInst<"b32", i32, Int32Regs>; def ProxyRegI64 : ProxyRegInst<"b64", i64, Int64Regs>; -def ProxyRegF32 : ProxyRegInst<"f32", f32, Float32Regs>; -def ProxyRegF64 : ProxyRegInst<"f64", f64, Float64Regs>; +def ProxyRegF32 : ProxyRegInst<"b32", f32, Float32Regs>; +def ProxyRegF64 : ProxyRegInst<"b64", f64, Float64Regs>; foreach vt = [f16, bf16] in { def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI16 $src)>; diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll index b180928af82a4..b14295020bc0e 100644 --- a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll +++ b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll @@ -72,7 +72,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half % ; CHECKPTX62-NEXT: or.b32 %r32, %r31, %r30; ; CHECKPTX62-NEXT: atom.cas.b32 %r6, [%r1], %r54, %r32; ; CHECKPTX62-NEXT: setp.ne.s32 %p1, %r6, %r54; -; CHECKPTX62-NEXT: mov.u32 %r54, %r6; +; CHECKPTX62-NEXT: mov.b32 %r54, %r6; ; CHECKPTX62-NEXT: @%p1 bra $L__BB0_1; ; CHECKPTX62-NEXT: // %bb.2: // %atomicrmw.end44 ; CHECKPTX62-NEXT: ld.u32 %r55, [%r1]; @@ -88,7 +88,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half % ; CHECKPTX62-NEXT: or.b32 %r37, %r36, %r35; ; CHECKPTX62-NEXT: atom.cas.b32 %r9, [%r1], %r55, %r37; ; CHECKPTX62-NEXT: setp.ne.s32 %p2, %r9, %r55; -; CHECKPTX62-NEXT: mov.u32 %r55, %r9; +; CHECKPTX62-NEXT: mov.b32 %r55, %r9; ; CHECKPTX62-NEXT: @%p2 bra $L__BB0_3; ; CHECKPTX62-NEXT: // %bb.4: // %atomicrmw.end26 ; CHECKPTX62-NEXT: and.b32 %r10, %r22, -4; @@ -109,7 +109,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half % ; CHECKPTX62-NEXT: or.b32 %r45, %r44, %r43; ; CHECKPTX62-NEXT: atom.global.cas.b32 %r15, [%r10], %r56, %r45; ; CHECKPTX62-NEXT: setp.ne.s32 %p3, %r15, %r56; -; CHECKPTX62-NEXT: mov.u32 %r56, %r15; +; CHECKPTX62-NEXT: mov.b32 %r56, %r15; ; CHECKPTX62-NEXT: @%p3 bra $L__BB0_5; ; CHECKPTX62-NEXT: // %bb.6: // %atomicrmw.end8 ; CHECKPTX62-NEXT: and.b32 %r16, %r23, -4; @@ -130,7 +130,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half % ; CHECKPTX62-NEXT: or.b32 %r53, %r52, %r51; ; CHECKPTX62-NEXT: atom.shared.cas.b32 %r21, [%r16], %r57, %r53; ; CHECKPTX62-NEXT: setp.ne.s32 %p4, %r21, %r57; -; CHECKPTX62-NEXT: mov.u32 %r57, %r21; +; CHECKPTX62-NEXT: mov.b32 %r57, %r21; ; CHECKPTX62-NEXT: @%p4 bra $L__BB0_7; ; CHECKPTX62-NEXT: // %bb.8: // %atomicrmw.end ; CHECKPTX62-NEXT: ret; diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll index 9027bd6a14780..f27e574724ce4 100644 --- a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll +++ b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll @@ -73,7 +73,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat ; CHECKPTX71-NEXT: or.b32 %r32, %r31, %r30; ; CHECKPTX71-NEXT: atom.relaxed.cas.b32 %r6, [%r1], %r54, %r32; ; CHECKPTX71-NEXT: setp.ne.s32 %p1, %r6, %r54; -; CHECKPTX71-NEXT: mov.u32 %r54, %r6; +; CHECKPTX71-NEXT: mov.b32 %r54, %r6; ; CHECKPTX71-NEXT: @%p1 bra $L__BB0_1; ; CHECKPTX71-NEXT: // %bb.2: // %atomicrmw.end44 ; CHECKPTX71-NEXT: ld.u32 %r55, [%r1]; @@ -89,7 +89,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat ; CHECKPTX71-NEXT: or.b32 %r37, %r36, %r35; ; CHECKPTX71-NEXT: atom.relaxed.cas.b32 %r9, [%r1], %r55, %r37; ; CHECKPTX71-NEXT: setp.ne.s32 %p2, %r9, %r55; -; CHECKPTX71-NEXT: mov.u32 %r55, %r9; +; CHECKPTX71-NEXT: mov.b32 %r55, %r9; ; CHECKPTX71-NEXT: @%p2 bra $L__BB0_3; ; CHECKPTX71-NEXT: // %bb.4: // %atomicrmw.end26 ; CHECKPTX71-NEXT: and.b32 %r10, %r22, -4; @@ -111,7 +111,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat ; CHECKPTX71-NEXT: or.b32 %r45, %r44, %r43; ; CHECKPTX71-NEXT: atom.relaxed.global.cas.b32 %r15, [%r10], %r56, %r45; ; CHECKPTX71-NEXT: setp.ne.s32 %p3, %r15, %r56; -; CHECKPTX71-NEXT: mov.u32 %r56, %r15; +; CHECKPTX71-NEXT: mov.b32 %r56, %r15; ; CHECKPTX71-NEXT: @%p3 bra $L__BB0_5; ; CHECKPTX71-NEXT: // %bb.6: // %atomicrmw.end8 ; CHECKPTX71-NEXT: and.b32 %r16, %r23, -4; @@ -133,7 +133,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat ; CHECKPTX71-NEXT: or.b32 %r53, %r52, %r51; ; CHECKPTX71-NEXT: atom.relaxed.shared.cas.b32 %r21, [%r16], %r57, %r53; ; CHECKPTX71-NEXT: setp.ne.s32 %p4, %r21, %r57; -; CHECKPTX71-NEXT: mov.u32 %r57, %r21; +; CHECKPTX71-NEXT: mov.b32 %r57, %r21; ; CHECKPTX71-NEXT: @%p4 bra $L__BB0_7; ; CHECKPTX71-NEXT: // %bb.8: // %atomicrmw.end ; CHECKPTX71-NEXT: ret; diff --git a/llvm/test/CodeGen/NVPTX/atomics.ll b/llvm/test/CodeGen/NVPTX/atomics.ll index 2f58d279f82c3..e1fbb53891902 100644 --- a/llvm/test/CodeGen/NVPTX/atomics.ll +++ b/llvm/test/CodeGen/NVPTX/atomics.ll @@ -429,7 +429,7 @@ define half @atomicrmw_add_f16_generic(ptr %addr, half %val) { ; CHECK-NEXT: membar.sys; ; CHECK-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r14; ; CHECK-NEXT: setp.ne.s32 %p1, %r5, %r16; -; CHECK-NEXT: mov.u32 %r16, %r5; +; CHECK-NEXT: mov.b32 %r16, %r5; ; CHECK-NEXT: @%p1 bra $L__BB22_1; ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: shr.u32 %r15, %r5, %r1; diff --git a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll index 1c9d271902fd3..9474b01f95ee8 100644 --- a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll +++ b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll @@ -21,7 +21,7 @@ entry: %buf = alloca [16 x i8], align 4 ; CHECK: .local .align 4 .b8 __local_depot0[16] -; CHECK: mov.u64 %SPL +; CHECK: mov.b64 %SPL ; CHECK: ld.param.u64 %rd[[A_REG:[0-9]+]], [kernel_func_param_0] ; CHECK: cvta.to.global.u64 %rd[[A1_REG:[0-9]+]], %rd[[A_REG]] diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll index ea308c2a7673b..442da4debea8f 100644 --- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll +++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll @@ -38,7 +38,7 @@ define i8 @monotonic_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB0_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB0_1; ; SM60-NEXT: $L__BB0_3: // %partword.cmpxchg.end ; SM60-NEXT: st.param.b32 [func_retval0], %r13; @@ -83,7 +83,7 @@ define i8 @monotonic_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM60-NEXT: // in Loop: Header=BB1_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB1_1; ; SM60-NEXT: $L__BB1_3: // %partword.cmpxchg.end ; SM60-NEXT: st.param.b32 [func_retval0], %r13; @@ -128,7 +128,7 @@ define i8 @monotonic_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %ne ; SM60-NEXT: // in Loop: Header=BB2_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB2_1; ; SM60-NEXT: $L__BB2_3: // %partword.cmpxchg.end ; SM60-NEXT: st.param.b32 [func_retval0], %r13; @@ -173,7 +173,7 @@ define i8 @monotonic_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB3_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB3_1; ; SM60-NEXT: $L__BB3_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -219,7 +219,7 @@ define i8 @monotonic_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: // in Loop: Header=BB4_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB4_1; ; SM60-NEXT: $L__BB4_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -265,7 +265,7 @@ define i8 @monotonic_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: // in Loop: Header=BB5_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB5_1; ; SM60-NEXT: $L__BB5_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -312,7 +312,7 @@ define i8 @monotonic_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB6_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB6_1; ; SM60-NEXT: $L__BB6_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -359,7 +359,7 @@ define i8 @monotonic_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: // in Loop: Header=BB7_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB7_1; ; SM60-NEXT: $L__BB7_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -406,7 +406,7 @@ define i8 @monotonic_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: // in Loop: Header=BB8_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB8_1; ; SM60-NEXT: $L__BB8_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -452,7 +452,7 @@ define i8 @acquire_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB9_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB9_1; ; SM60-NEXT: $L__BB9_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -498,7 +498,7 @@ define i8 @acquire_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: // in Loop: Header=BB10_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB10_1; ; SM60-NEXT: $L__BB10_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -544,7 +544,7 @@ define i8 @acquire_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: // in Loop: Header=BB11_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB11_1; ; SM60-NEXT: $L__BB11_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -590,7 +590,7 @@ define i8 @acquire_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB12_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB12_1; ; SM60-NEXT: $L__BB12_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -636,7 +636,7 @@ define i8 @acquire_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB13_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB13_1; ; SM60-NEXT: $L__BB13_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -682,7 +682,7 @@ define i8 @acquire_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB14_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB14_1; ; SM60-NEXT: $L__BB14_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -729,7 +729,7 @@ define i8 @acquire_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB15_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB15_1; ; SM60-NEXT: $L__BB15_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -776,7 +776,7 @@ define i8 @acquire_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB16_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB16_1; ; SM60-NEXT: $L__BB16_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -823,7 +823,7 @@ define i8 @acquire_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB17_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB17_1; ; SM60-NEXT: $L__BB17_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -870,7 +870,7 @@ define i8 @release_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB18_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB18_1; ; SM60-NEXT: $L__BB18_3: // %partword.cmpxchg.end ; SM60-NEXT: st.param.b32 [func_retval0], %r13; @@ -916,7 +916,7 @@ define i8 @release_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: // in Loop: Header=BB19_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB19_1; ; SM60-NEXT: $L__BB19_3: // %partword.cmpxchg.end ; SM60-NEXT: st.param.b32 [func_retval0], %r13; @@ -962,7 +962,7 @@ define i8 @release_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: // in Loop: Header=BB20_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB20_1; ; SM60-NEXT: $L__BB20_3: // %partword.cmpxchg.end ; SM60-NEXT: st.param.b32 [func_retval0], %r13; @@ -1008,7 +1008,7 @@ define i8 @release_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB21_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB21_1; ; SM60-NEXT: $L__BB21_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1055,7 +1055,7 @@ define i8 @release_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB22_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB22_1; ; SM60-NEXT: $L__BB22_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1102,7 +1102,7 @@ define i8 @release_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB23_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB23_1; ; SM60-NEXT: $L__BB23_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1149,7 +1149,7 @@ define i8 @release_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB24_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB24_1; ; SM60-NEXT: $L__BB24_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1196,7 +1196,7 @@ define i8 @release_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB25_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB25_1; ; SM60-NEXT: $L__BB25_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1243,7 +1243,7 @@ define i8 @release_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB26_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB26_1; ; SM60-NEXT: $L__BB26_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1290,7 +1290,7 @@ define i8 @acq_rel_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB27_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB27_1; ; SM60-NEXT: $L__BB27_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1337,7 +1337,7 @@ define i8 @acq_rel_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: // in Loop: Header=BB28_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB28_1; ; SM60-NEXT: $L__BB28_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1384,7 +1384,7 @@ define i8 @acq_rel_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: // in Loop: Header=BB29_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB29_1; ; SM60-NEXT: $L__BB29_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1431,7 +1431,7 @@ define i8 @acq_rel_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB30_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB30_1; ; SM60-NEXT: $L__BB30_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1478,7 +1478,7 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB31_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB31_1; ; SM60-NEXT: $L__BB31_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1525,7 +1525,7 @@ define i8 @acq_rel_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB32_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB32_1; ; SM60-NEXT: $L__BB32_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1572,7 +1572,7 @@ define i8 @acq_rel_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB33_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB33_1; ; SM60-NEXT: $L__BB33_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1619,7 +1619,7 @@ define i8 @acq_rel_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB34_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB34_1; ; SM60-NEXT: $L__BB34_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1666,7 +1666,7 @@ define i8 @acq_rel_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB35_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB35_1; ; SM60-NEXT: $L__BB35_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1713,7 +1713,7 @@ define i8 @seq_cst_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB36_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB36_1; ; SM60-NEXT: $L__BB36_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1760,7 +1760,7 @@ define i8 @seq_cst_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: // in Loop: Header=BB37_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB37_1; ; SM60-NEXT: $L__BB37_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1807,7 +1807,7 @@ define i8 @seq_cst_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM60-NEXT: // in Loop: Header=BB38_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB38_1; ; SM60-NEXT: $L__BB38_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1854,7 +1854,7 @@ define i8 @seq_cst_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB39_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB39_1; ; SM60-NEXT: $L__BB39_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1901,7 +1901,7 @@ define i8 @seq_cst_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB40_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB40_1; ; SM60-NEXT: $L__BB40_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1948,7 +1948,7 @@ define i8 @seq_cst_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB41_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB41_1; ; SM60-NEXT: $L__BB41_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -1995,7 +1995,7 @@ define i8 @seq_cst_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB42_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB42_1; ; SM60-NEXT: $L__BB42_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -2042,7 +2042,7 @@ define i8 @seq_cst_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB43_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB43_1; ; SM60-NEXT: $L__BB43_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -2089,7 +2089,7 @@ define i8 @seq_cst_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM60-NEXT: // in Loop: Header=BB44_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM60-NEXT: mov.u32 %r20, %r8; +; SM60-NEXT: mov.b32 %r20, %r8; ; SM60-NEXT: @%p2 bra $L__BB44_1; ; SM60-NEXT: $L__BB44_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -2134,7 +2134,7 @@ define i16 @monotonic_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: // in Loop: Header=BB45_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB45_1; ; SM60-NEXT: $L__BB45_3: // %partword.cmpxchg.end ; SM60-NEXT: st.param.b32 [func_retval0], %r14; @@ -2178,7 +2178,7 @@ define i16 @monotonic_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 ; SM60-NEXT: // in Loop: Header=BB46_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB46_1; ; SM60-NEXT: $L__BB46_3: // %partword.cmpxchg.end ; SM60-NEXT: st.param.b32 [func_retval0], %r14; @@ -2222,7 +2222,7 @@ define i16 @monotonic_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 ; SM60-NEXT: // in Loop: Header=BB47_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB47_1; ; SM60-NEXT: $L__BB47_3: // %partword.cmpxchg.end ; SM60-NEXT: st.param.b32 [func_retval0], %r14; @@ -2266,7 +2266,7 @@ define i16 @monotonic_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: // in Loop: Header=BB48_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB48_1; ; SM60-NEXT: $L__BB48_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -2311,7 +2311,7 @@ define i16 @monotonic_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM60-NEXT: // in Loop: Header=BB49_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB49_1; ; SM60-NEXT: $L__BB49_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -2356,7 +2356,7 @@ define i16 @monotonic_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM60-NEXT: // in Loop: Header=BB50_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB50_1; ; SM60-NEXT: $L__BB50_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -2402,7 +2402,7 @@ define i16 @monotonic_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: // in Loop: Header=BB51_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB51_1; ; SM60-NEXT: $L__BB51_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -2448,7 +2448,7 @@ define i16 @monotonic_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM60-NEXT: // in Loop: Header=BB52_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB52_1; ; SM60-NEXT: $L__BB52_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -2494,7 +2494,7 @@ define i16 @monotonic_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM60-NEXT: // in Loop: Header=BB53_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB53_1; ; SM60-NEXT: $L__BB53_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -2539,7 +2539,7 @@ define i16 @acquire_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: // in Loop: Header=BB54_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB54_1; ; SM60-NEXT: $L__BB54_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -2584,7 +2584,7 @@ define i16 @acquire_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM60-NEXT: // in Loop: Header=BB55_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB55_1; ; SM60-NEXT: $L__BB55_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -2629,7 +2629,7 @@ define i16 @acquire_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM60-NEXT: // in Loop: Header=BB56_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB56_1; ; SM60-NEXT: $L__BB56_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -2674,7 +2674,7 @@ define i16 @acquire_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: // in Loop: Header=BB57_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB57_1; ; SM60-NEXT: $L__BB57_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -2719,7 +2719,7 @@ define i16 @acquire_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: // in Loop: Header=BB58_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB58_1; ; SM60-NEXT: $L__BB58_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -2764,7 +2764,7 @@ define i16 @acquire_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: // in Loop: Header=BB59_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB59_1; ; SM60-NEXT: $L__BB59_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -2810,7 +2810,7 @@ define i16 @acquire_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: // in Loop: Header=BB60_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB60_1; ; SM60-NEXT: $L__BB60_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -2856,7 +2856,7 @@ define i16 @acquire_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: // in Loop: Header=BB61_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB61_1; ; SM60-NEXT: $L__BB61_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -2902,7 +2902,7 @@ define i16 @acquire_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: // in Loop: Header=BB62_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB62_1; ; SM60-NEXT: $L__BB62_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -2948,7 +2948,7 @@ define i16 @release_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: // in Loop: Header=BB63_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB63_1; ; SM60-NEXT: $L__BB63_3: // %partword.cmpxchg.end ; SM60-NEXT: st.param.b32 [func_retval0], %r14; @@ -2993,7 +2993,7 @@ define i16 @release_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM60-NEXT: // in Loop: Header=BB64_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB64_1; ; SM60-NEXT: $L__BB64_3: // %partword.cmpxchg.end ; SM60-NEXT: st.param.b32 [func_retval0], %r14; @@ -3038,7 +3038,7 @@ define i16 @release_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM60-NEXT: // in Loop: Header=BB65_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB65_1; ; SM60-NEXT: $L__BB65_3: // %partword.cmpxchg.end ; SM60-NEXT: st.param.b32 [func_retval0], %r14; @@ -3083,7 +3083,7 @@ define i16 @release_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: // in Loop: Header=BB66_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB66_1; ; SM60-NEXT: $L__BB66_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -3129,7 +3129,7 @@ define i16 @release_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: // in Loop: Header=BB67_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB67_1; ; SM60-NEXT: $L__BB67_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -3175,7 +3175,7 @@ define i16 @release_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: // in Loop: Header=BB68_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB68_1; ; SM60-NEXT: $L__BB68_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -3221,7 +3221,7 @@ define i16 @release_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: // in Loop: Header=BB69_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB69_1; ; SM60-NEXT: $L__BB69_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -3267,7 +3267,7 @@ define i16 @release_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: // in Loop: Header=BB70_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB70_1; ; SM60-NEXT: $L__BB70_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -3313,7 +3313,7 @@ define i16 @release_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: // in Loop: Header=BB71_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB71_1; ; SM60-NEXT: $L__BB71_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -3359,7 +3359,7 @@ define i16 @acq_rel_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: // in Loop: Header=BB72_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB72_1; ; SM60-NEXT: $L__BB72_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -3405,7 +3405,7 @@ define i16 @acq_rel_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM60-NEXT: // in Loop: Header=BB73_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB73_1; ; SM60-NEXT: $L__BB73_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -3451,7 +3451,7 @@ define i16 @acq_rel_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM60-NEXT: // in Loop: Header=BB74_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB74_1; ; SM60-NEXT: $L__BB74_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -3497,7 +3497,7 @@ define i16 @acq_rel_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: // in Loop: Header=BB75_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB75_1; ; SM60-NEXT: $L__BB75_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -3543,7 +3543,7 @@ define i16 @acq_rel_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: // in Loop: Header=BB76_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB76_1; ; SM60-NEXT: $L__BB76_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -3589,7 +3589,7 @@ define i16 @acq_rel_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: // in Loop: Header=BB77_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB77_1; ; SM60-NEXT: $L__BB77_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -3635,7 +3635,7 @@ define i16 @acq_rel_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: // in Loop: Header=BB78_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB78_1; ; SM60-NEXT: $L__BB78_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -3681,7 +3681,7 @@ define i16 @acq_rel_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: // in Loop: Header=BB79_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB79_1; ; SM60-NEXT: $L__BB79_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -3727,7 +3727,7 @@ define i16 @acq_rel_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: // in Loop: Header=BB80_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB80_1; ; SM60-NEXT: $L__BB80_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -3773,7 +3773,7 @@ define i16 @seq_cst_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: // in Loop: Header=BB81_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB81_1; ; SM60-NEXT: $L__BB81_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -3819,7 +3819,7 @@ define i16 @seq_cst_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM60-NEXT: // in Loop: Header=BB82_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB82_1; ; SM60-NEXT: $L__BB82_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -3865,7 +3865,7 @@ define i16 @seq_cst_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM60-NEXT: // in Loop: Header=BB83_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB83_1; ; SM60-NEXT: $L__BB83_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -3911,7 +3911,7 @@ define i16 @seq_cst_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: // in Loop: Header=BB84_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB84_1; ; SM60-NEXT: $L__BB84_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -3957,7 +3957,7 @@ define i16 @seq_cst_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: // in Loop: Header=BB85_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB85_1; ; SM60-NEXT: $L__BB85_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -4003,7 +4003,7 @@ define i16 @seq_cst_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: // in Loop: Header=BB86_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB86_1; ; SM60-NEXT: $L__BB86_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -4049,7 +4049,7 @@ define i16 @seq_cst_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM60-NEXT: // in Loop: Header=BB87_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB87_1; ; SM60-NEXT: $L__BB87_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -4095,7 +4095,7 @@ define i16 @seq_cst_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: // in Loop: Header=BB88_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB88_1; ; SM60-NEXT: $L__BB88_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; @@ -4141,7 +4141,7 @@ define i16 @seq_cst_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM60-NEXT: // in Loop: Header=BB89_1 Depth=1 ; SM60-NEXT: and.b32 %r8, %r7, %r2; ; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM60-NEXT: mov.u32 %r19, %r8; +; SM60-NEXT: mov.b32 %r19, %r8; ; SM60-NEXT: @%p2 bra $L__BB89_1; ; SM60-NEXT: $L__BB89_3: // %partword.cmpxchg.end ; SM60-NEXT: membar.sys; diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll index 4360ea36e863a..df8c49aaaa42c 100644 --- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll +++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll @@ -38,7 +38,7 @@ define i8 @monotonic_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB0_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB0_1; ; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end ; SM70-NEXT: st.param.b32 [func_retval0], %r13; @@ -83,7 +83,7 @@ define i8 @monotonic_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM70-NEXT: // in Loop: Header=BB1_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB1_1; ; SM70-NEXT: $L__BB1_3: // %partword.cmpxchg.end ; SM70-NEXT: st.param.b32 [func_retval0], %r13; @@ -128,7 +128,7 @@ define i8 @monotonic_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %ne ; SM70-NEXT: // in Loop: Header=BB2_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB2_1; ; SM70-NEXT: $L__BB2_3: // %partword.cmpxchg.end ; SM70-NEXT: st.param.b32 [func_retval0], %r13; @@ -173,7 +173,7 @@ define i8 @monotonic_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB3_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB3_1; ; SM70-NEXT: $L__BB3_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -219,7 +219,7 @@ define i8 @monotonic_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: // in Loop: Header=BB4_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB4_1; ; SM70-NEXT: $L__BB4_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -265,7 +265,7 @@ define i8 @monotonic_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: // in Loop: Header=BB5_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB5_1; ; SM70-NEXT: $L__BB5_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -312,7 +312,7 @@ define i8 @monotonic_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB6_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB6_1; ; SM70-NEXT: $L__BB6_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -359,7 +359,7 @@ define i8 @monotonic_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: // in Loop: Header=BB7_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB7_1; ; SM70-NEXT: $L__BB7_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -406,7 +406,7 @@ define i8 @monotonic_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: // in Loop: Header=BB8_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB8_1; ; SM70-NEXT: $L__BB8_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -452,7 +452,7 @@ define i8 @acquire_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB9_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB9_1; ; SM70-NEXT: $L__BB9_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -498,7 +498,7 @@ define i8 @acquire_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: // in Loop: Header=BB10_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB10_1; ; SM70-NEXT: $L__BB10_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -544,7 +544,7 @@ define i8 @acquire_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: // in Loop: Header=BB11_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB11_1; ; SM70-NEXT: $L__BB11_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -590,7 +590,7 @@ define i8 @acquire_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB12_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB12_1; ; SM70-NEXT: $L__BB12_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -636,7 +636,7 @@ define i8 @acquire_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB13_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB13_1; ; SM70-NEXT: $L__BB13_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -682,7 +682,7 @@ define i8 @acquire_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB14_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB14_1; ; SM70-NEXT: $L__BB14_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -729,7 +729,7 @@ define i8 @acquire_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB15_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB15_1; ; SM70-NEXT: $L__BB15_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -776,7 +776,7 @@ define i8 @acquire_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB16_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB16_1; ; SM70-NEXT: $L__BB16_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -823,7 +823,7 @@ define i8 @acquire_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB17_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB17_1; ; SM70-NEXT: $L__BB17_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -870,7 +870,7 @@ define i8 @release_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB18_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB18_1; ; SM70-NEXT: $L__BB18_3: // %partword.cmpxchg.end ; SM70-NEXT: st.param.b32 [func_retval0], %r13; @@ -916,7 +916,7 @@ define i8 @release_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: // in Loop: Header=BB19_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB19_1; ; SM70-NEXT: $L__BB19_3: // %partword.cmpxchg.end ; SM70-NEXT: st.param.b32 [func_retval0], %r13; @@ -962,7 +962,7 @@ define i8 @release_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: // in Loop: Header=BB20_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB20_1; ; SM70-NEXT: $L__BB20_3: // %partword.cmpxchg.end ; SM70-NEXT: st.param.b32 [func_retval0], %r13; @@ -1008,7 +1008,7 @@ define i8 @release_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB21_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB21_1; ; SM70-NEXT: $L__BB21_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1055,7 +1055,7 @@ define i8 @release_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB22_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB22_1; ; SM70-NEXT: $L__BB22_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1102,7 +1102,7 @@ define i8 @release_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB23_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB23_1; ; SM70-NEXT: $L__BB23_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1149,7 +1149,7 @@ define i8 @release_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB24_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB24_1; ; SM70-NEXT: $L__BB24_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1196,7 +1196,7 @@ define i8 @release_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB25_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB25_1; ; SM70-NEXT: $L__BB25_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1243,7 +1243,7 @@ define i8 @release_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB26_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB26_1; ; SM70-NEXT: $L__BB26_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1290,7 +1290,7 @@ define i8 @acq_rel_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB27_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB27_1; ; SM70-NEXT: $L__BB27_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1337,7 +1337,7 @@ define i8 @acq_rel_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: // in Loop: Header=BB28_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB28_1; ; SM70-NEXT: $L__BB28_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1384,7 +1384,7 @@ define i8 @acq_rel_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: // in Loop: Header=BB29_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB29_1; ; SM70-NEXT: $L__BB29_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1431,7 +1431,7 @@ define i8 @acq_rel_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB30_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB30_1; ; SM70-NEXT: $L__BB30_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1478,7 +1478,7 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB31_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB31_1; ; SM70-NEXT: $L__BB31_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1525,7 +1525,7 @@ define i8 @acq_rel_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB32_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB32_1; ; SM70-NEXT: $L__BB32_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1572,7 +1572,7 @@ define i8 @acq_rel_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB33_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB33_1; ; SM70-NEXT: $L__BB33_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1619,7 +1619,7 @@ define i8 @acq_rel_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB34_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB34_1; ; SM70-NEXT: $L__BB34_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1666,7 +1666,7 @@ define i8 @acq_rel_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB35_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB35_1; ; SM70-NEXT: $L__BB35_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1713,7 +1713,7 @@ define i8 @seq_cst_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB36_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB36_1; ; SM70-NEXT: $L__BB36_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1760,7 +1760,7 @@ define i8 @seq_cst_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: // in Loop: Header=BB37_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB37_1; ; SM70-NEXT: $L__BB37_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1807,7 +1807,7 @@ define i8 @seq_cst_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM70-NEXT: // in Loop: Header=BB38_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB38_1; ; SM70-NEXT: $L__BB38_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1854,7 +1854,7 @@ define i8 @seq_cst_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB39_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB39_1; ; SM70-NEXT: $L__BB39_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1901,7 +1901,7 @@ define i8 @seq_cst_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB40_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB40_1; ; SM70-NEXT: $L__BB40_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1948,7 +1948,7 @@ define i8 @seq_cst_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB41_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB41_1; ; SM70-NEXT: $L__BB41_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1995,7 +1995,7 @@ define i8 @seq_cst_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB42_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB42_1; ; SM70-NEXT: $L__BB42_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -2042,7 +2042,7 @@ define i8 @seq_cst_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB43_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB43_1; ; SM70-NEXT: $L__BB43_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -2089,7 +2089,7 @@ define i8 @seq_cst_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB44_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB44_1; ; SM70-NEXT: $L__BB44_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -2134,7 +2134,7 @@ define i16 @monotonic_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB45_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB45_1; ; SM70-NEXT: $L__BB45_3: // %partword.cmpxchg.end ; SM70-NEXT: st.param.b32 [func_retval0], %r14; @@ -2178,7 +2178,7 @@ define i16 @monotonic_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 ; SM70-NEXT: // in Loop: Header=BB46_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB46_1; ; SM70-NEXT: $L__BB46_3: // %partword.cmpxchg.end ; SM70-NEXT: st.param.b32 [func_retval0], %r14; @@ -2222,7 +2222,7 @@ define i16 @monotonic_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 ; SM70-NEXT: // in Loop: Header=BB47_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB47_1; ; SM70-NEXT: $L__BB47_3: // %partword.cmpxchg.end ; SM70-NEXT: st.param.b32 [func_retval0], %r14; @@ -2266,7 +2266,7 @@ define i16 @monotonic_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB48_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB48_1; ; SM70-NEXT: $L__BB48_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -2311,7 +2311,7 @@ define i16 @monotonic_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM70-NEXT: // in Loop: Header=BB49_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB49_1; ; SM70-NEXT: $L__BB49_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -2356,7 +2356,7 @@ define i16 @monotonic_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM70-NEXT: // in Loop: Header=BB50_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB50_1; ; SM70-NEXT: $L__BB50_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -2402,7 +2402,7 @@ define i16 @monotonic_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB51_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB51_1; ; SM70-NEXT: $L__BB51_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -2448,7 +2448,7 @@ define i16 @monotonic_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM70-NEXT: // in Loop: Header=BB52_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB52_1; ; SM70-NEXT: $L__BB52_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -2494,7 +2494,7 @@ define i16 @monotonic_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM70-NEXT: // in Loop: Header=BB53_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB53_1; ; SM70-NEXT: $L__BB53_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -2539,7 +2539,7 @@ define i16 @acquire_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB54_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB54_1; ; SM70-NEXT: $L__BB54_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -2584,7 +2584,7 @@ define i16 @acquire_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM70-NEXT: // in Loop: Header=BB55_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB55_1; ; SM70-NEXT: $L__BB55_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -2629,7 +2629,7 @@ define i16 @acquire_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM70-NEXT: // in Loop: Header=BB56_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB56_1; ; SM70-NEXT: $L__BB56_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -2674,7 +2674,7 @@ define i16 @acquire_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB57_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB57_1; ; SM70-NEXT: $L__BB57_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -2719,7 +2719,7 @@ define i16 @acquire_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: // in Loop: Header=BB58_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB58_1; ; SM70-NEXT: $L__BB58_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -2764,7 +2764,7 @@ define i16 @acquire_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: // in Loop: Header=BB59_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB59_1; ; SM70-NEXT: $L__BB59_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -2810,7 +2810,7 @@ define i16 @acquire_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB60_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB60_1; ; SM70-NEXT: $L__BB60_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -2856,7 +2856,7 @@ define i16 @acquire_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: // in Loop: Header=BB61_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB61_1; ; SM70-NEXT: $L__BB61_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -2902,7 +2902,7 @@ define i16 @acquire_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: // in Loop: Header=BB62_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB62_1; ; SM70-NEXT: $L__BB62_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -2948,7 +2948,7 @@ define i16 @release_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB63_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB63_1; ; SM70-NEXT: $L__BB63_3: // %partword.cmpxchg.end ; SM70-NEXT: st.param.b32 [func_retval0], %r14; @@ -2993,7 +2993,7 @@ define i16 @release_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM70-NEXT: // in Loop: Header=BB64_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB64_1; ; SM70-NEXT: $L__BB64_3: // %partword.cmpxchg.end ; SM70-NEXT: st.param.b32 [func_retval0], %r14; @@ -3038,7 +3038,7 @@ define i16 @release_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM70-NEXT: // in Loop: Header=BB65_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB65_1; ; SM70-NEXT: $L__BB65_3: // %partword.cmpxchg.end ; SM70-NEXT: st.param.b32 [func_retval0], %r14; @@ -3083,7 +3083,7 @@ define i16 @release_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB66_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB66_1; ; SM70-NEXT: $L__BB66_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -3129,7 +3129,7 @@ define i16 @release_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: // in Loop: Header=BB67_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB67_1; ; SM70-NEXT: $L__BB67_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -3175,7 +3175,7 @@ define i16 @release_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: // in Loop: Header=BB68_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB68_1; ; SM70-NEXT: $L__BB68_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -3221,7 +3221,7 @@ define i16 @release_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB69_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB69_1; ; SM70-NEXT: $L__BB69_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -3267,7 +3267,7 @@ define i16 @release_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: // in Loop: Header=BB70_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB70_1; ; SM70-NEXT: $L__BB70_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -3313,7 +3313,7 @@ define i16 @release_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: // in Loop: Header=BB71_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB71_1; ; SM70-NEXT: $L__BB71_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -3359,7 +3359,7 @@ define i16 @acq_rel_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB72_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB72_1; ; SM70-NEXT: $L__BB72_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -3405,7 +3405,7 @@ define i16 @acq_rel_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM70-NEXT: // in Loop: Header=BB73_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB73_1; ; SM70-NEXT: $L__BB73_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -3451,7 +3451,7 @@ define i16 @acq_rel_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM70-NEXT: // in Loop: Header=BB74_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB74_1; ; SM70-NEXT: $L__BB74_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -3497,7 +3497,7 @@ define i16 @acq_rel_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB75_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB75_1; ; SM70-NEXT: $L__BB75_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -3543,7 +3543,7 @@ define i16 @acq_rel_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: // in Loop: Header=BB76_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB76_1; ; SM70-NEXT: $L__BB76_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -3589,7 +3589,7 @@ define i16 @acq_rel_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: // in Loop: Header=BB77_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB77_1; ; SM70-NEXT: $L__BB77_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -3635,7 +3635,7 @@ define i16 @acq_rel_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB78_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB78_1; ; SM70-NEXT: $L__BB78_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -3681,7 +3681,7 @@ define i16 @acq_rel_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: // in Loop: Header=BB79_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB79_1; ; SM70-NEXT: $L__BB79_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -3727,7 +3727,7 @@ define i16 @acq_rel_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: // in Loop: Header=BB80_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB80_1; ; SM70-NEXT: $L__BB80_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -3773,7 +3773,7 @@ define i16 @seq_cst_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB81_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB81_1; ; SM70-NEXT: $L__BB81_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -3819,7 +3819,7 @@ define i16 @seq_cst_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM70-NEXT: // in Loop: Header=BB82_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB82_1; ; SM70-NEXT: $L__BB82_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -3865,7 +3865,7 @@ define i16 @seq_cst_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM70-NEXT: // in Loop: Header=BB83_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB83_1; ; SM70-NEXT: $L__BB83_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -3911,7 +3911,7 @@ define i16 @seq_cst_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB84_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB84_1; ; SM70-NEXT: $L__BB84_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -3957,7 +3957,7 @@ define i16 @seq_cst_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: // in Loop: Header=BB85_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB85_1; ; SM70-NEXT: $L__BB85_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -4003,7 +4003,7 @@ define i16 @seq_cst_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: // in Loop: Header=BB86_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB86_1; ; SM70-NEXT: $L__BB86_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -4049,7 +4049,7 @@ define i16 @seq_cst_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB87_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB87_1; ; SM70-NEXT: $L__BB87_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -4095,7 +4095,7 @@ define i16 @seq_cst_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: // in Loop: Header=BB88_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB88_1; ; SM70-NEXT: $L__BB88_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -4141,7 +4141,7 @@ define i16 @seq_cst_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM70-NEXT: // in Loop: Header=BB89_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB89_1; ; SM70-NEXT: $L__BB89_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll index 5acb275a6f581..6df7b3d695f7d 100644 --- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll +++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll @@ -38,7 +38,7 @@ define i8 @monotonic_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB0_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB0_1; ; SM90-NEXT: $L__BB0_3: // %partword.cmpxchg.end ; SM90-NEXT: st.param.b32 [func_retval0], %r13; @@ -83,7 +83,7 @@ define i8 @monotonic_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %ne ; SM90-NEXT: // in Loop: Header=BB1_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB1_1; ; SM90-NEXT: $L__BB1_3: // %partword.cmpxchg.end ; SM90-NEXT: st.param.b32 [func_retval0], %r13; @@ -128,7 +128,7 @@ define i8 @monotonic_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %ne ; SM90-NEXT: // in Loop: Header=BB2_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB2_1; ; SM90-NEXT: $L__BB2_3: // %partword.cmpxchg.end ; SM90-NEXT: st.param.b32 [func_retval0], %r13; @@ -173,7 +173,7 @@ define i8 @monotonic_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB3_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB3_1; ; SM90-NEXT: $L__BB3_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -219,7 +219,7 @@ define i8 @monotonic_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: // in Loop: Header=BB4_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB4_1; ; SM90-NEXT: $L__BB4_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -265,7 +265,7 @@ define i8 @monotonic_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: // in Loop: Header=BB5_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB5_1; ; SM90-NEXT: $L__BB5_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -312,7 +312,7 @@ define i8 @monotonic_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB6_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB6_1; ; SM90-NEXT: $L__BB6_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -359,7 +359,7 @@ define i8 @monotonic_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: // in Loop: Header=BB7_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB7_1; ; SM90-NEXT: $L__BB7_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -406,7 +406,7 @@ define i8 @monotonic_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: // in Loop: Header=BB8_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB8_1; ; SM90-NEXT: $L__BB8_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -452,7 +452,7 @@ define i8 @acquire_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB9_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB9_1; ; SM90-NEXT: $L__BB9_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -498,7 +498,7 @@ define i8 @acquire_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: // in Loop: Header=BB10_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB10_1; ; SM90-NEXT: $L__BB10_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -544,7 +544,7 @@ define i8 @acquire_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: // in Loop: Header=BB11_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB11_1; ; SM90-NEXT: $L__BB11_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -590,7 +590,7 @@ define i8 @acquire_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB12_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB12_1; ; SM90-NEXT: $L__BB12_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -636,7 +636,7 @@ define i8 @acquire_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB13_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB13_1; ; SM90-NEXT: $L__BB13_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -682,7 +682,7 @@ define i8 @acquire_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB14_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB14_1; ; SM90-NEXT: $L__BB14_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -729,7 +729,7 @@ define i8 @acquire_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB15_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB15_1; ; SM90-NEXT: $L__BB15_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -776,7 +776,7 @@ define i8 @acquire_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB16_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB16_1; ; SM90-NEXT: $L__BB16_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -823,7 +823,7 @@ define i8 @acquire_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB17_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB17_1; ; SM90-NEXT: $L__BB17_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -870,7 +870,7 @@ define i8 @release_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB18_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB18_1; ; SM90-NEXT: $L__BB18_3: // %partword.cmpxchg.end ; SM90-NEXT: st.param.b32 [func_retval0], %r13; @@ -916,7 +916,7 @@ define i8 @release_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: // in Loop: Header=BB19_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB19_1; ; SM90-NEXT: $L__BB19_3: // %partword.cmpxchg.end ; SM90-NEXT: st.param.b32 [func_retval0], %r13; @@ -962,7 +962,7 @@ define i8 @release_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: // in Loop: Header=BB20_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB20_1; ; SM90-NEXT: $L__BB20_3: // %partword.cmpxchg.end ; SM90-NEXT: st.param.b32 [func_retval0], %r13; @@ -1008,7 +1008,7 @@ define i8 @release_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB21_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB21_1; ; SM90-NEXT: $L__BB21_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1055,7 +1055,7 @@ define i8 @release_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB22_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB22_1; ; SM90-NEXT: $L__BB22_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1102,7 +1102,7 @@ define i8 @release_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB23_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB23_1; ; SM90-NEXT: $L__BB23_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1149,7 +1149,7 @@ define i8 @release_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB24_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB24_1; ; SM90-NEXT: $L__BB24_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1196,7 +1196,7 @@ define i8 @release_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB25_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB25_1; ; SM90-NEXT: $L__BB25_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1243,7 +1243,7 @@ define i8 @release_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB26_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB26_1; ; SM90-NEXT: $L__BB26_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1290,7 +1290,7 @@ define i8 @acq_rel_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB27_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB27_1; ; SM90-NEXT: $L__BB27_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1337,7 +1337,7 @@ define i8 @acq_rel_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: // in Loop: Header=BB28_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB28_1; ; SM90-NEXT: $L__BB28_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1384,7 +1384,7 @@ define i8 @acq_rel_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: // in Loop: Header=BB29_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB29_1; ; SM90-NEXT: $L__BB29_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1431,7 +1431,7 @@ define i8 @acq_rel_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB30_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB30_1; ; SM90-NEXT: $L__BB30_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1478,7 +1478,7 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB31_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB31_1; ; SM90-NEXT: $L__BB31_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1525,7 +1525,7 @@ define i8 @acq_rel_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB32_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB32_1; ; SM90-NEXT: $L__BB32_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1572,7 +1572,7 @@ define i8 @acq_rel_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB33_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB33_1; ; SM90-NEXT: $L__BB33_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1619,7 +1619,7 @@ define i8 @acq_rel_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB34_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB34_1; ; SM90-NEXT: $L__BB34_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1666,7 +1666,7 @@ define i8 @acq_rel_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB35_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB35_1; ; SM90-NEXT: $L__BB35_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1713,7 +1713,7 @@ define i8 @seq_cst_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB36_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB36_1; ; SM90-NEXT: $L__BB36_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1760,7 +1760,7 @@ define i8 @seq_cst_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: // in Loop: Header=BB37_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB37_1; ; SM90-NEXT: $L__BB37_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1807,7 +1807,7 @@ define i8 @seq_cst_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) ; SM90-NEXT: // in Loop: Header=BB38_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB38_1; ; SM90-NEXT: $L__BB38_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1854,7 +1854,7 @@ define i8 @seq_cst_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB39_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB39_1; ; SM90-NEXT: $L__BB39_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1901,7 +1901,7 @@ define i8 @seq_cst_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB40_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB40_1; ; SM90-NEXT: $L__BB40_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1948,7 +1948,7 @@ define i8 @seq_cst_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB41_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB41_1; ; SM90-NEXT: $L__BB41_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -1995,7 +1995,7 @@ define i8 @seq_cst_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB42_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB42_1; ; SM90-NEXT: $L__BB42_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -2042,7 +2042,7 @@ define i8 @seq_cst_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB43_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB43_1; ; SM90-NEXT: $L__BB43_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -2089,7 +2089,7 @@ define i8 @seq_cst_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) { ; SM90-NEXT: // in Loop: Header=BB44_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM90-NEXT: mov.u32 %r20, %r8; +; SM90-NEXT: mov.b32 %r20, %r8; ; SM90-NEXT: @%p2 bra $L__BB44_1; ; SM90-NEXT: $L__BB44_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -2134,7 +2134,7 @@ define i16 @monotonic_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: // in Loop: Header=BB45_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB45_1; ; SM90-NEXT: $L__BB45_3: // %partword.cmpxchg.end ; SM90-NEXT: st.param.b32 [func_retval0], %r14; @@ -2178,7 +2178,7 @@ define i16 @monotonic_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 ; SM90-NEXT: // in Loop: Header=BB46_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB46_1; ; SM90-NEXT: $L__BB46_3: // %partword.cmpxchg.end ; SM90-NEXT: st.param.b32 [func_retval0], %r14; @@ -2222,7 +2222,7 @@ define i16 @monotonic_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 ; SM90-NEXT: // in Loop: Header=BB47_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB47_1; ; SM90-NEXT: $L__BB47_3: // %partword.cmpxchg.end ; SM90-NEXT: st.param.b32 [func_retval0], %r14; @@ -2266,7 +2266,7 @@ define i16 @monotonic_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: // in Loop: Header=BB48_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB48_1; ; SM90-NEXT: $L__BB48_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -2311,7 +2311,7 @@ define i16 @monotonic_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM90-NEXT: // in Loop: Header=BB49_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB49_1; ; SM90-NEXT: $L__BB49_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -2356,7 +2356,7 @@ define i16 @monotonic_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM90-NEXT: // in Loop: Header=BB50_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB50_1; ; SM90-NEXT: $L__BB50_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -2402,7 +2402,7 @@ define i16 @monotonic_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: // in Loop: Header=BB51_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB51_1; ; SM90-NEXT: $L__BB51_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -2448,7 +2448,7 @@ define i16 @monotonic_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM90-NEXT: // in Loop: Header=BB52_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB52_1; ; SM90-NEXT: $L__BB52_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -2494,7 +2494,7 @@ define i16 @monotonic_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM90-NEXT: // in Loop: Header=BB53_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB53_1; ; SM90-NEXT: $L__BB53_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -2539,7 +2539,7 @@ define i16 @acquire_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: // in Loop: Header=BB54_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB54_1; ; SM90-NEXT: $L__BB54_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -2584,7 +2584,7 @@ define i16 @acquire_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM90-NEXT: // in Loop: Header=BB55_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB55_1; ; SM90-NEXT: $L__BB55_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -2629,7 +2629,7 @@ define i16 @acquire_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM90-NEXT: // in Loop: Header=BB56_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB56_1; ; SM90-NEXT: $L__BB56_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -2674,7 +2674,7 @@ define i16 @acquire_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: // in Loop: Header=BB57_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB57_1; ; SM90-NEXT: $L__BB57_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -2719,7 +2719,7 @@ define i16 @acquire_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: // in Loop: Header=BB58_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB58_1; ; SM90-NEXT: $L__BB58_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -2764,7 +2764,7 @@ define i16 @acquire_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: // in Loop: Header=BB59_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB59_1; ; SM90-NEXT: $L__BB59_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -2810,7 +2810,7 @@ define i16 @acquire_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: // in Loop: Header=BB60_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB60_1; ; SM90-NEXT: $L__BB60_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -2856,7 +2856,7 @@ define i16 @acquire_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: // in Loop: Header=BB61_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB61_1; ; SM90-NEXT: $L__BB61_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -2902,7 +2902,7 @@ define i16 @acquire_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: // in Loop: Header=BB62_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB62_1; ; SM90-NEXT: $L__BB62_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -2948,7 +2948,7 @@ define i16 @release_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: // in Loop: Header=BB63_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB63_1; ; SM90-NEXT: $L__BB63_3: // %partword.cmpxchg.end ; SM90-NEXT: st.param.b32 [func_retval0], %r14; @@ -2993,7 +2993,7 @@ define i16 @release_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM90-NEXT: // in Loop: Header=BB64_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB64_1; ; SM90-NEXT: $L__BB64_3: // %partword.cmpxchg.end ; SM90-NEXT: st.param.b32 [func_retval0], %r14; @@ -3038,7 +3038,7 @@ define i16 @release_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM90-NEXT: // in Loop: Header=BB65_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB65_1; ; SM90-NEXT: $L__BB65_3: // %partword.cmpxchg.end ; SM90-NEXT: st.param.b32 [func_retval0], %r14; @@ -3083,7 +3083,7 @@ define i16 @release_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: // in Loop: Header=BB66_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB66_1; ; SM90-NEXT: $L__BB66_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -3129,7 +3129,7 @@ define i16 @release_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: // in Loop: Header=BB67_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB67_1; ; SM90-NEXT: $L__BB67_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -3175,7 +3175,7 @@ define i16 @release_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: // in Loop: Header=BB68_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB68_1; ; SM90-NEXT: $L__BB68_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -3221,7 +3221,7 @@ define i16 @release_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: // in Loop: Header=BB69_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB69_1; ; SM90-NEXT: $L__BB69_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -3267,7 +3267,7 @@ define i16 @release_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: // in Loop: Header=BB70_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB70_1; ; SM90-NEXT: $L__BB70_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -3313,7 +3313,7 @@ define i16 @release_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: // in Loop: Header=BB71_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB71_1; ; SM90-NEXT: $L__BB71_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -3359,7 +3359,7 @@ define i16 @acq_rel_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: // in Loop: Header=BB72_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB72_1; ; SM90-NEXT: $L__BB72_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -3405,7 +3405,7 @@ define i16 @acq_rel_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM90-NEXT: // in Loop: Header=BB73_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB73_1; ; SM90-NEXT: $L__BB73_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -3451,7 +3451,7 @@ define i16 @acq_rel_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM90-NEXT: // in Loop: Header=BB74_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB74_1; ; SM90-NEXT: $L__BB74_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -3497,7 +3497,7 @@ define i16 @acq_rel_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: // in Loop: Header=BB75_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB75_1; ; SM90-NEXT: $L__BB75_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -3543,7 +3543,7 @@ define i16 @acq_rel_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: // in Loop: Header=BB76_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB76_1; ; SM90-NEXT: $L__BB76_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -3589,7 +3589,7 @@ define i16 @acq_rel_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: // in Loop: Header=BB77_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB77_1; ; SM90-NEXT: $L__BB77_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -3635,7 +3635,7 @@ define i16 @acq_rel_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: // in Loop: Header=BB78_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB78_1; ; SM90-NEXT: $L__BB78_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -3681,7 +3681,7 @@ define i16 @acq_rel_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: // in Loop: Header=BB79_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB79_1; ; SM90-NEXT: $L__BB79_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -3727,7 +3727,7 @@ define i16 @acq_rel_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: // in Loop: Header=BB80_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB80_1; ; SM90-NEXT: $L__BB80_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -3773,7 +3773,7 @@ define i16 @seq_cst_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: // in Loop: Header=BB81_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB81_1; ; SM90-NEXT: $L__BB81_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -3819,7 +3819,7 @@ define i16 @seq_cst_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 % ; SM90-NEXT: // in Loop: Header=BB82_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB82_1; ; SM90-NEXT: $L__BB82_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -3865,7 +3865,7 @@ define i16 @seq_cst_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 % ; SM90-NEXT: // in Loop: Header=BB83_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB83_1; ; SM90-NEXT: $L__BB83_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -3911,7 +3911,7 @@ define i16 @seq_cst_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: // in Loop: Header=BB84_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB84_1; ; SM90-NEXT: $L__BB84_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -3957,7 +3957,7 @@ define i16 @seq_cst_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: // in Loop: Header=BB85_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB85_1; ; SM90-NEXT: $L__BB85_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -4003,7 +4003,7 @@ define i16 @seq_cst_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: // in Loop: Header=BB86_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB86_1; ; SM90-NEXT: $L__BB86_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -4049,7 +4049,7 @@ define i16 @seq_cst_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) { ; SM90-NEXT: // in Loop: Header=BB87_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB87_1; ; SM90-NEXT: $L__BB87_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -4095,7 +4095,7 @@ define i16 @seq_cst_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: // in Loop: Header=BB88_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB88_1; ; SM90-NEXT: $L__BB88_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; @@ -4141,7 +4141,7 @@ define i16 @seq_cst_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne ; SM90-NEXT: // in Loop: Header=BB89_1 Depth=1 ; SM90-NEXT: and.b32 %r8, %r7, %r2; ; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM90-NEXT: mov.u32 %r19, %r8; +; SM90-NEXT: mov.b32 %r19, %r8; ; SM90-NEXT: @%p2 bra $L__BB89_1; ; SM90-NEXT: $L__BB89_3: // %partword.cmpxchg.end ; SM90-NEXT: fence.acquire.sys; diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg.ll b/llvm/test/CodeGen/NVPTX/cmpxchg.ll index aaea0d2ee25ef..e5f05e49d2fef 100644 --- a/llvm/test/CodeGen/NVPTX/cmpxchg.ll +++ b/llvm/test/CodeGen/NVPTX/cmpxchg.ll @@ -45,7 +45,7 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30-NEXT: // in Loop: Header=BB0_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; ; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM30-NEXT: mov.u32 %r20, %r8; +; SM30-NEXT: mov.b32 %r20, %r8; ; SM30-NEXT: @%p2 bra $L__BB0_1; ; SM30-NEXT: $L__BB0_3: // %partword.cmpxchg.end ; SM30-NEXT: st.param.b32 [func_retval0], %r13; @@ -86,7 +86,7 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB0_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB0_1; ; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end ; SM70-NEXT: st.param.b32 [func_retval0], %r13; @@ -171,7 +171,7 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30-NEXT: // in Loop: Header=BB1_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; ; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM30-NEXT: mov.u32 %r20, %r8; +; SM30-NEXT: mov.b32 %r20, %r8; ; SM30-NEXT: @%p2 bra $L__BB1_1; ; SM30-NEXT: $L__BB1_3: // %partword.cmpxchg.end ; SM30-NEXT: membar.sys; @@ -213,7 +213,7 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB1_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB1_1; ; SM70-NEXT: $L__BB1_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -301,7 +301,7 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30-NEXT: // in Loop: Header=BB2_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; ; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM30-NEXT: mov.u32 %r20, %r8; +; SM30-NEXT: mov.b32 %r20, %r8; ; SM30-NEXT: @%p2 bra $L__BB2_1; ; SM30-NEXT: $L__BB2_3: // %partword.cmpxchg.end ; SM30-NEXT: st.param.b32 [func_retval0], %r13; @@ -343,7 +343,7 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB2_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB2_1; ; SM70-NEXT: $L__BB2_3: // %partword.cmpxchg.end ; SM70-NEXT: st.param.b32 [func_retval0], %r13; @@ -430,7 +430,7 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30-NEXT: // in Loop: Header=BB3_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; ; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM30-NEXT: mov.u32 %r20, %r8; +; SM30-NEXT: mov.b32 %r20, %r8; ; SM30-NEXT: @%p2 bra $L__BB3_1; ; SM30-NEXT: $L__BB3_3: // %partword.cmpxchg.end ; SM30-NEXT: membar.sys; @@ -473,7 +473,7 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB3_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB3_1; ; SM70-NEXT: $L__BB3_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -562,7 +562,7 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM30-NEXT: // in Loop: Header=BB4_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; ; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM30-NEXT: mov.u32 %r20, %r8; +; SM30-NEXT: mov.b32 %r20, %r8; ; SM30-NEXT: @%p2 bra $L__BB4_1; ; SM30-NEXT: $L__BB4_3: // %partword.cmpxchg.end ; SM30-NEXT: membar.sys; @@ -605,7 +605,7 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) { ; SM70-NEXT: // in Loop: Header=BB4_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8; -; SM70-NEXT: mov.u32 %r20, %r8; +; SM70-NEXT: mov.b32 %r20, %r8; ; SM70-NEXT: @%p2 bra $L__BB4_1; ; SM70-NEXT: $L__BB4_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -693,7 +693,7 @@ define i16 @relaxed_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM30-NEXT: // in Loop: Header=BB5_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; ; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM30-NEXT: mov.u32 %r19, %r8; +; SM30-NEXT: mov.b32 %r19, %r8; ; SM30-NEXT: @%p2 bra $L__BB5_1; ; SM30-NEXT: $L__BB5_3: // %partword.cmpxchg.end ; SM30-NEXT: st.param.b32 [func_retval0], %r14; @@ -733,7 +733,7 @@ define i16 @relaxed_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB5_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB5_1; ; SM70-NEXT: $L__BB5_3: // %partword.cmpxchg.end ; SM70-NEXT: st.param.b32 [func_retval0], %r14; @@ -816,7 +816,7 @@ define i16 @acquire_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM30-NEXT: // in Loop: Header=BB6_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; ; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM30-NEXT: mov.u32 %r19, %r8; +; SM30-NEXT: mov.b32 %r19, %r8; ; SM30-NEXT: @%p2 bra $L__BB6_1; ; SM30-NEXT: $L__BB6_3: // %partword.cmpxchg.end ; SM30-NEXT: membar.sys; @@ -857,7 +857,7 @@ define i16 @acquire_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB6_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB6_1; ; SM70-NEXT: $L__BB6_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -943,7 +943,7 @@ define i16 @release_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM30-NEXT: // in Loop: Header=BB7_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; ; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM30-NEXT: mov.u32 %r19, %r8; +; SM30-NEXT: mov.b32 %r19, %r8; ; SM30-NEXT: @%p2 bra $L__BB7_1; ; SM30-NEXT: $L__BB7_3: // %partword.cmpxchg.end ; SM30-NEXT: st.param.b32 [func_retval0], %r14; @@ -984,7 +984,7 @@ define i16 @release_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB7_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB7_1; ; SM70-NEXT: $L__BB7_3: // %partword.cmpxchg.end ; SM70-NEXT: st.param.b32 [func_retval0], %r14; @@ -1069,7 +1069,7 @@ define i16 @acq_rel_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM30-NEXT: // in Loop: Header=BB8_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; ; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM30-NEXT: mov.u32 %r19, %r8; +; SM30-NEXT: mov.b32 %r19, %r8; ; SM30-NEXT: @%p2 bra $L__BB8_1; ; SM30-NEXT: $L__BB8_3: // %partword.cmpxchg.end ; SM30-NEXT: membar.sys; @@ -1111,7 +1111,7 @@ define i16 @acq_rel_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB8_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB8_1; ; SM70-NEXT: $L__BB8_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; @@ -1199,7 +1199,7 @@ define i16 @seq_cst_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM30-NEXT: // in Loop: Header=BB9_1 Depth=1 ; SM30-NEXT: and.b32 %r8, %r7, %r2; ; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM30-NEXT: mov.u32 %r19, %r8; +; SM30-NEXT: mov.b32 %r19, %r8; ; SM30-NEXT: @%p2 bra $L__BB9_1; ; SM30-NEXT: $L__BB9_3: // %partword.cmpxchg.end ; SM30-NEXT: membar.sys; @@ -1241,7 +1241,7 @@ define i16 @seq_cst_sys_i16(ptr %addr, i16 %cmp, i16 %new) { ; SM70-NEXT: // in Loop: Header=BB9_1 Depth=1 ; SM70-NEXT: and.b32 %r8, %r7, %r2; ; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8; -; SM70-NEXT: mov.u32 %r19, %r8; +; SM70-NEXT: mov.b32 %r19, %r8; ; SM70-NEXT: @%p2 bra $L__BB9_1; ; SM70-NEXT: $L__BB9_3: // %partword.cmpxchg.end ; SM70-NEXT: fence.acq_rel.sys; diff --git a/llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll b/llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll index 1b1bb91d5c79e..b0e2082621bff 100644 --- a/llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll +++ b/llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll @@ -12,7 +12,7 @@ define i32 @test_disjoint_or_addr(i16 %a) { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.u64 %rd1, a; +; CHECK-NEXT: mov.b64 %rd1, a; ; CHECK-NEXT: cvta.global.u64 %rd2, %rd1; ; CHECK-NEXT: ld.u32 %r1, [%rd2+8]; ; CHECK-NEXT: st.param.b32 [func_retval0], %r1; diff --git a/llvm/test/CodeGen/NVPTX/div.ll b/llvm/test/CodeGen/NVPTX/div.ll index 3d14d36ed599b..4f9d58758ca9e 100644 --- a/llvm/test/CodeGen/NVPTX/div.ll +++ b/llvm/test/CodeGen/NVPTX/div.ll @@ -11,10 +11,10 @@ define float @div_full(float %a, float %b) { ; CHECK-NEXT: ld.param.f32 %f1, [div_full_param_0]; ; CHECK-NEXT: ld.param.f32 %f2, [div_full_param_1]; ; CHECK-NEXT: div.full.f32 %f3, %f1, %f2; -; CHECK-NEXT: mov.f32 %f4, 0f40400000; +; CHECK-NEXT: mov.b32 %f4, 0f40400000; ; CHECK-NEXT: div.full.f32 %f5, %f3, %f4; ; CHECK-NEXT: div.full.ftz.f32 %f6, %f5, %f2; -; CHECK-NEXT: mov.f32 %f7, 0f40800000; +; CHECK-NEXT: mov.b32 %f7, 0f40800000; ; CHECK-NEXT: div.full.ftz.f32 %f8, %f6, %f7; ; CHECK-NEXT: st.param.f32 [func_retval0], %f8; ; CHECK-NEXT: ret; diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll index f78cfc3172621..70d1167bbb6e2 100644 --- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll @@ -138,7 +138,7 @@ define half @test_fsub(half %a, half %b) #0 { ; CHECK-F16-FTZ-NEXT: mov.b16 [[Z:%rs[0-9]+]], 0x0000 ; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%rs[0-9]+]], [[Z]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] -; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000; +; CHECK-NOF16-DAG: mov.b32 [[Z:%f[0-9]+]], 0f00000000; ; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[Z]], [[A32]]; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0], [[R]]; @@ -646,7 +646,7 @@ else: ; CHECK: ld.param.u64 %[[P1:rd[0-9]+]], [test_phi_param_0]; ; CHECK: ld.b16 {{%rs[0-9]+}}, [%[[P1]]]; ; CHECK: [[LOOP:\$L__BB[0-9_]+]]: -; CHECK: mov.u16 [[R:%rs[0-9]+]], [[AB:%rs[0-9]+]]; +; CHECK: mov.b16 [[R:%rs[0-9]+]], [[AB:%rs[0-9]+]]; ; CHECK: ld.b16 [[AB:%rs[0-9]+]], [%[[P1]]]; ; CHECK: { ; CHECK: st.param.b64 [param0], %[[P1]]; diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll index 1905fec8ab7a8..539e810c83cbd 100644 --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -260,7 +260,7 @@ define <2 x half> @test_fneg(<2 x half> %a) #0 { ; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fneg_param_0]; ; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2; -; CHECK-NOF16-NEXT: mov.f32 %f2, 0f00000000; +; CHECK-NOF16-NEXT: mov.b32 %f2, 0f00000000; ; CHECK-NOF16-NEXT: sub.rn.f32 %f3, %f2, %f1; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %f3; ; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs1; diff --git a/llvm/test/CodeGen/NVPTX/fma.ll b/llvm/test/CodeGen/NVPTX/fma.ll index 3416420367beb..90fbd5ba9dfd6 100644 --- a/llvm/test/CodeGen/NVPTX/fma.ll +++ b/llvm/test/CodeGen/NVPTX/fma.ll @@ -50,7 +50,7 @@ define ptx_device float @f32_iir(float %x) { } define ptx_device float @f32_iii(float %x) { -; CHECK: mov.f32 %f{{[0-9]+}}, 0f41200000; +; CHECK: mov.b32 %f{{[0-9]+}}, 0f41200000; ; CHECK: ret; %r = call float @llvm.fma.f32(float 2.0, float 3.0, float 4.0) ret float %r diff --git a/llvm/test/CodeGen/NVPTX/i128.ll b/llvm/test/CodeGen/NVPTX/i128.ll index ca1b5fdabbf8f..546700c2b0335 100644 --- a/llvm/test/CodeGen/NVPTX/i128.ll +++ b/llvm/test/CodeGen/NVPTX/i128.ll @@ -77,7 +77,7 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: setp.gt.s32 %p16, %r10, 63; ; CHECK-NEXT: selp.b64 %rd124, %rd76, %rd75, %p16; ; CHECK-NEXT: shl.b64 %rd123, %rd3, %r10; -; CHECK-NEXT: mov.u64 %rd114, %rd117; +; CHECK-NEXT: mov.b64 %rd114, %rd117; ; CHECK-NEXT: @%p15 bra $L__BB0_4; ; CHECK-NEXT: // %bb.1: // %udiv-preheader ; CHECK-NEXT: cvt.u32.u64 %r13, %rd119; @@ -93,7 +93,7 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: add.cc.s64 %rd35, %rd5, -1; ; CHECK-NEXT: addc.cc.s64 %rd36, %rd6, -1; ; CHECK-NEXT: mov.b64 %rd114, 0; -; CHECK-NEXT: mov.u64 %rd117, %rd114; +; CHECK-NEXT: mov.b64 %rd117, %rd114; ; CHECK-NEXT: $L__BB0_2: // %udiv-do-while ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: shr.u64 %rd83, %rd121, 63; @@ -210,7 +210,7 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: setp.gt.s32 %p14, %r10, 63; ; CHECK-NEXT: selp.b64 %rd110, %rd66, %rd65, %p14; ; CHECK-NEXT: shl.b64 %rd109, %rd41, %r10; -; CHECK-NEXT: mov.u64 %rd100, %rd103; +; CHECK-NEXT: mov.b64 %rd100, %rd103; ; CHECK-NEXT: @%p13 bra $L__BB1_4; ; CHECK-NEXT: // %bb.1: // %udiv-preheader ; CHECK-NEXT: cvt.u32.u64 %r13, %rd105; @@ -226,7 +226,7 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: add.cc.s64 %rd33, %rd3, -1; ; CHECK-NEXT: addc.cc.s64 %rd34, %rd4, -1; ; CHECK-NEXT: mov.b64 %rd100, 0; -; CHECK-NEXT: mov.u64 %rd103, %rd100; +; CHECK-NEXT: mov.b64 %rd103, %rd100; ; CHECK-NEXT: $L__BB1_2: // %udiv-do-while ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: shr.u64 %rd73, %rd107, 63; @@ -386,7 +386,7 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: setp.gt.s32 %p16, %r10, 63; ; CHECK-NEXT: selp.b64 %rd119, %rd77, %rd76, %p16; ; CHECK-NEXT: shl.b64 %rd118, %rd1, %r10; -; CHECK-NEXT: mov.u64 %rd109, %rd112; +; CHECK-NEXT: mov.b64 %rd109, %rd112; ; CHECK-NEXT: @%p15 bra $L__BB4_4; ; CHECK-NEXT: // %bb.1: // %udiv-preheader ; CHECK-NEXT: cvt.u32.u64 %r13, %rd114; @@ -402,7 +402,7 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: add.cc.s64 %rd35, %rd3, -1; ; CHECK-NEXT: addc.cc.s64 %rd36, %rd4, -1; ; CHECK-NEXT: mov.b64 %rd109, 0; -; CHECK-NEXT: mov.u64 %rd112, %rd109; +; CHECK-NEXT: mov.b64 %rd112, %rd109; ; CHECK-NEXT: $L__BB4_2: // %udiv-do-while ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: shr.u64 %rd84, %rd116, 63; @@ -513,7 +513,7 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: setp.gt.s32 %p14, %r10, 63; ; CHECK-NEXT: selp.b64 %rd104, %rd66, %rd65, %p14; ; CHECK-NEXT: shl.b64 %rd103, %rd41, %r10; -; CHECK-NEXT: mov.u64 %rd94, %rd97; +; CHECK-NEXT: mov.b64 %rd94, %rd97; ; CHECK-NEXT: @%p13 bra $L__BB5_4; ; CHECK-NEXT: // %bb.1: // %udiv-preheader ; CHECK-NEXT: cvt.u32.u64 %r13, %rd99; @@ -529,7 +529,7 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: add.cc.s64 %rd33, %rd43, -1; ; CHECK-NEXT: addc.cc.s64 %rd34, %rd44, -1; ; CHECK-NEXT: mov.b64 %rd94, 0; -; CHECK-NEXT: mov.u64 %rd97, %rd94; +; CHECK-NEXT: mov.b64 %rd97, %rd94; ; CHECK-NEXT: $L__BB5_2: // %udiv-do-while ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: shr.u64 %rd73, %rd101, 63; diff --git a/llvm/test/CodeGen/NVPTX/indirect_byval.ll b/llvm/test/CodeGen/NVPTX/indirect_byval.ll index 3ae6300d8767d..4509fcfd1a9bc 100644 --- a/llvm/test/CodeGen/NVPTX/indirect_byval.ll +++ b/llvm/test/CodeGen/NVPTX/indirect_byval.ll @@ -20,7 +20,7 @@ define internal i32 @foo() { ; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: mov.u64 %SPL, __local_depot0; +; CHECK-NEXT: mov.b64 %SPL, __local_depot0; ; CHECK-NEXT: cvta.local.u64 %SP, %SPL; ; CHECK-NEXT: ld.global.u64 %rd1, [ptr]; ; CHECK-NEXT: add.u64 %rd3, %SPL, 1; @@ -63,7 +63,7 @@ define internal i32 @bar() { ; CHECK-NEXT: .reg .b64 %rd<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: mov.u64 %SPL, __local_depot1; +; CHECK-NEXT: mov.b64 %SPL, __local_depot1; ; CHECK-NEXT: cvta.local.u64 %SP, %SPL; ; CHECK-NEXT: ld.global.u64 %rd1, [ptr]; ; CHECK-NEXT: add.u64 %rd3, %SPL, 8; diff --git a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll index eaf6cf59dd066..311741f737adc 100644 --- a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll +++ b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll @@ -16,7 +16,7 @@ define void @test_b128_input_from_const() { ; CHECK-NEXT: mov.b64 %rd2, 0; ; CHECK-NEXT: mov.b64 %rd3, 42; ; CHECK-NEXT: mov.b128 %rq1, {%rd3, %rd2}; -; CHECK-NEXT: mov.u64 %rd4, value; +; CHECK-NEXT: mov.b64 %rd4, value; ; CHECK-NEXT: cvta.global.u64 %rd1, %rd4; ; CHECK-NEXT: // begin inline asm ; CHECK-NEXT: { st.b128 [%rd1], %rq1; } @@ -38,7 +38,7 @@ define void @test_b128_input_from_load(ptr nocapture readonly %data) { ; CHECK-NEXT: ld.global.u64 %rd4, [%rd3+8]; ; CHECK-NEXT: ld.global.u64 %rd5, [%rd3]; ; CHECK-NEXT: mov.b128 %rq1, {%rd5, %rd4}; -; CHECK-NEXT: mov.u64 %rd6, value; +; CHECK-NEXT: mov.b64 %rd6, value; ; CHECK-NEXT: cvta.global.u64 %rd1, %rd6; ; CHECK-NEXT: // begin inline asm ; CHECK-NEXT: { st.b128 [%rd1], %rq1; } @@ -67,7 +67,7 @@ define void @test_b128_input_from_select(ptr nocapture readonly %flag) { ; CHECK-NEXT: selp.b64 %rd4, 24, 42, %p1; ; CHECK-NEXT: mov.b64 %rd5, 0; ; CHECK-NEXT: mov.b128 %rq1, {%rd4, %rd5}; -; CHECK-NEXT: mov.u64 %rd6, value; +; CHECK-NEXT: mov.b64 %rd6, value; ; CHECK-NEXT: cvta.global.u64 %rd1, %rd6; ; CHECK-NEXT: // begin inline asm ; CHECK-NEXT: { st.b128 [%rd1], %rq1; } diff --git a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test2.ll b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test2.ll index 8441c30e4c4d4..8ca863bba5f4a 100644 --- a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test2.ll +++ b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test2.ll @@ -25,7 +25,7 @@ define void @test_corner_values() { ; CHECK-NEXT: add.s64 %rd2, %rd1, 8; ; CHECK-NEXT: mov.b64 %rd13, -1; ; CHECK-NEXT: mov.b128 %rq1, {%rd13, %rd13}; -; CHECK-NEXT: mov.u64 %rd14, v_u128_max; +; CHECK-NEXT: mov.b64 %rd14, v_u128_max; ; CHECK-NEXT: cvta.global.u64 %rd3, %rd14; ; CHECK-NEXT: // begin inline asm ; CHECK-NEXT: { @@ -42,7 +42,7 @@ define void @test_corner_values() { ; CHECK-NEXT: add.s64 %rd5, %rd15, 24; ; CHECK-NEXT: mov.b64 %rd16, 9223372036854775807; ; CHECK-NEXT: mov.b128 %rq2, {%rd13, %rd16}; -; CHECK-NEXT: mov.u64 %rd17, v_i128_max; +; CHECK-NEXT: mov.b64 %rd17, v_i128_max; ; CHECK-NEXT: cvta.global.u64 %rd6, %rd17; ; CHECK-NEXT: // begin inline asm ; CHECK-NEXT: { @@ -60,7 +60,7 @@ define void @test_corner_values() { ; CHECK-NEXT: mov.b64 %rd19, -9223372036854775808; ; CHECK-NEXT: mov.b64 %rd20, 0; ; CHECK-NEXT: mov.b128 %rq3, {%rd20, %rd19}; -; CHECK-NEXT: mov.u64 %rd21, v_i128_min; +; CHECK-NEXT: mov.b64 %rd21, v_i128_min; ; CHECK-NEXT: cvta.global.u64 %rd9, %rd21; ; CHECK-NEXT: // begin inline asm ; CHECK-NEXT: { @@ -76,7 +76,7 @@ define void @test_corner_values() { ; CHECK-NEXT: add.s64 %rd10, %rd22, 48; ; CHECK-NEXT: add.s64 %rd11, %rd22, 56; ; CHECK-NEXT: mov.b128 %rq4, {%rd20, %rd20}; -; CHECK-NEXT: mov.u64 %rd23, v_u128_zero; +; CHECK-NEXT: mov.b64 %rd23, v_u128_zero; ; CHECK-NEXT: cvta.global.u64 %rd12, %rd23; ; CHECK-NEXT: // begin inline asm ; CHECK-NEXT: { diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll index 3523ffe6ae3ca..f49053485fa29 100644 --- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll +++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll @@ -15,7 +15,7 @@ define void @foo(i32 %a) { ; PTX32-NEXT: .reg .b32 %r<4>; ; PTX32-EMPTY: ; PTX32-NEXT: // %bb.0: -; PTX32-NEXT: mov.u32 %SPL, __local_depot0; +; PTX32-NEXT: mov.b32 %SPL, __local_depot0; ; PTX32-NEXT: ld.param.u32 %r1, [foo_param_0]; ; PTX32-NEXT: add.u32 %r3, %SPL, 0; ; PTX32-NEXT: st.local.u32 [%r3], %r1; @@ -30,7 +30,7 @@ define void @foo(i32 %a) { ; PTX64-NEXT: .reg .b64 %rd<3>; ; PTX64-EMPTY: ; PTX64-NEXT: // %bb.0: -; PTX64-NEXT: mov.u64 %SPL, __local_depot0; +; PTX64-NEXT: mov.b64 %SPL, __local_depot0; ; PTX64-NEXT: ld.param.u32 %r1, [foo_param_0]; ; PTX64-NEXT: add.u64 %rd2, %SPL, 0; ; PTX64-NEXT: st.local.u32 [%rd2], %r1; @@ -49,7 +49,7 @@ define ptx_kernel void @foo2(i32 %a) { ; PTX32-NEXT: .reg .b32 %r<4>; ; PTX32-EMPTY: ; PTX32-NEXT: // %bb.0: -; PTX32-NEXT: mov.u32 %SPL, __local_depot1; +; PTX32-NEXT: mov.b32 %SPL, __local_depot1; ; PTX32-NEXT: cvta.local.u32 %SP, %SPL; ; PTX32-NEXT: ld.param.u32 %r1, [foo2_param_0]; ; PTX32-NEXT: add.u32 %r2, %SP, 0; @@ -75,7 +75,7 @@ define ptx_kernel void @foo2(i32 %a) { ; PTX64-NEXT: .reg .b64 %rd<3>; ; PTX64-EMPTY: ; PTX64-NEXT: // %bb.0: -; PTX64-NEXT: mov.u64 %SPL, __local_depot1; +; PTX64-NEXT: mov.b64 %SPL, __local_depot1; ; PTX64-NEXT: cvta.local.u64 %SP, %SPL; ; PTX64-NEXT: ld.param.u32 %r1, [foo2_param_0]; ; PTX64-NEXT: add.u64 %rd1, %SP, 0; @@ -108,7 +108,7 @@ define void @foo3(i32 %a) { ; PTX32-NEXT: .reg .b32 %r<6>; ; PTX32-EMPTY: ; PTX32-NEXT: // %bb.0: -; PTX32-NEXT: mov.u32 %SPL, __local_depot2; +; PTX32-NEXT: mov.b32 %SPL, __local_depot2; ; PTX32-NEXT: ld.param.u32 %r1, [foo3_param_0]; ; PTX32-NEXT: add.u32 %r3, %SPL, 0; ; PTX32-NEXT: shl.b32 %r4, %r1, 2; @@ -125,7 +125,7 @@ define void @foo3(i32 %a) { ; PTX64-NEXT: .reg .b64 %rd<5>; ; PTX64-EMPTY: ; PTX64-NEXT: // %bb.0: -; PTX64-NEXT: mov.u64 %SPL, __local_depot2; +; PTX64-NEXT: mov.b64 %SPL, __local_depot2; ; PTX64-NEXT: ld.param.u32 %r1, [foo3_param_0]; ; PTX64-NEXT: add.u64 %rd2, %SPL, 0; ; PTX64-NEXT: mul.wide.s32 %rd3, %r1, 4; @@ -147,7 +147,7 @@ define void @foo4() { ; PTX32-NEXT: .reg .b32 %r<6>; ; PTX32-EMPTY: ; PTX32-NEXT: // %bb.0: -; PTX32-NEXT: mov.u32 %SPL, __local_depot3; +; PTX32-NEXT: mov.b32 %SPL, __local_depot3; ; PTX32-NEXT: cvta.local.u32 %SP, %SPL; ; PTX32-NEXT: add.u32 %r1, %SP, 0; ; PTX32-NEXT: add.u32 %r2, %SPL, 0; @@ -185,7 +185,7 @@ define void @foo4() { ; PTX64-NEXT: .reg .b64 %rd<5>; ; PTX64-EMPTY: ; PTX64-NEXT: // %bb.0: -; PTX64-NEXT: mov.u64 %SPL, __local_depot3; +; PTX64-NEXT: mov.b64 %SPL, __local_depot3; ; PTX64-NEXT: cvta.local.u64 %SP, %SPL; ; PTX64-NEXT: add.u64 %rd1, %SP, 0; ; PTX64-NEXT: add.u64 %rd2, %SPL, 0; diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll index 90f9306d036cd..e4e1f40d0d8b2 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll @@ -33,7 +33,7 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly ; PTX-NEXT: ld.param.u8 %rs1, [non_kernel_function_param_1]; ; PTX-NEXT: and.b16 %rs2, %rs1, 1; ; PTX-NEXT: setp.eq.b16 %p1, %rs2, 1; -; PTX-NEXT: mov.u64 %rd3, gi; +; PTX-NEXT: mov.b64 %rd3, gi; ; PTX-NEXT: cvta.global.u64 %rd4, %rd3; ; PTX-NEXT: selp.b64 %rd5, %rd2, %rd4, %p1; ; PTX-NEXT: ld.param.s32 %rd6, [non_kernel_function_param_2]; @@ -81,7 +81,6 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu ; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]] ; OPT-NEXT: store i32 [[ADD]], ptr [[OUT3]], align 4 ; OPT-NEXT: ret void -; %tmp = load i32, ptr %input1, align 4 %add = add i32 %tmp, %input2 store i32 %add, ptr %out @@ -116,7 +115,6 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p ; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]] ; OPT-NEXT: store i32 [[ADD]], ptr [[OUT5]], align 4 ; OPT-NEXT: ret void -; %gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0 %gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1 %int1 = load i32, ptr %gep1 @@ -134,9 +132,9 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) { ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: ; PTX-NEXT: mov.b64 %rd2, grid_const_escape_param_0; -; PTX-NEXT: mov.u64 %rd3, %rd2; +; PTX-NEXT: mov.b64 %rd3, %rd2; ; PTX-NEXT: cvta.param.u64 %rd4, %rd3; -; PTX-NEXT: mov.u64 %rd1, escape; +; PTX-NEXT: mov.b64 %rd1, escape; ; PTX-NEXT: { // callseq 0, 0 ; PTX-NEXT: .param .b64 param0; ; PTX-NEXT: st.param.b64 [param0], %rd4; @@ -157,7 +155,6 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) { ; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]]) ; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]]) ; OPT-NEXT: ret void -; %call = call i32 @escape(ptr %input) ret void } @@ -172,19 +169,19 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 ; PTX-NEXT: .reg .b64 %rd<10>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: -; PTX-NEXT: mov.u64 %SPL, __local_depot4; +; PTX-NEXT: mov.b64 %SPL, __local_depot4; ; PTX-NEXT: cvta.local.u64 %SP, %SPL; ; PTX-NEXT: mov.b64 %rd2, multiple_grid_const_escape_param_0; ; PTX-NEXT: mov.b64 %rd3, multiple_grid_const_escape_param_2; -; PTX-NEXT: mov.u64 %rd4, %rd3; +; PTX-NEXT: mov.b64 %rd4, %rd3; ; PTX-NEXT: ld.param.u32 %r1, [multiple_grid_const_escape_param_1]; ; PTX-NEXT: cvta.param.u64 %rd5, %rd4; -; PTX-NEXT: mov.u64 %rd6, %rd2; +; PTX-NEXT: mov.b64 %rd6, %rd2; ; PTX-NEXT: cvta.param.u64 %rd7, %rd6; ; PTX-NEXT: add.u64 %rd8, %SP, 0; ; PTX-NEXT: add.u64 %rd9, %SPL, 0; ; PTX-NEXT: st.local.u32 [%rd9], %r1; -; PTX-NEXT: mov.u64 %rd1, escape3; +; PTX-NEXT: mov.b64 %rd1, escape3; ; PTX-NEXT: { // callseq 1, 0 ; PTX-NEXT: .param .b64 param0; ; PTX-NEXT: st.param.b64 [param0], %rd7; @@ -215,7 +212,6 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 ; OPT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 ; OPT-NEXT: [[CALL:%.*]] = call i32 @escape3(ptr [[INPUT_PARAM_GEN]], ptr [[A_ADDR]], ptr [[B_PARAM_GEN]]) ; OPT-NEXT: ret void -; %a.addr = alloca i32, align 4 store i32 %a, ptr %a.addr, align 4 %call = call i32 @escape3(ptr %input, ptr %a.addr, ptr %b) @@ -231,7 +227,7 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i ; PTX-NEXT: mov.b64 %rd1, grid_const_memory_escape_param_0; ; PTX-NEXT: ld.param.u64 %rd2, [grid_const_memory_escape_param_1]; ; PTX-NEXT: cvta.to.global.u64 %rd3, %rd2; -; PTX-NEXT: mov.u64 %rd4, %rd1; +; PTX-NEXT: mov.b64 %rd4, %rd1; ; PTX-NEXT: cvta.param.u64 %rd5, %rd4; ; PTX-NEXT: st.global.u64 [%rd3], %rd5; ; PTX-NEXT: ret; @@ -243,7 +239,6 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i ; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]]) ; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR5]], align 8 ; OPT-NEXT: ret void -; store ptr %input, ptr %addr, align 8 ret void } @@ -257,7 +252,7 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 ; PTX-NEXT: mov.b64 %rd4, grid_const_inlineasm_escape_param_0; ; PTX-NEXT: ld.param.u64 %rd5, [grid_const_inlineasm_escape_param_1]; ; PTX-NEXT: cvta.to.global.u64 %rd6, %rd5; -; PTX-NEXT: mov.u64 %rd7, %rd4; +; PTX-NEXT: mov.b64 %rd7, %rd4; ; PTX-NEXT: cvta.param.u64 %rd2, %rd7; ; PTX-NEXT: add.s64 %rd3, %rd2, 4; ; PTX-NEXT: // begin inline asm @@ -277,7 +272,6 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 ; OPT-NEXT: [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2 ; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT5]], align 8 ; OPT-NEXT: ret void -; %tmpptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0 %tmpptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1 %1 = call i64 asm "add.s64 $0, $1, $2;", "=l,l,l"(ptr %tmpptr1, ptr %tmpptr2) #1 @@ -295,12 +289,12 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou ; PTX-NEXT: mov.b64 %rd2, grid_const_partial_escape_param_0; ; PTX-NEXT: ld.param.u64 %rd3, [grid_const_partial_escape_param_1]; ; PTX-NEXT: cvta.to.global.u64 %rd4, %rd3; -; PTX-NEXT: mov.u64 %rd5, %rd2; +; PTX-NEXT: mov.b64 %rd5, %rd2; ; PTX-NEXT: cvta.param.u64 %rd6, %rd5; ; PTX-NEXT: ld.u32 %r1, [%rd6]; ; PTX-NEXT: add.s32 %r2, %r1, %r1; ; PTX-NEXT: st.global.u32 [%rd4], %r2; -; PTX-NEXT: mov.u64 %rd1, escape; +; PTX-NEXT: mov.b64 %rd1, escape; ; PTX-NEXT: { // callseq 2, 0 ; PTX-NEXT: .param .b64 param0; ; PTX-NEXT: st.param.b64 [param0], %rd6; @@ -326,7 +320,6 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou ; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT5]], align 4 ; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]]) ; OPT-NEXT: ret void -; %val = load i32, ptr %input %twice = add i32 %val, %val store i32 %twice, ptr %output @@ -344,13 +337,13 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ; PTX-NEXT: mov.b64 %rd2, grid_const_partial_escapemem_param_0; ; PTX-NEXT: ld.param.u64 %rd3, [grid_const_partial_escapemem_param_1]; ; PTX-NEXT: cvta.to.global.u64 %rd4, %rd3; -; PTX-NEXT: mov.u64 %rd5, %rd2; +; PTX-NEXT: mov.b64 %rd5, %rd2; ; PTX-NEXT: cvta.param.u64 %rd6, %rd5; ; PTX-NEXT: ld.u32 %r1, [%rd6]; ; PTX-NEXT: ld.u32 %r2, [%rd6+4]; ; PTX-NEXT: st.global.u64 [%rd4], %rd6; ; PTX-NEXT: add.s32 %r3, %r1, %r2; -; PTX-NEXT: mov.u64 %rd1, escape; +; PTX-NEXT: mov.b64 %rd1, escape; ; PTX-NEXT: { // callseq 3, 0 ; PTX-NEXT: .param .b64 param0; ; PTX-NEXT: st.param.b64 [param0], %rd6; @@ -380,7 +373,6 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ; OPT-NEXT: [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]] ; OPT-NEXT: [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]]) ; OPT-NEXT: ret i32 [[ADD]] -; %ptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0 %val1 = load i32, ptr %ptr1 %ptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1 @@ -402,7 +394,7 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr ; PTX-NEXT: mov.b64 %rd5, grid_const_phi_param_0; ; PTX-NEXT: ld.param.u64 %rd6, [grid_const_phi_param_1]; ; PTX-NEXT: cvta.to.global.u64 %rd1, %rd6; -; PTX-NEXT: mov.u64 %rd7, %rd5; +; PTX-NEXT: mov.b64 %rd7, %rd5; ; PTX-NEXT: cvta.param.u64 %rd8, %rd7; ; PTX-NEXT: ld.global.u32 %r1, [%rd1]; ; PTX-NEXT: setp.lt.s32 %p1, %r1, 0; @@ -433,7 +425,6 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr ; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 ; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4 ; OPT-NEXT: ret void -; %val = load i32, ptr %inout %less = icmp slt i32 %val, 0 @@ -463,14 +454,14 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1, ; PTX-NEXT: mov.b64 %rd6, grid_const_phi_ngc_param_0; ; PTX-NEXT: ld.param.u64 %rd7, [grid_const_phi_ngc_param_2]; ; PTX-NEXT: cvta.to.global.u64 %rd1, %rd7; -; PTX-NEXT: mov.u64 %rd10, %rd6; +; PTX-NEXT: mov.b64 %rd10, %rd6; ; PTX-NEXT: cvta.param.u64 %rd11, %rd10; ; PTX-NEXT: ld.global.u32 %r1, [%rd1]; ; PTX-NEXT: setp.lt.s32 %p1, %r1, 0; ; PTX-NEXT: @%p1 bra $L__BB10_2; ; PTX-NEXT: // %bb.1: // %second ; PTX-NEXT: mov.b64 %rd8, grid_const_phi_ngc_param_1; -; PTX-NEXT: mov.u64 %rd9, %rd8; +; PTX-NEXT: mov.b64 %rd9, %rd8; ; PTX-NEXT: cvta.param.u64 %rd2, %rd9; ; PTX-NEXT: add.s64 %rd11, %rd2, 4; ; PTX-NEXT: $L__BB10_2: // %merge @@ -499,7 +490,6 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1, ; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 ; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4 ; OPT-NEXT: ret void -; %val = load i32, ptr %inout %less = icmp slt i32 %val, 0 br i1 %less, label %first, label %second @@ -529,9 +519,9 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by ; PTX-NEXT: ld.param.u64 %rd2, [grid_const_select_param_2]; ; PTX-NEXT: cvta.to.global.u64 %rd3, %rd2; ; PTX-NEXT: mov.b64 %rd4, grid_const_select_param_1; -; PTX-NEXT: mov.u64 %rd5, %rd4; +; PTX-NEXT: mov.b64 %rd5, %rd4; ; PTX-NEXT: cvta.param.u64 %rd6, %rd5; -; PTX-NEXT: mov.u64 %rd7, %rd1; +; PTX-NEXT: mov.b64 %rd7, %rd1; ; PTX-NEXT: cvta.param.u64 %rd8, %rd7; ; PTX-NEXT: ld.global.u32 %r1, [%rd3]; ; PTX-NEXT: setp.lt.s32 %p1, %r1, 0; @@ -553,7 +543,6 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by ; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 ; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4 ; OPT-NEXT: ret void -; %val = load i32, ptr %inout %less = icmp slt i32 %val, 0 %ptrnew = select i1 %less, ptr %input1, ptr %input2 @@ -570,7 +559,7 @@ define ptx_kernel i32 @grid_const_ptrtoint(ptr byval(i32) %input) { ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: ; PTX-NEXT: mov.b64 %rd1, grid_const_ptrtoint_param_0; -; PTX-NEXT: mov.u64 %rd2, %rd1; +; PTX-NEXT: mov.b64 %rd2, %rd1; ; PTX-NEXT: ld.param.u32 %r1, [grid_const_ptrtoint_param_0]; ; PTX-NEXT: cvta.param.u64 %rd3, %rd2; ; PTX-NEXT: cvt.u32.u64 %r2, %rd3; @@ -585,7 +574,6 @@ define ptx_kernel i32 @grid_const_ptrtoint(ptr byval(i32) %input) { ; OPT-NEXT: [[PTRVAL:%.*]] = ptrtoint ptr [[INPUT1]] to i32 ; OPT-NEXT: [[KEEPALIVE:%.*]] = add i32 [[INPUT3]], [[PTRVAL]] ; OPT-NEXT: ret i32 [[KEEPALIVE]] -; %val = load i32, ptr %input %ptrval = ptrtoint ptr %input to i32 %keepalive = add i32 %val, %ptrval diff --git a/llvm/test/CodeGen/NVPTX/no-extra-parens.ll b/llvm/test/CodeGen/NVPTX/no-extra-parens.ll index 16c397116dc38..214e3a57f912c 100644 --- a/llvm/test/CodeGen/NVPTX/no-extra-parens.ll +++ b/llvm/test/CodeGen/NVPTX/no-extra-parens.ll @@ -9,7 +9,7 @@ declare void @str2(ptr %str) define void @str1() { entry: -;; CHECK: mov.u64 %rd{{[0-9]+}}, $str; +;; CHECK: mov.b64 %rd{{[0-9]+}}, $str; tail call void @str2(ptr addrspacecast (ptr addrspace(1) @"$str" to ptr)) ret void } diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll index 41372c531de23..885c711d31f01 100644 --- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll +++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll @@ -143,15 +143,15 @@ return: } ; SM_52: .visible .func (.param .b32 func_retval0) phi() -; SM_52: mov.f32 %[[REG:.+]], 0f00000000; +; SM_52: mov.b32 %[[REG:.+]], 0f00000000; ; SM_52-NEXT: st.param.f32 [func_retval0], %[[REG]]; ; SM_52-NEXT: ret; ; SM_70: .visible .func (.param .b32 func_retval0) phi() -; SM_70: mov.f32 %[[REG:.+]], 0f00000000; +; SM_70: mov.b32 %[[REG:.+]], 0f00000000; ; SM_70-NEXT: st.param.f32 [func_retval0], %[[REG]]; ; SM_70-NEXT: ret; ; SM_90: .visible .func (.param .b32 func_retval0) phi() -; SM_90: mov.f32 %[[REG:.+]], 0f00000000; +; SM_90: mov.b32 %[[REG:.+]], 0f00000000; ; SM_90-NEXT: st.param.f32 [func_retval0], %[[REG]]; ; SM_90-NEXT: ret; define float @phi() { diff --git a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll index da1a449c5d51f..b95a3287474c4 100644 --- a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll +++ b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll @@ -110,7 +110,7 @@ define float @check_f32() { ; PTX-DAG: ld.param.f32 [[LD:%f[0-9]+]], [retval0]; ; PTX-DAG: } // callseq {{[0-9]+}} - ; PTX-WITHOUT-DAG: mov.f32 [[PROXY:%f[0-9]+]], [[LD]]; + ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%f[0-9]+]], [[LD]]; ; PTX-WITHOUT-DAG: st.param.f32 [func_retval0], [[PROXY]]; ; PTX-WITH-DAG: st.param.f32 [func_retval0], [[LD]]; @@ -125,7 +125,7 @@ define double @check_f64() { ; PTX-DAG: ld.param.f64 [[LD:%fd[0-9]+]], [retval0]; ; PTX-DAG: } // callseq {{[0-9]+}} - ; PTX-WITHOUT-DAG: mov.f64 [[PROXY:%fd[0-9]+]], [[LD]]; + ; PTX-WITHOUT-DAG: mov.b64 [[PROXY:%fd[0-9]+]], [[LD]]; ; PTX-WITHOUT-DAG: st.param.f64 [func_retval0], [[PROXY]]; ; PTX-WITH-DAG: st.param.f64 [func_retval0], [[LD]]; @@ -173,8 +173,8 @@ define <2 x double> @check_vec_f64() { ; PTX-DAG: ld.param.v2.f64 {[[LD0:%fd[0-9]+]], [[LD1:%fd[0-9]+]]}, [retval0]; ; PTX-DAG: } // callseq {{[0-9]+}} - ; PTX-WITHOUT-DAG: mov.f64 [[PROXY0:%fd[0-9]+]], [[LD0]]; - ; PTX-WITHOUT-DAG: mov.f64 [[PROXY1:%fd[0-9]+]], [[LD1]]; + ; PTX-WITHOUT-DAG: mov.b64 [[PROXY0:%fd[0-9]+]], [[LD0]]; + ; PTX-WITHOUT-DAG: mov.b64 [[PROXY1:%fd[0-9]+]], [[LD1]]; ; PTX-WITHOUT-DAG: st.param.v2.f64 [func_retval0], {[[PROXY0]], [[PROXY1]]}; ; PTX-WITH-DAG: st.param.v2.f64 [func_retval0], {[[LD0]], [[LD1]]}; diff --git a/llvm/test/CodeGen/NVPTX/variadics-backend.ll b/llvm/test/CodeGen/NVPTX/variadics-backend.ll index 5a7e40ce898df..35db4894c1b49 100644 --- a/llvm/test/CodeGen/NVPTX/variadics-backend.ll +++ b/llvm/test/CodeGen/NVPTX/variadics-backend.ll @@ -109,7 +109,7 @@ define dso_local i32 @foo() { ; CHECK-PTX-NEXT: .reg .b64 %rd<5>; ; CHECK-PTX-EMPTY: ; CHECK-PTX-NEXT: // %bb.0: // %entry -; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot1; +; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot1; ; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL; ; CHECK-PTX-NEXT: mov.b64 %rd1, 4294967297; ; CHECK-PTX-NEXT: st.u64 [%SP], %rd1; @@ -156,7 +156,7 @@ define dso_local i32 @variadics2(i32 noundef %first, ...) { ; CHECK-PTX-NEXT: .reg .b64 %rd<9>; ; CHECK-PTX-EMPTY: ; CHECK-PTX-NEXT: // %bb.0: // %entry -; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot2; +; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot2; ; CHECK-PTX-NEXT: ld.param.u32 %r1, [variadics2_param_0]; ; CHECK-PTX-NEXT: ld.param.u64 %rd1, [variadics2_param_1]; ; CHECK-PTX-NEXT: add.u64 %rd3, %SPL, 0; @@ -217,7 +217,7 @@ define dso_local i32 @bar() { ; CHECK-PTX-NEXT: .reg .b64 %rd<5>; ; CHECK-PTX-EMPTY: ; CHECK-PTX-NEXT: // %bb.0: // %entry -; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot3; +; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot3; ; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL; ; CHECK-PTX-NEXT: add.u64 %rd2, %SPL, 0; ; CHECK-PTX-NEXT: ld.global.nc.u8 %rs1, [__const_$_bar_$_s1+7]; @@ -308,7 +308,7 @@ define dso_local i32 @baz() { ; CHECK-PTX-NEXT: .reg .b64 %rd<2>; ; CHECK-PTX-EMPTY: ; CHECK-PTX-NEXT: // %bb.0: // %entry -; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot5; +; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot5; ; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL; ; CHECK-PTX-NEXT: mov.b32 %r1, 1; ; CHECK-PTX-NEXT: st.v4.u32 [%SP], {%r1, %r1, %r1, %r1}; @@ -382,7 +382,7 @@ define dso_local void @qux() { ; CHECK-PTX-NEXT: .reg .b64 %rd<9>; ; CHECK-PTX-EMPTY: ; CHECK-PTX-NEXT: // %bb.0: // %entry -; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot7; +; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot7; ; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL; ; CHECK-PTX-NEXT: add.u64 %rd2, %SPL, 0; ; CHECK-PTX-NEXT: ld.global.nc.u64 %rd3, [__const_$_qux_$_s+8]; diff --git a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll index 31517939a4b75..8a9052c6f98f9 100644 --- a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll +++ b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll @@ -5,7 +5,7 @@ ; CHECK: .visible .func use_dbg_declare() ; CHECK: .local .align 8 .b8 __local_depot0[8]; -; CHECK: mov.u64 %SPL, __local_depot0; +; CHECK: mov.b64 %SPL, __local_depot0; ; CHECK: add.u64 %rd1, %SP, 0; ; CHECK: .loc 1 5 3 // t.c:5:3 ; CHECK: { // callseq 0, 0