Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -994,7 +994,7 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
Opc = TM.is64Bit() ? NVPTX::cvta_to_local_64 : NVPTX::cvta_to_local;
break;
case ADDRESS_SPACE_PARAM:
Opc = TM.is64Bit() ? NVPTX::IMOV64rr : NVPTX::IMOV32rr;
Opc = TM.is64Bit() ? NVPTX::IMOV64r : NVPTX::IMOV32r;
break;
}

Expand Down Expand Up @@ -2151,10 +2151,10 @@ bool NVPTXDAGToDAGISel::tryBF16ArithToFMA(SDNode *N) {
auto API = APF.bitcastToAPInt();
API = API.concat(API);
auto Const = CurDAG->getTargetConstant(API, DL, MVT::i32);
return SDValue(CurDAG->getMachineNode(NVPTX::IMOV32ri, DL, VT, Const), 0);
return SDValue(CurDAG->getMachineNode(NVPTX::IMOV32i, DL, VT, Const), 0);
}
auto Const = CurDAG->getTargetConstantFP(APF, DL, VT);
return SDValue(CurDAG->getMachineNode(NVPTX::BFMOV16ri, DL, VT, Const), 0);
return SDValue(CurDAG->getMachineNode(NVPTX::BFMOV16i, DL, VT, Const), 0);
};

switch (N->getOpcode()) {
Expand Down
14 changes: 7 additions & 7 deletions llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,22 @@ void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,

unsigned Op;
if (DestRC == &NVPTX::Int1RegsRegClass) {
Op = NVPTX::IMOV1rr;
Op = NVPTX::IMOV1r;
} else if (DestRC == &NVPTX::Int16RegsRegClass) {
Op = NVPTX::IMOV16rr;
Op = NVPTX::MOV16r;
} else if (DestRC == &NVPTX::Int32RegsRegClass) {
Op = (SrcRC == &NVPTX::Int32RegsRegClass ? NVPTX::IMOV32rr
Op = (SrcRC == &NVPTX::Int32RegsRegClass ? NVPTX::IMOV32r
: NVPTX::BITCONVERT_32_F2I);
} else if (DestRC == &NVPTX::Int64RegsRegClass) {
Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64rr
Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64r
: NVPTX::BITCONVERT_64_F2I);
} else if (DestRC == &NVPTX::Int128RegsRegClass) {
Op = NVPTX::IMOV128rr;
Op = NVPTX::IMOV128r;
} else if (DestRC == &NVPTX::Float32RegsRegClass) {
Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32rr
Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32r
: NVPTX::BITCONVERT_32_I2F);
} else if (DestRC == &NVPTX::Float64RegsRegClass) {
Op = (SrcRC == &NVPTX::Float64RegsRegClass ? NVPTX::FMOV64rr
Op = (SrcRC == &NVPTX::Float64RegsRegClass ? NVPTX::FMOV64r
: NVPTX::BITCONVERT_64_I2F);
} else {
llvm_unreachable("Bad register copy");
Expand Down
89 changes: 37 additions & 52 deletions llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1945,68 +1945,53 @@ def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;

// Load a memory address into a u32 or u64 register.
def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR_base:$a),
"mov.u32 \t$dst, $a;",
"mov.b32 \t$dst, $a;",
[(set i32:$dst, (Wrapper tglobaladdr:$a))]>;
def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins ADDR_base:$a),
"mov.u64 \t$dst, $a;",
"mov.b64 \t$dst, $a;",
[(set i64:$dst, (Wrapper tglobaladdr:$a))]>;

// Get pointer to local stack.
let hasSideEffects = false in {
def MOV_DEPOT_ADDR : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
"mov.u32 \t$d, __local_depot$num;", []>;
"mov.b32 \t$d, __local_depot$num;", []>;
def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
"mov.u64 \t$d, __local_depot$num;", []>;
"mov.b64 \t$d, __local_depot$num;", []>;
}


// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
let hasSideEffects=0, isAsCheapAsAMove=1 in {
def IMOV1rr : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
"mov.pred \t$dst, $sss;", []>;
def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
"mov.u16 \t$dst, $sss;", []>;
def IMOV32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
"mov.u32 \t$dst, $sss;", []>;
def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
"mov.u64 \t$dst, $sss;", []>;
def IMOV128rr : NVPTXInst<(outs Int128Regs:$dst), (ins Int128Regs:$sss),
"mov.b128 \t$dst, $sss;", []>;

def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
"mov.f32 \t$dst, $src;", []>;
def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
"mov.f64 \t$dst, $src;", []>;

def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
"mov.pred \t$dst, $src;",
[(set i1:$dst, imm:$src)]>;
def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
"mov.b16 \t$dst, $src;",
[(set i16:$dst, imm:$src)]>;
def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
"mov.b32 \t$dst, $src;",
[(set i32:$dst, imm:$src)]>;
def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
"mov.b64 \t$dst, $src;",
[(set i64:$dst, imm:$src)]>;

def FMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins f16imm:$src),
"mov.b16 \t$dst, $src;",
[(set f16:$dst, fpimm:$src)]>;
def BFMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins bf16imm:$src),
"mov.b16 \t$dst, $src;",
[(set bf16:$dst, fpimm:$src)]>;
def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
"mov.f32 \t$dst, $src;",
[(set f32:$dst, fpimm:$src)]>;
def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
"mov.f64 \t$dst, $src;",
[(set f64:$dst, fpimm:$src)]>;
}

def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>;
let hasSideEffects = false, isAsCheapAsAMove = true in {
// Class for register-to-register moves
class MOVr<RegisterClass RC, string OpStr> :
NVPTXInst<(outs RC:$dst), (ins RC:$src),
"mov." # OpStr # " \t$dst, $src;", []>;

// Class for immediate-to-register moves
class MOVi<RegisterClass RC, string OpStr, ValueType VT, Operand IMMType, SDNode ImmNode> :
NVPTXInst<(outs RC:$dst), (ins IMMType:$src),
"mov." # OpStr # " \t$dst, $src;",
[(set VT:$dst, ImmNode:$src)]>;
}

def IMOV1r : MOVr<Int1Regs, "pred">;
def IMOV1i : MOVi<Int1Regs, "pred", i1, i1imm, imm>;
def MOV16r : MOVr<Int16Regs, "b16">;
def IMOV16i : MOVi<Int16Regs, "b16", i16, i16imm, imm>;
def IMOV32r : MOVr<Int32Regs, "b32">;
def IMOV32i : MOVi<Int32Regs, "b32", i32, i32imm, imm>;
def IMOV64r : MOVr<Int64Regs, "b64">;
def IMOV64i : MOVi<Int64Regs, "b64", i64, i64imm, imm>;
def IMOV128r : MOVr<Int128Regs, "b128">;
def FMOV16i : MOVi<Int16Regs, "b16", f16, f16imm, fpimm>;
def BFMOV16i : MOVi<Int16Regs, "b16", bf16, bf16imm, fpimm>;
def FMOV32r : MOVr<Float32Regs, "b32">;
def FMOV32i : MOVi<Float32Regs, "b32", f32, f32imm, fpimm>;
def FMOV64r : MOVr<Float64Regs, "b64">;
def FMOV64i : MOVi<Float64Regs, "b64", f64, f64imm, fpimm>;

def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32i texternalsym:$dst)>;
def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64i texternalsym:$dst)>;

//---- Copy Frame Index ----
def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR:$addr),
Expand Down Expand Up @@ -2717,8 +2702,8 @@ def ProxyRegI1 : ProxyRegInst<"pred", i1, Int1Regs>;
def ProxyRegI16 : ProxyRegInst<"b16", i16, Int16Regs>;
def ProxyRegI32 : ProxyRegInst<"b32", i32, Int32Regs>;
def ProxyRegI64 : ProxyRegInst<"b64", i64, Int64Regs>;
def ProxyRegF32 : ProxyRegInst<"f32", f32, Float32Regs>;
def ProxyRegF64 : ProxyRegInst<"f64", f64, Float64Regs>;
def ProxyRegF32 : ProxyRegInst<"b32", f32, Float32Regs>;
def ProxyRegF64 : ProxyRegInst<"b64", f64, Float64Regs>;

foreach vt = [f16, bf16] in {
def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI16 $src)>;
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/NVPTX/atomics-sm70.ll
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: or.b32 %r32, %r31, %r30;
; CHECKPTX62-NEXT: atom.cas.b32 %r6, [%r1], %r54, %r32;
; CHECKPTX62-NEXT: setp.ne.s32 %p1, %r6, %r54;
; CHECKPTX62-NEXT: mov.u32 %r54, %r6;
; CHECKPTX62-NEXT: mov.b32 %r54, %r6;
; CHECKPTX62-NEXT: @%p1 bra $L__BB0_1;
; CHECKPTX62-NEXT: // %bb.2: // %atomicrmw.end44
; CHECKPTX62-NEXT: ld.u32 %r55, [%r1];
Expand All @@ -88,7 +88,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: or.b32 %r37, %r36, %r35;
; CHECKPTX62-NEXT: atom.cas.b32 %r9, [%r1], %r55, %r37;
; CHECKPTX62-NEXT: setp.ne.s32 %p2, %r9, %r55;
; CHECKPTX62-NEXT: mov.u32 %r55, %r9;
; CHECKPTX62-NEXT: mov.b32 %r55, %r9;
; CHECKPTX62-NEXT: @%p2 bra $L__BB0_3;
; CHECKPTX62-NEXT: // %bb.4: // %atomicrmw.end26
; CHECKPTX62-NEXT: and.b32 %r10, %r22, -4;
Expand All @@ -109,7 +109,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: or.b32 %r45, %r44, %r43;
; CHECKPTX62-NEXT: atom.global.cas.b32 %r15, [%r10], %r56, %r45;
; CHECKPTX62-NEXT: setp.ne.s32 %p3, %r15, %r56;
; CHECKPTX62-NEXT: mov.u32 %r56, %r15;
; CHECKPTX62-NEXT: mov.b32 %r56, %r15;
; CHECKPTX62-NEXT: @%p3 bra $L__BB0_5;
; CHECKPTX62-NEXT: // %bb.6: // %atomicrmw.end8
; CHECKPTX62-NEXT: and.b32 %r16, %r23, -4;
Expand All @@ -130,7 +130,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: or.b32 %r53, %r52, %r51;
; CHECKPTX62-NEXT: atom.shared.cas.b32 %r21, [%r16], %r57, %r53;
; CHECKPTX62-NEXT: setp.ne.s32 %p4, %r21, %r57;
; CHECKPTX62-NEXT: mov.u32 %r57, %r21;
; CHECKPTX62-NEXT: mov.b32 %r57, %r21;
; CHECKPTX62-NEXT: @%p4 bra $L__BB0_7;
; CHECKPTX62-NEXT: // %bb.8: // %atomicrmw.end
; CHECKPTX62-NEXT: ret;
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/NVPTX/atomics-sm90.ll
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: or.b32 %r32, %r31, %r30;
; CHECKPTX71-NEXT: atom.relaxed.cas.b32 %r6, [%r1], %r54, %r32;
; CHECKPTX71-NEXT: setp.ne.s32 %p1, %r6, %r54;
; CHECKPTX71-NEXT: mov.u32 %r54, %r6;
; CHECKPTX71-NEXT: mov.b32 %r54, %r6;
; CHECKPTX71-NEXT: @%p1 bra $L__BB0_1;
; CHECKPTX71-NEXT: // %bb.2: // %atomicrmw.end44
; CHECKPTX71-NEXT: ld.u32 %r55, [%r1];
Expand All @@ -89,7 +89,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: or.b32 %r37, %r36, %r35;
; CHECKPTX71-NEXT: atom.relaxed.cas.b32 %r9, [%r1], %r55, %r37;
; CHECKPTX71-NEXT: setp.ne.s32 %p2, %r9, %r55;
; CHECKPTX71-NEXT: mov.u32 %r55, %r9;
; CHECKPTX71-NEXT: mov.b32 %r55, %r9;
; CHECKPTX71-NEXT: @%p2 bra $L__BB0_3;
; CHECKPTX71-NEXT: // %bb.4: // %atomicrmw.end26
; CHECKPTX71-NEXT: and.b32 %r10, %r22, -4;
Expand All @@ -111,7 +111,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: or.b32 %r45, %r44, %r43;
; CHECKPTX71-NEXT: atom.relaxed.global.cas.b32 %r15, [%r10], %r56, %r45;
; CHECKPTX71-NEXT: setp.ne.s32 %p3, %r15, %r56;
; CHECKPTX71-NEXT: mov.u32 %r56, %r15;
; CHECKPTX71-NEXT: mov.b32 %r56, %r15;
; CHECKPTX71-NEXT: @%p3 bra $L__BB0_5;
; CHECKPTX71-NEXT: // %bb.6: // %atomicrmw.end8
; CHECKPTX71-NEXT: and.b32 %r16, %r23, -4;
Expand All @@ -133,7 +133,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: or.b32 %r53, %r52, %r51;
; CHECKPTX71-NEXT: atom.relaxed.shared.cas.b32 %r21, [%r16], %r57, %r53;
; CHECKPTX71-NEXT: setp.ne.s32 %p4, %r21, %r57;
; CHECKPTX71-NEXT: mov.u32 %r57, %r21;
; CHECKPTX71-NEXT: mov.b32 %r57, %r21;
; CHECKPTX71-NEXT: @%p4 bra $L__BB0_7;
; CHECKPTX71-NEXT: // %bb.8: // %atomicrmw.end
; CHECKPTX71-NEXT: ret;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/NVPTX/atomics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ define half @atomicrmw_add_f16_generic(ptr %addr, half %val) {
; CHECK-NEXT: membar.sys;
; CHECK-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r14;
; CHECK-NEXT: setp.ne.s32 %p1, %r5, %r16;
; CHECK-NEXT: mov.u32 %r16, %r5;
; CHECK-NEXT: mov.b32 %r16, %r5;
; CHECK-NEXT: @%p1 bra $L__BB22_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
; CHECK-NEXT: shr.u32 %r15, %r5, %r1;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ entry:
%buf = alloca [16 x i8], align 4

; CHECK: .local .align 4 .b8 __local_depot0[16]
; CHECK: mov.u64 %SPL
; CHECK: mov.b64 %SPL

; CHECK: ld.param.u64 %rd[[A_REG:[0-9]+]], [kernel_func_param_0]
; CHECK: cvta.to.global.u64 %rd[[A1_REG:[0-9]+]], %rd[[A_REG]]
Expand Down
Loading
Loading