Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 14 additions & 25 deletions llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1994,22 +1994,15 @@ let IsSimpleMove=1, hasSideEffects=0 in {
def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
"mov.pred \t$dst, $src;",
[(set i1:$dst, imm:$src)]>;
def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
"mov.u16 \t$dst, $src;",
[(set i16:$dst, imm:$src)]>;
def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
"mov.u32 \t$dst, $src;",
[(set i32:$dst, imm:$src)]>;
def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
"mov.u64 \t$dst, $src;",
[(set i64:$dst, imm:$src)]>;

def IMOVB16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
"mov.b16 \t$dst, $src;", []>;
"mov.b16 \t$dst, $src;",
[(set i16:$dst, imm:$src)]>;
def IMOVB32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
"mov.b32 \t$dst, $src;", []>;
"mov.b32 \t$dst, $src;",
[(set i32:$dst, imm:$src)]>;
def IMOVB64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
"mov.b64 \t$dst, $src;", []>;
"mov.b64 \t$dst, $src;",
[(set i64:$dst, imm:$src)]>;

def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
"mov.f32 \t$dst, $src;",
Expand All @@ -2018,8 +2011,8 @@ def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
"mov.f64 \t$dst, $src;",
[(set f64:$dst, fpimm:$src)]>;

def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>;
def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOVB32ri texternalsym:$dst)>;
def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOVB64ri texternalsym:$dst)>;

//---- Copy Frame Index ----
def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
Expand Down Expand Up @@ -3104,21 +3097,17 @@ def: Pat<(f32 (bitconvert vt:$a)),
(BITCONVERT_32_I2F Int32Regs:$a)>;
}
foreach vt = [f16, bf16] in {
def: Pat<(vt (bitconvert (i16 UInt16Const:$a))),
(IMOVB16ri UInt16Const:$a)>;
def: Pat<(vt (bitconvert i16:$a)),
(ProxyRegI16 Int16Regs:$a)>;
def: Pat<(i16 (bitconvert vt:$a)),
(ProxyRegI16 Int16Regs:$a)>;
def: Pat<(vt (bitconvert i16:$a)),
(vt Int16Regs:$a)>;
def: Pat<(i16 (bitconvert vt:$a)),
(i16 Int16Regs:$a)>;
}

foreach ta = [v2f16, v2bf16, v2i16, v4i8, i32] in {
def: Pat<(ta (bitconvert (i32 UInt32Const:$a))),
(IMOVB32ri UInt32Const:$a)>;
foreach tb = [v2f16, v2bf16, v2i16, v4i8, i32] in {
if !ne(ta, tb) then {
def: Pat<(ta (bitconvert (tb Int32Regs:$a))),
(ProxyRegI32 Int32Regs:$a)>;
def: Pat<(ta (bitconvert tb:$a)),
(ta Int32Regs:$a)>;
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -2803,10 +2803,10 @@ def : Pat<(int_nvvm_ptr_param_to_gen i64:$src),

// nvvm.ptr.gen.to.param
def : Pat<(int_nvvm_ptr_gen_to_param i32:$src),
(IMOV32rr Int32Regs:$src)>;
(i32 Int32Regs:$src)>;

def : Pat<(int_nvvm_ptr_gen_to_param i64:$src),
(IMOV64rr Int64Regs:$src)>;
(i64 Int64Regs:$src)>;

// nvvm.move intrinsicc
def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s),
Expand Down
26 changes: 13 additions & 13 deletions llvm/test/CodeGen/NVPTX/atomics-sm70.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-LABEL: test(
; CHECKPTX62: {
; CHECKPTX62-NEXT: .reg .pred %p<5>;
; CHECKPTX62-NEXT: .reg .b16 %rs<19>;
; CHECKPTX62-NEXT: .reg .b16 %rs<11>;
; CHECKPTX62-NEXT: .reg .b32 %r<58>;
; CHECKPTX62-EMPTY:
; CHECKPTX62-NEXT: // %bb.0:
Expand All @@ -65,8 +65,8 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX62-NEXT: shr.u32 %r28, %r54, %r2;
; CHECKPTX62-NEXT: cvt.u16.u32 %rs2, %r28;
; CHECKPTX62-NEXT: add.rn.f16 %rs4, %rs2, %rs1;
; CHECKPTX62-NEXT: cvt.u32.u16 %r29, %rs4;
; CHECKPTX62-NEXT: add.rn.f16 %rs3, %rs2, %rs1;
; CHECKPTX62-NEXT: cvt.u32.u16 %r29, %rs3;
; CHECKPTX62-NEXT: shl.b32 %r30, %r29, %r2;
; CHECKPTX62-NEXT: and.b32 %r31, %r54, %r3;
; CHECKPTX62-NEXT: or.b32 %r32, %r31, %r30;
Expand All @@ -79,10 +79,10 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: $L__BB0_3: // %atomicrmw.start27
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX62-NEXT: shr.u32 %r33, %r55, %r2;
; CHECKPTX62-NEXT: cvt.u16.u32 %rs6, %r33;
; CHECKPTX62-NEXT: mov.b16 %rs8, 0x3C00;
; CHECKPTX62-NEXT: add.rn.f16 %rs9, %rs6, %rs8;
; CHECKPTX62-NEXT: cvt.u32.u16 %r34, %rs9;
; CHECKPTX62-NEXT: cvt.u16.u32 %rs4, %r33;
; CHECKPTX62-NEXT: mov.b16 %rs5, 0x3C00;
; CHECKPTX62-NEXT: add.rn.f16 %rs6, %rs4, %rs5;
; CHECKPTX62-NEXT: cvt.u32.u16 %r34, %rs6;
; CHECKPTX62-NEXT: shl.b32 %r35, %r34, %r2;
; CHECKPTX62-NEXT: and.b32 %r36, %r55, %r3;
; CHECKPTX62-NEXT: or.b32 %r37, %r36, %r35;
Expand All @@ -100,9 +100,9 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: $L__BB0_5: // %atomicrmw.start9
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX62-NEXT: shr.u32 %r41, %r56, %r11;
; CHECKPTX62-NEXT: cvt.u16.u32 %rs11, %r41;
; CHECKPTX62-NEXT: add.rn.f16 %rs13, %rs11, %rs1;
; CHECKPTX62-NEXT: cvt.u32.u16 %r42, %rs13;
; CHECKPTX62-NEXT: cvt.u16.u32 %rs7, %r41;
; CHECKPTX62-NEXT: add.rn.f16 %rs8, %rs7, %rs1;
; CHECKPTX62-NEXT: cvt.u32.u16 %r42, %rs8;
; CHECKPTX62-NEXT: shl.b32 %r43, %r42, %r11;
; CHECKPTX62-NEXT: and.b32 %r44, %r56, %r12;
; CHECKPTX62-NEXT: or.b32 %r45, %r44, %r43;
Expand All @@ -120,9 +120,9 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: $L__BB0_7: // %atomicrmw.start
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX62-NEXT: shr.u32 %r49, %r57, %r17;
; CHECKPTX62-NEXT: cvt.u16.u32 %rs15, %r49;
; CHECKPTX62-NEXT: add.rn.f16 %rs17, %rs15, %rs1;
; CHECKPTX62-NEXT: cvt.u32.u16 %r50, %rs17;
; CHECKPTX62-NEXT: cvt.u16.u32 %rs9, %r49;
; CHECKPTX62-NEXT: add.rn.f16 %rs10, %rs9, %rs1;
; CHECKPTX62-NEXT: cvt.u32.u16 %r50, %rs10;
; CHECKPTX62-NEXT: shl.b32 %r51, %r50, %r17;
; CHECKPTX62-NEXT: and.b32 %r52, %r57, %r18;
; CHECKPTX62-NEXT: or.b32 %r53, %r52, %r51;
Expand Down
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/NVPTX/atomics-sm90.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-LABEL: test(
; CHECKPTX71: {
; CHECKPTX71-NEXT: .reg .pred %p<5>;
; CHECKPTX71-NEXT: .reg .b16 %rs<34>;
; CHECKPTX71-NEXT: .reg .b16 %rs<22>;
; CHECKPTX71-NEXT: .reg .b32 %r<4>;
; CHECKPTX71-NEXT: .reg .f32 %f<12>;
; CHECKPTX71-EMPTY:
Expand All @@ -55,49 +55,49 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: ld.param.u32 %r3, [test_param_2];
; CHECKPTX71-NEXT: ld.param.u32 %r2, [test_param_1];
; CHECKPTX71-NEXT: ld.param.u32 %r1, [test_param_0];
; CHECKPTX71-NEXT: ld.b16 %rs30, [%r1];
; CHECKPTX71-NEXT: ld.b16 %rs18, [%r1];
; CHECKPTX71-NEXT: cvt.f32.bf16 %f1, %rs13;
; CHECKPTX71-NEXT: $L__BB0_1: // %atomicrmw.start14
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX71-NEXT: cvt.f32.bf16 %f2, %rs30;
; CHECKPTX71-NEXT: cvt.f32.bf16 %f2, %rs18;
; CHECKPTX71-NEXT: add.rn.f32 %f3, %f2, %f1;
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs14, %f3;
; CHECKPTX71-NEXT: atom.cas.b16 %rs17, [%r1], %rs30, %rs14;
; CHECKPTX71-NEXT: setp.ne.s16 %p1, %rs17, %rs30;
; CHECKPTX71-NEXT: mov.u16 %rs30, %rs17;
; CHECKPTX71-NEXT: atom.cas.b16 %rs3, [%r1], %rs18, %rs14;
; CHECKPTX71-NEXT: setp.ne.s16 %p1, %rs3, %rs18;
; CHECKPTX71-NEXT: mov.u16 %rs18, %rs3;
; CHECKPTX71-NEXT: @%p1 bra $L__BB0_1;
; CHECKPTX71-NEXT: // %bb.2: // %atomicrmw.end13
; CHECKPTX71-NEXT: ld.b16 %rs31, [%r1];
; CHECKPTX71-NEXT: ld.b16 %rs19, [%r1];
; CHECKPTX71-NEXT: $L__BB0_3: // %atomicrmw.start8
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX71-NEXT: cvt.f32.bf16 %f4, %rs31;
; CHECKPTX71-NEXT: cvt.f32.bf16 %f4, %rs19;
; CHECKPTX71-NEXT: add.rn.f32 %f5, %f4, 0f3F800000;
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs18, %f5;
; CHECKPTX71-NEXT: atom.cas.b16 %rs21, [%r1], %rs31, %rs18;
; CHECKPTX71-NEXT: setp.ne.s16 %p2, %rs21, %rs31;
; CHECKPTX71-NEXT: mov.u16 %rs31, %rs21;
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs15, %f5;
; CHECKPTX71-NEXT: atom.cas.b16 %rs6, [%r1], %rs19, %rs15;
; CHECKPTX71-NEXT: setp.ne.s16 %p2, %rs6, %rs19;
; CHECKPTX71-NEXT: mov.u16 %rs19, %rs6;
; CHECKPTX71-NEXT: @%p2 bra $L__BB0_3;
; CHECKPTX71-NEXT: // %bb.4: // %atomicrmw.end7
; CHECKPTX71-NEXT: ld.global.b16 %rs32, [%r2];
; CHECKPTX71-NEXT: ld.global.b16 %rs20, [%r2];
; CHECKPTX71-NEXT: $L__BB0_5: // %atomicrmw.start2
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX71-NEXT: cvt.f32.bf16 %f7, %rs32;
; CHECKPTX71-NEXT: cvt.f32.bf16 %f7, %rs20;
; CHECKPTX71-NEXT: add.rn.f32 %f8, %f7, %f1;
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs22, %f8;
; CHECKPTX71-NEXT: atom.global.cas.b16 %rs25, [%r2], %rs32, %rs22;
; CHECKPTX71-NEXT: setp.ne.s16 %p3, %rs25, %rs32;
; CHECKPTX71-NEXT: mov.u16 %rs32, %rs25;
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs16, %f8;
; CHECKPTX71-NEXT: atom.global.cas.b16 %rs9, [%r2], %rs20, %rs16;
; CHECKPTX71-NEXT: setp.ne.s16 %p3, %rs9, %rs20;
; CHECKPTX71-NEXT: mov.u16 %rs20, %rs9;
; CHECKPTX71-NEXT: @%p3 bra $L__BB0_5;
; CHECKPTX71-NEXT: // %bb.6: // %atomicrmw.end1
; CHECKPTX71-NEXT: ld.shared.b16 %rs33, [%r3];
; CHECKPTX71-NEXT: ld.shared.b16 %rs21, [%r3];
; CHECKPTX71-NEXT: $L__BB0_7: // %atomicrmw.start
; CHECKPTX71-NEXT: // =>This Inner Loop Header: Depth=1
; CHECKPTX71-NEXT: cvt.f32.bf16 %f10, %rs33;
; CHECKPTX71-NEXT: cvt.f32.bf16 %f10, %rs21;
; CHECKPTX71-NEXT: add.rn.f32 %f11, %f10, %f1;
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs26, %f11;
; CHECKPTX71-NEXT: atom.shared.cas.b16 %rs29, [%r3], %rs33, %rs26;
; CHECKPTX71-NEXT: setp.ne.s16 %p4, %rs29, %rs33;
; CHECKPTX71-NEXT: mov.u16 %rs33, %rs29;
; CHECKPTX71-NEXT: cvt.rn.bf16.f32 %rs17, %f11;
; CHECKPTX71-NEXT: atom.shared.cas.b16 %rs12, [%r3], %rs21, %rs17;
; CHECKPTX71-NEXT: setp.ne.s16 %p4, %rs12, %rs21;
; CHECKPTX71-NEXT: mov.u16 %rs21, %rs12;
; CHECKPTX71-NEXT: @%p4 bra $L__BB0_7;
; CHECKPTX71-NEXT: // %bb.8: // %atomicrmw.end
; CHECKPTX71-NEXT: ret;
Expand Down
Loading
Loading