@@ -1177,17 +1177,6 @@ def NegDoubleConst : SDNodeXForm<fpimm, [{
11771177 SDLoc(N), MVT::f64);
11781178}]>;
11791179
1180- // Loads FP16 constant into a register.
1181- //
1182- // ptxas does not have hex representation for fp16, so we can't use
1183- // fp16 immediate values in .f16 instructions. Instead we have to load
1184- // the constant into a register using mov.b16.
1185- def LOAD_CONST_F16 :
1186- NVPTXInst<(outs Int16Regs:$dst), (ins f16imm:$a),
1187- "mov.b16 \t$dst, $a;", []>;
1188- def LOAD_CONST_BF16 :
1189- NVPTXInst<(outs Int16Regs:$dst), (ins bf16imm:$a),
1190- "mov.b16 \t$dst, $a;", []>;
11911180defm FADD : F3_fma_component<"add", fadd>;
11921181defm FSUB : F3_fma_component<"sub", fsub>;
11931182defm FMUL : F3_fma_component<"mul", fmul>;
@@ -1963,7 +1952,7 @@ let hasSideEffects = false in {
19631952
19641953
19651954// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
1966- let IsSimpleMove=1, hasSideEffects=0 in {
1955+ let IsSimpleMove=1, hasSideEffects=0, isAsCheapAsAMove=1 in {
19671956 def IMOV1rr : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
19681957 "mov.pred \t$dst, $sss;", []>;
19691958 def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
@@ -1975,48 +1964,37 @@ let IsSimpleMove=1, hasSideEffects=0 in {
19751964 def IMOV128rr : NVPTXInst<(outs Int128Regs:$dst), (ins Int128Regs:$sss),
19761965 "mov.b128 \t$dst, $sss;", []>;
19771966
1978- def IMOVB16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
1979- "mov.b16 \t$dst, $sss;", []>;
1980- def IMOVB32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
1981- "mov.b32 \t$dst, $sss;", []>;
1982- def IMOVB64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
1983- "mov.b64 \t$dst, $sss;", []>;
1984-
1985- def FMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
1986- // We have to use .b16 here as there's no mov.f16.
1987- "mov.b16 \t$dst, $src;", []>;
19881967 def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
19891968 "mov.f32 \t$dst, $src;", []>;
19901969 def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
19911970 "mov.f64 \t$dst, $src;", []>;
1992- }
19931971
1994- def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1995- "mov.pred \t$dst, $src;",
1996- [(set i1:$dst, imm:$src)]>;
1997- def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1998- "mov.u16 \t$dst, $src;",
1999- [(set i16:$dst, imm:$src)]>;
2000- def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
2001- "mov.u32 \t$dst, $src;",
2002- [(set i32:$dst, imm:$src)]>;
2003- def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
2004- "mov.u64 \t$dst, $src;",
2005- [(set i64:$dst, imm:$src)]>;
2006-
2007- def IMOVB16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm :$src),
2008- "mov.b16 \t$dst, $src;", []>;
2009- def IMOVB32ri : NVPTXInst<(outs Int32Regs :$dst), (ins i32imm :$src),
2010- "mov.b32 \t $dst, $src;", []>;
2011- def IMOVB64ri : NVPTXInst<(outs Int64Regs: $dst), (ins i64imm: $src) ,
2012- "mov.b64 \t $dst, $src;", [ ]>;
2013-
2014- def FMOV32ri : NVPTXInst<(outs Float32Regs: $dst), (ins f32imm: $src) ,
2015- "mov.f32 \t $dst, $src;",
2016- [(set f32 :$dst, fpimm :$src)]>;
2017- def FMOV64ri : NVPTXInst<(outs Float64Regs: $dst), (ins f64imm: $src) ,
2018- "mov.f64 \t $dst, $src;",
2019- [(set f64:$dst, fpimm:$src)]>;
1972+ def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1973+ "mov.pred \t$dst, $src;",
1974+ [(set i1:$dst, imm:$src)]>;
1975+ def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1976+ "mov.b16 \t$dst, $src;",
1977+ [(set i16:$dst, imm:$src)]>;
1978+ def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
1979+ "mov.b32 \t$dst, $src;",
1980+ [(set i32:$dst, imm:$src)]>;
1981+ def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
1982+ "mov.b64 \t$dst, $src;",
1983+ [(set i64:$dst, imm:$src)]>;
1984+
1985+ def FMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins f16imm :$src),
1986+ "mov.b16 \t$dst, $src;",
1987+ [(set f16 :$dst, fpimm :$src)]>;
1988+ def BFMOV16ri : NVPTXInst<(outs Int16Regs: $dst), (ins bf16imm: $src),
1989+ "mov.b16 \t $dst, $src;" ,
1990+ [(set bf16: $dst, fpimm: $src) ]>;
1991+ def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
1992+ "mov.f32 \t $dst, $src;" ,
1993+ [(set f32: $dst, fpimm: $src)]>;
1994+ def FMOV64ri : NVPTXInst<(outs Float64Regs :$dst), (ins f64imm :$src),
1995+ "mov.f64 \t $dst, $src;" ,
1996+ [(set f64: $dst, fpimm: $src)]>;
1997+ }
20201998
20211999def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
20222000def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>;
@@ -2215,18 +2193,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
22152193 def : Pat<(i1 (OpNode f16:$a, f16:$b)),
22162194 (SETP_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
22172195 Requires<[useFP16Math]>;
2218- def : Pat<(i1 (OpNode f16:$a, fpimm:$b)),
2219- (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
2220- Requires<[useFP16Math,doF32FTZ]>;
2221- def : Pat<(i1 (OpNode f16:$a, fpimm:$b)),
2222- (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
2223- Requires<[useFP16Math]>;
2224- def : Pat<(i1 (OpNode fpimm:$a, f16:$b)),
2225- (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
2226- Requires<[useFP16Math,doF32FTZ]>;
2227- def : Pat<(i1 (OpNode fpimm:$a, f16:$b)),
2228- (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>,
2229- Requires<[useFP16Math]>;
22302196
22312197 // bf16 -> pred
22322198 def : Pat<(i1 (OpNode bf16:$a, bf16:$b)),
@@ -2235,18 +2201,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
22352201 def : Pat<(i1 (OpNode bf16:$a, bf16:$b)),
22362202 (SETP_bf16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
22372203 Requires<[hasBF16Math]>;
2238- def : Pat<(i1 (OpNode bf16:$a, fpimm:$b)),
2239- (SETP_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), ModeFTZ)>,
2240- Requires<[hasBF16Math,doF32FTZ]>;
2241- def : Pat<(i1 (OpNode bf16:$a, fpimm:$b)),
2242- (SETP_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), Mode)>,
2243- Requires<[hasBF16Math]>;
2244- def : Pat<(i1 (OpNode fpimm:$a, bf16:$b)),
2245- (SETP_bf16rr (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
2246- Requires<[hasBF16Math,doF32FTZ]>;
2247- def : Pat<(i1 (OpNode fpimm:$a, bf16:$b)),
2248- (SETP_bf16rr (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, Mode)>,
2249- Requires<[hasBF16Math]>;
22502204
22512205 // f32 -> pred
22522206 def : Pat<(i1 (OpNode f32:$a, f32:$b)),
@@ -2280,18 +2234,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
22802234 def : Pat<(i32 (OpNode f16:$a, f16:$b)),
22812235 (SET_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
22822236 Requires<[useFP16Math]>;
2283- def : Pat<(i32 (OpNode f16:$a, fpimm:$b)),
2284- (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
2285- Requires<[useFP16Math, doF32FTZ]>;
2286- def : Pat<(i32 (OpNode f16:$a, fpimm:$b)),
2287- (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
2288- Requires<[useFP16Math]>;
2289- def : Pat<(i32 (OpNode fpimm:$a, f16:$b)),
2290- (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
2291- Requires<[useFP16Math, doF32FTZ]>;
2292- def : Pat<(i32 (OpNode fpimm:$a, f16:$b)),
2293- (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>,
2294- Requires<[useFP16Math]>;
22952237
22962238 // bf16 -> i32
22972239 def : Pat<(i32 (OpNode bf16:$a, bf16:$b)),
@@ -2300,18 +2242,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
23002242 def : Pat<(i32 (OpNode bf16:$a, bf16:$b)),
23012243 (SET_bf16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
23022244 Requires<[hasBF16Math]>;
2303- def : Pat<(i32 (OpNode bf16:$a, fpimm:$b)),
2304- (SET_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), ModeFTZ)>,
2305- Requires<[hasBF16Math, doF32FTZ]>;
2306- def : Pat<(i32 (OpNode bf16:$a, fpimm:$b)),
2307- (SET_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), Mode)>,
2308- Requires<[hasBF16Math]>;
2309- def : Pat<(i32 (OpNode fpimm:$a, bf16:$b)),
2310- (SET_bf16ir (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
2311- Requires<[hasBF16Math, doF32FTZ]>;
2312- def : Pat<(i32 (OpNode fpimm:$a, bf16:$b)),
2313- (SET_bf16ir (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, Mode)>,
2314- Requires<[hasBF16Math]>;
23152245
23162246 // f32 -> i32
23172247 def : Pat<(i32 (OpNode f32:$a, f32:$b)),
@@ -3104,21 +3034,17 @@ def: Pat<(f32 (bitconvert vt:$a)),
31043034 (BITCONVERT_32_I2F Int32Regs:$a)>;
31053035}
31063036foreach vt = [f16, bf16] in {
3107- def: Pat<(vt (bitconvert (i16 UInt16Const:$a))),
3108- (IMOVB16ri UInt16Const:$a)>;
3109- def: Pat<(vt (bitconvert i16:$a)),
3110- (ProxyRegI16 Int16Regs:$a)>;
3111- def: Pat<(i16 (bitconvert vt:$a)),
3112- (ProxyRegI16 Int16Regs:$a)>;
3037+ def: Pat<(vt (bitconvert i16:$a)),
3038+ (vt Int16Regs:$a)>;
3039+ def: Pat<(i16 (bitconvert vt:$a)),
3040+ (i16 Int16Regs:$a)>;
31133041}
31143042
31153043foreach ta = [v2f16, v2bf16, v2i16, v4i8, i32] in {
3116- def: Pat<(ta (bitconvert (i32 UInt32Const:$a))),
3117- (IMOVB32ri UInt32Const:$a)>;
31183044 foreach tb = [v2f16, v2bf16, v2i16, v4i8, i32] in {
31193045 if !ne(ta, tb) then {
3120- def: Pat<(ta (bitconvert (tb Int32Regs :$a) )),
3121- (ProxyRegI32 Int32Regs:$a)>;
3046+ def: Pat<(ta (bitconvert tb :$a)),
3047+ (ta Int32Regs:$a)>;
31223048 }
31233049 }
31243050}
0 commit comments