@@ -1177,17 +1177,6 @@ def NegDoubleConst : SDNodeXForm<fpimm, [{
11771177 SDLoc(N), MVT::f64);
11781178}]>;
11791179
1180- // Loads FP16 constant into a register.
1181- //
1182- // ptxas does not have hex representation for fp16, so we can't use
1183- // fp16 immediate values in .f16 instructions. Instead we have to load
1184- // the constant into a register using mov.b16.
1185- def LOAD_CONST_F16 :
1186- NVPTXInst<(outs Int16Regs:$dst), (ins f16imm:$a),
1187- "mov.b16 \t$dst, $a;", []>;
1188- def LOAD_CONST_BF16 :
1189- NVPTXInst<(outs Int16Regs:$dst), (ins bf16imm:$a),
1190- "mov.b16 \t$dst, $a;", []>;
11911180defm FADD : F3_fma_component<"add", fadd>;
11921181defm FSUB : F3_fma_component<"sub", fsub>;
11931182defm FMUL : F3_fma_component<"mul", fmul>;
@@ -1963,7 +1952,7 @@ let hasSideEffects = false in {
19631952
19641953
19651954// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
1966- let IsSimpleMove=1, hasSideEffects=0 in {
1955+ let IsSimpleMove=1, hasSideEffects=0, isAsCheapAsAMove=1 in {
19671956 def IMOV1rr : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
19681957 "mov.pred \t$dst, $sss;", []>;
19691958 def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
@@ -1975,44 +1964,40 @@ let IsSimpleMove=1, hasSideEffects=0 in {
19751964 def IMOV128rr : NVPTXInst<(outs Int128Regs:$dst), (ins Int128Regs:$sss),
19761965 "mov.b128 \t$dst, $sss;", []>;
19771966
1978- def IMOVB16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
1979- "mov.b16 \t$dst, $sss;", []>;
1980- def IMOVB32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
1981- "mov.b32 \t$dst, $sss;", []>;
1982- def IMOVB64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
1983- "mov.b64 \t$dst, $sss;", []>;
1984-
1985- def FMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
1986- // We have to use .b16 here as there's no mov.f16.
1987- "mov.b16 \t$dst, $src;", []>;
19881967 def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
19891968 "mov.f32 \t$dst, $src;", []>;
19901969 def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
19911970 "mov.f64 \t$dst, $src;", []>;
1992- }
19931971
1994- def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1995- "mov.pred \t$dst, $src;",
1996- [(set i1:$dst, imm:$src)]>;
1997- def IMOVB16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1998- "mov.b16 \t$dst, $src;",
1999- [(set i16:$dst, imm:$src)]>;
2000- def IMOVB32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
2001- "mov.b32 \t$dst, $src;",
2002- [(set i32:$dst, imm:$src)]>;
2003- def IMOVB64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
2004- "mov.b64 \t$dst, $src;",
2005- [(set i64:$dst, imm:$src)]>;
2006-
2007- def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
2008- "mov.f32 \t$dst, $src;",
2009- [(set f32:$dst, fpimm:$src)]>;
2010- def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
2011- "mov.f64 \t$dst, $src;",
2012- [(set f64:$dst, fpimm:$src)]>;
2013-
2014- def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOVB32ri texternalsym:$dst)>;
2015- def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOVB64ri texternalsym:$dst)>;
1972+ def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
1973+ "mov.pred \t$dst, $src;",
1974+ [(set i1:$dst, imm:$src)]>;
1975+ def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
1976+ "mov.b16 \t$dst, $src;",
1977+ [(set i16:$dst, imm:$src)]>;
1978+ def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
1979+ "mov.b32 \t$dst, $src;",
1980+ [(set i32:$dst, imm:$src)]>;
1981+ def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
1982+ "mov.b64 \t$dst, $src;",
1983+ [(set i64:$dst, imm:$src)]>;
1984+
1985+ def FMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins f16imm:$src),
1986+ "mov.b16 \t$dst, $src;",
1987+ [(set f16:$dst, fpimm:$src)]>;
1988+ def BFMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins bf16imm:$src),
1989+ "mov.b16 \t$dst, $src;",
1990+ [(set bf16:$dst, fpimm:$src)]>;
1991+ def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
1992+ "mov.f32 \t$dst, $src;",
1993+ [(set f32:$dst, fpimm:$src)]>;
1994+ def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
1995+ "mov.f64 \t$dst, $src;",
1996+ [(set f64:$dst, fpimm:$src)]>;
1997+ }
1998+
1999+ def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
2000+ def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>;
20162001
20172002//---- Copy Frame Index ----
20182003def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
@@ -2208,18 +2193,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
22082193 def : Pat<(i1 (OpNode f16:$a, f16:$b)),
22092194 (SETP_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
22102195 Requires<[useFP16Math]>;
2211- def : Pat<(i1 (OpNode f16:$a, fpimm:$b)),
2212- (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
2213- Requires<[useFP16Math,doF32FTZ]>;
2214- def : Pat<(i1 (OpNode f16:$a, fpimm:$b)),
2215- (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
2216- Requires<[useFP16Math]>;
2217- def : Pat<(i1 (OpNode fpimm:$a, f16:$b)),
2218- (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
2219- Requires<[useFP16Math,doF32FTZ]>;
2220- def : Pat<(i1 (OpNode fpimm:$a, f16:$b)),
2221- (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>,
2222- Requires<[useFP16Math]>;
22232196
22242197 // bf16 -> pred
22252198 def : Pat<(i1 (OpNode bf16:$a, bf16:$b)),
@@ -2228,18 +2201,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
22282201 def : Pat<(i1 (OpNode bf16:$a, bf16:$b)),
22292202 (SETP_bf16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
22302203 Requires<[hasBF16Math]>;
2231- def : Pat<(i1 (OpNode bf16:$a, fpimm:$b)),
2232- (SETP_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), ModeFTZ)>,
2233- Requires<[hasBF16Math,doF32FTZ]>;
2234- def : Pat<(i1 (OpNode bf16:$a, fpimm:$b)),
2235- (SETP_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), Mode)>,
2236- Requires<[hasBF16Math]>;
2237- def : Pat<(i1 (OpNode fpimm:$a, bf16:$b)),
2238- (SETP_bf16rr (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
2239- Requires<[hasBF16Math,doF32FTZ]>;
2240- def : Pat<(i1 (OpNode fpimm:$a, bf16:$b)),
2241- (SETP_bf16rr (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, Mode)>,
2242- Requires<[hasBF16Math]>;
22432204
22442205 // f32 -> pred
22452206 def : Pat<(i1 (OpNode f32:$a, f32:$b)),
@@ -2273,18 +2234,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
22732234 def : Pat<(i32 (OpNode f16:$a, f16:$b)),
22742235 (SET_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
22752236 Requires<[useFP16Math]>;
2276- def : Pat<(i32 (OpNode f16:$a, fpimm:$b)),
2277- (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
2278- Requires<[useFP16Math, doF32FTZ]>;
2279- def : Pat<(i32 (OpNode f16:$a, fpimm:$b)),
2280- (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
2281- Requires<[useFP16Math]>;
2282- def : Pat<(i32 (OpNode fpimm:$a, f16:$b)),
2283- (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
2284- Requires<[useFP16Math, doF32FTZ]>;
2285- def : Pat<(i32 (OpNode fpimm:$a, f16:$b)),
2286- (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>,
2287- Requires<[useFP16Math]>;
22882237
22892238 // bf16 -> i32
22902239 def : Pat<(i32 (OpNode bf16:$a, bf16:$b)),
@@ -2293,18 +2242,6 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
22932242 def : Pat<(i32 (OpNode bf16:$a, bf16:$b)),
22942243 (SET_bf16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
22952244 Requires<[hasBF16Math]>;
2296- def : Pat<(i32 (OpNode bf16:$a, fpimm:$b)),
2297- (SET_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), ModeFTZ)>,
2298- Requires<[hasBF16Math, doF32FTZ]>;
2299- def : Pat<(i32 (OpNode bf16:$a, fpimm:$b)),
2300- (SET_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), Mode)>,
2301- Requires<[hasBF16Math]>;
2302- def : Pat<(i32 (OpNode fpimm:$a, bf16:$b)),
2303- (SET_bf16ir (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
2304- Requires<[hasBF16Math, doF32FTZ]>;
2305- def : Pat<(i32 (OpNode fpimm:$a, bf16:$b)),
2306- (SET_bf16ir (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, Mode)>,
2307- Requires<[hasBF16Math]>;
23082245
23092246 // f32 -> i32
23102247 def : Pat<(i32 (OpNode f32:$a, f32:$b)),
0 commit comments