Skip to content

Commit 76a5510

Browse files
committed
Add simd fpcvt codegen for fptoi(_sat)
1 parent f3f9e7b commit 76a5510

File tree

7 files changed

+2305
-143
lines changed

7 files changed

+2305
-143
lines changed

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5302,7 +5302,7 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
53025302
}
53035303

53045304
multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm,
5305-
SDPatternOperator OpN = null_frag> {
5305+
SDPatternOperator OpN> {
53065306
// double-precision to 32-bit SIMD/FPR
53075307
def SDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm,
53085308
[(set FPR32:$Rd, (i32 (OpN (f64 FPR64:$Rn))))]> {

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 193 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -5239,114 +5239,11 @@ let Predicates = [HasNEON, HasFPRCVT] in{
52395239
defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu", int_aarch64_neon_fcvtnu>;
52405240
defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps", int_aarch64_neon_fcvtps>;
52415241
defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu", int_aarch64_neon_fcvtpu>;
5242-
defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">;
5243-
defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">;
5242+
defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs", any_fp_to_sint>;
5243+
defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu", any_fp_to_uint>;
52445244
}
52455245

52465246

5247-
// AArch64's FCVT instructions saturate when out of range.
5248-
multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
5249-
let Predicates = [HasFullFP16] in {
5250-
def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
5251-
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
5252-
def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
5253-
(!cast<Instruction>(INST # UXHr) f16:$Rn)>;
5254-
}
5255-
def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
5256-
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
5257-
def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
5258-
(!cast<Instruction>(INST # UXSr) f32:$Rn)>;
5259-
def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
5260-
(!cast<Instruction>(INST # UWDr) f64:$Rn)>;
5261-
def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
5262-
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
5263-
5264-
let Predicates = [HasFullFP16] in {
5265-
def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
5266-
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
5267-
def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
5268-
(!cast<Instruction>(INST # UXHr) f16:$Rn)>;
5269-
}
5270-
def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
5271-
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
5272-
def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
5273-
(!cast<Instruction>(INST # UXSr) f32:$Rn)>;
5274-
def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
5275-
(!cast<Instruction>(INST # UWDr) f64:$Rn)>;
5276-
def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
5277-
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
5278-
5279-
let Predicates = [HasFullFP16] in {
5280-
def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
5281-
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
5282-
def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
5283-
(!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
5284-
}
5285-
def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
5286-
(!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
5287-
def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
5288-
(!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
5289-
def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
5290-
(!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
5291-
def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
5292-
(!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
5293-
5294-
let Predicates = [HasFullFP16] in {
5295-
def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
5296-
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
5297-
def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
5298-
(!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
5299-
}
5300-
def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
5301-
(!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
5302-
def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
5303-
(!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
5304-
def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
5305-
(!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
5306-
def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
5307-
(!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
5308-
}
5309-
5310-
defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
5311-
defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
5312-
5313-
multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
5314-
def : Pat<(i32 (to_int (round f32:$Rn))),
5315-
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
5316-
def : Pat<(i64 (to_int (round f32:$Rn))),
5317-
(!cast<Instruction>(INST # UXSr) f32:$Rn)>;
5318-
def : Pat<(i32 (to_int (round f64:$Rn))),
5319-
(!cast<Instruction>(INST # UWDr) f64:$Rn)>;
5320-
def : Pat<(i64 (to_int (round f64:$Rn))),
5321-
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
5322-
5323-
// These instructions saturate like fp_to_[su]int_sat.
5324-
let Predicates = [HasFullFP16] in {
5325-
def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
5326-
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
5327-
def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
5328-
(!cast<Instruction>(INST # UXHr) f16:$Rn)>;
5329-
}
5330-
def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
5331-
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
5332-
def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
5333-
(!cast<Instruction>(INST # UXSr) f32:$Rn)>;
5334-
def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
5335-
(!cast<Instruction>(INST # UWDr) f64:$Rn)>;
5336-
def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
5337-
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
5338-
}
5339-
5340-
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil, "FCVTPS">;
5341-
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil, "FCVTPU">;
5342-
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">;
5343-
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">;
5344-
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">;
5345-
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">;
5346-
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">;
5347-
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
5348-
5349-
53505247

53515248
let Predicates = [HasFullFP16] in {
53525249
def : Pat<(i32 (any_lround f16:$Rn)),
@@ -6553,8 +6450,8 @@ defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu", int_aarch64_neon_fcvtn
65536450
defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps", int_aarch64_neon_fcvtps>;
65546451
defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu", int_aarch64_neon_fcvtpu>;
65556452
def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
6556-
defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
6557-
defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
6453+
defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
6454+
defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
65586455
defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">;
65596456
defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">;
65606457
defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
@@ -6596,6 +6493,8 @@ defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtns, "FCVTNS">;
65966493
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtnu, "FCVTNU">;
65976494
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtps, "FCVTPS">;
65986495
defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtpu, "FCVTPU">;
6496+
defm: FPToIntegerSIMDScalarPatterns<any_fp_to_sint, "FCVTZS">;
6497+
defm: FPToIntegerSIMDScalarPatterns<any_fp_to_uint, "FCVTZU">;
65996498

66006499
multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
66016500
let Predicates = [HasFullFP16] in {
@@ -6652,6 +6551,193 @@ multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
66526551
defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
66536552
defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
66546553

6554+
// AArch64's FCVT instructions saturate when out of range.
6555+
multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
6556+
let Predicates = [HasFullFP16] in {
6557+
def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
6558+
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
6559+
def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
6560+
(!cast<Instruction>(INST # UXHr) f16:$Rn)>;
6561+
}
6562+
def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
6563+
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
6564+
def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
6565+
(!cast<Instruction>(INST # UXSr) f32:$Rn)>;
6566+
def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
6567+
(!cast<Instruction>(INST # UWDr) f64:$Rn)>;
6568+
def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
6569+
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
6570+
6571+
let Predicates = [HasFullFP16] in {
6572+
def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
6573+
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
6574+
def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
6575+
(!cast<Instruction>(INST # UXHr) f16:$Rn)>;
6576+
}
6577+
def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
6578+
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
6579+
def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
6580+
(!cast<Instruction>(INST # UXSr) f32:$Rn)>;
6581+
def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
6582+
(!cast<Instruction>(INST # UWDr) f64:$Rn)>;
6583+
def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
6584+
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
6585+
6586+
// For global-isel we can use register classes to determine
6587+
// which FCVT instruction to use.
6588+
let Predicates = [HasFPRCVT] in {
6589+
def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
6590+
(!cast<Instruction>(INST # SHr) f16:$Rn)>;
6591+
def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
6592+
(!cast<Instruction>(INST # DHr) f16:$Rn)>;
6593+
def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
6594+
(!cast<Instruction>(INST # DSr) f32:$Rn)>;
6595+
def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
6596+
(!cast<Instruction>(INST # SDr) f64:$Rn)>;
6597+
}
6598+
def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
6599+
(!cast<Instruction>(INST # v1i32) f32:$Rn)>;
6600+
def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
6601+
(!cast<Instruction>(INST # v1i64) f64:$Rn)>;
6602+
6603+
let Predicates = [HasFPRCVT] in {
6604+
def : Pat<(f32 (bitconvert (i32 (to_int_sat f16:$Rn, i32)))),
6605+
(!cast<Instruction>(INST # SHr) f16:$Rn)>;
6606+
def : Pat<(f64 (bitconvert (i64 (to_int_sat f16:$Rn, i64)))),
6607+
(!cast<Instruction>(INST # DHr) f16:$Rn)>;
6608+
def : Pat<(f64 (bitconvert (i64 (to_int_sat f32:$Rn, i64)))),
6609+
(!cast<Instruction>(INST # DSr) f32:$Rn)>;
6610+
def : Pat<(f32 (bitconvert (i32 (to_int_sat f64:$Rn, i32)))),
6611+
(!cast<Instruction>(INST # SDr) f64:$Rn)>;
6612+
6613+
def : Pat<(f32 (bitconvert (i32 (to_int_sat_gi f16:$Rn)))),
6614+
(!cast<Instruction>(INST # SHr) f16:$Rn)>;
6615+
def : Pat<(f64 (bitconvert (i64 (to_int_sat_gi f16:$Rn)))),
6616+
(!cast<Instruction>(INST # DHr) f16:$Rn)>;
6617+
def : Pat<(f64 (bitconvert (i64 (to_int_sat_gi f32:$Rn)))),
6618+
(!cast<Instruction>(INST # DSr) f32:$Rn)>;
6619+
def : Pat<(f32 (bitconvert (i32 (to_int_sat_gi f64:$Rn)))),
6620+
(!cast<Instruction>(INST # SDr) f64:$Rn)>;
6621+
}
6622+
def : Pat<(f32 (bitconvert (i32 (to_int_sat f32:$Rn, i32)))),
6623+
(!cast<Instruction>(INST # v1i32) f32:$Rn)>;
6624+
def : Pat<(f64 (bitconvert (i64 (to_int_sat f64:$Rn, i64)))),
6625+
(!cast<Instruction>(INST # v1i64) f64:$Rn)>;
6626+
6627+
def : Pat<(f32 (bitconvert (i32 (to_int_sat_gi f32:$Rn)))),
6628+
(!cast<Instruction>(INST # v1i32) f32:$Rn)>;
6629+
def : Pat<(f64 (bitconvert (i64 (to_int_sat_gi f64:$Rn)))),
6630+
(!cast<Instruction>(INST # v1i64) f64:$Rn)>;
6631+
6632+
let Predicates = [HasFullFP16] in {
6633+
def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
6634+
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
6635+
def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
6636+
(!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
6637+
}
6638+
def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
6639+
(!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
6640+
def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
6641+
(!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
6642+
def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
6643+
(!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
6644+
def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
6645+
(!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
6646+
6647+
let Predicates = [HasFullFP16] in {
6648+
def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
6649+
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
6650+
def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
6651+
(!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
6652+
}
6653+
def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
6654+
(!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
6655+
def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
6656+
(!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
6657+
def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
6658+
(!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
6659+
def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
6660+
(!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
6661+
}
6662+
6663+
defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
6664+
defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
6665+
6666+
multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
6667+
def : Pat<(i32 (to_int (round f32:$Rn))),
6668+
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
6669+
def : Pat<(i64 (to_int (round f32:$Rn))),
6670+
(!cast<Instruction>(INST # UXSr) f32:$Rn)>;
6671+
def : Pat<(i32 (to_int (round f64:$Rn))),
6672+
(!cast<Instruction>(INST # UWDr) f64:$Rn)>;
6673+
def : Pat<(i64 (to_int (round f64:$Rn))),
6674+
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
6675+
6676+
// For global-isel we can use register classes to determine
6677+
// which FCVT instruction to use.
6678+
def : Pat<(i32 (to_int (round f32:$Rn))),
6679+
(!cast<Instruction>(INST # v1i32) f32:$Rn)>;
6680+
let Predicates = [HasFPRCVT] in {
6681+
def : Pat<(i64 (to_int (round f32:$Rn))),
6682+
(!cast<Instruction>(INST # DSr) f32:$Rn)>;
6683+
def : Pat<(i32 (to_int (round f64:$Rn))),
6684+
(!cast<Instruction>(INST # SDr) f64:$Rn)>;
6685+
}
6686+
def : Pat<(i64 (to_int (round f64:$Rn))),
6687+
(!cast<Instruction>(INST # v1i64) f64:$Rn)>;
6688+
6689+
let Predicates = [HasFPRCVT] in {
6690+
def : Pat<(f64 (bitconvert (i64 (to_int (round f32:$Rn))))),
6691+
(!cast<Instruction>(INST # DSr) f32:$Rn)>;
6692+
def : Pat<(f32 (bitconvert (i32 (to_int (round f64:$Rn))))),
6693+
(!cast<Instruction>(INST # SDr) f64:$Rn)>;
6694+
}
6695+
def : Pat<(f32 (bitconvert (i32 (to_int (round f32:$Rn))))),
6696+
(!cast<Instruction>(INST # v1i32) f32:$Rn)>;
6697+
def : Pat<(f64 (bitconvert (i64 (to_int (round f64:$Rn))))),
6698+
(!cast<Instruction>(INST # v1i64) f64:$Rn)>;
6699+
6700+
// These instructions saturate like fp_to_[su]int_sat.
6701+
let Predicates = [HasFullFP16] in {
6702+
def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
6703+
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
6704+
def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
6705+
(!cast<Instruction>(INST # UXHr) f16:$Rn)>;
6706+
}
6707+
def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
6708+
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
6709+
def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
6710+
(!cast<Instruction>(INST # UXSr) f32:$Rn)>;
6711+
def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
6712+
(!cast<Instruction>(INST # UWDr) f64:$Rn)>;
6713+
def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
6714+
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
6715+
6716+
let Predicates = [HasFPRCVT] in {
6717+
def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f16:$Rn), i32)))),
6718+
(!cast<Instruction>(INST # SHr) f16:$Rn)>;
6719+
def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f16:$Rn), i64)))),
6720+
(!cast<Instruction>(INST # DHr) f16:$Rn)>;
6721+
def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f32:$Rn), i64)))),
6722+
(!cast<Instruction>(INST # DSr) f32:$Rn)>;
6723+
def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f64:$Rn), i32)))),
6724+
(!cast<Instruction>(INST # SDr) f64:$Rn)>;
6725+
}
6726+
def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f32:$Rn), i32)))),
6727+
(!cast<Instruction>(INST # v1i32) f32:$Rn)>;
6728+
def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f64:$Rn), i64)))),
6729+
(!cast<Instruction>(INST # v1i64) f64:$Rn)>;
6730+
}
6731+
6732+
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil, "FCVTPS">;
6733+
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil, "FCVTPU">;
6734+
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">;
6735+
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">;
6736+
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">;
6737+
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">;
6738+
defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">;
6739+
defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
6740+
66556741
// f16 -> s16 conversions
66566742
let Predicates = [HasFullFP16] in {
66576743
def : Pat<(i16(fp_to_sint_sat_gi f16:$Rn)), (FCVTZSv1f16 f16:$Rn)>;

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -852,19 +852,29 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
852852
break;
853853
}
854854
case TargetOpcode::G_FPTOSI_SAT:
855-
case TargetOpcode::G_FPTOUI_SAT: {
855+
case TargetOpcode::G_FPTOUI_SAT:
856+
case TargetOpcode::G_FPTOSI:
857+
case TargetOpcode::G_FPTOUI: {
856858
LLT DstType = MRI.getType(MI.getOperand(0).getReg());
857859
if (DstType.isVector())
858860
break;
859861
if (DstType == LLT::scalar(16)) {
860862
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
861863
break;
862864
}
863-
OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
865+
TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
866+
TypeSize SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, TRI);
867+
if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) &&
868+
all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
869+
[&](const MachineInstr &UseMI) {
870+
return onlyUsesFP(UseMI, MRI, TRI) ||
871+
prefersFPUse(UseMI, MRI, TRI);
872+
}))
873+
OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR};
874+
else
875+
OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
864876
break;
865877
}
866-
case TargetOpcode::G_FPTOSI:
867-
case TargetOpcode::G_FPTOUI:
868878
case TargetOpcode::G_INTRINSIC_LRINT:
869879
case TargetOpcode::G_INTRINSIC_LLRINT:
870880
if (MRI.getType(MI.getOperand(0).getReg()).isVector())

llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ body: |
9696
; CHECK-NEXT: [[SITOFP:%[0-9]+]]:fpr(s32) = G_SITOFP [[COPY1]](s32)
9797
; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr(s32) = COPY [[COPY2]](s32)
9898
; CHECK-NEXT: [[SELECT:%[0-9]+]]:fpr(s32) = G_SELECT [[COPY2]](s32), [[COPY3]], [[SITOFP]]
99-
; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:gpr(s32) = G_FPTOSI [[SELECT]](s32)
99+
; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:fpr(s32) = G_FPTOSI [[SELECT]](s32)
100100
%0:_(s32) = COPY $w0
101101
%2:_(s32) = COPY $w1
102102
%3:_(s32) = COPY $w2

0 commit comments

Comments
 (0)