-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AArch64][GlobalISel] SIMD fpcvt codegen for fptoi(_sat) #160831
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-globalisel Author: None (Lukacma) ChangesThis is followup patch to #157680, which allows simd fpcvt instructions to be generated from fptoi(_sat) nodes. Patch is 120.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160831.diff 8 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 78d683a4b4256..957d28a1ec308 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -5301,28 +5301,29 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
}
}
-multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm> {
+multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm,
+ SDPatternOperator OpN> {
// double-precision to 32-bit SIMD/FPR
def SDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm,
- []> {
+ [(set FPR32:$Rd, (i32 (OpN (f64 FPR64:$Rn))))]> {
let Inst{31} = 0; // 32-bit FPR flag
}
// half-precision to 32-bit SIMD/FPR
def SHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR32, asm,
- []> {
+ [(set FPR32:$Rd, (i32 (OpN (f16 FPR16:$Rn))))]> {
let Inst{31} = 0; // 32-bit FPR flag
}
// half-precision to 64-bit SIMD/FPR
def DHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR64, asm,
- []> {
+ [(set FPR64:$Rd, (i64 (OpN (f16 FPR16:$Rn))))]> {
let Inst{31} = 1; // 64-bit FPR flag
}
// single-precision to 64-bit SIMD/FPR
def DSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, FPR64, asm,
- []> {
+ [(set FPR64:$Rd, (i64 (OpN (f32 FPR32:$Rn))))]> {
let Inst{31} = 1; // 64-bit FPR flag
}
}
@@ -7940,14 +7941,18 @@ multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
}
}
-let mayRaiseFPException = 1, Uses = [FPCR] in
-multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
+let mayRaiseFPException = 1, Uses = [FPCR], FastISelShouldIgnore = 1 in
+multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm,
+ SDPatternOperator OpN = null_frag> {
let Predicates = [HasNEONandIsStreamingSafe] in {
- def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,[]>;
- def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,[]>;
+ def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,
+ [(set (i64 FPR64:$Rd), (OpN (f64 FPR64:$Rn)))]>;
+ def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,
+ [(set FPR32:$Rd, (i32 (OpN (f32 FPR32:$Rn))))]>;
}
let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
- def v1f16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,[]>;
+ def v1f16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,
+ [(set FPR16:$Rd, (i16 (OpN (f16 FPR16:$Rn))))]>;
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 980636c1b562b..f45816b7fcff5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5231,149 +5231,18 @@ defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
let Predicates = [HasNEON, HasFPRCVT] in{
- defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas">;
- defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau">;
- defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms">;
- defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu">;
- defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns">;
- defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu">;
- defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps">;
- defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu">;
- defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">;
- defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">;
+ defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas", int_aarch64_neon_fcvtas>;
+ defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau", int_aarch64_neon_fcvtau>;
+ defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms", int_aarch64_neon_fcvtms>;
+ defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu", int_aarch64_neon_fcvtmu>;
+ defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns", int_aarch64_neon_fcvtns>;
+ defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu", int_aarch64_neon_fcvtnu>;
+ defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps", int_aarch64_neon_fcvtps>;
+ defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu", int_aarch64_neon_fcvtpu>;
+ defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs", any_fp_to_sint>;
+ defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu", any_fp_to_uint>;
}
-// AArch64's FCVT instructions saturate when out of range.
-multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
- (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
- def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
- (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
- }
- def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
- (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
- def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
- (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
- def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
- (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
- def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
- (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
-
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
- (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
- def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
- (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
- }
- def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
- (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
- def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
- (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
- def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
- (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
- def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
- (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
-
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
- (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
- (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
- }
- def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
- (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
- (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
- def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
- (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
- (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
-
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
- (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat_gi (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
- (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
- }
- def : Pat<(i32 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
- (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat_gi (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
- (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
- def : Pat<(i32 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
- (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
- def : Pat<(i64 (to_int_sat_gi (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
- (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
-}
-
-defm : FPToIntegerSatPats<fp_to_sint_sat, fp_to_sint_sat_gi, "FCVTZS">;
-defm : FPToIntegerSatPats<fp_to_uint_sat, fp_to_uint_sat_gi, "FCVTZU">;
-
-multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
- def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
- }
- def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
- def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
- def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
- def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
-
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
- (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
- def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
- (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
- }
- def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
- (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
- def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
- (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
- def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
- (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
- def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
- (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
-}
-
-defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
-defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
-
-multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
- def : Pat<(i32 (to_int (round f32:$Rn))),
- (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
- def : Pat<(i64 (to_int (round f32:$Rn))),
- (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
- def : Pat<(i32 (to_int (round f64:$Rn))),
- (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
- def : Pat<(i64 (to_int (round f64:$Rn))),
- (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
-
- // These instructions saturate like fp_to_[su]int_sat.
- let Predicates = [HasFullFP16] in {
- def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
- (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
- def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
- (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
- }
- def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
- (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
- def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
- (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
- def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
- (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
- def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
- (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
-}
-
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil, "FCVTPS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil, "FCVTPU">;
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">;
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">;
-defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">;
-defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
-
let Predicates = [HasFullFP16] in {
@@ -6572,17 +6441,17 @@ defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
-defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">;
-defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">;
-defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">;
-defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">;
-defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">;
-defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">;
-defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">;
-defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
+defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas", int_aarch64_neon_fcvtas>;
+defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau", int_aarch64_neon_fcvtau>;
+defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms", int_aarch64_neon_fcvtms>;
+defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu", int_aarch64_neon_fcvtmu>;
+defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns", int_aarch64_neon_fcvtns>;
+defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu", int_aarch64_neon_fcvtnu>;
+defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps", int_aarch64_neon_fcvtps>;
+defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu", int_aarch64_neon_fcvtpu>;
def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
-defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
-defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
+defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
+defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">;
defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">;
defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
@@ -6600,6 +6469,275 @@ defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar
defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
int_aarch64_neon_usqadd>;
+// Floating-point conversion patterns.
+multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> {
+ def : Pat<(f32 (bitconvert (i32 (OpN (f64 FPR64:$Rn))))),
+ (!cast<Instruction>(INST # SDr) FPR64:$Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (OpN (f16 FPR16:$Rn))))),
+ (!cast<Instruction>(INST # SHr) FPR16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (OpN (f16 FPR16:$Rn))))),
+ (!cast<Instruction>(INST # DHr) FPR16:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (OpN (f32 FPR32:$Rn))))),
+ (!cast<Instruction>(INST # DSr) FPR32:$Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))),
+ (!cast<Instruction>(INST # v1i32) FPR32:$Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))),
+ (!cast<Instruction>(INST # v1i64) FPR64:$Rn)>;
+
+}
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtas, "FCVTAS">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtau, "FCVTAU">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtms, "FCVTMS">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtmu, "FCVTMU">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtns, "FCVTNS">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtnu, "FCVTNU">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtps, "FCVTPS">;
+defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtpu, "FCVTPU">;
+defm: FPToIntegerSIMDScalarPatterns<any_fp_to_sint, "FCVTZS">;
+defm: FPToIntegerSIMDScalarPatterns<any_fp_to_uint, "FCVTZU">;
+
+multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
+ def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
+ }
+ def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
+ def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
+ def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
+ def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
+
+ // For global-isel we can use register classes to determine
+ // which FCVT instruction to use.
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # SHr) $Rn)>;
+ def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # DHr) $Rn)>;
+ def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # DSr) $Rn)>;
+ def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # SDr) $Rn)>;
+ }
+ def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # v1i32) $Rn)>;
+ def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # v1i64) $Rn)>;
+
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(f32 (bitconvert (i32 (round f16:$Rn)))),
+ (!cast<Instruction>(INST # SHr) $Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (round f16:$Rn)))),
+ (!cast<Instruction>(INST # DHr) $Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (round f32:$Rn)))),
+ (!cast<Instruction>(INST # DSr) $Rn)>;
+ def : Pat<(f32 (bitconvert (i32 (round f64:$Rn)))),
+ (!cast<Instruction>(INST # SDr) $Rn)>;
+ }
+ def : Pat<(f32 (bitconvert (i32 (round f32:$Rn)))),
+ (!cast<Instruction>(INST # v1i32) $Rn)>;
+ def : Pat<(f64 (bitconvert (i64 (round f64:$Rn)))),
+ (!cast<Instruction>(INST # v1i64) $Rn)>;
+
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
+ (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
+ def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
+ (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
+ }
+ def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
+ (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
+ def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
+ (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
+ def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
+ (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
+ def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
+ (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
+}
+
+defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
+defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
+
+// AArch64's FCVT instructions saturate when out of range.
+multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
+ (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
+ (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+ def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
+ (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+ def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
+ (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
+ (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
+ (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+ // For global-isel we can use register classes to determine
+ // which FCVT instruction to use.
+ let Predicates = [HasFPRCVT] in {
+ def : Pat<(i32 (to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST # SHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f16:$Rn)),
+ (!cast<Instruction>(INST # DHr) f16:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f32:$Rn)),
+ (!cast<Instruction>(INST # DSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat_gi f64:$Rn)),
+ (!cast<Instruction>(INST # SDr) f64:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat_gi f32:$Rn)),
+ (!cast<Instruction>(INST # v1i32) f32:$Rn)>;
+ def : Pat<(i64 (to_int_sat_gi f64:$Rn)),
+ ...
[truncated]
|
CarolineConcatto
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I noticed that we added some patterns for globalISel that I am not sure will ever be used.
If they will be used can you give maybe a comment on them. I usually dont like to add patterns that are not followed by a tests in a PR
| ; CHECK-GI-LABEL: fcvtzu_1d: | ||
| ; CHECK-GI: // %bb.0: | ||
| ; CHECK-GI-NEXT: fcvtzu d0, d0 | ||
| ; CHECK-GI-NEXT: ret |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When we will be able to optimise this for the selection DAG?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not the case, I am handling as part of this work. In this series I am working on fixing scalar types. So fixing this would be separate work.
| def : Pat<(f32 (bitconvert (i32 (to_int_sat f64:$Rn, i32)))), | ||
| (!cast<Instruction>(INST # SDr) f64:$Rn)>; | ||
|
|
||
| def : Pat<(f32 (bitconvert (i32 (to_int_sat_gi f16:$Rn)))), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand why we need this patterns, it should all be covered 6589 to 6596 and also the other patter for globalIsel with neon vector 6622 to 6625
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes these patterns are redundant. I forgot to remove them when adding the ones at the top.
| (!cast<Instruction>(INST # UXDr) f64:$Rn)>; | ||
|
|
||
| // For global-isel we can use register classes to determine | ||
| // which FCVT instruction to use. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I dont think these patterns are being used by globalised
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are right. I will remove them.
| def : Pat<(i32 (to_int_sat_gi f64:$Rn)), | ||
| (!cast<Instruction>(INST # SDr) f64:$Rn)>; | ||
| } | ||
| def : Pat<(i32 (to_int_sat_gi f32:$Rn)), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not sure why this is better than what we had before
fcvtzs v0.2s, v0.2s with the patterns becomes fcvtzs s0, s0
Do you have any reason to replace that?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you give an example LLVM-IR here, please ? I tried removing the highlighted patterns and I have never gotten fcvtzs v0.2s, v0.2s to be emitted.
| ; CHECK-SD-NEXT: fcvtas d0, d0 | ||
| ; CHECK-SD-NEXT: ret | ||
| ; | ||
| ; CHECK-GI-LABEL: fcvtas_dd_simd: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So if you remove the patterns in line 6598 this code is better for Global ISel.
I don't see the str and ldr only fcvtas d0, d0
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same case as above
| ; CHECK-SD-NEXT: fcvtas d0, s0 | ||
| ; CHECK-SD-NEXT: ret | ||
| ; | ||
| ; CHECK-GI-LABEL: fcvtas_ds_round_simd: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This code can be improved, I am not sure why it is adding str and ldr, but I can see if we remove some of the patterns this go away.
For instance this one:
defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
Looks like it is adding the ldr and str.
Is there a way so you can remove it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So what you are observing here is just GlobalISel falling back to SelectionDAG. With the patterns removed GlobalISel doesn't know how to lower these nodes, so it fails and because you probably had global-isel-abort enabled it fallsback onto SelectionDAG and produces the optimal code. If you look at for example fcvtas_ds_round_simd you can see that ldr and str are present there even with patterns removed. I think its just that GlobalISel isn't capable of merging the rounding operation with cvt yet so it just generates call. Fixing this is beyond the scope of this patch I think
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you change all these to use @llvm.round, as opposed to the @roundf functions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks that made it work with GlobalISel after adding patterns. Is using these llvm intrinsics the preferred way for handling rounding library calls or is there still value in testing with roundf?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe the frontend produces the llvm.round intrinsics. It could be a missed optimization to not have gisel do the same thing, but gisel needn't do it if it should be done elsewhere, like the mid/front-end.
Lukacma
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the comments. Thanks to them I noticed I have forgotten to properly feature guard patterns! I now added a check for that as well
| ; CHECK-SD-NEXT: fcvtas d0, s0 | ||
| ; CHECK-SD-NEXT: ret | ||
| ; | ||
| ; CHECK-GI-LABEL: fcvtas_ds_round_simd: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So what you are observing here is just GlobalISel falling back to SelectionDAG. With the patterns removed GlobalISel doesn't know how to lower these nodes, so it fails and because you probably had global-isel-abort enabled it fallsback onto SelectionDAG and produces the optimal code. If you look at for example fcvtas_ds_round_simd you can see that ldr and str are present there even with patterns removed. I think its just that GlobalISel isn't capable of merging the rounding operation with cvt yet so it just generates call. Fixing this is beyond the scope of this patch I think
| ; CHECK-SD-NEXT: fcvtas d0, d0 | ||
| ; CHECK-SD-NEXT: ret | ||
| ; | ||
| ; CHECK-GI-LABEL: fcvtas_dd_simd: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same case as above
| def : Pat<(i32 (to_int_sat_gi f64:$Rn)), | ||
| (!cast<Instruction>(INST # SDr) f64:$Rn)>; | ||
| } | ||
| def : Pat<(i32 (to_int_sat_gi f32:$Rn)), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you give an example LLVM-IR here, please ? I tried removing the highlighted patterns and I have never gotten fcvtzs v0.2s, v0.2s to be emitted.
| ; CHECK-SD-NEXT: fcvtas d0, s0 | ||
| ; CHECK-SD-NEXT: ret | ||
| ; | ||
| ; CHECK-GI-LABEL: fcvtas_ds_round_simd: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you change all these to use @llvm.round, as opposed to the @roundf functions?
davemgreen
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This is followup patch to llvm#157680, which allows simd fpcvt instructions to be generated from fptoi(_sat) nodes.
This is followup patch to llvm#157680, which allows simd fpcvt instructions to be generated from fptoi(_sat) nodes.
This is followup patch to llvm#157680, which allows simd fpcvt instructions to be generated from fptoi(_sat) nodes.
This is followup patch to #157680, which allows simd fpcvt instructions to be generated from fptoi(_sat) nodes.