From 8534e98a2e0b86ca6849f8213380b70b37cb176c Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Thu, 4 Sep 2025 13:04:57 +0000 Subject: [PATCH 1/4] [AArch64][GlobalISel] Add codegen for simd fpcvt instructions --- .../lib/Target/AArch64/AArch64InstrFormats.td | 26 +- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 251 +++- .../AArch64/GISel/AArch64RegisterBankInfo.cpp | 54 +- .../AArch64/GlobalISel/regbank-fp-use-def.mir | 2 +- .../AArch64/GlobalISel/regbank-llround.mir | 4 +- .../AArch64/GlobalISel/regbank-lround.mir | 4 +- .../CodeGen/AArch64/arm64-cvt-simd-fptoi.ll | 1306 +++++++++++++++++ .../AArch64/arm64-cvt-simd-intrinsics.ll | 612 ++++++++ .../AArch64/arm64-cvt-simd-round-rint.ll | 428 ++++++ llvm/test/CodeGen/AArch64/arm64-neon-copy.ll | 57 +- llvm/test/CodeGen/AArch64/arm64-vcvt.ll | 22 +- .../test/CodeGen/AArch64/fptosi-sat-vector.ll | 21 +- .../test/CodeGen/AArch64/fptoui-sat-vector.ll | 21 +- llvm/test/CodeGen/AArch64/vector-lrint.ll | 18 +- 14 files changed, 2735 insertions(+), 91 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll create mode 100644 llvm/test/CodeGen/AArch64/arm64-cvt-simd-round-rint.ll diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 8958ad129269c..690cb5500875f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -5299,28 +5299,29 @@ multiclass FPToIntegerUnscaled rmode, bits<3> opcode, string asm, } } -multiclass FPToIntegerSIMDScalar rmode, bits<3> opcode, string asm> { +multiclass FPToIntegerSIMDScalar rmode, bits<3> opcode, string asm, + SDPatternOperator OpN> { // double-precision to 32-bit SIMD/FPR def SDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm, - []> { + [(set FPR32:$Rd, (i32 (OpN (f64 FPR64:$Rn))))]> { let Inst{31} = 0; // 32-bit FPR flag } // half-precision to 32-bit SIMD/FPR def SHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR32, asm, - []> { + [(set FPR32:$Rd, (i32 (OpN (f16 FPR16:$Rn))))]> { let Inst{31} = 0; // 32-bit FPR flag } // half-precision to 64-bit SIMD/FPR def DHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR64, asm, - []> { + [(set FPR64:$Rd, (i64 (OpN (f16 FPR16:$Rn))))]> { let Inst{31} = 1; // 64-bit FPR flag } // single-precision to 64-bit SIMD/FPR def DSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, FPR64, asm, - []> { + [(set FPR64:$Rd, (i64 (OpN (f32 FPR32:$Rn))))]> { let Inst{31} = 1; // 64-bit FPR flag } } @@ -7949,6 +7950,21 @@ multiclass SIMDFPTwoScalar opc, string asm> { } } +let mayRaiseFPException = 1, Uses = [FPCR] in +multiclass SIMDFPTwoScalarFCVT opc, string asm, + SDPatternOperator OpN> { + let Predicates = [HasNEONandIsStreamingSafe], FastISelShouldIgnore = 1 in { + def v1i64 : BaseSIMDTwoScalar; + def v1i32 : BaseSIMDTwoScalar; + } + let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in { + def v1f16 : BaseSIMDTwoScalar; + } +} + let mayRaiseFPException = 1, Uses = [FPCR] in multiclass SIMDFPTwoScalarCVT opc, string asm, SDPatternOperator OpNode> { diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 62b26b5239365..34e55dcafcd06 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -5212,18 +5212,55 @@ defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; +defm FCVTAS : SIMDFPTwoScalarFCVT< 0, 0, 0b11100, "fcvtas", int_aarch64_neon_fcvtas>; +defm FCVTAU : SIMDFPTwoScalarFCVT< 1, 0, 0b11100, "fcvtau", int_aarch64_neon_fcvtau>; +defm FCVTMS : SIMDFPTwoScalarFCVT< 0, 0, 0b11011, "fcvtms", int_aarch64_neon_fcvtms>; +defm FCVTMU : SIMDFPTwoScalarFCVT< 1, 0, 0b11011, "fcvtmu", int_aarch64_neon_fcvtmu>; +defm FCVTNS : SIMDFPTwoScalarFCVT< 0, 0, 0b11010, "fcvtns", int_aarch64_neon_fcvtns>; +defm FCVTNU : SIMDFPTwoScalarFCVT< 1, 0, 0b11010, "fcvtnu", int_aarch64_neon_fcvtnu>; +defm FCVTPS : SIMDFPTwoScalarFCVT< 0, 1, 0b11010, "fcvtps", int_aarch64_neon_fcvtps>; +defm FCVTPU : SIMDFPTwoScalarFCVT< 1, 1, 0b11010, "fcvtpu", int_aarch64_neon_fcvtpu>; +defm FCVTZS : SIMDFPTwoScalarFCVT< 0, 1, 0b11011, "fcvtzs", any_fp_to_sint>; +defm FCVTZU : SIMDFPTwoScalarFCVT< 1, 1, 0b11011, "fcvtzu", any_fp_to_uint>; + let Predicates = [HasNEON, HasFPRCVT] in{ - defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas">; - defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau">; - defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms">; - defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu">; - defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns">; - defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu">; - defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps">; - defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu">; - defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">; - defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">; -} + defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas", int_aarch64_neon_fcvtas>; + defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau", int_aarch64_neon_fcvtau>; + defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms", int_aarch64_neon_fcvtms>; + defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu", int_aarch64_neon_fcvtmu>; + defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns", int_aarch64_neon_fcvtns>; + defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu", int_aarch64_neon_fcvtnu>; + defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps", int_aarch64_neon_fcvtps>; + defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu", int_aarch64_neon_fcvtpu>; + defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs", any_fp_to_sint>; + defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu", any_fp_to_uint>; +} + +multiclass FPToIntegerSIMDScalarPatterns { + def : Pat<(f32 (bitconvert (i32 (OpN (f64 FPR64:$Rn))))), + (!cast(INST # SDr) FPR64:$Rn)>; + def : Pat<(f32 (bitconvert (i32 (OpN (f16 FPR16:$Rn))))), + (!cast(INST # SHr) FPR16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (OpN (f16 FPR16:$Rn))))), + (!cast(INST # DHr) FPR16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (OpN (f32 FPR32:$Rn))))), + (!cast(INST # DSr) FPR32:$Rn)>; + def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))), + (!cast(INST # v1i32) FPR32:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))), + (!cast(INST # v1i64) FPR64:$Rn)>; + +} +defm: FPToIntegerSIMDScalarPatterns; +defm: FPToIntegerSIMDScalarPatterns; +defm: FPToIntegerSIMDScalarPatterns; +defm: FPToIntegerSIMDScalarPatterns; +defm: FPToIntegerSIMDScalarPatterns; +defm: FPToIntegerSIMDScalarPatterns; +defm: FPToIntegerSIMDScalarPatterns; +defm: FPToIntegerSIMDScalarPatterns; +defm: FPToIntegerSIMDScalarPatterns; +defm: FPToIntegerSIMDScalarPatterns; // AArch64's FCVT instructions saturate when out of range. multiclass FPToIntegerSatPats { @@ -5257,6 +5294,52 @@ multiclass FPToIntegerSatPats(INST # UXDr) f64:$Rn)>; + // For global-isel we can use register classes to determine + // which FCVT instruction to use. + let Predicates = [HasFPRCVT] in { + def : Pat<(i32 (to_int_sat_gi f16:$Rn)), + (!cast(INST # SHr) f16:$Rn)>; + def : Pat<(i64 (to_int_sat_gi f16:$Rn)), + (!cast(INST # DHr) f16:$Rn)>; + def : Pat<(i64 (to_int_sat_gi f32:$Rn)), + (!cast(INST # DSr) f32:$Rn)>; + def : Pat<(i32 (to_int_sat_gi f64:$Rn)), + (!cast(INST # SDr) f64:$Rn)>; + } + def : Pat<(i32 (to_int_sat_gi f32:$Rn)), + (!cast(INST # v1i32) f32:$Rn)>; + def : Pat<(i64 (to_int_sat_gi f64:$Rn)), + (!cast(INST # v1i64) f64:$Rn)>; + + let Predicates = [HasFPRCVT] in { + def : Pat<(f32 (bitconvert (i32 (to_int_sat f16:$Rn, i32)))), + (!cast(INST # SHr) f16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat f16:$Rn, i64)))), + (!cast(INST # DHr) f16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat f32:$Rn, i64)))), + (!cast(INST # DSr) f32:$Rn)>; + def : Pat<(f32 (bitconvert (i32 (to_int_sat f64:$Rn, i32)))), + (!cast(INST # SDr) f64:$Rn)>; + + def : Pat<(f32 (bitconvert (i32 (to_int_sat_gi f16:$Rn)))), + (!cast(INST # SHr) f16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat_gi f16:$Rn)))), + (!cast(INST # DHr) f16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat_gi f32:$Rn)))), + (!cast(INST # DSr) f32:$Rn)>; + def : Pat<(f32 (bitconvert (i32 (to_int_sat_gi f64:$Rn)))), + (!cast(INST # SDr) f64:$Rn)>; + } + def : Pat<(f32 (bitconvert (i32 (to_int_sat f32:$Rn, i32)))), + (!cast(INST # v1i32) f32:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat f64:$Rn, i64)))), + (!cast(INST # v1i64) f64:$Rn)>; + + def : Pat<(f32 (bitconvert (i32 (to_int_sat_gi f32:$Rn)))), + (!cast(INST # v1i32) f32:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat_gi f64:$Rn)))), + (!cast(INST # v1i64) f64:$Rn)>; + let Predicates = [HasFullFP16] in { def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)), (!cast(INST # SWHri) $Rn, $scale)>; @@ -5301,6 +5384,32 @@ multiclass FPToIntegerIntPats { def : Pat<(i32 (round f64:$Rn)), (!cast(INST # UWDr) $Rn)>; def : Pat<(i64 (round f64:$Rn)), (!cast(INST # UXDr) $Rn)>; + // For global-isel we can use register classes to determine + // which FCVT instruction to use. + let Predicates = [HasFPRCVT] in { + def : Pat<(i32 (round f16:$Rn)), (!cast(INST # SHr) $Rn)>; + def : Pat<(i64 (round f16:$Rn)), (!cast(INST # DHr) $Rn)>; + def : Pat<(i64 (round f32:$Rn)), (!cast(INST # DSr) $Rn)>; + def : Pat<(i32 (round f64:$Rn)), (!cast(INST # SDr) $Rn)>; + } + def : Pat<(i32 (round f32:$Rn)), (!cast(INST # v1i32) $Rn)>; + def : Pat<(i64 (round f64:$Rn)), (!cast(INST # v1i64) $Rn)>; + + let Predicates = [HasFPRCVT] in { + def : Pat<(f32 (bitconvert (i32 (round f16:$Rn)))), + (!cast(INST # SHr) $Rn)>; + def : Pat<(f64 (bitconvert (i64 (round f16:$Rn)))), + (!cast(INST # DHr) $Rn)>; + def : Pat<(f64 (bitconvert (i64 (round f32:$Rn)))), + (!cast(INST # DSr) $Rn)>; + def : Pat<(f32 (bitconvert (i32 (round f64:$Rn)))), + (!cast(INST # SDr) $Rn)>; + } + def : Pat<(f32 (bitconvert (i32 (round f32:$Rn)))), + (!cast(INST # v1i32) $Rn)>; + def : Pat<(f64 (bitconvert (i64 (round f64:$Rn)))), + (!cast(INST # v1i64) $Rn)>; + let Predicates = [HasFullFP16] in { def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), (!cast(INST # SWHri) $Rn, $scale)>; @@ -5330,6 +5439,30 @@ multiclass FPToIntegerPats(INST # UXDr) f64:$Rn)>; + // For global-isel we can use register classes to determine + // which FCVT instruction to use. + def : Pat<(i32 (to_int (round f32:$Rn))), + (!cast(INST # v1i32) f32:$Rn)>; + let Predicates = [HasFPRCVT] in { + def : Pat<(i64 (to_int (round f32:$Rn))), + (!cast(INST # DSr) f32:$Rn)>; + def : Pat<(i32 (to_int (round f64:$Rn))), + (!cast(INST # SDr) f64:$Rn)>; + } + def : Pat<(i64 (to_int (round f64:$Rn))), + (!cast(INST # v1i64) f64:$Rn)>; + + let Predicates = [HasFPRCVT] in { + def : Pat<(f64 (bitconvert (i64 (to_int (round f32:$Rn))))), + (!cast(INST # DSr) f32:$Rn)>; + def : Pat<(f32 (bitconvert (i32 (to_int (round f64:$Rn))))), + (!cast(INST # SDr) f64:$Rn)>; + } + def : Pat<(f32 (bitconvert (i32 (to_int (round f32:$Rn))))), + (!cast(INST # v1i32) f32:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int (round f64:$Rn))))), + (!cast(INST # v1i64) f64:$Rn)>; + // These instructions saturate like fp_to_[su]int_sat. let Predicates = [HasFullFP16] in { def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)), @@ -5345,6 +5478,21 @@ multiclass FPToIntegerPats(INST # UWDr) f64:$Rn)>; def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)), (!cast(INST # UXDr) f64:$Rn)>; + + let Predicates = [HasFPRCVT] in { + def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f16:$Rn), i32)))), + (!cast(INST # SHr) f16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f16:$Rn), i64)))), + (!cast(INST # DHr) f16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f32:$Rn), i64)))), + (!cast(INST # DSr) f32:$Rn)>; + def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f64:$Rn), i32)))), + (!cast(INST # SDr) f64:$Rn)>; + } + def : Pat<(f32 (bitconvert (i32 (to_int_sat (round f32:$Rn), i32)))), + (!cast(INST # v1i32) f32:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (to_int_sat (round f64:$Rn), i64)))), + (!cast(INST # v1i64) f64:$Rn)>; } defm : FPToIntegerPats; @@ -5379,6 +5527,39 @@ def : Pat<(i64 (any_llround f32:$Rn)), def : Pat<(i64 (any_llround f64:$Rn)), (FCVTASUXDr f64:$Rn)>; +// For global-isel we can use register classes to determine +// which FCVT instruction to use. +let Predicates = [HasFPRCVT] in { +def : Pat<(i64 (any_lround f32:$Rn)), + (FCVTASDSr f32:$Rn)>; +def : Pat<(i64 (any_llround f32:$Rn)), + (FCVTASDSr f32:$Rn)>; +} +def : Pat<(i64 (any_lround f64:$Rn)), + (FCVTASv1i64 f64:$Rn)>; +def : Pat<(i64 (any_llround f64:$Rn)), + (FCVTASv1i64 f64:$Rn)>; + +let Predicates = [HasFPRCVT] in { + def : Pat<(f32 (bitconvert (i32 (any_lround f16:$Rn)))), + (FCVTASSHr f16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (any_lround f16:$Rn)))), + (FCVTASDHr f16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (any_llround f16:$Rn)))), + (FCVTASDHr f16:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (any_lround f32:$Rn)))), + (FCVTASDSr f32:$Rn)>; + def : Pat<(f32 (bitconvert (i32 (any_lround f64:$Rn)))), + (FCVTASSDr f64:$Rn)>; + def : Pat<(f64 (bitconvert (i64 (any_llround f32:$Rn)))), + (FCVTASDSr f32:$Rn)>; +} +def : Pat<(f32 (bitconvert (i32 (any_lround f32:$Rn)))), + (FCVTASv1i32 f32:$Rn)>; +def : Pat<(f64 (bitconvert (i64 (any_lround f64:$Rn)))), + (FCVTASv1i64 f64:$Rn)>; +def : Pat<(f64 (bitconvert (i64 (any_llround f64:$Rn)))), + (FCVTASv1i64 f64:$Rn)>; //===----------------------------------------------------------------------===// // Scaled integer to floating point conversion instructions. //===----------------------------------------------------------------------===// @@ -5524,6 +5705,44 @@ def : Pat<(i64 (any_llrint f32:$Rn)), def : Pat<(i64 (any_llrint f64:$Rn)), (FCVTZSUXDr (FRINTXDr f64:$Rn))>; +// For global-isel we can use register classes to determine +// which FCVT instruction to use. +let Predicates = [HasFPRCVT] in { +def : Pat<(i64 (any_lrint f16:$Rn)), + (FCVTZSDHr (FRINTXHr f16:$Rn))>; +def : Pat<(i64 (any_llrint f16:$Rn)), + (FCVTZSDHr (FRINTXHr f16:$Rn))>; +def : Pat<(i64 (any_lrint f32:$Rn)), + (FCVTZSDSr (FRINTXSr f32:$Rn))>; +def : Pat<(i64 (any_llrint f32:$Rn)), + (FCVTZSDSr (FRINTXSr f32:$Rn))>; +} +def : Pat<(i64 (any_lrint f64:$Rn)), + (FCVTZSv1i64 (FRINTXDr f64:$Rn))>; +def : Pat<(i64 (any_llrint f64:$Rn)), + (FCVTZSv1i64 (FRINTXDr f64:$Rn))>; + +let Predicates = [HasFPRCVT] in { + def : Pat<(f32 (bitconvert (i32 (any_lrint f16:$Rn)))), + (FCVTZSSHr (FRINTXHr f16:$Rn))>; + def : Pat<(f64 (bitconvert (i64 (any_lrint f16:$Rn)))), + (FCVTZSDHr (FRINTXHr f16:$Rn))>; + def : Pat<(f64 (bitconvert (i64 (any_llrint f16:$Rn)))), + (FCVTZSDHr (FRINTXHr f16:$Rn))>; + def : Pat<(f64 (bitconvert (i64 (any_lrint f32:$Rn)))), + (FCVTZSDSr (FRINTXSr f32:$Rn))>; + def : Pat<(f32 (bitconvert (i32 (any_lrint f64:$Rn)))), + (FCVTZSSDr (FRINTXDr f64:$Rn))>; + def : Pat<(f64 (bitconvert (i64 (any_llrint f32:$Rn)))), + (FCVTZSDSr (FRINTXSr f32:$Rn))>; +} +def : Pat<(f32 (bitconvert (i32 (any_lrint f32:$Rn)))), + (FCVTZSv1i32 (FRINTXSr f32:$Rn))>; +def : Pat<(f64 (bitconvert (i64 (any_lrint f64:$Rn)))), + (FCVTZSv1i64 (FRINTXDr f64:$Rn))>; +def : Pat<(f64 (bitconvert (i64 (any_llrint f64:$Rn)))), + (FCVTZSv1i64 (FRINTXDr f64:$Rn))>; + //===----------------------------------------------------------------------===// // Floating point two operand instructions. //===----------------------------------------------------------------------===// @@ -6549,17 +6768,7 @@ defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>; defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>; defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; -defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">; -defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">; -defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">; -defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">; -defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">; -defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">; -defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">; -defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">; def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; -defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">; -defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">; defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">; defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">; defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index cf391c446a955..c75a3c406f60d 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -568,9 +568,7 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI, case Intrinsic::aarch64_neon_fcvtnu: case Intrinsic::aarch64_neon_fcvtps: case Intrinsic::aarch64_neon_fcvtpu: - // Force FPR register bank for half types, as those types otherwise - // don't get legalized correctly resulting in fp16 <-> gpr32 COPY's. - return MRI.getType(MI.getOperand(2).getReg()) == LLT::float16(); + return true; default: break; } @@ -864,10 +862,24 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_FPTOUI: case TargetOpcode::G_INTRINSIC_LRINT: case TargetOpcode::G_INTRINSIC_LLRINT: + case TargetOpcode::G_LROUND: + case TargetOpcode::G_LLROUND: { if (MRI.getType(MI.getOperand(0).getReg()).isVector()) break; - OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; + TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); + TypeSize SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, TRI); + if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) & + all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), + [&](const MachineInstr &UseMI) { + return onlyUsesFP(UseMI, MRI, TRI) || + prefersFPUse(UseMI, MRI, TRI); + })) + OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; + else + OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; break; + } + case TargetOpcode::G_FCMP: { // If the result is a vector, it must use a FPR. AArch64GenRegisterBankInfo::PartialMappingIdx Idx0 = @@ -1143,6 +1155,34 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_INTRINSIC: case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: { switch (cast(MI).getIntrinsicID()) { + case Intrinsic::aarch64_neon_fcvtas: + case Intrinsic::aarch64_neon_fcvtau: + case Intrinsic::aarch64_neon_fcvtzs: + case Intrinsic::aarch64_neon_fcvtzu: + case Intrinsic::aarch64_neon_fcvtms: + case Intrinsic::aarch64_neon_fcvtmu: + case Intrinsic::aarch64_neon_fcvtns: + case Intrinsic::aarch64_neon_fcvtnu: + case Intrinsic::aarch64_neon_fcvtps: + case Intrinsic::aarch64_neon_fcvtpu: { + OpRegBankIdx[2] = PMI_FirstFPR; + if (MRI.getType(MI.getOperand(0).getReg()).isVector()) { + OpRegBankIdx[0] = PMI_FirstFPR; + break; + } + TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); + TypeSize SrcSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, TRI); + if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) & + all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), + [&](const MachineInstr &UseMI) { + return onlyUsesFP(UseMI, MRI, TRI) || + prefersFPUse(UseMI, MRI, TRI); + })) + OpRegBankIdx[0] = PMI_FirstFPR; + else + OpRegBankIdx[0] = PMI_FirstGPR; + break; + } case Intrinsic::aarch64_neon_vcvtfxs2fp: case Intrinsic::aarch64_neon_vcvtfxu2fp: case Intrinsic::aarch64_neon_vcvtfp2fxs: @@ -1179,12 +1219,6 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } break; } - case TargetOpcode::G_LROUND: - case TargetOpcode::G_LLROUND: { - // Source is always floating point and destination is always integer. - OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; - break; - } } // Finally construct the computed mapping. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir index b2528840a39cf..46dbc1556fb1d 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir @@ -96,7 +96,7 @@ body: | ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:fpr(s32) = G_SITOFP [[COPY1]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[SELECT:%[0-9]+]]:fpr(s32) = G_SELECT [[COPY2]](s32), [[COPY3]], [[SITOFP]] - ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:gpr(s32) = G_FPTOSI [[SELECT]](s32) + ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:fpr(s32) = G_FPTOSI [[SELECT]](s32) %0:_(s32) = COPY $w0 %2:_(s32) = COPY $w1 %3:_(s32) = COPY $w2 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-llround.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-llround.mir index 420c7cfb07b74..16100f01017a6 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-llround.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-llround.mir @@ -14,7 +14,7 @@ body: | ; CHECK: liveins: $d0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %fpr:fpr(s64) = COPY $d0 - ; CHECK-NEXT: %llround:gpr(s64) = G_LLROUND %fpr(s64) + ; CHECK-NEXT: %llround:fpr(s64) = G_LLROUND %fpr(s64) ; CHECK-NEXT: $d0 = COPY %llround(s64) ; CHECK-NEXT: RET_ReallyLR implicit $s0 %fpr:_(s64) = COPY $d0 @@ -35,7 +35,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %gpr:gpr(s64) = COPY $x0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY %gpr(s64) - ; CHECK-NEXT: %llround:gpr(s64) = G_LLROUND [[COPY]](s64) + ; CHECK-NEXT: %llround:fpr(s64) = G_LLROUND [[COPY]](s64) ; CHECK-NEXT: $d0 = COPY %llround(s64) ; CHECK-NEXT: RET_ReallyLR implicit $s0 %gpr:_(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-lround.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-lround.mir index 775c6ca773c68..5cb93f7c4646d 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-lround.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-lround.mir @@ -14,7 +14,7 @@ body: | ; CHECK: liveins: $d0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %fpr:fpr(s64) = COPY $d0 - ; CHECK-NEXT: %lround:gpr(s64) = G_LROUND %fpr(s64) + ; CHECK-NEXT: %lround:fpr(s64) = G_LROUND %fpr(s64) ; CHECK-NEXT: $d0 = COPY %lround(s64) ; CHECK-NEXT: RET_ReallyLR implicit $s0 %fpr:_(s64) = COPY $d0 @@ -35,7 +35,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %gpr:gpr(s64) = COPY $x0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr(s64) = COPY %gpr(s64) - ; CHECK-NEXT: %lround:gpr(s64) = G_LROUND [[COPY]](s64) + ; CHECK-NEXT: %lround:fpr(s64) = G_LROUND [[COPY]](s64) ; CHECK-NEXT: $d0 = COPY %lround(s64) ; CHECK-NEXT: RET_ReallyLR implicit $s0 %gpr:_(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll new file mode 100644 index 0000000000000..936bdfc164810 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll @@ -0,0 +1,1306 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for fptosi_i32_f16_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f16_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f32_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f64_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f64_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f32_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f16_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f16_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f32_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f64_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f64_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f32_simd + +; +; FPTOI +; + +define float @test_fptosi_f16_i32_simd(half %a) { +; CHECK-SD-LABEL: test_fptosi_f16_i32_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, h0 +; CHECK-SD-NEXT: ret + %r = fptosi half %a to i32 + %bc = bitcast i32 %r to float + ret float %bc +} + +define double @test_fptosi_f16_i64_simd(half %a) { +; CHECK-SD-LABEL: test_fptosi_f16_i64_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, h0 +; CHECK-SD-NEXT: ret + %r = fptosi half %a to i64 + %bc = bitcast i64 %r to double + ret double %bc +} + +define float @test_fptosi_f64_i32_simd(double %a) { +; CHECK-SD-LABEL: test_fptosi_f64_i32_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, d0 +; CHECK-SD-NEXT: ret + %r = fptosi double %a to i32 + %bc = bitcast i32 %r to float + ret float %bc +} + +define double @test_fptosi_f32_i64_simd(float %a) { +; CHECK-SD-LABEL: test_fptosi_f32_i64_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, s0 +; CHECK-SD-NEXT: ret + %r = fptosi float %a to i64 + %bc = bitcast i64 %r to double + ret double %bc +} + +define double @test_fptosi_f64_i64_simd(double %a) { +; CHECK-SD-LABEL: test_fptosi_f64_i64_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, d0 +; CHECK-SD-NEXT: ret + %r = fptosi double %a to i64 + %bc = bitcast i64 %r to double + ret double %bc +} + + +define float @test_fptosi_f32_i32_simd(float %a) { +; CHECK-SD-LABEL: test_fptosi_f32_i32_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, s0 +; CHECK-SD-NEXT: ret + %r = fptosi float %a to i32 + %bc = bitcast i32 %r to float + ret float %bc +} + +define float @test_fptoui_f16_i32_simd(half %a) { +; CHECK-SD-LABEL: test_fptoui_f16_i32_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu s0, h0 +; CHECK-SD-NEXT: ret + %r = fptoui half %a to i32 + %bc = bitcast i32 %r to float + ret float %bc +} + +define double @test_fptoui_f16_i64_simd(half %a) { +; CHECK-SD-LABEL: test_fptoui_f16_i64_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu d0, h0 +; CHECK-SD-NEXT: ret + %r = fptoui half %a to i64 + %bc = bitcast i64 %r to double + ret double %bc +} + +define float @test_fptoui_f64_i32_simd(double %a) { +; CHECK-SD-LABEL: test_fptoui_f64_i32_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu s0, d0 +; CHECK-SD-NEXT: ret + %r = fptoui double %a to i32 + %bc = bitcast i32 %r to float + ret float %bc +} + +define double @test_fptoui_f32_i64_simd(float %a) { +; CHECK-SD-LABEL: test_fptoui_f32_i64_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu d0, s0 +; CHECK-SD-NEXT: ret + %r = fptoui float %a to i64 + %bc = bitcast i64 %r to double + ret double %bc +} + +define double @test_fptoui_f64_i64_simd(double %a) { +; CHECK-SD-LABEL: test_fptoui_f64_i64_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu d0, d0 +; CHECK-SD-NEXT: ret + %r = fptoui double %a to i64 + %bc = bitcast i64 %r to double + ret double %bc +} + + +define float @test_fptoui_f32_i32_simd(float %a) { +; CHECK-SD-LABEL: test_fptoui_f32_i32_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu s0, s0 +; CHECK-SD-NEXT: ret + %r = fptoui float %a to i32 + %bc = bitcast i32 %r to float + ret float %bc +} + + +; +; FPTOI experimental +; + +define float @fptosi_i32_f16_simd(half %x) { +; CHECK-SD-LABEL: fptosi_i32_f16_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, h0 +; CHECK-SD-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict") + %sum = bitcast i32 %val to float + ret float %sum +} + +define double @fptosi_i64_f16_simd(half %x) { +; CHECK-SD-LABEL: fptosi_i64_f16_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, h0 +; CHECK-SD-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict") + %sum = bitcast i64 %val to double + ret double %sum +} + +define double @fptosi_i64_f32_simd(float %x) { +; CHECK-SD-LABEL: fptosi_i64_f32_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, s0 +; CHECK-SD-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define float @fptosi_i32_f64_simd(double %x) { +; CHECK-SD-LABEL: fptosi_i32_f64_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, d0 +; CHECK-SD-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict") + %bc = bitcast i32 %val to float + ret float %bc +} + +define double @fptosi_i64_f64_simd(double %x) { +; CHECK-SD-LABEL: fptosi_i64_f64_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, d0 +; CHECK-SD-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define float @fptosi_i32_f32_simd(float %x) { +; CHECK-SD-LABEL: fptosi_i32_f32_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, s0 +; CHECK-SD-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict") + %bc = bitcast i32 %val to float + ret float %bc +} + + + +define float @fptoui_i32_f16_simd(half %x) { +; CHECK-SD-LABEL: fptoui_i32_f16_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu s0, h0 +; CHECK-SD-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") + %sum = bitcast i32 %val to float + ret float %sum +} + +define double @fptoui_i64_f16_simd(half %x) { +; CHECK-SD-LABEL: fptoui_i64_f16_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu d0, h0 +; CHECK-SD-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict") + %sum = bitcast i64 %val to double + ret double %sum +} + +define double @fptoui_i64_f32_simd(float %x) { +; CHECK-SD-LABEL: fptoui_i64_f32_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu d0, s0 +; CHECK-SD-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define float @fptoui_i32_f64_simd(double %x) { +; CHECK-SD-LABEL: fptoui_i32_f64_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu s0, d0 +; CHECK-SD-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict") + %bc = bitcast i32 %val to float + ret float %bc +} + +define double @fptoui_i64_f64_simd(double %x) { +; CHECK-SD-LABEL: fptoui_i64_f64_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu d0, d0 +; CHECK-SD-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define float @fptoui_i32_f32_simd(float %x) { +; CHECK-SD-LABEL: fptoui_i32_f32_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu s0, s0 +; CHECK-SD-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") + %bc = bitcast i32 %val to float + ret float %bc +} + +; +; FPTOI rounding +; + + +define double @fcvtas_ds_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtas_ds_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas d0, s0 +; CHECK-SD-NEXT: ret + %r = call float @roundf(float %a) nounwind readnone + %i = fptosi float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtas_sd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtas_sd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas s0, d0 +; CHECK-SD-NEXT: ret + %r = call double @round(double %a) nounwind readnone + %i = fptosi double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtas_ss_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtas_ss_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas s0, s0 +; CHECK-SD-NEXT: ret + %r = call float @roundf(float %a) nounwind readnone + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtas_dd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtas_dd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas d0, d0 +; CHECK-SD-NEXT: ret + %r = call double @round(double %a) nounwind readnone + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +define double @fcvtau_ds_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtau_ds_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtau d0, s0 +; CHECK-SD-NEXT: ret + %r = call float @roundf(float %a) nounwind readnone + %i = fptoui float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtau_sd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtau_sd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtau s0, d0 +; CHECK-SD-NEXT: ret + %r = call double @round(double %a) nounwind readnone + %i = fptoui double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtau_ss_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtau_ss_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas s0, s0 +; CHECK-SD-NEXT: ret + %r = call float @roundf(float %a) nounwind readnone + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtau_dd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtau_dd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas d0, d0 +; CHECK-SD-NEXT: ret + %r = call double @round(double %a) nounwind readnone + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +define double @fcvtms_ds_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtms_ds_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms d0, s0 +; CHECK-SD-NEXT: ret + %r = call float @floorf(float %a) nounwind readnone + %i = fptosi float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtms_sd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtms_sd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms s0, d0 +; CHECK-SD-NEXT: ret + %r = call double @floor(double %a) nounwind readnone + %i = fptosi double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtms_ss_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtms_ss_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms s0, s0 +; CHECK-SD-NEXT: ret + %r = call float @floorf(float %a) nounwind readnone + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtms_dd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtms_dd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms d0, d0 +; CHECK-SD-NEXT: ret + %r = call double @floor(double %a) nounwind readnone + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + + +define double @fcvtmu_ds_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtmu_ds_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtmu d0, s0 +; CHECK-SD-NEXT: ret + %r = call float @floorf(float %a) nounwind readnone + %i = fptoui float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtmu_sd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtmu_sd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtmu s0, d0 +; CHECK-SD-NEXT: ret + %r = call double @floor(double %a) nounwind readnone + %i = fptoui double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtmu_ss_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtmu_ss_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms s0, s0 +; CHECK-SD-NEXT: ret + %r = call float @floorf(float %a) nounwind readnone + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtmu_dd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtmu_dd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms d0, d0 +; CHECK-SD-NEXT: ret + %r = call double @floor(double %a) nounwind readnone + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +define double @fcvtps_ds_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtps_ds_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps d0, s0 +; CHECK-SD-NEXT: ret + %r = call float @ceilf(float %a) nounwind readnone + %i = fptosi float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtps_sd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtps_sd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps s0, d0 +; CHECK-SD-NEXT: ret + %r = call double @ceil(double %a) nounwind readnone + %i = fptosi double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtps_ss_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtps_ss_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps s0, s0 +; CHECK-SD-NEXT: ret + %r = call float @ceilf(float %a) nounwind readnone + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtps_dd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtps_dd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps d0, d0 +; CHECK-SD-NEXT: ret + %r = call double @ceil(double %a) nounwind readnone + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +define double @fcvtpu_ds_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtpu_ds_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtpu d0, s0 +; CHECK-SD-NEXT: ret + %r = call float @ceilf(float %a) nounwind readnone + %i = fptoui float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtpu_sd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtpu_sd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtpu s0, d0 +; CHECK-SD-NEXT: ret + %r = call double @ceil(double %a) nounwind readnone + %i = fptoui double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtpu_ss_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtpu_ss_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps s0, s0 +; CHECK-SD-NEXT: ret + %r = call float @ceilf(float %a) nounwind readnone + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtpu_dd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtpu_dd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps d0, d0 +; CHECK-SD-NEXT: ret + %r = call double @ceil(double %a) nounwind readnone + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +define double @fcvtzs_ds_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtzs_ds_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, s0 +; CHECK-SD-NEXT: ret + %r = call float @truncf(float %a) nounwind readnone + %i = fptosi float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzs_sd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtzs_sd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, d0 +; CHECK-SD-NEXT: ret + %r = call double @trunc(double %a) nounwind readnone + %i = fptosi double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtzs_ss_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtzs_ss_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, s0 +; CHECK-SD-NEXT: ret + %r = call float @truncf(float %a) nounwind readnone + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzs_dd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtzs_dd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, d0 +; CHECK-SD-NEXT: ret + %r = call double @trunc(double %a) nounwind readnone + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtzu_ds_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtzu_ds_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu d0, s0 +; CHECK-SD-NEXT: ret + %r = call float @truncf(float %a) nounwind readnone + %i = fptoui float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzu_sd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtzu_sd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu s0, d0 +; CHECK-SD-NEXT: ret + %r = call double @trunc(double %a) nounwind readnone + %i = fptoui double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtzu_ss_round_simd(float %a) { +; CHECK-SD-LABEL: fcvtzu_ss_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, s0 +; CHECK-SD-NEXT: ret + %r = call float @truncf(float %a) nounwind readnone + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzu_dd_round_simd(double %a) { +; CHECK-SD-LABEL: fcvtzu_dd_round_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, d0 +; CHECK-SD-NEXT: ret + %r = call double @trunc(double %a) nounwind readnone + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +; +; FPTOI saturating +; + +define float @fcvtzs_sh_sat_simd(half %a) { +; CHECK-SD-LABEL: fcvtzs_sh_sat_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, h0 +; CHECK-SD-NEXT: ret + %i = call i32 @llvm.fptosi.sat.i32.f16(half %a) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzs_dh_sat_simd(half %a) { +; CHECK-SD-LABEL: fcvtzs_dh_sat_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, h0 +; CHECK-SD-NEXT: ret + %i = call i64 @llvm.fptosi.sat.i64.f16(half %a) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtzs_ds_sat_simd(float %a) { +; CHECK-SD-LABEL: fcvtzs_ds_sat_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, s0 +; CHECK-SD-NEXT: ret + %i = call i64 @llvm.fptosi.sat.i64.f32(float %a) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzs_sd_sat_simd(double %a) { +; CHECK-SD-LABEL: fcvtzs_sd_sat_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, d0 +; CHECK-SD-NEXT: ret + %i = call i32 @llvm.fptosi.sat.i32.f64(double %a) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtzs_ss_sat_simd(float %a) { +; CHECK-SD-LABEL: fcvtzs_ss_sat_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, s0 +; CHECK-SD-NEXT: ret + %i = call i32 @llvm.fptosi.sat.i32.f32(float %a) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzs_dd_sat_simd(double %a) { +; CHECK-SD-LABEL: fcvtzs_dd_sat_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, d0 +; CHECK-SD-NEXT: ret + %i = call i64 @llvm.fptosi.sat.i64.f64(double %a) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzu_sh_sat_simd(half %a) { +; CHECK-SD-LABEL: fcvtzu_sh_sat_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu s0, h0 +; CHECK-SD-NEXT: ret + %i = call i32 @llvm.fptoui.sat.i32.f16(half %a) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzu_dh_sat_simd(half %a) { +; CHECK-SD-LABEL: fcvtzu_dh_sat_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu d0, h0 +; CHECK-SD-NEXT: ret + %i = call i64 @llvm.fptoui.sat.i64.f16(half %a) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtzu_ds_sat_simd(float %a) { +; CHECK-SD-LABEL: fcvtzu_ds_sat_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu d0, s0 +; CHECK-SD-NEXT: ret + %i = call i64 @llvm.fptoui.sat.i64.f32(float %a) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzu_sd_sat_simd(double %a) { +; CHECK-SD-LABEL: fcvtzu_sd_sat_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu s0, d0 +; CHECK-SD-NEXT: ret + %i = call i32 @llvm.fptoui.sat.i32.f64(double %a) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtzu_ss_sat_simd(float %a) { +; CHECK-SD-LABEL: fcvtzu_ss_sat_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, s0 +; CHECK-SD-NEXT: ret + %i = call i32 @llvm.fptosi.sat.i32.f32(float %a) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzu_dd_sat_simd(double %a) { +; CHECK-SD-LABEL: fcvtzu_dd_sat_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, d0 +; CHECK-SD-NEXT: ret + %i = call i64 @llvm.fptosi.sat.i64.f64(double %a) + %bc = bitcast i64 %i to double + ret double %bc +} + +; +; FPTOI saturating with rounding +; + +define float @fcvtas_sh_simd(half %a) { +; CHECK-SD-LABEL: fcvtas_sh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas s0, h0 +; CHECK-SD-NEXT: ret + %r = call half @llvm.round.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtas_dh_simd(half %a) { +; CHECK-SD-LABEL: fcvtas_dh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas d0, h0 +; CHECK-SD-NEXT: ret + %r = call half @llvm.round.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtas_ds_simd(float %a) { +; CHECK-SD-LABEL: fcvtas_ds_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas d0, s0 +; CHECK-SD-NEXT: ret + %r = call float @roundf(float %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtas_sd_simd(double %a) { +; CHECK-SD-LABEL: fcvtas_sd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas s0, d0 +; CHECK-SD-NEXT: ret + %r = call double @round(double %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtas_ss_simd(float %a) { +; CHECK-SD-LABEL: fcvtas_ss_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas s0, s0 +; CHECK-SD-NEXT: ret + %r = call float @roundf(float %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtas_dd_simd(double %a) { +; CHECK-SD-LABEL: fcvtas_dd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas d0, d0 +; CHECK-SD-NEXT: ret + %r = call double @round(double %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtau_sh_simd(half %a) { +; CHECK-SD-LABEL: fcvtau_sh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtau s0, h0 +; CHECK-SD-NEXT: ret + %r = call half @llvm.round.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtau_dh_simd(half %a) { +; CHECK-SD-LABEL: fcvtau_dh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtau d0, h0 +; CHECK-SD-NEXT: ret + %r = call half @llvm.round.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtau_ds_simd(float %a) { +; CHECK-SD-LABEL: fcvtau_ds_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtau d0, s0 +; CHECK-SD-NEXT: ret + %r = call float @roundf(float %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtau_sd_simd(double %a) { +; CHECK-SD-LABEL: fcvtau_sd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtau s0, d0 +; CHECK-SD-NEXT: ret + %r = call double @round(double %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtau_ss_simd(float %a) { +; CHECK-SD-LABEL: fcvtau_ss_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas s0, s0 +; CHECK-SD-NEXT: ret + %r = call float @roundf(float %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtau_dd_simd(double %a) { +; CHECK-SD-LABEL: fcvtau_dd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtas d0, d0 +; CHECK-SD-NEXT: ret + %r = call double @round(double %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtms_sh_simd(half %a) { +; CHECK-SD-LABEL: fcvtms_sh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms s0, h0 +; CHECK-SD-NEXT: ret + %r = call half @llvm.floor.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtms_dh_simd(half %a) { +; CHECK-SD-LABEL: fcvtms_dh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms d0, h0 +; CHECK-SD-NEXT: ret + %r = call half @llvm.floor.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtms_ds_simd(float %a) { +; CHECK-SD-LABEL: fcvtms_ds_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms d0, s0 +; CHECK-SD-NEXT: ret + %r = call float @floorf(float %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtms_sd_simd(double %a) { +; CHECK-SD-LABEL: fcvtms_sd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms s0, d0 +; CHECK-SD-NEXT: ret + %r = call double @floor(double %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtms_ss_simd(float %a) { +; CHECK-SD-LABEL: fcvtms_ss_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms s0, s0 +; CHECK-SD-NEXT: ret + %r = call float @floorf(float %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtms_dd_simd(double %a) { +; CHECK-SD-LABEL: fcvtms_dd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms d0, d0 +; CHECK-SD-NEXT: ret + %r = call double @floor(double %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtmu_sh_simd(half %a) { +; CHECK-SD-LABEL: fcvtmu_sh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtmu s0, h0 +; CHECK-SD-NEXT: ret + %r = call half @llvm.floor.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtmu_dh_simd(half %a) { +; CHECK-SD-LABEL: fcvtmu_dh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtmu d0, h0 +; CHECK-SD-NEXT: ret + %r = call half @llvm.floor.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtmu_ds_simd(float %a) { +; CHECK-SD-LABEL: fcvtmu_ds_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtmu d0, s0 +; CHECK-SD-NEXT: ret + %r = call float @floorf(float %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtmu_sd_simd(double %a) { +; CHECK-SD-LABEL: fcvtmu_sd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtmu s0, d0 +; CHECK-SD-NEXT: ret + %r = call double @floor(double %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtmu_ss_simd(float %a) { +; CHECK-SD-LABEL: fcvtmu_ss_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms s0, s0 +; CHECK-SD-NEXT: ret + %r = call float @floorf(float %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtmu_dd_simd(double %a) { +; CHECK-SD-LABEL: fcvtmu_dd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtms d0, d0 +; CHECK-SD-NEXT: ret + %r = call double @floor(double %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtps_sh_simd(half %a) { +; CHECK-SD-LABEL: fcvtps_sh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps s0, h0 +; CHECK-SD-NEXT: ret + %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtps_dh_simd(half %a) { +; CHECK-SD-LABEL: fcvtps_dh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps d0, h0 +; CHECK-SD-NEXT: ret + %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtps_ds_simd(float %a) { +; CHECK-SD-LABEL: fcvtps_ds_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps d0, s0 +; CHECK-SD-NEXT: ret + %r = call float @ceilf(float %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtps_sd_simd(double %a) { +; CHECK-SD-LABEL: fcvtps_sd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps s0, d0 +; CHECK-SD-NEXT: ret + %r = call double @ceil(double %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtps_ss_simd(float %a) { +; CHECK-SD-LABEL: fcvtps_ss_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps s0, s0 +; CHECK-SD-NEXT: ret + %r = call float @ceilf(float %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtps_dd_simd(double %a) { +; CHECK-SD-LABEL: fcvtps_dd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps d0, d0 +; CHECK-SD-NEXT: ret + %r = call double @ceil(double %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtpu_sh_simd(half %a) { +; CHECK-SD-LABEL: fcvtpu_sh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtpu s0, h0 +; CHECK-SD-NEXT: ret + %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtpu_dh_simd(half %a) { +; CHECK-SD-LABEL: fcvtpu_dh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtpu d0, h0 +; CHECK-SD-NEXT: ret + %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtpu_ds_simd(float %a) { +; CHECK-SD-LABEL: fcvtpu_ds_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtpu d0, s0 +; CHECK-SD-NEXT: ret + %r = call float @ceilf(float %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtpu_sd_simd(double %a) { +; CHECK-SD-LABEL: fcvtpu_sd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtpu s0, d0 +; CHECK-SD-NEXT: ret + %r = call double @ceil(double %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtpu_ss_simd(float %a) { +; CHECK-SD-LABEL: fcvtpu_ss_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps s0, s0 +; CHECK-SD-NEXT: ret + %r = call float @ceilf(float %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtpu_dd_simd(double %a) { +; CHECK-SD-LABEL: fcvtpu_dd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtps d0, d0 +; CHECK-SD-NEXT: ret + %r = call double @ceil(double %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzs_sh_simd(half %a) { +; CHECK-SD-LABEL: fcvtzs_sh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, h0 +; CHECK-SD-NEXT: ret + %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzs_dh_simd(half %a) { +; CHECK-SD-LABEL: fcvtzs_dh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, h0 +; CHECK-SD-NEXT: ret + %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtzs_ds_simd(float %a) { +; CHECK-SD-LABEL: fcvtzs_ds_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, s0 +; CHECK-SD-NEXT: ret + %r = call float @truncf(float %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzs_sd_simd(double %a) { +; CHECK-SD-LABEL: fcvtzs_sd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, d0 +; CHECK-SD-NEXT: ret + %r = call double @trunc(double %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtzs_ss_simd(float %a) { +; CHECK-SD-LABEL: fcvtzs_ss_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs s0, s0 +; CHECK-SD-NEXT: ret + %r = call float @truncf(float %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzs_dd_simd(double %a) { +; CHECK-SD-LABEL: fcvtzs_dd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs d0, d0 +; CHECK-SD-NEXT: ret + %r = call double @trunc(double %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzu_sh_simd(half %a) { +; CHECK-SD-LABEL: fcvtzu_sh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu s0, h0 +; CHECK-SD-NEXT: ret + %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzu_dh_simd(half %a) { +; CHECK-SD-LABEL: fcvtzu_dh_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu d0, h0 +; CHECK-SD-NEXT: ret + %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtzu_ds_simd(float %a) { +; CHECK-SD-LABEL: fcvtzu_ds_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu d0, s0 +; CHECK-SD-NEXT: ret + %r = call float @truncf(float %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzu_sd_simd(double %a) { +; CHECK-SD-LABEL: fcvtzu_sd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu s0, d0 +; CHECK-SD-NEXT: ret + %r = call double @trunc(double %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtzu_ss_simd(float %a) { +; CHECK-SD-LABEL: fcvtzu_ss_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu s0, s0 +; CHECK-SD-NEXT: ret + %r = call float @truncf(float %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzu_dd_simd(double %a) { +; CHECK-SD-LABEL: fcvtzu_dd_simd: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu d0, d0 +; CHECK-SD-NEXT: ret + %r = call double @trunc(double %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +declare half @llvm.floor.f16(half) nounwind readnone +declare half @llvm.ceil.f16(half) nounwind readnone +declare half @llvm.trunc.f16(half) nounwind readnone +declare half @llvm.round.f16(half) nounwind readnone +declare float @floorf(float) nounwind readnone +declare float @ceilf(float) nounwind readnone +declare float @truncf(float) nounwind readnone +declare float @roundf(float) nounwind readnone +declare double @floor(double) nounwind readnone +declare double @ceil(double) nounwind readnone +declare double @trunc(double) nounwind readnone +declare double @round(double) nounwind readnone +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} +; CHECK-GI: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll new file mode 100644 index 0000000000000..ae4f83a5bd261 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-intrinsics.ll @@ -0,0 +1,612 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + + +; +; Intriniscs +; + +define float @fcvtas_1s1d_simd(double %A) nounwind { +; CHECK-LABEL: fcvtas_1s1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas s0, d0 +; CHECK-NEXT: ret + %i = call i32 @llvm.aarch64.neon.fcvtas.i32.f64(double %A) + %f = bitcast i32 %i to float + ret float %f +} + +define double @fcvtas_1d1s_simd(float %A) nounwind { +; CHECK-LABEL: fcvtas_1d1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, s0 +; CHECK-NEXT: ret + %i = call i64 @llvm.aarch64.neon.fcvtas.i64.f32(float %A) + %d = bitcast i64 %i to double + ret double %d +} + +define dso_local float @fcvtas_1s1h_simd(half %a) { +; CHECK-LABEL: fcvtas_1s1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas s0, h0 +; CHECK-NEXT: ret + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtas.i32.f16(half %a) + %f = bitcast i32 %fcvt to float + ret float %f +} + +define dso_local double @fcvtas_1d1h_simd(half %a) { +; CHECK-LABEL: fcvtas_1d1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, h0 +; CHECK-NEXT: ret + %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f16(half %a) + %d = bitcast i64 %vcvtah_s64_f16 to double + ret double %d +} + +define dso_local double @fcvtas_1d1d_simd(double %a) { +; CHECK-LABEL: fcvtas_1d1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, d0 +; CHECK-NEXT: ret + %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtas.i64.f64(double %a) + %d = bitcast i64 %vcvtah_s64_f64 to double + ret double %d +} + +define dso_local float @fcvtas_1s1s_simd(float %a) { +; CHECK-LABEL: fcvtas_1s1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas s0, s0 +; CHECK-NEXT: ret + %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtas.i32.f32(float %a) + %d = bitcast i32 %vcvtah_s32_f32 to float + ret float %d +} + + +define float @fcvtau_1s1d_simd(double %A) nounwind { +; CHECK-LABEL: fcvtau_1s1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtau s0, d0 +; CHECK-NEXT: ret + %i = call i32 @llvm.aarch64.neon.fcvtau.i32.f64(double %A) + %f = bitcast i32 %i to float + ret float %f +} + +define double @fcvtau_1d1s_simd(float %A) nounwind { +; CHECK-LABEL: fcvtau_1d1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtau d0, s0 +; CHECK-NEXT: ret + %i = call i64 @llvm.aarch64.neon.fcvtau.i64.f32(float %A) + %d = bitcast i64 %i to double + ret double %d +} + +define dso_local float @fcvtau_1s1h_simd(half %a) { +; CHECK-LABEL: fcvtau_1s1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtau s0, h0 +; CHECK-NEXT: ret + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtau.i32.f16(half %a) + %f = bitcast i32 %fcvt to float + ret float %f +} + +define dso_local double @fcvtau_1d1h_simd(half %a) { +; CHECK-LABEL: fcvtau_1d1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtau d0, h0 +; CHECK-NEXT: ret + %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f16(half %a) + %d = bitcast i64 %vcvtah_s64_f16 to double + ret double %d +} + +define dso_local double @fcvtau_1d1d_simd(double %a) { +; CHECK-LABEL: fcvtau_1d1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtau d0, d0 +; CHECK-NEXT: ret + %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtau.i64.f64(double %a) + %d = bitcast i64 %vcvtah_s64_f64 to double + ret double %d +} + +define dso_local float @fcvtau_1s1s_simd(float %a) { +; CHECK-LABEL: fcvtau_1s1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtau s0, s0 +; CHECK-NEXT: ret + %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtau.i32.f32(float %a) + %d = bitcast i32 %vcvtah_s32_f32 to float + ret float %d +} + +define float @fcvtms_1s1d_simd(double %A) nounwind { +; CHECK-LABEL: fcvtms_1s1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms s0, d0 +; CHECK-NEXT: ret + %i = call i32 @llvm.aarch64.neon.fcvtms.i32.f64(double %A) + %f = bitcast i32 %i to float + ret float %f +} + +define double @fcvtms_1d1s_simd(float %A) nounwind { +; CHECK-LABEL: fcvtms_1d1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms d0, s0 +; CHECK-NEXT: ret + %i = call i64 @llvm.aarch64.neon.fcvtms.i64.f32(float %A) + %d = bitcast i64 %i to double + ret double %d +} + +define dso_local float @fcvtms_1s1h_simd(half %a) { +; CHECK-LABEL: fcvtms_1s1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms s0, h0 +; CHECK-NEXT: ret + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtms.i32.f16(half %a) + %f = bitcast i32 %fcvt to float + ret float %f +} + +define dso_local double @fcvtms_1d1h_simd(half %a) { +; CHECK-LABEL: fcvtms_1d1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms d0, h0 +; CHECK-NEXT: ret + %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f16(half %a) + %d = bitcast i64 %vcvtah_s64_f16 to double + ret double %d +} + +define dso_local double @fcvtms_1d1d_simd(double %a) { +; CHECK-LABEL: fcvtms_1d1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms d0, d0 +; CHECK-NEXT: ret + %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtms.i64.f64(double %a) + %d = bitcast i64 %vcvtah_s64_f64 to double + ret double %d +} + +define dso_local float @fcvtms_1s1s_simd(float %a) { +; CHECK-LABEL: fcvtms_1s1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms s0, s0 +; CHECK-NEXT: ret + %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtms.i32.f32(float %a) + %d = bitcast i32 %vcvtah_s32_f32 to float + ret float %d +} + +define float @fcvtmu_1s1d_simd(double %A) nounwind { +; CHECK-LABEL: fcvtmu_1s1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtmu s0, d0 +; CHECK-NEXT: ret + %i = call i32 @llvm.aarch64.neon.fcvtmu.i32.f64(double %A) + %f = bitcast i32 %i to float + ret float %f +} + +define double @fcvtmu_1d1s_simd(float %A) nounwind { +; CHECK-LABEL: fcvtmu_1d1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtmu d0, s0 +; CHECK-NEXT: ret + %i = call i64 @llvm.aarch64.neon.fcvtmu.i64.f32(float %A) + %d = bitcast i64 %i to double + ret double %d +} + +define dso_local float @fcvtmu_1s1h_simd(half %a) { +; CHECK-LABEL: fcvtmu_1s1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtmu s0, h0 +; CHECK-NEXT: ret + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtmu.i32.f16(half %a) + %f = bitcast i32 %fcvt to float + ret float %f +} + +define dso_local double @fcvtmu_1d1h_simd(half %a) { +; CHECK-LABEL: fcvtmu_1d1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtmu d0, h0 +; CHECK-NEXT: ret + %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f16(half %a) + %d = bitcast i64 %vcvtah_s64_f16 to double + ret double %d +} + +define dso_local double @fcvtmu_1d1d_simd(double %a) { +; CHECK-LABEL: fcvtmu_1d1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtmu d0, d0 +; CHECK-NEXT: ret + %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtmu.i64.f64(double %a) + %d = bitcast i64 %vcvtah_s64_f64 to double + ret double %d +} + +define dso_local float @fcvtmu_1s1s_simd(float %a) { +; CHECK-LABEL: fcvtmu_1s1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtmu s0, s0 +; CHECK-NEXT: ret + %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtmu.i32.f32(float %a) + %d = bitcast i32 %vcvtah_s32_f32 to float + ret float %d +} + +define float @fcvtns_1s1d_simd(double %A) nounwind { +; CHECK-LABEL: fcvtns_1s1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns s0, d0 +; CHECK-NEXT: ret + %i = call i32 @llvm.aarch64.neon.fcvtns.i32.f64(double %A) + %f = bitcast i32 %i to float + ret float %f +} + +define double @fcvtns_1d1s_simd(float %A) nounwind { +; CHECK-LABEL: fcvtns_1d1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns d0, s0 +; CHECK-NEXT: ret + %i = call i64 @llvm.aarch64.neon.fcvtns.i64.f32(float %A) + %d = bitcast i64 %i to double + ret double %d +} + +define dso_local float @fcvtns_1s1h_simd(half %a) { +; CHECK-LABEL: fcvtns_1s1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns s0, h0 +; CHECK-NEXT: ret + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtns.i32.f16(half %a) + %f = bitcast i32 %fcvt to float + ret float %f +} + +define dso_local double @fcvtns_1d1h_simd(half %a) { +; CHECK-LABEL: fcvtns_1d1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns d0, h0 +; CHECK-NEXT: ret + %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f16(half %a) + %d = bitcast i64 %vcvtah_s64_f16 to double + ret double %d +} + +define dso_local double @fcvtns_1d1d_simd(double %a) { +; CHECK-LABEL: fcvtns_1d1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns d0, d0 +; CHECK-NEXT: ret + %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtns.i64.f64(double %a) + %d = bitcast i64 %vcvtah_s64_f64 to double + ret double %d +} + +define dso_local float @fcvtns_1s1s_simd(float %a) { +; CHECK-LABEL: fcvtns_1s1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtns s0, s0 +; CHECK-NEXT: ret + %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtns.i32.f32(float %a) + %d = bitcast i32 %vcvtah_s32_f32 to float + ret float %d +} + +define float @fcvtnu_1s1d_simd(double %A) nounwind { +; CHECK-LABEL: fcvtnu_1s1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu s0, d0 +; CHECK-NEXT: ret + %i = call i32 @llvm.aarch64.neon.fcvtnu.i32.f64(double %A) + %f = bitcast i32 %i to float + ret float %f +} + +define double @fcvtnu_1d1s_simd(float %A) nounwind { +; CHECK-LABEL: fcvtnu_1d1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu d0, s0 +; CHECK-NEXT: ret + %i = call i64 @llvm.aarch64.neon.fcvtnu.i64.f32(float %A) + %d = bitcast i64 %i to double + ret double %d +} + +define dso_local float @fcvtnu_1s1h_simd(half %a) { +; CHECK-LABEL: fcvtnu_1s1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu s0, h0 +; CHECK-NEXT: ret + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtnu.i32.f16(half %a) + %f = bitcast i32 %fcvt to float + ret float %f +} + +define dso_local double @fcvtnu_1d1h_simd(half %a) { +; CHECK-LABEL: fcvtnu_1d1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu d0, h0 +; CHECK-NEXT: ret + %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f16(half %a) + %d = bitcast i64 %vcvtah_s64_f16 to double + ret double %d +} + +define dso_local double @fcvtnu_1d1d_simd(double %a) { +; CHECK-LABEL: fcvtnu_1d1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu d0, d0 +; CHECK-NEXT: ret + %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtnu.i64.f64(double %a) + %d = bitcast i64 %vcvtah_s64_f64 to double + ret double %d +} + +define dso_local float @fcvtnu_1s1s_simd(float %a) { +; CHECK-LABEL: fcvtnu_1s1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtnu s0, s0 +; CHECK-NEXT: ret + %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtnu.i32.f32(float %a) + %d = bitcast i32 %vcvtah_s32_f32 to float + ret float %d +} + +define float @fcvtps_1s1d_simd(double %A) nounwind { +; CHECK-LABEL: fcvtps_1s1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps s0, d0 +; CHECK-NEXT: ret + %i = call i32 @llvm.aarch64.neon.fcvtps.i32.f64(double %A) + %f = bitcast i32 %i to float + ret float %f +} + +define double @fcvtps_1d1s_simd(float %A) nounwind { +; CHECK-LABEL: fcvtps_1d1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps d0, s0 +; CHECK-NEXT: ret + %i = call i64 @llvm.aarch64.neon.fcvtps.i64.f32(float %A) + %d = bitcast i64 %i to double + ret double %d +} + +define dso_local float @fcvtps_1s1h_simd(half %a) { +; CHECK-LABEL: fcvtps_1s1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps s0, h0 +; CHECK-NEXT: ret + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtps.i32.f16(half %a) + %f = bitcast i32 %fcvt to float + ret float %f +} + +define dso_local double @fcvtps_1d1h_simd(half %a) { +; CHECK-LABEL: fcvtps_1d1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps d0, h0 +; CHECK-NEXT: ret + %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f16(half %a) + %d = bitcast i64 %vcvtah_s64_f16 to double + ret double %d +} + +define dso_local double @fcvtps_1d1d_simd(double %a) { +; CHECK-LABEL: fcvtps_1d1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps d0, d0 +; CHECK-NEXT: ret + %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtps.i64.f64(double %a) + %d = bitcast i64 %vcvtah_s64_f64 to double + ret double %d +} + +define dso_local float @fcvtps_1s1s_simd(float %a) { +; CHECK-LABEL: fcvtps_1s1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps s0, s0 +; CHECK-NEXT: ret + %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtps.i32.f32(float %a) + %d = bitcast i32 %vcvtah_s32_f32 to float + ret float %d +} + +define float @fcvtpu_1s1d_simd(double %A) nounwind { +; CHECK-LABEL: fcvtpu_1s1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtpu s0, d0 +; CHECK-NEXT: ret + %i = call i32 @llvm.aarch64.neon.fcvtpu.i32.f64(double %A) + %f = bitcast i32 %i to float + ret float %f +} + +define double @fcvtpu_1d1s_simd(float %A) nounwind { +; CHECK-LABEL: fcvtpu_1d1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtpu d0, s0 +; CHECK-NEXT: ret + %i = call i64 @llvm.aarch64.neon.fcvtpu.i64.f32(float %A) + %d = bitcast i64 %i to double + ret double %d +} + +define dso_local float @fcvtpu_1s1h_simd(half %a) { +; CHECK-LABEL: fcvtpu_1s1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtpu s0, h0 +; CHECK-NEXT: ret + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half %a) + %f = bitcast i32 %fcvt to float + ret float %f +} + +define dso_local double @fcvtpu_1d1h_simd(half %a) { +; CHECK-LABEL: fcvtpu_1d1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtpu d0, h0 +; CHECK-NEXT: ret + %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half %a) + %d = bitcast i64 %vcvtah_s64_f16 to double + ret double %d +} + +define dso_local double @fcvtpu_1d1d_simd(double %a) { +; CHECK-LABEL: fcvtpu_1d1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtpu d0, d0 +; CHECK-NEXT: ret + %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtpu.i64.f64(double %a) + %d = bitcast i64 %vcvtah_s64_f64 to double + ret double %d +} + +define dso_local float @fcvtpu_1s1s_simd(float %a) { +; CHECK-LABEL: fcvtpu_1s1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtpu s0, s0 +; CHECK-NEXT: ret + %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtpu.i32.f32(float %a) + %d = bitcast i32 %vcvtah_s32_f32 to float + ret float %d +} + +define float @fcvtzs_1s1d_simd(double %A) nounwind { +; CHECK-LABEL: fcvtzs_1s1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, d0 +; CHECK-NEXT: ret + %i = call i32 @llvm.aarch64.neon.fcvtzs.i32.f64(double %A) + %f = bitcast i32 %i to float + ret float %f +} + +define double @fcvtzs_1d1s_simd(float %A) nounwind { +; CHECK-LABEL: fcvtzs_1d1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: ret + %i = call i64 @llvm.aarch64.neon.fcvtzs.i64.f32(float %A) + %d = bitcast i64 %i to double + ret double %d +} + +define dso_local float @fcvtzs_1s1h_simd(half %a) { +; CHECK-LABEL: fcvtzs_1s1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, h0 +; CHECK-NEXT: ret + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a) + %f = bitcast i32 %fcvt to float + ret float %f +} + +define dso_local double @fcvtzs_1d1h_simd(half %a) { +; CHECK-LABEL: fcvtzs_1d1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, h0 +; CHECK-NEXT: ret + %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a) + %d = bitcast i64 %vcvtah_s64_f16 to double + ret double %d +} + +define dso_local double @fcvtzs_1d1d_simd(double %a) { +; CHECK-LABEL: fcvtzs_1d1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret + %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double %a) + %d = bitcast i64 %vcvtah_s64_f64 to double + ret double %d +} + +define dso_local float @fcvtzs_1s1s_simd(float %a) { +; CHECK-LABEL: fcvtzs_1s1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret + %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a) + %d = bitcast i32 %vcvtah_s32_f32 to float + ret float %d +} + +define float @fcvtzu_1s1d_simd(double %A) nounwind { +; CHECK-LABEL: fcvtzu_1s1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, d0 +; CHECK-NEXT: ret + %i = call i32 @llvm.aarch64.neon.fcvtzu.i32.f64(double %A) + %f = bitcast i32 %i to float + ret float %f +} + +define double @fcvtzu_1d1s_simd(float %A) nounwind { +; CHECK-LABEL: fcvtzu_1d1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, s0 +; CHECK-NEXT: ret + %i = call i64 @llvm.aarch64.neon.fcvtzu.i64.f32(float %A) + %d = bitcast i64 %i to double + ret double %d +} + +define dso_local float @fcvtzu_1s1h_simd(half %a) { +; CHECK-LABEL: fcvtzu_1s1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, h0 +; CHECK-NEXT: ret + %fcvt = tail call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a) + %f = bitcast i32 %fcvt to float + ret float %f +} + +define dso_local double @fcvtzu_1d1h_simd(half %a) { +; CHECK-LABEL: fcvtzu_1d1h_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, h0 +; CHECK-NEXT: ret + %vcvtah_s64_f16 = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f16(half %a) + %d = bitcast i64 %vcvtah_s64_f16 to double + ret double %d +} + +define dso_local double @fcvtzu_1d1d_simd(double %a) { +; CHECK-LABEL: fcvtzu_1d1d_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, d0 +; CHECK-NEXT: ret + %vcvtah_s64_f64 = tail call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double %a) + %d = bitcast i64 %vcvtah_s64_f64 to double + ret double %d +} + +define dso_local float @fcvtzu_1s1s_simd(float %a) { +; CHECK-LABEL: fcvtzu_1s1s_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, s0 +; CHECK-NEXT: ret + %vcvtah_s32_f32 = tail call i32 @llvm.aarch64.neon.fcvtzu.i32.f32(float %a) + %d = bitcast i32 %vcvtah_s32_f32 to float + ret float %d +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-GI: {{.*}} +; CHECK-SD: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-round-rint.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-round-rint.ll new file mode 100644 index 0000000000000..000ff64131ccf --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-round-rint.ll @@ -0,0 +1,428 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for lround_i32_f16_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_i64_f16_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_i32_f64_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_i32_f32_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llround_i64_f16_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_i32_f16_simd_exp +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_i64_f16_simd_exp +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_i64_f32_simd_exp +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_i32_f64_simd_exp +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_i32_f32_simd_exp +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lround_i64_f64_simd_exp +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llround_i64_f16_simd_exp +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llround_i64_f32_simd_exp +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llround_i64_f64_simd_exp +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_i32_f16_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_i32_f64_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_i32_f32_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_i32_f16_simd_exp +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_i64_f16_simd_exp +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_i64_f32_simd_exp +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_i32_f64_simd_exp +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_i32_f32_simd_exp +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_i64_f64_simd_exp +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llrint_i64_f16_simd_exp +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llrint_i64_f32_simd_exp +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for llrint_i64_f64_simd_exp + +; +; (L/LL)Round +; + +define float @lround_i32_f16_simd(half %x) { +; CHECK-LABEL: lround_i32_f16_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas s0, h0 +; CHECK-NEXT: ret + %val = call i32 @llvm.lround.i32.f16(half %x) + %sum = bitcast i32 %val to float + ret float %sum +} + +define double @lround_i64_f16_simd(half %x) { +; CHECK-LABEL: lround_i64_f16_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, h0 +; CHECK-NEXT: ret + %val = call i64 @llvm.lround.i64.f16(half %x) + %bc = bitcast i64 %val to double + ret double %bc +} + +define double @lround_i64_f32_simd(float %x) { +; CHECK-LABEL: lround_i64_f32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, s0 +; CHECK-NEXT: ret + %val = call i64 @llvm.lround.i64.f32(float %x) + %bc = bitcast i64 %val to double + ret double %bc +} + +define float @lround_i32_f64_simd(double %x) { +; CHECK-LABEL: lround_i32_f64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas s0, d0 +; CHECK-NEXT: ret + %val = call i32 @llvm.lround.i32.f64(double %x) + %bc = bitcast i32 %val to float + ret float %bc +} + +define float @lround_i32_f32_simd(float %x) { +; CHECK-LABEL: lround_i32_f32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas s0, s0 +; CHECK-NEXT: ret + %val = call i32 @llvm.lround.i32.f32(float %x) + %bc = bitcast i32 %val to float + ret float %bc +} + +define double @lround_i64_f64_simd(double %x) { +; CHECK-LABEL: lround_i64_f64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, d0 +; CHECK-NEXT: ret + %val = call i64 @llvm.lround.i64.f64(double %x) + %bc = bitcast i64 %val to double + ret double %bc +} + +define double @llround_i64_f16_simd(half %x) { +; CHECK-LABEL: llround_i64_f16_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, h0 +; CHECK-NEXT: ret + %val = call i64 @llvm.llround.i64.f16(half %x) + %sum = bitcast i64 %val to double + ret double %sum +} + +define double @llround_i64_f32_simd(float %x) { +; CHECK-LABEL: llround_i64_f32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, s0 +; CHECK-NEXT: ret + %val = call i64 @llvm.llround.i64.f32(float %x) + %bc = bitcast i64 %val to double + ret double %bc +} + +define double @llround_i64_f64_simd(double %x) { +; CHECK-LABEL: llround_i64_f64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, d0 +; CHECK-NEXT: ret + %val = call i64 @llvm.llround.i64.f64(double %x) + %bc = bitcast i64 %val to double + ret double %bc +} + + +; +; (L/LL)Round experimental +; + +define float @lround_i32_f16_simd_exp(half %x) { +; CHECK-LABEL: lround_i32_f16_simd_exp: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas s0, h0 +; CHECK-NEXT: ret + %val = call i32 @llvm.experimental.constrained.lround.i32.f16(half %x, metadata !"fpexcept.strict") + %sum = bitcast i32 %val to float + ret float %sum +} + +define double @lround_i64_f16_simd_exp(half %x) { +; CHECK-LABEL: lround_i64_f16_simd_exp: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, h0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.lround.i64.f16(half %x, metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define double @lround_i64_f32_simd_exp(float %x) { +; CHECK-LABEL: lround_i64_f32_simd_exp: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, s0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.lround.i64.f32(float %x, metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define float @lround_i32_f64_simd_exp(double %x) { +; CHECK-LABEL: lround_i32_f64_simd_exp: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas s0, d0 +; CHECK-NEXT: ret + %val = call i32 @llvm.experimental.constrained.lround.i32.f64(double %x, metadata !"fpexcept.strict") + %bc = bitcast i32 %val to float + ret float %bc +} + +define float @lround_i32_f32_simd_exp(float %x) { +; CHECK-LABEL: lround_i32_f32_simd_exp: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas s0, s0 +; CHECK-NEXT: ret + %val = call i32 @llvm.experimental.constrained.lround.i32.f32(float %x, metadata !"fpexcept.strict") + %bc = bitcast i32 %val to float + ret float %bc +} + +define double @lround_i64_f64_simd_exp(double %x) { +; CHECK-LABEL: lround_i64_f64_simd_exp: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, d0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.lround.i64.f64(double %x, metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define double @llround_i64_f16_simd_exp(half %x) { +; CHECK-LABEL: llround_i64_f16_simd_exp: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, h0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.llround.i64.f16(half %x, metadata !"fpexcept.strict") + %sum = bitcast i64 %val to double + ret double %sum +} + +define double @llround_i64_f32_simd_exp(float %x) { +; CHECK-LABEL: llround_i64_f32_simd_exp: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, s0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.llround.i64.f32(float %x, metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define double @llround_i64_f64_simd_exp(double %x) { +; CHECK-LABEL: llround_i64_f64_simd_exp: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, d0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.llround.i64.f64(double %x, metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +; +; (L/LL)Rint +; + +define float @lrint_i32_f16_simd(half %x) { +; CHECK-LABEL: lrint_i32_f16_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx h0, h0 +; CHECK-NEXT: fcvtzs s0, h0 +; CHECK-NEXT: ret + %val = call i32 @llvm.lrint.i32.f16(half %x) + %sum = bitcast i32 %val to float + ret float %sum +} + +define double @lrint_i64_f16_simd(half %x) { +; CHECK-LABEL: lrint_i64_f16_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx h0, h0 +; CHECK-NEXT: fcvtzs d0, h0 +; CHECK-NEXT: ret + %val = call i64 @llvm.lrint.i53.f16(half %x) + %bc = bitcast i64 %val to double + ret double %bc +} + +define double @lrint_i64_f32_simd(float %x) { +; CHECK-LABEL: lrint_i64_f32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: ret + %val = call i64 @llvm.lrint.i64.f32(float %x) + %bc = bitcast i64 %val to double + ret double %bc +} + +define float @lrint_i32_f64_simd(double %x) { +; CHECK-LABEL: lrint_i32_f64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs s0, d0 +; CHECK-NEXT: ret + %val = call i32 @llvm.lrint.i32.f64(double %x) + %bc = bitcast i32 %val to float + ret float %bc +} + +define float @lrint_i32_f32_simd(float %x) { +; CHECK-LABEL: lrint_i32_f32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret + %val = call i32 @llvm.lrint.i32.f32(float %x) + %bc = bitcast i32 %val to float + ret float %bc +} + +define double @lrint_i64_f64_simd(double %x) { +; CHECK-LABEL: lrint_i64_f64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret + %val = call i64 @llvm.lrint.i64.f64(double %x) + %bc = bitcast i64 %val to double + ret double %bc +} + +define double @llrint_i64_f16_simd(half %x) { +; CHECK-LABEL: llrint_i64_f16_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx h0, h0 +; CHECK-NEXT: fcvtzs d0, h0 +; CHECK-NEXT: ret + %val = call i64 @llvm.llrint.i64.f16(half %x) + %sum = bitcast i64 %val to double + ret double %sum +} + +define double @llrint_i64_f32_simd(float %x) { +; CHECK-LABEL: llrint_i64_f32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: ret + %val = call i64 @llvm.llrint.i64.f32(float %x) + %bc = bitcast i64 %val to double + ret double %bc +} + +define double @llrint_i64_f64_simd(double %x) { +; CHECK-LABEL: llrint_i64_f64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret + %val = call i64 @llvm.llrint.i64.f64(double %x) + %bc = bitcast i64 %val to double + ret double %bc +} + +; +; (L/LL)Rint experimental +; + +define float @lrint_i32_f16_simd_exp(half %x) { +; CHECK-LABEL: lrint_i32_f16_simd_exp: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx h0, h0 +; CHECK-NEXT: fcvtzs s0, h0 +; CHECK-NEXT: ret + %val = call i32 @llvm.experimental.constrained.lrint.i32.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") + %sum = bitcast i32 %val to float + ret float %sum +} + +define double @lrint_i64_f16_simd_exp(half %x) { +; CHECK-LABEL: lrint_i64_f16_simd_exp: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx h0, h0 +; CHECK-NEXT: fcvtzs d0, h0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.lrint.i53.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define double @lrint_i64_f32_simd_exp(float %x) { +; CHECK-LABEL: lrint_i64_f32_simd_exp: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.lrint.i64.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define float @lrint_i32_f64_simd_exp(double %x) { +; CHECK-LABEL: lrint_i32_f64_simd_exp: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs s0, d0 +; CHECK-NEXT: ret + %val = call i32 @llvm.experimental.constrained.lrint.i32.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") + %bc = bitcast i32 %val to float + ret float %bc +} + +define float @lrint_i32_f32_simd_exp(float %x) { +; CHECK-LABEL: lrint_i32_f32_simd_exp: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret + %val = call i32 @llvm.experimental.constrained.lrint.i32.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") + %bc = bitcast i32 %val to float + ret float %bc +} + +define double @lrint_i64_f64_simd_exp(double %x) { +; CHECK-LABEL: lrint_i64_f64_simd_exp: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.lrint.i64.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define double @llrint_i64_f16_simd_exp(half %x) { +; CHECK-LABEL: llrint_i64_f16_simd_exp: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx h0, h0 +; CHECK-NEXT: fcvtzs d0, h0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.llrint.i64.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") + %sum = bitcast i64 %val to double + ret double %sum +} + +define double @llrint_i64_f32_simd_exp(float %x) { +; CHECK-LABEL: llrint_i64_f32_simd_exp: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx s0, s0 +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.llrint.i64.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define double @llrint_i64_f64_simd_exp(double %x) { +; CHECK-LABEL: llrint_i64_f64_simd_exp: +; CHECK: // %bb.0: +; CHECK-NEXT: frintx d0, d0 +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.llrint.i64.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-GI: {{.*}} +; CHECK-SD: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll index 367105f783817..649071900eb0c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -978,12 +978,18 @@ define <1 x double> @test_bitcasti64tov1f64(i64 %in) { } define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 { -; CHECK-LABEL: test_bitcastv8i8tov1f64: -; CHECK: // %bb.0: -; CHECK-NEXT: neg v0.8b, v0.8b -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_bitcastv8i8tov1f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: neg v0.8b, v0.8b +; CHECK-SD-NEXT: fcvtzs x8, d0 +; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_bitcastv8i8tov1f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: neg v0.8b, v0.8b +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ret %sub.i = sub <8 x i8> zeroinitializer, %a %1 = bitcast <8 x i8> %sub.i to <1 x double> %vcvt.i = fptosi <1 x double> %1 to <1 x i64> @@ -991,12 +997,18 @@ define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 { } define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 { -; CHECK-LABEL: test_bitcastv4i16tov1f64: -; CHECK: // %bb.0: -; CHECK-NEXT: neg v0.4h, v0.4h -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_bitcastv4i16tov1f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: neg v0.4h, v0.4h +; CHECK-SD-NEXT: fcvtzs x8, d0 +; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_bitcastv4i16tov1f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: neg v0.4h, v0.4h +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ret %sub.i = sub <4 x i16> zeroinitializer, %a %1 = bitcast <4 x i16> %sub.i to <1 x double> %vcvt.i = fptosi <1 x double> %1 to <1 x i64> @@ -1004,12 +1016,18 @@ define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 { } define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 { -; CHECK-LABEL: test_bitcastv2i32tov1f64: -; CHECK: // %bb.0: -; CHECK-NEXT: neg v0.2s, v0.2s -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_bitcastv2i32tov1f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: neg v0.2s, v0.2s +; CHECK-SD-NEXT: fcvtzs x8, d0 +; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_bitcastv2i32tov1f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: neg v0.2s, v0.2s +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ret %sub.i = sub <2 x i32> zeroinitializer, %a %1 = bitcast <2 x i32> %sub.i to <1 x double> %vcvt.i = fptosi <1 x double> %1 to <1 x i64> @@ -1029,8 +1047,7 @@ define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 { ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: neg x8, x8 ; CHECK-GI-NEXT: fmov d0, x8 -; CHECK-GI-NEXT: fcvtzs x8, d0 -; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: fcvtzs d0, d0 ; CHECK-GI-NEXT: ret %sub.i = sub <1 x i64> zeroinitializer, %a %1 = bitcast <1 x i64> %sub.i to <1 x double> diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll index be21776e26f8e..c3a95a45b7ba6 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=arm64-eabi -pass-remarks-missed=gisel-* \ ; RUN: -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | \ -; RUN: FileCheck %s --check-prefixes=FALLBACK,CHECK +; RUN: FileCheck %s --check-prefixes=FALLBACK,CHECK,CHECK-GI ; FALLBACK-NOT: remark{{.*}}fcvtas_2s define <2 x i32> @fcvtas_2s(<2 x float> %A) nounwind { @@ -365,9 +365,12 @@ define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind { define <1 x i64> @fcvtzs_1d(<1 x double> %A) nounwind { ;CHECK-LABEL: fcvtzs_1d: ;CHECK-NOT: ld1 -;CHECK: fcvtzs x8, d0 -;CHECK-NEXT: mov d0, x8 -;CHECK-NEXT: ret +;CHECK-SD: fcvtzs x8, d0 +;CHECK-SD-NEXT: mov d0, x8 +;CHECK-SD-NEXT: ret + +;CHECK-GI: fcvtzs d0, d0 +;CHECK-GI-NEXT: ret %tmp3 = fptosi <1 x double> %A to <1 x i64> ret <1 x i64> %tmp3 } @@ -444,9 +447,12 @@ define <2 x i64> @fcvtzu_2d(<2 x double> %A) nounwind { define <1 x i64> @fcvtzu_1d(<1 x double> %A) nounwind { ;CHECK-LABEL: fcvtzu_1d: ;CHECK-NOT: ld1 -;CHECK: fcvtzu x8, d0 -;CHECK-NEXT: mov d0, x8 -;CHECK-NEXT: ret +;CHECK-SD: fcvtzu x8, d0 +;CHECK-SD-NEXT: mov d0, x8 +;CHECK-SD-NEXT: ret + +;CHECK-GI: fcvtzu d0, d0 +;CHECK-GI-NEXT: ret %tmp3 = fptoui <1 x double> %A to <1 x i64> ret <1 x i64> %tmp3 } diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index c74112937ba53..b963acd8cb2a1 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -31,8 +31,7 @@ define <1 x i32> @test_signed_v1f32_v1i32(<1 x float> %f) { ; ; CHECK-GI-LABEL: test_signed_v1f32_v1i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcvtzs w8, s0 -; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fcvtzs s0, s0 ; CHECK-GI-NEXT: ret %x = call <1 x i32> @llvm.fptosi.sat.v1f32.v1i32(<1 x float> %f) ret <1 x i32> %x @@ -1162,18 +1161,24 @@ declare <7 x i32> @llvm.fptosi.sat.v7f16.v7i32 (<7 x half>) declare <8 x i32> @llvm.fptosi.sat.v8f16.v8i32 (<8 x half>) define <1 x i32> @test_signed_v1f16_v1i32(<1 x half> %f) { -; CHECK-CVT-LABEL: test_signed_v1f16_v1i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzs w8, s0 -; CHECK-CVT-NEXT: fmov s0, w8 -; CHECK-CVT-NEXT: ret +; CHECK-SD-CVT-LABEL: test_signed_v1f16_v1i32: +; CHECK-SD-CVT: // %bb.0: +; CHECK-SD-CVT-NEXT: fcvt s0, h0 +; CHECK-SD-CVT-NEXT: fcvtzs w8, s0 +; CHECK-SD-CVT-NEXT: fmov s0, w8 +; CHECK-SD-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v1f16_v1i32: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzs w8, h0 ; CHECK-FP16-NEXT: fmov s0, w8 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-CVT-LABEL: test_signed_v1f16_v1i32: +; CHECK-GI-CVT: // %bb.0: +; CHECK-GI-CVT-NEXT: fcvt s0, h0 +; CHECK-GI-CVT-NEXT: fcvtzs s0, s0 +; CHECK-GI-CVT-NEXT: ret %x = call <1 x i32> @llvm.fptosi.sat.v1f16.v1i32(<1 x half> %f) ret <1 x i32> %x } diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index efe0a1bedbc9e..5a66b68af8e96 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -31,8 +31,7 @@ define <1 x i32> @test_unsigned_v1f32_v1i32(<1 x float> %f) { ; ; CHECK-GI-LABEL: test_unsigned_v1f32_v1i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcvtzu w8, s0 -; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fcvtzu s0, s0 ; CHECK-GI-NEXT: ret %x = call <1 x i32> @llvm.fptoui.sat.v1f32.v1i32(<1 x float> %f) ret <1 x i32> %x @@ -993,18 +992,24 @@ declare <7 x i32> @llvm.fptoui.sat.v7f16.v7i32 (<7 x half>) declare <8 x i32> @llvm.fptoui.sat.v8f16.v8i32 (<8 x half>) define <1 x i32> @test_unsigned_v1f16_v1i32(<1 x half> %f) { -; CHECK-CVT-LABEL: test_unsigned_v1f16_v1i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzu w8, s0 -; CHECK-CVT-NEXT: fmov s0, w8 -; CHECK-CVT-NEXT: ret +; CHECK-SD-CVT-LABEL: test_unsigned_v1f16_v1i32: +; CHECK-SD-CVT: // %bb.0: +; CHECK-SD-CVT-NEXT: fcvt s0, h0 +; CHECK-SD-CVT-NEXT: fcvtzu w8, s0 +; CHECK-SD-CVT-NEXT: fmov s0, w8 +; CHECK-SD-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v1f16_v1i32: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzu w8, h0 ; CHECK-FP16-NEXT: fmov s0, w8 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-CVT-LABEL: test_unsigned_v1f16_v1i32: +; CHECK-GI-CVT: // %bb.0: +; CHECK-GI-CVT-NEXT: fcvt s0, h0 +; CHECK-GI-CVT-NEXT: fcvtzu s0, s0 +; CHECK-GI-CVT-NEXT: ret %x = call <1 x i32> @llvm.fptoui.sat.v1f16.v1i32(<1 x half> %f) ret <1 x i32> %x } diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll index 927c6142138b3..057a927422432 100644 --- a/llvm/test/CodeGen/AArch64/vector-lrint.ll +++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll @@ -995,12 +995,18 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) nounwind { ; CHECK-i32-NEXT: fmov s0, w8 ; CHECK-i32-NEXT: ret ; -; CHECK-i64-LABEL: lrint_v1f64: -; CHECK-i64: // %bb.0: -; CHECK-i64-NEXT: frintx d0, d0 -; CHECK-i64-NEXT: fcvtzs x8, d0 -; CHECK-i64-NEXT: fmov d0, x8 -; CHECK-i64-NEXT: ret +; CHECK-i64-SD-LABEL: lrint_v1f64: +; CHECK-i64-SD: // %bb.0: +; CHECK-i64-SD-NEXT: frintx d0, d0 +; CHECK-i64-SD-NEXT: fcvtzs x8, d0 +; CHECK-i64-SD-NEXT: fmov d0, x8 +; CHECK-i64-SD-NEXT: ret +; +; CHECK-i64-GI-LABEL: lrint_v1f64: +; CHECK-i64-GI: // %bb.0: +; CHECK-i64-GI-NEXT: frintx d0, d0 +; CHECK-i64-GI-NEXT: fcvtzs d0, d0 +; CHECK-i64-GI-NEXT: ret %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x) ret <1 x iXLen> %a } From 81958ac1f3d7d550e62157d18d8a7dc1a5738654 Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Thu, 4 Sep 2025 14:19:57 +0000 Subject: [PATCH 2/4] Update tests --- .../test/CodeGen/AArch64/fptosi-sat-vector.ll | 21 +++++++------------ .../test/CodeGen/AArch64/fptoui-sat-vector.ll | 21 +++++++------------ 2 files changed, 16 insertions(+), 26 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index b963acd8cb2a1..c74112937ba53 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -31,7 +31,8 @@ define <1 x i32> @test_signed_v1f32_v1i32(<1 x float> %f) { ; ; CHECK-GI-LABEL: test_signed_v1f32_v1i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: fcvtzs w8, s0 +; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: ret %x = call <1 x i32> @llvm.fptosi.sat.v1f32.v1i32(<1 x float> %f) ret <1 x i32> %x @@ -1161,24 +1162,18 @@ declare <7 x i32> @llvm.fptosi.sat.v7f16.v7i32 (<7 x half>) declare <8 x i32> @llvm.fptosi.sat.v8f16.v8i32 (<8 x half>) define <1 x i32> @test_signed_v1f16_v1i32(<1 x half> %f) { -; CHECK-SD-CVT-LABEL: test_signed_v1f16_v1i32: -; CHECK-SD-CVT: // %bb.0: -; CHECK-SD-CVT-NEXT: fcvt s0, h0 -; CHECK-SD-CVT-NEXT: fcvtzs w8, s0 -; CHECK-SD-CVT-NEXT: fmov s0, w8 -; CHECK-SD-CVT-NEXT: ret +; CHECK-CVT-LABEL: test_signed_v1f16_v1i32: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzs w8, s0 +; CHECK-CVT-NEXT: fmov s0, w8 +; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v1f16_v1i32: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzs w8, h0 ; CHECK-FP16-NEXT: fmov s0, w8 ; CHECK-FP16-NEXT: ret -; -; CHECK-GI-CVT-LABEL: test_signed_v1f16_v1i32: -; CHECK-GI-CVT: // %bb.0: -; CHECK-GI-CVT-NEXT: fcvt s0, h0 -; CHECK-GI-CVT-NEXT: fcvtzs s0, s0 -; CHECK-GI-CVT-NEXT: ret %x = call <1 x i32> @llvm.fptosi.sat.v1f16.v1i32(<1 x half> %f) ret <1 x i32> %x } diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index 5a66b68af8e96..efe0a1bedbc9e 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -31,7 +31,8 @@ define <1 x i32> @test_unsigned_v1f32_v1i32(<1 x float> %f) { ; ; CHECK-GI-LABEL: test_unsigned_v1f32_v1i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcvtzu s0, s0 +; CHECK-GI-NEXT: fcvtzu w8, s0 +; CHECK-GI-NEXT: fmov s0, w8 ; CHECK-GI-NEXT: ret %x = call <1 x i32> @llvm.fptoui.sat.v1f32.v1i32(<1 x float> %f) ret <1 x i32> %x @@ -992,24 +993,18 @@ declare <7 x i32> @llvm.fptoui.sat.v7f16.v7i32 (<7 x half>) declare <8 x i32> @llvm.fptoui.sat.v8f16.v8i32 (<8 x half>) define <1 x i32> @test_unsigned_v1f16_v1i32(<1 x half> %f) { -; CHECK-SD-CVT-LABEL: test_unsigned_v1f16_v1i32: -; CHECK-SD-CVT: // %bb.0: -; CHECK-SD-CVT-NEXT: fcvt s0, h0 -; CHECK-SD-CVT-NEXT: fcvtzu w8, s0 -; CHECK-SD-CVT-NEXT: fmov s0, w8 -; CHECK-SD-CVT-NEXT: ret +; CHECK-CVT-LABEL: test_unsigned_v1f16_v1i32: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzu w8, s0 +; CHECK-CVT-NEXT: fmov s0, w8 +; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v1f16_v1i32: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzu w8, h0 ; CHECK-FP16-NEXT: fmov s0, w8 ; CHECK-FP16-NEXT: ret -; -; CHECK-GI-CVT-LABEL: test_unsigned_v1f16_v1i32: -; CHECK-GI-CVT: // %bb.0: -; CHECK-GI-CVT-NEXT: fcvt s0, h0 -; CHECK-GI-CVT-NEXT: fcvtzu s0, s0 -; CHECK-GI-CVT-NEXT: ret %x = call <1 x i32> @llvm.fptoui.sat.v1f16.v1i32(<1 x half> %f) ret <1 x i32> %x } From ad701bbfbfe8273af5269e80b8a2e0019f9a7f17 Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Fri, 5 Sep 2025 14:21:23 +0000 Subject: [PATCH 3/4] Fix bitwise and error --- llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index c75a3c406f60d..2f607f1d8cf30 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -868,7 +868,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); TypeSize SrcSize = getSizeInBits(MI.getOperand(1).getReg(), MRI, TRI); - if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) & + if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) && all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), [&](const MachineInstr &UseMI) { return onlyUsesFP(UseMI, MRI, TRI) || @@ -1172,7 +1172,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } TypeSize DstSize = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI); TypeSize SrcSize = getSizeInBits(MI.getOperand(2).getReg(), MRI, TRI); - if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) & + if (((DstSize == SrcSize) || STI.hasFeature(AArch64::FeatureFPRCVT)) && all_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()), [&](const MachineInstr &UseMI) { return onlyUsesFP(UseMI, MRI, TRI) || From b0ecf9b34e2b18edb229cf7cc729db4d62e376f1 Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Mon, 8 Sep 2025 16:00:50 +0000 Subject: [PATCH 4/4] Fix GlobalISel for sat fptoi and regenerate tests --- .../AArch64/GISel/AArch64RegisterBankInfo.cpp | 12 +- .../CodeGen/AArch64/arm64-cvt-simd-fptoi.ll | 1027 ++++++++++++++--- 2 files changed, 881 insertions(+), 158 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 2f607f1d8cf30..6e954a1f6611b 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -847,17 +847,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; } case TargetOpcode::G_FPTOSI_SAT: - case TargetOpcode::G_FPTOUI_SAT: { - LLT DstType = MRI.getType(MI.getOperand(0).getReg()); - if (DstType.isVector()) - break; - if (DstType == LLT::scalar(16)) { - OpRegBankIdx = {PMI_FirstFPR, PMI_FirstFPR}; - break; - } - OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR}; - break; - } + case TargetOpcode::G_FPTOUI_SAT: case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: case TargetOpcode::G_INTRINSIC_LRINT: diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll index 936bdfc164810..4a6b1f1f1d9d2 100644 --- a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll +++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll @@ -20,50 +20,50 @@ ; define float @test_fptosi_f16_i32_simd(half %a) { -; CHECK-SD-LABEL: test_fptosi_f16_i32_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs s0, h0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: test_fptosi_f16_i32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, h0 +; CHECK-NEXT: ret %r = fptosi half %a to i32 %bc = bitcast i32 %r to float ret float %bc } define double @test_fptosi_f16_i64_simd(half %a) { -; CHECK-SD-LABEL: test_fptosi_f16_i64_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs d0, h0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: test_fptosi_f16_i64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, h0 +; CHECK-NEXT: ret %r = fptosi half %a to i64 %bc = bitcast i64 %r to double ret double %bc } define float @test_fptosi_f64_i32_simd(double %a) { -; CHECK-SD-LABEL: test_fptosi_f64_i32_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs s0, d0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: test_fptosi_f64_i32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, d0 +; CHECK-NEXT: ret %r = fptosi double %a to i32 %bc = bitcast i32 %r to float ret float %bc } define double @test_fptosi_f32_i64_simd(float %a) { -; CHECK-SD-LABEL: test_fptosi_f32_i64_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs d0, s0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: test_fptosi_f32_i64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: ret %r = fptosi float %a to i64 %bc = bitcast i64 %r to double ret double %bc } define double @test_fptosi_f64_i64_simd(double %a) { -; CHECK-SD-LABEL: test_fptosi_f64_i64_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs d0, d0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: test_fptosi_f64_i64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret %r = fptosi double %a to i64 %bc = bitcast i64 %r to double ret double %bc @@ -71,60 +71,60 @@ define double @test_fptosi_f64_i64_simd(double %a) { define float @test_fptosi_f32_i32_simd(float %a) { -; CHECK-SD-LABEL: test_fptosi_f32_i32_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs s0, s0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: test_fptosi_f32_i32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret %r = fptosi float %a to i32 %bc = bitcast i32 %r to float ret float %bc } define float @test_fptoui_f16_i32_simd(half %a) { -; CHECK-SD-LABEL: test_fptoui_f16_i32_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzu s0, h0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: test_fptoui_f16_i32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, h0 +; CHECK-NEXT: ret %r = fptoui half %a to i32 %bc = bitcast i32 %r to float ret float %bc } define double @test_fptoui_f16_i64_simd(half %a) { -; CHECK-SD-LABEL: test_fptoui_f16_i64_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzu d0, h0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: test_fptoui_f16_i64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, h0 +; CHECK-NEXT: ret %r = fptoui half %a to i64 %bc = bitcast i64 %r to double ret double %bc } define float @test_fptoui_f64_i32_simd(double %a) { -; CHECK-SD-LABEL: test_fptoui_f64_i32_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzu s0, d0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: test_fptoui_f64_i32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, d0 +; CHECK-NEXT: ret %r = fptoui double %a to i32 %bc = bitcast i32 %r to float ret float %bc } define double @test_fptoui_f32_i64_simd(float %a) { -; CHECK-SD-LABEL: test_fptoui_f32_i64_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzu d0, s0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: test_fptoui_f32_i64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, s0 +; CHECK-NEXT: ret %r = fptoui float %a to i64 %bc = bitcast i64 %r to double ret double %bc } define double @test_fptoui_f64_i64_simd(double %a) { -; CHECK-SD-LABEL: test_fptoui_f64_i64_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzu d0, d0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: test_fptoui_f64_i64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, d0 +; CHECK-NEXT: ret %r = fptoui double %a to i64 %bc = bitcast i64 %r to double ret double %bc @@ -132,10 +132,10 @@ define double @test_fptoui_f64_i64_simd(double %a) { define float @test_fptoui_f32_i32_simd(float %a) { -; CHECK-SD-LABEL: test_fptoui_f32_i32_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzu s0, s0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: test_fptoui_f32_i32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, s0 +; CHECK-NEXT: ret %r = fptoui float %a to i32 %bc = bitcast i32 %r to float ret float %bc @@ -147,60 +147,60 @@ define float @test_fptoui_f32_i32_simd(float %a) { ; define float @fptosi_i32_f16_simd(half %x) { -; CHECK-SD-LABEL: fptosi_i32_f16_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs s0, h0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fptosi_i32_f16_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, h0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict") %sum = bitcast i32 %val to float ret float %sum } define double @fptosi_i64_f16_simd(half %x) { -; CHECK-SD-LABEL: fptosi_i64_f16_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs d0, h0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fptosi_i64_f16_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, h0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict") %sum = bitcast i64 %val to double ret double %sum } define double @fptosi_i64_f32_simd(float %x) { -; CHECK-SD-LABEL: fptosi_i64_f32_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs d0, s0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fptosi_i64_f32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict") %bc = bitcast i64 %val to double ret double %bc } define float @fptosi_i32_f64_simd(double %x) { -; CHECK-SD-LABEL: fptosi_i32_f64_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs s0, d0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fptosi_i32_f64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, d0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict") %bc = bitcast i32 %val to float ret float %bc } define double @fptosi_i64_f64_simd(double %x) { -; CHECK-SD-LABEL: fptosi_i64_f64_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs d0, d0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fptosi_i64_f64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict") %bc = bitcast i64 %val to double ret double %bc } define float @fptosi_i32_f32_simd(float %x) { -; CHECK-SD-LABEL: fptosi_i32_f32_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs s0, s0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fptosi_i32_f32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict") %bc = bitcast i32 %val to float ret float %bc @@ -209,60 +209,60 @@ define float @fptosi_i32_f32_simd(float %x) { define float @fptoui_i32_f16_simd(half %x) { -; CHECK-SD-LABEL: fptoui_i32_f16_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzu s0, h0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fptoui_i32_f16_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, h0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") %sum = bitcast i32 %val to float ret float %sum } define double @fptoui_i64_f16_simd(half %x) { -; CHECK-SD-LABEL: fptoui_i64_f16_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzu d0, h0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fptoui_i64_f16_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, h0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict") %sum = bitcast i64 %val to double ret double %sum } define double @fptoui_i64_f32_simd(float %x) { -; CHECK-SD-LABEL: fptoui_i64_f32_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzu d0, s0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fptoui_i64_f32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, s0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict") %bc = bitcast i64 %val to double ret double %bc } define float @fptoui_i32_f64_simd(double %x) { -; CHECK-SD-LABEL: fptoui_i32_f64_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzu s0, d0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fptoui_i32_f64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, d0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict") %bc = bitcast i32 %val to float ret float %bc } define double @fptoui_i64_f64_simd(double %x) { -; CHECK-SD-LABEL: fptoui_i64_f64_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzu d0, d0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fptoui_i64_f64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, d0 +; CHECK-NEXT: ret %val = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict") %bc = bitcast i64 %val to double ret double %bc } define float @fptoui_i32_f32_simd(float %x) { -; CHECK-SD-LABEL: fptoui_i32_f32_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzu s0, s0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fptoui_i32_f32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, s0 +; CHECK-NEXT: ret %val = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") %bc = bitcast i32 %val to float ret float %bc @@ -278,6 +278,16 @@ define double @fcvtas_ds_round_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtas d0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_ds_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl roundf +; CHECK-GI-NEXT: fcvtzs d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @roundf(float %a) nounwind readnone %i = fptosi float %r to i64 %bc = bitcast i64 %i to double @@ -289,6 +299,16 @@ define float @fcvtas_sd_round_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtas s0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_sd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl round +; CHECK-GI-NEXT: fcvtzs s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @round(double %a) nounwind readnone %i = fptosi double %r to i32 %bc = bitcast i32 %i to float @@ -300,6 +320,16 @@ define float @fcvtas_ss_round_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtas s0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_ss_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl roundf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @roundf(float %a) nounwind readnone %i = fptosi float %r to i32 %bc = bitcast i32 %i to float @@ -311,6 +341,16 @@ define double @fcvtas_dd_round_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtas d0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_dd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl round +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @round(double %a) nounwind readnone %i = fptosi double %r to i64 %bc = bitcast i64 %i to double @@ -323,6 +363,16 @@ define double @fcvtau_ds_round_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtau d0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_ds_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl roundf +; CHECK-GI-NEXT: fcvtzu d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @roundf(float %a) nounwind readnone %i = fptoui float %r to i64 %bc = bitcast i64 %i to double @@ -334,6 +384,16 @@ define float @fcvtau_sd_round_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtau s0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_sd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl round +; CHECK-GI-NEXT: fcvtzu s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @round(double %a) nounwind readnone %i = fptoui double %r to i32 %bc = bitcast i32 %i to float @@ -345,6 +405,16 @@ define float @fcvtau_ss_round_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtas s0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_ss_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl roundf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @roundf(float %a) nounwind readnone %i = fptosi float %r to i32 %bc = bitcast i32 %i to float @@ -356,6 +426,16 @@ define double @fcvtau_dd_round_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtas d0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_dd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl round +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @round(double %a) nounwind readnone %i = fptosi double %r to i64 %bc = bitcast i64 %i to double @@ -368,6 +448,16 @@ define double @fcvtms_ds_round_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtms d0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_ds_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floorf +; CHECK-GI-NEXT: fcvtzs d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @floorf(float %a) nounwind readnone %i = fptosi float %r to i64 %bc = bitcast i64 %i to double @@ -379,6 +469,16 @@ define float @fcvtms_sd_round_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtms s0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_sd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floor +; CHECK-GI-NEXT: fcvtzs s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @floor(double %a) nounwind readnone %i = fptosi double %r to i32 %bc = bitcast i32 %i to float @@ -390,6 +490,16 @@ define float @fcvtms_ss_round_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtms s0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_ss_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floorf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @floorf(float %a) nounwind readnone %i = fptosi float %r to i32 %bc = bitcast i32 %i to float @@ -401,6 +511,16 @@ define double @fcvtms_dd_round_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtms d0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_dd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floor +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @floor(double %a) nounwind readnone %i = fptosi double %r to i64 %bc = bitcast i64 %i to double @@ -414,6 +534,16 @@ define double @fcvtmu_ds_round_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtmu d0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_ds_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floorf +; CHECK-GI-NEXT: fcvtzu d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @floorf(float %a) nounwind readnone %i = fptoui float %r to i64 %bc = bitcast i64 %i to double @@ -425,6 +555,16 @@ define float @fcvtmu_sd_round_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtmu s0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_sd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floor +; CHECK-GI-NEXT: fcvtzu s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @floor(double %a) nounwind readnone %i = fptoui double %r to i32 %bc = bitcast i32 %i to float @@ -436,6 +576,16 @@ define float @fcvtmu_ss_round_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtms s0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_ss_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floorf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @floorf(float %a) nounwind readnone %i = fptosi float %r to i32 %bc = bitcast i32 %i to float @@ -447,6 +597,16 @@ define double @fcvtmu_dd_round_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtms d0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_dd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floor +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @floor(double %a) nounwind readnone %i = fptosi double %r to i64 %bc = bitcast i64 %i to double @@ -459,6 +619,16 @@ define double @fcvtps_ds_round_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtps d0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_ds_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceilf +; CHECK-GI-NEXT: fcvtzs d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @ceilf(float %a) nounwind readnone %i = fptosi float %r to i64 %bc = bitcast i64 %i to double @@ -470,6 +640,16 @@ define float @fcvtps_sd_round_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtps s0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_sd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceil +; CHECK-GI-NEXT: fcvtzs s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @ceil(double %a) nounwind readnone %i = fptosi double %r to i32 %bc = bitcast i32 %i to float @@ -481,6 +661,16 @@ define float @fcvtps_ss_round_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtps s0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_ss_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceilf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @ceilf(float %a) nounwind readnone %i = fptosi float %r to i32 %bc = bitcast i32 %i to float @@ -492,6 +682,16 @@ define double @fcvtps_dd_round_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtps d0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_dd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceil +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @ceil(double %a) nounwind readnone %i = fptosi double %r to i64 %bc = bitcast i64 %i to double @@ -504,6 +704,16 @@ define double @fcvtpu_ds_round_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtpu d0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_ds_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceilf +; CHECK-GI-NEXT: fcvtzu d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @ceilf(float %a) nounwind readnone %i = fptoui float %r to i64 %bc = bitcast i64 %i to double @@ -515,6 +725,16 @@ define float @fcvtpu_sd_round_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtpu s0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_sd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceil +; CHECK-GI-NEXT: fcvtzu s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @ceil(double %a) nounwind readnone %i = fptoui double %r to i32 %bc = bitcast i32 %i to float @@ -526,6 +746,16 @@ define float @fcvtpu_ss_round_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtps s0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_ss_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceilf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @ceilf(float %a) nounwind readnone %i = fptosi float %r to i32 %bc = bitcast i32 %i to float @@ -537,6 +767,16 @@ define double @fcvtpu_dd_round_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtps d0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_dd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceil +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @ceil(double %a) nounwind readnone %i = fptosi double %r to i64 %bc = bitcast i64 %i to double @@ -549,6 +789,16 @@ define double @fcvtzs_ds_round_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzs d0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_ds_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl truncf +; CHECK-GI-NEXT: fcvtzs d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @truncf(float %a) nounwind readnone %i = fptosi float %r to i64 %bc = bitcast i64 %i to double @@ -560,6 +810,16 @@ define float @fcvtzs_sd_round_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzs s0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl trunc +; CHECK-GI-NEXT: fcvtzs s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @trunc(double %a) nounwind readnone %i = fptosi double %r to i32 %bc = bitcast i32 %i to float @@ -571,6 +831,16 @@ define float @fcvtzs_ss_round_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzs s0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_ss_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl truncf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @truncf(float %a) nounwind readnone %i = fptosi float %r to i32 %bc = bitcast i32 %i to float @@ -582,6 +852,16 @@ define double @fcvtzs_dd_round_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzs d0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_dd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl trunc +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @trunc(double %a) nounwind readnone %i = fptosi double %r to i64 %bc = bitcast i64 %i to double @@ -593,6 +873,16 @@ define double @fcvtzu_ds_round_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzu d0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_ds_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl truncf +; CHECK-GI-NEXT: fcvtzu d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @truncf(float %a) nounwind readnone %i = fptoui float %r to i64 %bc = bitcast i64 %i to double @@ -604,6 +894,16 @@ define float @fcvtzu_sd_round_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzu s0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl trunc +; CHECK-GI-NEXT: fcvtzu s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @trunc(double %a) nounwind readnone %i = fptoui double %r to i32 %bc = bitcast i32 %i to float @@ -615,6 +915,16 @@ define float @fcvtzu_ss_round_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzs s0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_ss_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl truncf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @truncf(float %a) nounwind readnone %i = fptosi float %r to i32 %bc = bitcast i32 %i to float @@ -626,6 +936,16 @@ define double @fcvtzu_dd_round_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzs d0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_dd_round_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl trunc +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @trunc(double %a) nounwind readnone %i = fptosi double %r to i64 %bc = bitcast i64 %i to double @@ -638,120 +958,120 @@ define double @fcvtzu_dd_round_simd(double %a) { ; define float @fcvtzs_sh_sat_simd(half %a) { -; CHECK-SD-LABEL: fcvtzs_sh_sat_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs s0, h0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fcvtzs_sh_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, h0 +; CHECK-NEXT: ret %i = call i32 @llvm.fptosi.sat.i32.f16(half %a) %bc = bitcast i32 %i to float ret float %bc } define double @fcvtzs_dh_sat_simd(half %a) { -; CHECK-SD-LABEL: fcvtzs_dh_sat_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs d0, h0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fcvtzs_dh_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, h0 +; CHECK-NEXT: ret %i = call i64 @llvm.fptosi.sat.i64.f16(half %a) %bc = bitcast i64 %i to double ret double %bc } define double @fcvtzs_ds_sat_simd(float %a) { -; CHECK-SD-LABEL: fcvtzs_ds_sat_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs d0, s0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fcvtzs_ds_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: ret %i = call i64 @llvm.fptosi.sat.i64.f32(float %a) %bc = bitcast i64 %i to double ret double %bc } define float @fcvtzs_sd_sat_simd(double %a) { -; CHECK-SD-LABEL: fcvtzs_sd_sat_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs s0, d0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fcvtzs_sd_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, d0 +; CHECK-NEXT: ret %i = call i32 @llvm.fptosi.sat.i32.f64(double %a) %bc = bitcast i32 %i to float ret float %bc } define float @fcvtzs_ss_sat_simd(float %a) { -; CHECK-SD-LABEL: fcvtzs_ss_sat_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs s0, s0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fcvtzs_ss_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret %i = call i32 @llvm.fptosi.sat.i32.f32(float %a) %bc = bitcast i32 %i to float ret float %bc } define double @fcvtzs_dd_sat_simd(double %a) { -; CHECK-SD-LABEL: fcvtzs_dd_sat_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs d0, d0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fcvtzs_dd_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret %i = call i64 @llvm.fptosi.sat.i64.f64(double %a) %bc = bitcast i64 %i to double ret double %bc } define float @fcvtzu_sh_sat_simd(half %a) { -; CHECK-SD-LABEL: fcvtzu_sh_sat_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzu s0, h0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fcvtzu_sh_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, h0 +; CHECK-NEXT: ret %i = call i32 @llvm.fptoui.sat.i32.f16(half %a) %bc = bitcast i32 %i to float ret float %bc } define double @fcvtzu_dh_sat_simd(half %a) { -; CHECK-SD-LABEL: fcvtzu_dh_sat_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzu d0, h0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fcvtzu_dh_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, h0 +; CHECK-NEXT: ret %i = call i64 @llvm.fptoui.sat.i64.f16(half %a) %bc = bitcast i64 %i to double ret double %bc } define double @fcvtzu_ds_sat_simd(float %a) { -; CHECK-SD-LABEL: fcvtzu_ds_sat_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzu d0, s0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fcvtzu_ds_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, s0 +; CHECK-NEXT: ret %i = call i64 @llvm.fptoui.sat.i64.f32(float %a) %bc = bitcast i64 %i to double ret double %bc } define float @fcvtzu_sd_sat_simd(double %a) { -; CHECK-SD-LABEL: fcvtzu_sd_sat_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzu s0, d0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fcvtzu_sd_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, d0 +; CHECK-NEXT: ret %i = call i32 @llvm.fptoui.sat.i32.f64(double %a) %bc = bitcast i32 %i to float ret float %bc } define float @fcvtzu_ss_sat_simd(float %a) { -; CHECK-SD-LABEL: fcvtzu_ss_sat_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs s0, s0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fcvtzu_ss_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret %i = call i32 @llvm.fptosi.sat.i32.f32(float %a) %bc = bitcast i32 %i to float ret float %bc } define double @fcvtzu_dd_sat_simd(double %a) { -; CHECK-SD-LABEL: fcvtzu_dd_sat_simd: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcvtzs d0, d0 -; CHECK-SD-NEXT: ret +; CHECK-LABEL: fcvtzu_dd_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret %i = call i64 @llvm.fptosi.sat.i64.f64(double %a) %bc = bitcast i64 %i to double ret double %bc @@ -766,6 +1086,12 @@ define float @fcvtas_sh_simd(half %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtas s0, h0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_sh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frinta h0, h0 +; CHECK-GI-NEXT: fcvtzs s0, h0 +; CHECK-GI-NEXT: ret %r = call half @llvm.round.f16(half %a) nounwind readnone %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) %bc = bitcast i32 %i to float @@ -777,6 +1103,12 @@ define double @fcvtas_dh_simd(half %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtas d0, h0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_dh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frinta h0, h0 +; CHECK-GI-NEXT: fcvtzs d0, h0 +; CHECK-GI-NEXT: ret %r = call half @llvm.round.f16(half %a) nounwind readnone %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) %bc = bitcast i64 %i to double @@ -788,6 +1120,16 @@ define double @fcvtas_ds_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtas d0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_ds_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl roundf +; CHECK-GI-NEXT: fcvtzs d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @roundf(float %a) nounwind readnone %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) %bc = bitcast i64 %i to double @@ -799,6 +1141,16 @@ define float @fcvtas_sd_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtas s0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_sd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl round +; CHECK-GI-NEXT: fcvtzs s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @round(double %a) nounwind readnone %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) %bc = bitcast i32 %i to float @@ -810,6 +1162,16 @@ define float @fcvtas_ss_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtas s0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_ss_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl roundf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @roundf(float %a) nounwind readnone %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) %bc = bitcast i32 %i to float @@ -821,6 +1183,16 @@ define double @fcvtas_dd_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtas d0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtas_dd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl round +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @round(double %a) nounwind readnone %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) %bc = bitcast i64 %i to double @@ -832,6 +1204,12 @@ define float @fcvtau_sh_simd(half %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtau s0, h0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_sh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frinta h0, h0 +; CHECK-GI-NEXT: fcvtzu s0, h0 +; CHECK-GI-NEXT: ret %r = call half @llvm.round.f16(half %a) nounwind readnone %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) %bc = bitcast i32 %i to float @@ -843,6 +1221,12 @@ define double @fcvtau_dh_simd(half %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtau d0, h0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_dh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frinta h0, h0 +; CHECK-GI-NEXT: fcvtzu d0, h0 +; CHECK-GI-NEXT: ret %r = call half @llvm.round.f16(half %a) nounwind readnone %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) %bc = bitcast i64 %i to double @@ -854,6 +1238,16 @@ define double @fcvtau_ds_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtau d0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_ds_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl roundf +; CHECK-GI-NEXT: fcvtzu d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @roundf(float %a) nounwind readnone %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) %bc = bitcast i64 %i to double @@ -865,6 +1259,16 @@ define float @fcvtau_sd_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtau s0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_sd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl round +; CHECK-GI-NEXT: fcvtzu s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @round(double %a) nounwind readnone %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) %bc = bitcast i32 %i to float @@ -876,6 +1280,16 @@ define float @fcvtau_ss_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtas s0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_ss_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl roundf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @roundf(float %a) nounwind readnone %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) %bc = bitcast i32 %i to float @@ -887,6 +1301,16 @@ define double @fcvtau_dd_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtas d0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtau_dd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl round +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @round(double %a) nounwind readnone %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) %bc = bitcast i64 %i to double @@ -898,6 +1322,12 @@ define float @fcvtms_sh_simd(half %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtms s0, h0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_sh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintm h0, h0 +; CHECK-GI-NEXT: fcvtzs s0, h0 +; CHECK-GI-NEXT: ret %r = call half @llvm.floor.f16(half %a) nounwind readnone %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) %bc = bitcast i32 %i to float @@ -909,6 +1339,12 @@ define double @fcvtms_dh_simd(half %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtms d0, h0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_dh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintm h0, h0 +; CHECK-GI-NEXT: fcvtzs d0, h0 +; CHECK-GI-NEXT: ret %r = call half @llvm.floor.f16(half %a) nounwind readnone %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) %bc = bitcast i64 %i to double @@ -920,6 +1356,16 @@ define double @fcvtms_ds_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtms d0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_ds_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floorf +; CHECK-GI-NEXT: fcvtzs d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @floorf(float %a) nounwind readnone %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) %bc = bitcast i64 %i to double @@ -931,6 +1377,16 @@ define float @fcvtms_sd_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtms s0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_sd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floor +; CHECK-GI-NEXT: fcvtzs s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @floor(double %a) nounwind readnone %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) %bc = bitcast i32 %i to float @@ -942,6 +1398,16 @@ define float @fcvtms_ss_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtms s0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_ss_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floorf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @floorf(float %a) nounwind readnone %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) %bc = bitcast i32 %i to float @@ -953,6 +1419,16 @@ define double @fcvtms_dd_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtms d0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtms_dd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floor +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @floor(double %a) nounwind readnone %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) %bc = bitcast i64 %i to double @@ -964,6 +1440,12 @@ define float @fcvtmu_sh_simd(half %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtmu s0, h0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_sh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintm h0, h0 +; CHECK-GI-NEXT: fcvtzu s0, h0 +; CHECK-GI-NEXT: ret %r = call half @llvm.floor.f16(half %a) nounwind readnone %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) %bc = bitcast i32 %i to float @@ -975,6 +1457,12 @@ define double @fcvtmu_dh_simd(half %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtmu d0, h0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_dh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintm h0, h0 +; CHECK-GI-NEXT: fcvtzu d0, h0 +; CHECK-GI-NEXT: ret %r = call half @llvm.floor.f16(half %a) nounwind readnone %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) %bc = bitcast i64 %i to double @@ -986,6 +1474,16 @@ define double @fcvtmu_ds_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtmu d0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_ds_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floorf +; CHECK-GI-NEXT: fcvtzu d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @floorf(float %a) nounwind readnone %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) %bc = bitcast i64 %i to double @@ -997,6 +1495,16 @@ define float @fcvtmu_sd_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtmu s0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_sd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floor +; CHECK-GI-NEXT: fcvtzu s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @floor(double %a) nounwind readnone %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) %bc = bitcast i32 %i to float @@ -1008,6 +1516,16 @@ define float @fcvtmu_ss_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtms s0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_ss_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floorf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @floorf(float %a) nounwind readnone %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) %bc = bitcast i32 %i to float @@ -1019,6 +1537,16 @@ define double @fcvtmu_dd_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtms d0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtmu_dd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl floor +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @floor(double %a) nounwind readnone %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) %bc = bitcast i64 %i to double @@ -1030,6 +1558,12 @@ define float @fcvtps_sh_simd(half %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtps s0, h0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_sh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintp h0, h0 +; CHECK-GI-NEXT: fcvtzs s0, h0 +; CHECK-GI-NEXT: ret %r = call half @llvm.ceil.f16(half %a) nounwind readnone %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) %bc = bitcast i32 %i to float @@ -1041,6 +1575,12 @@ define double @fcvtps_dh_simd(half %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtps d0, h0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_dh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintp h0, h0 +; CHECK-GI-NEXT: fcvtzs d0, h0 +; CHECK-GI-NEXT: ret %r = call half @llvm.ceil.f16(half %a) nounwind readnone %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) %bc = bitcast i64 %i to double @@ -1052,6 +1592,16 @@ define double @fcvtps_ds_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtps d0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_ds_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceilf +; CHECK-GI-NEXT: fcvtzs d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @ceilf(float %a) nounwind readnone %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) %bc = bitcast i64 %i to double @@ -1063,6 +1613,16 @@ define float @fcvtps_sd_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtps s0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_sd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceil +; CHECK-GI-NEXT: fcvtzs s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @ceil(double %a) nounwind readnone %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) %bc = bitcast i32 %i to float @@ -1074,6 +1634,16 @@ define float @fcvtps_ss_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtps s0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_ss_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceilf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @ceilf(float %a) nounwind readnone %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) %bc = bitcast i32 %i to float @@ -1085,6 +1655,16 @@ define double @fcvtps_dd_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtps d0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtps_dd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceil +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @ceil(double %a) nounwind readnone %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) %bc = bitcast i64 %i to double @@ -1096,6 +1676,12 @@ define float @fcvtpu_sh_simd(half %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtpu s0, h0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_sh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintp h0, h0 +; CHECK-GI-NEXT: fcvtzu s0, h0 +; CHECK-GI-NEXT: ret %r = call half @llvm.ceil.f16(half %a) nounwind readnone %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) %bc = bitcast i32 %i to float @@ -1107,6 +1693,12 @@ define double @fcvtpu_dh_simd(half %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtpu d0, h0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_dh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintp h0, h0 +; CHECK-GI-NEXT: fcvtzu d0, h0 +; CHECK-GI-NEXT: ret %r = call half @llvm.ceil.f16(half %a) nounwind readnone %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) %bc = bitcast i64 %i to double @@ -1118,6 +1710,16 @@ define double @fcvtpu_ds_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtpu d0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_ds_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceilf +; CHECK-GI-NEXT: fcvtzu d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @ceilf(float %a) nounwind readnone %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) %bc = bitcast i64 %i to double @@ -1129,6 +1731,16 @@ define float @fcvtpu_sd_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtpu s0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_sd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceil +; CHECK-GI-NEXT: fcvtzu s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @ceil(double %a) nounwind readnone %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) %bc = bitcast i32 %i to float @@ -1140,6 +1752,16 @@ define float @fcvtpu_ss_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtps s0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_ss_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceilf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @ceilf(float %a) nounwind readnone %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) %bc = bitcast i32 %i to float @@ -1151,6 +1773,16 @@ define double @fcvtpu_dd_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtps d0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtpu_dd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl ceil +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @ceil(double %a) nounwind readnone %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) %bc = bitcast i64 %i to double @@ -1162,6 +1794,12 @@ define float @fcvtzs_sh_simd(half %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzs s0, h0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintz h0, h0 +; CHECK-GI-NEXT: fcvtzs s0, h0 +; CHECK-GI-NEXT: ret %r = call half @llvm.trunc.f16(half %a) nounwind readnone %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) %bc = bitcast i32 %i to float @@ -1173,6 +1811,12 @@ define double @fcvtzs_dh_simd(half %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzs d0, h0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_dh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintz h0, h0 +; CHECK-GI-NEXT: fcvtzs d0, h0 +; CHECK-GI-NEXT: ret %r = call half @llvm.trunc.f16(half %a) nounwind readnone %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) %bc = bitcast i64 %i to double @@ -1184,6 +1828,16 @@ define double @fcvtzs_ds_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzs d0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_ds_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl truncf +; CHECK-GI-NEXT: fcvtzs d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @truncf(float %a) nounwind readnone %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) %bc = bitcast i64 %i to double @@ -1195,6 +1849,16 @@ define float @fcvtzs_sd_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzs s0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_sd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl trunc +; CHECK-GI-NEXT: fcvtzs s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @trunc(double %a) nounwind readnone %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) %bc = bitcast i32 %i to float @@ -1206,6 +1870,16 @@ define float @fcvtzs_ss_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzs s0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_ss_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl truncf +; CHECK-GI-NEXT: fcvtzs s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @truncf(float %a) nounwind readnone %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) %bc = bitcast i32 %i to float @@ -1217,6 +1891,16 @@ define double @fcvtzs_dd_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzs d0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_dd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl trunc +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @trunc(double %a) nounwind readnone %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) %bc = bitcast i64 %i to double @@ -1228,6 +1912,12 @@ define float @fcvtzu_sh_simd(half %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzu s0, h0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintz h0, h0 +; CHECK-GI-NEXT: fcvtzu s0, h0 +; CHECK-GI-NEXT: ret %r = call half @llvm.trunc.f16(half %a) nounwind readnone %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) %bc = bitcast i32 %i to float @@ -1239,6 +1929,12 @@ define double @fcvtzu_dh_simd(half %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzu d0, h0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_dh_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: frintz h0, h0 +; CHECK-GI-NEXT: fcvtzu d0, h0 +; CHECK-GI-NEXT: ret %r = call half @llvm.trunc.f16(half %a) nounwind readnone %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) %bc = bitcast i64 %i to double @@ -1250,6 +1946,16 @@ define double @fcvtzu_ds_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzu d0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_ds_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl truncf +; CHECK-GI-NEXT: fcvtzu d0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @truncf(float %a) nounwind readnone %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) %bc = bitcast i64 %i to double @@ -1261,6 +1967,16 @@ define float @fcvtzu_sd_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzu s0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_sd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl trunc +; CHECK-GI-NEXT: fcvtzu s0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @trunc(double %a) nounwind readnone %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) %bc = bitcast i32 %i to float @@ -1272,6 +1988,16 @@ define float @fcvtzu_ss_simd(float %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzu s0, s0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_ss_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl truncf +; CHECK-GI-NEXT: fcvtzu s0, s0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call float @truncf(float %a) nounwind readnone %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) %bc = bitcast i32 %i to float @@ -1283,6 +2009,16 @@ define double @fcvtzu_dd_simd(double %a) { ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fcvtzu d0, d0 ; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_dd_simd: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: bl trunc +; CHECK-GI-NEXT: fcvtzu d0, d0 +; CHECK-GI-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-GI-NEXT: ret %r = call double @trunc(double %a) nounwind readnone %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) %bc = bitcast i64 %i to double @@ -1301,6 +2037,3 @@ declare double @floor(double) nounwind readnone declare double @ceil(double) nounwind readnone declare double @trunc(double) nounwind readnone declare double @round(double) nounwind readnone -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} -; CHECK-GI: {{.*}}