-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[AArch64][GlobalISel] Add codegen for simd fpcvt intrinsics #157680
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
48a59b1
0b5f73c
fac8de9
d0b7331
1aed880
44fa93a
fd84e5e
d087878
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5232,19 +5232,54 @@ defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; | |
| defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; | ||
| defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; | ||
|
|
||
| defm FCVTAS : SIMDFPTwoScalarFCVT< 0, 0, 0b11100, "fcvtas", int_aarch64_neon_fcvtas>; | ||
|
||
| defm FCVTAU : SIMDFPTwoScalarFCVT< 1, 0, 0b11100, "fcvtau", int_aarch64_neon_fcvtau>; | ||
| defm FCVTMS : SIMDFPTwoScalarFCVT< 0, 0, 0b11011, "fcvtms", int_aarch64_neon_fcvtms>; | ||
| defm FCVTMU : SIMDFPTwoScalarFCVT< 1, 0, 0b11011, "fcvtmu", int_aarch64_neon_fcvtmu>; | ||
| defm FCVTNS : SIMDFPTwoScalarFCVT< 0, 0, 0b11010, "fcvtns", int_aarch64_neon_fcvtns>; | ||
| defm FCVTNU : SIMDFPTwoScalarFCVT< 1, 0, 0b11010, "fcvtnu", int_aarch64_neon_fcvtnu>; | ||
| defm FCVTPS : SIMDFPTwoScalarFCVT< 0, 1, 0b11010, "fcvtps", int_aarch64_neon_fcvtps>; | ||
| defm FCVTPU : SIMDFPTwoScalarFCVT< 1, 1, 0b11010, "fcvtpu", int_aarch64_neon_fcvtpu>; | ||
| defm FCVTZS : SIMDFPTwoScalarFCVT< 0, 1, 0b11011, "fcvtzs">; | ||
| defm FCVTZU : SIMDFPTwoScalarFCVT< 1, 1, 0b11011, "fcvtzu">; | ||
|
|
||
| let Predicates = [HasNEON, HasFPRCVT] in{ | ||
| defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas">; | ||
| defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau">; | ||
| defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms">; | ||
| defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu">; | ||
| defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns">; | ||
| defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu">; | ||
| defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps">; | ||
| defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu">; | ||
| defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas", int_aarch64_neon_fcvtas>; | ||
| defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau", int_aarch64_neon_fcvtau>; | ||
| defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms", int_aarch64_neon_fcvtms>; | ||
| defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu", int_aarch64_neon_fcvtmu>; | ||
| defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns", int_aarch64_neon_fcvtns>; | ||
| defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu", int_aarch64_neon_fcvtnu>; | ||
| defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps", int_aarch64_neon_fcvtps>; | ||
| defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu", int_aarch64_neon_fcvtpu>; | ||
| defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">; | ||
| defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">; | ||
| } | ||
|
|
||
| multiclass FPToIntegerSIMDScalarPatterns<SDPatternOperator OpN, string INST> { | ||
| def : Pat<(f32 (bitconvert (i32 (OpN (f64 FPR64:$Rn))))), | ||
| (!cast<Instruction>(INST # SDr) FPR64:$Rn)>; | ||
| def : Pat<(f32 (bitconvert (i32 (OpN (f16 FPR16:$Rn))))), | ||
| (!cast<Instruction>(INST # SHr) FPR16:$Rn)>; | ||
| def : Pat<(f64 (bitconvert (i64 (OpN (f16 FPR16:$Rn))))), | ||
| (!cast<Instruction>(INST # DHr) FPR16:$Rn)>; | ||
| def : Pat<(f64 (bitconvert (i64 (OpN (f32 FPR32:$Rn))))), | ||
| (!cast<Instruction>(INST # DSr) FPR32:$Rn)>; | ||
| def : Pat<(f32 (bitconvert (i32 (OpN (f32 FPR32:$Rn))))), | ||
| (!cast<Instruction>(INST # v1i32) FPR32:$Rn)>; | ||
| def : Pat<(f64 (bitconvert (i64 (OpN (f64 FPR64:$Rn))))), | ||
| (!cast<Instruction>(INST # v1i64) FPR64:$Rn)>; | ||
|
|
||
| } | ||
| defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtas, "FCVTAS">; | ||
| defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtau, "FCVTAU">; | ||
| defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtms, "FCVTMS">; | ||
| defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtmu, "FCVTMU">; | ||
| defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtns, "FCVTNS">; | ||
| defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtnu, "FCVTNU">; | ||
| defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtps, "FCVTPS">; | ||
| defm: FPToIntegerSIMDScalarPatterns<int_aarch64_neon_fcvtpu, "FCVTPU">; | ||
|
|
||
| // AArch64's FCVT instructions saturate when out of range. | ||
| multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> { | ||
| let Predicates = [HasFullFP16] in { | ||
|
|
@@ -5321,6 +5356,32 @@ multiclass FPToIntegerIntPats<Intrinsic round, string INST> { | |
| def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>; | ||
| def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>; | ||
|
|
||
| // For global-isel we can use register classes to determine | ||
| // which FCVT instruction to use. | ||
| let Predicates = [HasFPRCVT] in { | ||
| def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # SHr) $Rn)>; | ||
| def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # DHr) $Rn)>; | ||
| def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # DSr) $Rn)>; | ||
| def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # SDr) $Rn)>; | ||
| } | ||
| def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # v1i32) $Rn)>; | ||
| def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # v1i64) $Rn)>; | ||
|
|
||
| let Predicates = [HasFPRCVT] in { | ||
| def : Pat<(f32 (bitconvert (i32 (round f16:$Rn)))), | ||
| (!cast<Instruction>(INST # SHr) $Rn)>; | ||
| def : Pat<(f64 (bitconvert (i64 (round f16:$Rn)))), | ||
| (!cast<Instruction>(INST # DHr) $Rn)>; | ||
| def : Pat<(f64 (bitconvert (i64 (round f32:$Rn)))), | ||
| (!cast<Instruction>(INST # DSr) $Rn)>; | ||
| def : Pat<(f32 (bitconvert (i32 (round f64:$Rn)))), | ||
| (!cast<Instruction>(INST # SDr) $Rn)>; | ||
| } | ||
| def : Pat<(f32 (bitconvert (i32 (round f32:$Rn)))), | ||
| (!cast<Instruction>(INST # v1i32) $Rn)>; | ||
| def : Pat<(f64 (bitconvert (i64 (round f64:$Rn)))), | ||
| (!cast<Instruction>(INST # v1i64) $Rn)>; | ||
|
|
||
| let Predicates = [HasFullFP16] in { | ||
| def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), | ||
| (!cast<Instruction>(INST # SWHri) $Rn, $scale)>; | ||
|
|
@@ -6569,17 +6630,7 @@ defm FCMGE : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>; | |
| defm FCMGT : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>; | ||
| defm FCMLE : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>; | ||
| defm FCMLT : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>; | ||
| defm FCVTAS : SIMDFPTwoScalar< 0, 0, 0b11100, "fcvtas">; | ||
| defm FCVTAU : SIMDFPTwoScalar< 1, 0, 0b11100, "fcvtau">; | ||
| defm FCVTMS : SIMDFPTwoScalar< 0, 0, 0b11011, "fcvtms">; | ||
| defm FCVTMU : SIMDFPTwoScalar< 1, 0, 0b11011, "fcvtmu">; | ||
| defm FCVTNS : SIMDFPTwoScalar< 0, 0, 0b11010, "fcvtns">; | ||
| defm FCVTNU : SIMDFPTwoScalar< 1, 0, 0b11010, "fcvtnu">; | ||
| defm FCVTPS : SIMDFPTwoScalar< 0, 1, 0b11010, "fcvtps">; | ||
| defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">; | ||
| def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">; | ||
| defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">; | ||
| defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">; | ||
| defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">; | ||
| defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">; | ||
| defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you can fold this into SIMDFPTwoScalar providing they pass null_frag.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That will disable all instructions which use that class in FastISel. But if that is fine I can do that.