-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[ARM] Disable strict node mutation and use correct lowering for several strict ops #170136
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-arm Author: Erik Enikeev (Varnike) ChangesPatch is 64.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170136.diff 4 Files Affected:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 32f3e5fa3c842..1a8c470600394 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -546,16 +546,24 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT})
setOperationAction(Op, MVT::f64, Legal);
+
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
}
}
if (Subtarget->hasFullFP16()) {
+ for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
+ ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT})
+ setOperationAction(Op, MVT::f16, Legal);
+
addRegisterClass(MVT::f16, &ARM::HPRRegClass);
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_FMINNUM, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_FMAXNUM, MVT::f16, Legal);
}
if (Subtarget->hasBF16()) {
@@ -865,13 +873,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
}
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
+
if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Custom);
@@ -879,11 +888,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
}
+ } else {
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
}
if (!Subtarget->hasFP16()) {
setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Custom);
+ } else {
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
}
computeRegisterProperties(Subtarget->getRegisterInfo());
@@ -1223,16 +1237,16 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
- setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, LibCall);
- setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, LibCall);
+ setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f64, Expand);
+ setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f64, Expand);
}
// fp16 is a special v7 extension that adds f16 <-> f32 conversions.
if (!Subtarget->hasFP16()) {
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
- setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, LibCall);
- setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, LibCall);
+ setOperationAction(ISD::STRICT_FP16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::STRICT_FP_TO_FP16, MVT::f32, Expand);
}
// Strict floating-point comparisons need custom lowering.
@@ -1248,34 +1262,26 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// FP-ARMv8 implements a lot of rounding-like FP operations.
- if (Subtarget->hasFPARMv8Base()) {
- setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
- setOperationAction(ISD::FCEIL, MVT::f32, Legal);
- setOperationAction(ISD::FROUND, MVT::f32, Legal);
- setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
- setOperationAction(ISD::FRINT, MVT::f32, Legal);
- setOperationAction(ISD::FROUNDEVEN, MVT::f32, Legal);
- setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+ if (Subtarget->hasFPARMv8Base()) {
+ for (auto Op :
+ {ISD::FFLOOR, ISD::FCEIL, ISD::FROUND,
+ ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT,
+ ISD::FROUNDEVEN, ISD::FMINNUM, ISD::FMAXNUM,
+ ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FROUND,
+ ISD::STRICT_FTRUNC, ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
+ ISD::STRICT_FROUNDEVEN, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM}) {
+ setOperationAction(Op, MVT::f32, Legal);
+
+ if (Subtarget->hasFP64())
+ setOperationAction(Op, MVT::f64, Legal);
+ }
+
if (Subtarget->hasNEON()) {
setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
}
-
- if (Subtarget->hasFP64()) {
- setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
- setOperationAction(ISD::FCEIL, MVT::f64, Legal);
- setOperationAction(ISD::FROUND, MVT::f64, Legal);
- setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
- setOperationAction(ISD::FRINT, MVT::f64, Legal);
- setOperationAction(ISD::FROUNDEVEN, MVT::f64, Legal);
- setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
- }
}
// FP16 often need to be promoted to call lib functions
@@ -1430,6 +1436,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
Align(1ULL << Subtarget->getPreferBranchLogAlignment()));
setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
+
+ IsStrictFPEnabled = true;
}
bool ARMTargetLowering::useSoftFloat() const {
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 65c61c259d465..5f5f703fbabf1 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -814,7 +814,7 @@ def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
def : FP16Pat<(f32 (any_fpextend (f16 HPR:$Sm))),
(VCVTBHS (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>;
-def : FP16Pat<(f16_to_fp GPR:$a),
+def : FP16Pat<(any_f16_to_fp GPR:$a),
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
let hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPSCR_RM] in
@@ -826,7 +826,7 @@ def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sda,
def : FP16Pat<(f16 (any_fpround SPR:$Sm)),
(COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$Sm), HPR)>;
-def : FP16Pat<(fp_to_f16 SPR:$a),
+def : FP16Pat<(any_fp_to_f16 SPR:$a),
(i32 (COPY_TO_REGCLASS (VCVTBSH (IMPLICIT_DEF), SPR:$a), GPR))>;
def : FP16Pat<(insertelt (v8f16 MQPR:$src1), (f16 (any_fpround (f32 SPR:$src2))), imm_even:$lane),
(v8f16 (INSERT_SUBREG (v8f16 MQPR:$src1),
@@ -891,7 +891,7 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
def : FullFP16Pat<(f64 (any_fpextend (f16 HPR:$Sm))),
(VCVTBHD (COPY_TO_REGCLASS (f16 HPR:$Sm), SPR))>,
Requires<[HasFPARMv8, HasDPVFP]>;
-def : FP16Pat<(f64 (f16_to_fp GPR:$a)),
+def : FP16Pat<(f64 (any_f16_to_fp GPR:$a)),
(VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>,
Requires<[HasFPARMv8, HasDPVFP]>;
@@ -917,7 +917,7 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
def : FullFP16Pat<(f16 (any_fpround DPR:$Dm)),
(COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$Dm), HPR)>,
Requires<[HasFPARMv8, HasDPVFP]>;
-def : FP16Pat<(fp_to_f16 (f64 DPR:$a)),
+def : FP16Pat<(any_fp_to_f16 (f64 DPR:$a)),
(i32 (COPY_TO_REGCLASS (VCVTBDH (IMPLICIT_DEF), DPR:$a), GPR))>,
Requires<[HasFPARMv8, HasDPVFP]>;
diff --git a/llvm/test/CodeGen/ARM/fp-intrinsics-vector.ll b/llvm/test/CodeGen/ARM/fp-intrinsics-vector.ll
new file mode 100644
index 0000000000000..d4b94b97acad8
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/fp-intrinsics-vector.ll
@@ -0,0 +1,1499 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=armv7a-none-eabihf -mattr=+neon,+vfp4 %s -o - | FileCheck %s
+
+define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: add_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vadd.f32 s11, s3, s7
+; CHECK-NEXT: vadd.f32 s10, s2, s6
+; CHECK-NEXT: vadd.f32 s9, s1, s5
+; CHECK-NEXT: vadd.f32 s8, s0, s4
+; CHECK-NEXT: vorr q0, q2, q2
+; CHECK-NEXT: bx lr
+ %val = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %x, <4 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: sub_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vsub.f32 s11, s3, s7
+; CHECK-NEXT: vsub.f32 s10, s2, s6
+; CHECK-NEXT: vsub.f32 s9, s1, s5
+; CHECK-NEXT: vsub.f32 s8, s0, s4
+; CHECK-NEXT: vorr q0, q2, q2
+; CHECK-NEXT: bx lr
+ %val = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float> %x, <4 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: mul_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vmul.f32 s11, s3, s7
+; CHECK-NEXT: vmul.f32 s10, s2, s6
+; CHECK-NEXT: vmul.f32 s9, s1, s5
+; CHECK-NEXT: vmul.f32 s8, s0, s4
+; CHECK-NEXT: vorr q0, q2, q2
+; CHECK-NEXT: bx lr
+ %val = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float> %x, <4 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) #0 {
+; CHECK-LABEL: div_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vdiv.f32 s11, s3, s7
+; CHECK-NEXT: vdiv.f32 s10, s2, s6
+; CHECK-NEXT: vdiv.f32 s9, s1, s5
+; CHECK-NEXT: vdiv.f32 s8, s0, s4
+; CHECK-NEXT: vorr q0, q2, q2
+; CHECK-NEXT: bx lr
+ %val = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float> %x, <4 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @fma_v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z) #0 {
+; CHECK-LABEL: fma_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vfma.f32 s11, s3, s7
+; CHECK-NEXT: vfma.f32 s10, s2, s6
+; CHECK-NEXT: vfma.f32 s9, s1, s5
+; CHECK-NEXT: vfma.f32 s8, s0, s4
+; CHECK-NEXT: vorr q0, q2, q2
+; CHECK-NEXT: bx lr
+ %val = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %x, <4 x float> %y, <4 x float> %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x i32> @fptosi_v4i32_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fptosi_v4i32_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcvt.s32.f32 s4, s2
+; CHECK-NEXT: vcvt.s32.f32 s6, s0
+; CHECK-NEXT: vcvt.s32.f32 s0, s1
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vcvt.s32.f32 s4, s3
+; CHECK-NEXT: vmov.32 d17[0], r0
+; CHECK-NEXT: vmov r0, s6
+; CHECK-NEXT: vmov.32 d16[0], r0
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vmov.32 d17[1], r0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov.32 d16[1], r0
+; CHECK-NEXT: vorr q0, q8, q8
+; CHECK-NEXT: bx lr
+ %val = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x i32> %val
+}
+
+define <4 x i32> @fptoui_v4i32_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fptoui_v4i32_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vcvt.u32.f32 s4, s2
+; CHECK-NEXT: vcvt.u32.f32 s6, s0
+; CHECK-NEXT: vcvt.u32.f32 s0, s1
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vcvt.u32.f32 s4, s3
+; CHECK-NEXT: vmov.32 d17[0], r0
+; CHECK-NEXT: vmov r0, s6
+; CHECK-NEXT: vmov.32 d16[0], r0
+; CHECK-NEXT: vmov r0, s4
+; CHECK-NEXT: vmov.32 d17[1], r0
+; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: vmov.32 d16[1], r0
+; CHECK-NEXT: vorr q0, q8, q8
+; CHECK-NEXT: bx lr
+ %val = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x i32> %val
+}
+
+define <4 x i64> @fptosi_v4i64_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fptosi_v4i64_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, s19
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: vmov r1, s16
+; CHECK-NEXT: vmov r5, s17
+; CHECK-NEXT: vmov r6, s18
+; CHECK-NEXT: vmov.32 d9[0], r0
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: vmov.32 d10[0], r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: vmov.32 d11[0], r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: bl __aeabi_f2lz
+; CHECK-NEXT: vmov.32 d8[0], r0
+; CHECK-NEXT: vmov.32 d11[1], r5
+; CHECK-NEXT: vmov.32 d9[1], r4
+; CHECK-NEXT: vmov.32 d10[1], r7
+; CHECK-NEXT: vmov.32 d8[1], r1
+; CHECK-NEXT: vorr q0, q5, q5
+; CHECK-NEXT: vorr q1, q4, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
+ %val = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x i64> %val
+}
+
+define <4 x i64> @fptoui_v4i64_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fptoui_v4i64_v4f32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q4, q0, q0
+; CHECK-NEXT: vmov r0, s19
+; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: mov r4, r1
+; CHECK-NEXT: vmov r1, s16
+; CHECK-NEXT: vmov r5, s17
+; CHECK-NEXT: vmov r6, s18
+; CHECK-NEXT: vmov.32 d9[0], r0
+; CHECK-NEXT: mov r0, r1
+; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: vmov.32 d10[0], r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: vmov.32 d11[0], r0
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: bl __aeabi_f2ulz
+; CHECK-NEXT: vmov.32 d8[0], r0
+; CHECK-NEXT: vmov.32 d11[1], r5
+; CHECK-NEXT: vmov.32 d9[1], r4
+; CHECK-NEXT: vmov.32 d10[1], r7
+; CHECK-NEXT: vmov.32 d8[1], r1
+; CHECK-NEXT: vorr q0, q5, q5
+; CHECK-NEXT: vorr q1, q4, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc}
+ %val = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float> %x, metadata !"fpexcept.strict") #0
+ ret <4 x i64> %val
+}
+
+define <4 x float> @sitofp_v4f32_v4i32(<4 x i32> %x) #0 {
+; CHECK-LABEL: sitofp_v4f32_v4i32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .pad #32
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: vmov r12, r1, d0
+; CHECK-NEXT: movw r0, #0
+; CHECK-NEXT: vmov r2, r3, d1
+; CHECK-NEXT: movt r0, #17200
+; CHECK-NEXT: str r0, [sp, #20]
+; CHECK-NEXT: vldr d16, .LCPI9_0
+; CHECK-NEXT: eor r1, r1, #-2147483648
+; CHECK-NEXT: str r1, [sp, #16]
+; CHECK-NEXT: str r0, [sp, #12]
+; CHECK-NEXT: eor r1, r2, #-2147483648
+; CHECK-NEXT: vldr d17, [sp, #16]
+; CHECK-NEXT: stmib sp, {r0, r1}
+; CHECK-NEXT: eor r1, r3, #-2147483648
+; CHECK-NEXT: vsub.f64 d17, d17, d16
+; CHECK-NEXT: vldr d18, [sp, #8]
+; CHECK-NEXT: str r1, [sp]
+; CHECK-NEXT: str r0, [sp, #28]
+; CHECK-NEXT: eor r0, r12, #-2147483648
+; CHECK-NEXT: vldr d19, [sp]
+; CHECK-NEXT: str r0, [sp, #24]
+; CHECK-NEXT: vsub.f64 d18, d18, d16
+; CHECK-NEXT: vsub.f64 d19, d19, d16
+; CHECK-NEXT: vldr d20, [sp, #24]
+; CHECK-NEXT: vcvt.f32.f64 s3, d19
+; CHECK-NEXT: vsub.f64 d16, d20, d16
+; CHECK-NEXT: vcvt.f32.f64 s2, d18
+; CHECK-NEXT: vcvt.f32.f64 s1, d17
+; CHECK-NEXT: vcvt.f32.f64 s0, d16
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI9_0:
+; CHECK-NEXT: .long 2147483648 @ double 4503601774854144
+; CHECK-NEXT: .long 1127219200
+ %val = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @uitofp_v4f32_v4i32(<4 x i32> %x) #0 {
+; CHECK-LABEL: uitofp_v4f32_v4i32:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .pad #32
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: vmov r0, r1, d1
+; CHECK-NEXT: movw r2, #0
+; CHECK-NEXT: vmov r12, r3, d0
+; CHECK-NEXT: movt r2, #17200
+; CHECK-NEXT: stm sp, {r1, r2}
+; CHECK-NEXT: vldr d17, [sp]
+; CHECK-NEXT: vldr d16, .LCPI10_0
+; CHECK-NEXT: str r2, [sp, #12]
+; CHECK-NEXT: vsub.f64 d17, d17, d16
+; CHECK-NEXT: vcvt.f32.f64 s3, d17
+; CHECK-NEXT: str r0, [sp, #8]
+; CHECK-NEXT: vldr d18, [sp, #8]
+; CHECK-NEXT: str r2, [sp, #20]
+; CHECK-NEXT: str r3, [sp, #16]
+; CHECK-NEXT: vsub.f64 d18, d18, d16
+; CHECK-NEXT: vldr d19, [sp, #16]
+; CHECK-NEXT: str r2, [sp, #28]
+; CHECK-NEXT: vcvt.f32.f64 s2, d18
+; CHECK-NEXT: str r12, [sp, #24]
+; CHECK-NEXT: vldr d20, [sp, #24]
+; CHECK-NEXT: vsub.f64 d19, d19, d16
+; CHECK-NEXT: vsub.f64 d16, d20, d16
+; CHECK-NEXT: vcvt.f32.f64 s1, d19
+; CHECK-NEXT: vcvt.f32.f64 s0, d16
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: bx lr
+; CHECK-NEXT: .p2align 3
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI10_0:
+; CHECK-NEXT: .long 0 @ double 4503599627370496
+; CHECK-NEXT: .long 1127219200
+ %val = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @sitofp_v4f32_v4i64(<4 x i64> %x) #0 {
+; CHECK-LABEL: sitofp_v4f32_v4i64:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: .save {r4, r5, r6, lr}
+; CHECK-NEXT: push {r4, r5, r6, lr}
+; CHECK-NEXT: .vsave {d8, d9, d10, d11}
+; CHECK-NEXT: vpush {d8, d9, d10, d11}
+; CHECK-NEXT: vorr q4, q1, q1
+; CHECK-NEXT: vorr q5, q0, q0
+; CHECK-NEXT: vmov r0, r1, d8
+; CHECK-NEXT: bl __aeabi_l2f
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: vmov r0, r1, d9
+; CHECK-NEXT: bl __aeabi_l2f
+; CHECK-NEXT: vmov r2, r1, d11
+; CHECK-NEXT: vmov s19, r0
+; CHECK-NEXT: vmov r5, r6, d10
+; CHECK-NEXT: vmov s18, r4
+; CHECK-NEXT: mov r0, r2
+; CHECK-NEXT: bl __aeabi_l2f
+; CHECK-NEXT: vmov s17, r0
+; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: bl __aeabi_l2f
+; CHECK-NEXT: vmov s16, r0
+; CHECK-NEXT: vorr q0, q4, q4
+; CHECK-NEXT: vpop {d8, d9, d10, d11}
+; CHECK-NEXT: pop {r4, r5, r6, pc}
+ %val = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+ ret <4 x float> %val
+}
+
+define <4 x float> @uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
+; CHECK-LABEL: uitofp_v4f32_v4i64:
+; CHECK: @ %bb.0:
+; CHEC...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff origin/main HEAD --extensions cpp -- llvm/lib/Target/ARM/ARMISelLowering.cpp --diff_from_common_commit
View the diff from clang-format here.diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 1a8c47060..3b6f98c46 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -872,7 +872,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
- setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::f64, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
@@ -1262,14 +1262,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
// FP-ARMv8 implements a lot of rounding-like FP operations.
- if (Subtarget->hasFPARMv8Base()) {
+ if (Subtarget->hasFPARMv8Base()) {
for (auto Op :
- {ISD::FFLOOR, ISD::FCEIL, ISD::FROUND,
- ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT,
- ISD::FROUNDEVEN, ISD::FMINNUM, ISD::FMAXNUM,
- ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FROUND,
- ISD::STRICT_FTRUNC, ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
- ISD::STRICT_FROUNDEVEN, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM}) {
+ {ISD::FFLOOR, ISD::FCEIL, ISD::FROUND, ISD::FTRUNC, ISD::FNEARBYINT,
+ ISD::FRINT, ISD::FROUNDEVEN, ISD::FMINNUM, ISD::FMAXNUM,
+ ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FROUND,
+ ISD::STRICT_FTRUNC, ISD::STRICT_FNEARBYINT, ISD::STRICT_FRINT,
+ ISD::STRICT_FROUNDEVEN, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM}) {
setOperationAction(Op, MVT::f32, Legal);
if (Subtarget->hasFP64())
|
davemgreen
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks - LGTM
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/169/builds/17617 Here is the relevant piece of the build log for the reference |
Changes in this PR were discussed and reviewed in #137101.