diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f502a536d43b1..09d73633462b6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5752,6 +5752,34 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, } return isKnownNeverNaN(Src, SNaN, Depth + 1); } + case ISD::INSERT_SUBVECTOR: { + SDValue BaseVector = Op.getOperand(0); + SDValue SubVector = Op.getOperand(1); + EVT BaseVectorVT = BaseVector.getValueType(); + if (BaseVectorVT.isFixedLengthVector()) { + unsigned Idx = Op.getConstantOperandVal(2); + unsigned NumBaseElts = BaseVectorVT.getVectorNumElements(); + unsigned NumSubElts = SubVector.getValueType().getVectorNumElements(); + + // Clear/Extract the bits at the position where the subvector will be + // inserted. + APInt DemandedMask = + APInt::getBitsSet(NumBaseElts, Idx, Idx + NumSubElts); + APInt DemandedSrcElts = DemandedElts & ~DemandedMask; + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + + bool NeverNaN = true; + if (!DemandedSrcElts.isZero()) + NeverNaN &= + isKnownNeverNaN(BaseVector, DemandedSrcElts, SNaN, Depth + 1); + if (NeverNaN && !DemandedSubElts.isZero()) + NeverNaN &= + isKnownNeverNaN(SubVector, DemandedSubElts, SNaN, Depth + 1); + return NeverNaN; + } + return isKnownNeverNaN(BaseVector, SNaN, Depth + 1) && + isKnownNeverNaN(SubVector, SNaN, Depth + 1); + } case ISD::BUILD_VECTOR: { unsigned NumElts = Op.getNumOperands(); for (unsigned I = 0; I != NumElts; ++I) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll index 04e73ac1ea956..c6cd366497218 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll @@ -357,3 +357,52 @@ define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { %v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %c) ret <2 x half> %v } + +declare <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half>, <2 x half>, i64) + +define <4 x half> @vfmax_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half> %b, <4 x half> %c) { +; ZVFH-LABEL: vfmax_v2f16_vv_nnan_insert_subvector: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v8 +; ZVFH-NEXT: vfadd.vv v9, v9, v9 +; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 +; ZVFH-NEXT: vslideup.vi v8, v9, 2 +; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnan_insert_subvector: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v11, v11 +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v9 +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vslideup.vi v11, v9, 2 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret + %d = fadd nnan <2 x half> %a, %a + %e = fadd nnan <2 x half> %b, %b + %f = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> undef, <2 x half> %d, i64 0) + %g = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> %f, <2 x half> %e, i64 2) + %v = call <4 x half> @llvm.maximum.v4f16(<4 x half> %g, <4 x half> %c) + ret <4 x half> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll index a0334a9a5d20a..568923db83591 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll @@ -357,3 +357,52 @@ define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { %v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %c) ret <2 x half> %v } + +declare <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half>, <2 x half>, i64) + +define <4 x half> @vfmin_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half> %b, <4 x half> %c) { +; ZVFH-LABEL: vfmin_v2f16_vv_nnan_insert_subvector: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFH-NEXT: vfadd.vv v8, v8, v8 +; ZVFH-NEXT: vfadd.vv v9, v9, v9 +; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 +; ZVFH-NEXT: vslideup.vi v8, v9, 2 +; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnan_insert_subvector: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vfadd.vv v9, v11, v11 +; ZVFHMIN-NEXT: vfadd.vv v8, v8, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v9 +; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8 +; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vslideup.vi v11, v9, 2 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v8, v10 +; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 +; ZVFHMIN-NEXT: ret + %d = fadd nnan <2 x half> %a, %a + %e = fadd nnan <2 x half> %b, %b + %f = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> undef, <2 x half> %d, i64 0) + %g = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> %f, <2 x half> %e, i64 2) + %v = call <4 x half> @llvm.minimum.v4f16(<4 x half> %g, <4 x half> %c) + ret <4 x half> %v +}