Skip to content

Commit 5d30565

Browse files
committed
[RISCV] Improve vector fround lowering by changing FRM.
This is a follow up to D133238 which did this for ceil/floor. Reviewed By: arcbbb, frasercrmck Differential Revision: https://reviews.llvm.org/D133335
1 parent acb767f commit 5d30565

File tree

5 files changed

+128
-229
lines changed

5 files changed

+128
-229
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 27 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -1836,11 +1836,24 @@ static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
18361836
return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
18371837
}
18381838

1839-
// Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain
1840-
// and back. Taking care to avoid converting values that are nan or already
1841-
// correct.
1842-
static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG,
1843-
const RISCVSubtarget &Subtarget) {
1839+
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
1840+
switch (Opc) {
1841+
case ISD::FROUNDEVEN: return RISCVFPRndMode::RNE;
1842+
case ISD::FTRUNC: return RISCVFPRndMode::RTZ;
1843+
case ISD::FFLOOR: return RISCVFPRndMode::RDN;
1844+
case ISD::FCEIL: return RISCVFPRndMode::RUP;
1845+
case ISD::FROUND: return RISCVFPRndMode::RMM;
1846+
}
1847+
1848+
return RISCVFPRndMode::Invalid;
1849+
}
1850+
1851+
// Expand vector FTRUNC, FCEIL, FFLOOR, and FROUND by converting to the integer
1852+
// domain/ and back. Taking care to avoid converting values that are nan or
1853+
// already correct.
1854+
static SDValue
1855+
lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
1856+
const RISCVSubtarget &Subtarget) {
18441857
MVT VT = Op.getSimpleValueType();
18451858
assert(VT.isVector() && "Unexpected type");
18461859

@@ -1892,15 +1905,14 @@ static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG,
18921905
default:
18931906
llvm_unreachable("Unexpected opcode");
18941907
case ISD::FCEIL:
1895-
Truncated =
1896-
DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask,
1897-
DAG.getTargetConstant(RISCVFPRndMode::RUP, DL, XLenVT), VL);
1898-
break;
18991908
case ISD::FFLOOR:
1900-
Truncated =
1901-
DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask,
1902-
DAG.getTargetConstant(RISCVFPRndMode::RDN, DL, XLenVT), VL);
1909+
case ISD::FROUND: {
1910+
RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
1911+
assert(FRM != RISCVFPRndMode::Invalid);
1912+
Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask,
1913+
DAG.getTargetConstant(FRM, DL, XLenVT), VL);
19031914
break;
1915+
}
19041916
case ISD::FTRUNC:
19051917
Truncated = DAG.getNode(RISCVISD::FP_TO_SINT_VL, DL, IntVT, Src, Mask, VL);
19061918
break;
@@ -1919,88 +1931,6 @@ static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG,
19191931
return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
19201932
}
19211933

1922-
// ISD::FROUND is defined to round to nearest with ties rounding away from 0.
1923-
// This mode isn't supported in vector hardware on RISCV. But as long as we
1924-
// aren't compiling with trapping math, we can emulate this with
1925-
// floor(X + copysign(nextafter(0.5, 0.0), X)).
1926-
// FIXME: Could be shorter by changing rounding mode, but we don't have FRM
1927-
// dependencies modeled yet.
1928-
static SDValue lowerFROUND(SDValue Op, SelectionDAG &DAG,
1929-
const RISCVSubtarget &Subtarget) {
1930-
MVT VT = Op.getSimpleValueType();
1931-
assert(VT.isVector() && "Unexpected type");
1932-
1933-
SDLoc DL(Op);
1934-
1935-
SDValue Src = Op.getOperand(0);
1936-
1937-
MVT ContainerVT = VT;
1938-
if (VT.isFixedLengthVector()) {
1939-
ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
1940-
Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
1941-
}
1942-
1943-
SDValue TrueMask, VL;
1944-
std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
1945-
1946-
// Freeze the source since we are increasing the number of uses.
1947-
Src = DAG.getFreeze(Src);
1948-
1949-
// We do the conversion on the absolute value and fix the sign at the end.
1950-
SDValue Abs =
1951-
DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, TrueMask, VL);
1952-
1953-
// Determine the largest integer that can be represented exactly. This and
1954-
// values larger than it don't have any fractional bits so don't need to
1955-
// be converted.
1956-
const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
1957-
unsigned Precision = APFloat::semanticsPrecision(FltSem);
1958-
APFloat MaxVal = APFloat(FltSem);
1959-
MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
1960-
/*IsSigned*/ false, APFloat::rmNearestTiesToEven);
1961-
SDValue MaxValNode =
1962-
DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
1963-
SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
1964-
DAG.getUNDEF(ContainerVT), MaxValNode, VL);
1965-
1966-
// If abs(Src) was larger than MaxVal or nan, keep it.
1967-
MVT SetccVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
1968-
SDValue Mask = DAG.getNode(RISCVISD::SETCC_VL, DL, SetccVT,
1969-
{Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT),
1970-
DAG.getUNDEF(SetccVT), TrueMask, VL});
1971-
1972-
bool Ignored;
1973-
APFloat Point5Pred = APFloat(0.5f);
1974-
Point5Pred.convert(FltSem, APFloat::rmNearestTiesToEven, &Ignored);
1975-
Point5Pred.next(/*nextDown*/ true);
1976-
SDValue SplatVal =
1977-
DAG.getConstantFP(Point5Pred, DL, ContainerVT.getVectorElementType());
1978-
SDValue Splat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
1979-
DAG.getUNDEF(ContainerVT), SplatVal, VL);
1980-
1981-
// Add the adjustment.
1982-
SDValue Adjust = DAG.getNode(RISCVISD::FADD_VL, DL, ContainerVT, Abs, Splat,
1983-
DAG.getUNDEF(ContainerVT), Mask, VL);
1984-
1985-
// Truncate to integer and convert back to fp.
1986-
MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
1987-
SDValue Truncated =
1988-
DAG.getNode(RISCVISD::FP_TO_SINT_VL, DL, IntVT, Adjust, Mask, VL);
1989-
1990-
Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
1991-
Mask, VL);
1992-
1993-
// Restore the original sign and merge the original source to masked off
1994-
// lanes.
1995-
Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
1996-
Src, Src, Mask, VL);
1997-
1998-
if (!VT.isFixedLengthVector())
1999-
return Truncated;
2000-
2001-
return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
2002-
}
2003-
20041934
struct VIDSequence {
20051935
int64_t StepNumerator;
20061936
unsigned StepDenominator;
@@ -3493,9 +3423,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
34933423
case ISD::FTRUNC:
34943424
case ISD::FCEIL:
34953425
case ISD::FFLOOR:
3496-
return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG, Subtarget);
34973426
case ISD::FROUND:
3498-
return lowerFROUND(Op, DAG, Subtarget);
3427+
return lowerFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
34993428
case ISD::VECREDUCE_ADD:
35003429
case ISD::VECREDUCE_UMAX:
35013430
case ISD::VECREDUCE_SMAX:
@@ -8844,18 +8773,6 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
88448773
return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Merge, Mask, VL);
88458774
}
88468775

8847-
static RISCVFPRndMode::RoundingMode matchRoundingOp(SDValue Op) {
8848-
switch (Op.getOpcode()) {
8849-
case ISD::FROUNDEVEN: return RISCVFPRndMode::RNE;
8850-
case ISD::FTRUNC: return RISCVFPRndMode::RTZ;
8851-
case ISD::FFLOOR: return RISCVFPRndMode::RDN;
8852-
case ISD::FCEIL: return RISCVFPRndMode::RUP;
8853-
case ISD::FROUND: return RISCVFPRndMode::RMM;
8854-
}
8855-
8856-
return RISCVFPRndMode::Invalid;
8857-
}
8858-
88598776
// Fold
88608777
// (fp_to_int (froundeven X)) -> fcvt X, rne
88618778
// (fp_to_int (ftrunc X)) -> fcvt X, rtz
@@ -8885,7 +8802,7 @@ static SDValue performFP_TO_INTCombine(SDNode *N,
88858802
if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
88868803
return SDValue();
88878804

8888-
RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
8805+
RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
88898806
if (FRM == RISCVFPRndMode::Invalid)
88908807
return SDValue();
88918808

@@ -8934,7 +8851,7 @@ static SDValue performFP_TO_INT_SATCombine(SDNode *N,
89348851

89358852
EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
89368853

8937-
RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
8854+
RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src.getOpcode());
89388855
if (FRM == RISCVFPRndMode::Invalid)
89398856
return SDValue();
89408857

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -312,22 +312,22 @@ static const CostTblEntry VectorIntrinsicCostTable[]{
312312
{Intrinsic::trunc, MVT::nxv2f64, 7},
313313
{Intrinsic::trunc, MVT::nxv4f64, 7},
314314
{Intrinsic::trunc, MVT::nxv8f64, 7},
315-
{Intrinsic::round, MVT::v2f32, 10},
316-
{Intrinsic::round, MVT::v4f32, 10},
317-
{Intrinsic::round, MVT::v8f32, 10},
318-
{Intrinsic::round, MVT::v16f32, 10},
319-
{Intrinsic::round, MVT::nxv2f32, 10},
320-
{Intrinsic::round, MVT::nxv4f32, 10},
321-
{Intrinsic::round, MVT::nxv8f32, 10},
322-
{Intrinsic::round, MVT::nxv16f32, 10},
323-
{Intrinsic::round, MVT::v2f64, 10},
324-
{Intrinsic::round, MVT::v4f64, 10},
325-
{Intrinsic::round, MVT::v8f64, 10},
326-
{Intrinsic::round, MVT::v16f64, 10},
327-
{Intrinsic::round, MVT::nxv1f64, 10},
328-
{Intrinsic::round, MVT::nxv2f64, 10},
329-
{Intrinsic::round, MVT::nxv4f64, 10},
330-
{Intrinsic::round, MVT::nxv8f64, 10},
315+
{Intrinsic::round, MVT::v2f32, 9},
316+
{Intrinsic::round, MVT::v4f32, 9},
317+
{Intrinsic::round, MVT::v8f32, 9},
318+
{Intrinsic::round, MVT::v16f32, 9},
319+
{Intrinsic::round, MVT::nxv2f32, 9},
320+
{Intrinsic::round, MVT::nxv4f32, 9},
321+
{Intrinsic::round, MVT::nxv8f32, 9},
322+
{Intrinsic::round, MVT::nxv16f32, 9},
323+
{Intrinsic::round, MVT::v2f64, 9},
324+
{Intrinsic::round, MVT::v4f64, 9},
325+
{Intrinsic::round, MVT::v8f64, 9},
326+
{Intrinsic::round, MVT::v16f64, 9},
327+
{Intrinsic::round, MVT::nxv1f64, 9},
328+
{Intrinsic::round, MVT::nxv2f64, 9},
329+
{Intrinsic::round, MVT::nxv4f64, 9},
330+
{Intrinsic::round, MVT::nxv8f64, 9},
331331
{Intrinsic::fabs, MVT::v2f32, 1},
332332
{Intrinsic::fabs, MVT::v4f32, 1},
333333
{Intrinsic::fabs, MVT::v8f32, 1},

llvm/test/Analysis/CostModel/RISCV/fround.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -219,23 +219,23 @@ define void @nearbyint() {
219219
define void @round() {
220220
; CHECK-LABEL: 'round'
221221
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.round.f32(float undef)
222-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %2 = call <2 x float> @llvm.round.v2f32(<2 x float> undef)
223-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %3 = call <4 x float> @llvm.round.v4f32(<4 x float> undef)
224-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %4 = call <8 x float> @llvm.round.v8f32(<8 x float> undef)
225-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %5 = call <16 x float> @llvm.round.v16f32(<16 x float> undef)
226-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %6 = call <vscale x 2 x float> @llvm.round.nxv2f32(<vscale x 2 x float> undef)
227-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %7 = call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> undef)
228-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %8 = call <vscale x 8 x float> @llvm.round.nxv8f32(<vscale x 8 x float> undef)
229-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %9 = call <vscale x 16 x float> @llvm.round.nxv16f32(<vscale x 16 x float> undef)
222+
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.round.v2f32(<2 x float> undef)
223+
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.round.v4f32(<4 x float> undef)
224+
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.round.v8f32(<8 x float> undef)
225+
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %5 = call <16 x float> @llvm.round.v16f32(<16 x float> undef)
226+
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %6 = call <vscale x 2 x float> @llvm.round.nxv2f32(<vscale x 2 x float> undef)
227+
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %7 = call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> undef)
228+
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %8 = call <vscale x 8 x float> @llvm.round.nxv8f32(<vscale x 8 x float> undef)
229+
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %9 = call <vscale x 16 x float> @llvm.round.nxv16f32(<vscale x 16 x float> undef)
230230
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %10 = call double @llvm.round.f64(double undef)
231-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %11 = call <2 x double> @llvm.round.v2f64(<2 x double> undef)
232-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %12 = call <4 x double> @llvm.round.v4f64(<4 x double> undef)
233-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %13 = call <8 x double> @llvm.round.v8f64(<8 x double> undef)
234-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %14 = call <16 x double> @llvm.round.v16f64(<16 x double> undef)
235-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %15 = call <vscale x 1 x double> @llvm.round.nxv1f64(<vscale x 1 x double> undef)
236-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %16 = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> undef)
237-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %17 = call <vscale x 4 x double> @llvm.round.nxv4f64(<vscale x 4 x double> undef)
238-
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %18 = call <vscale x 8 x double> @llvm.round.nxv8f64(<vscale x 8 x double> undef)
231+
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %11 = call <2 x double> @llvm.round.v2f64(<2 x double> undef)
232+
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %12 = call <4 x double> @llvm.round.v4f64(<4 x double> undef)
233+
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %13 = call <8 x double> @llvm.round.v8f64(<8 x double> undef)
234+
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %14 = call <16 x double> @llvm.round.v16f64(<16 x double> undef)
235+
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %15 = call <vscale x 1 x double> @llvm.round.nxv1f64(<vscale x 1 x double> undef)
236+
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> undef)
237+
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 4 x double> @llvm.round.nxv4f64(<vscale x 4 x double> undef)
238+
; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 8 x double> @llvm.round.nxv8f64(<vscale x 8 x double> undef)
239239
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
240240
;
241241
call float @llvm.round.f32(float undef)

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2171,12 +2171,11 @@ define void @round_v8f16(<8 x half>* %x) {
21712171
; CHECK-NEXT: vle16.v v8, (a0)
21722172
; CHECK-NEXT: lui a1, %hi(.LCPI100_0)
21732173
; CHECK-NEXT: flh ft0, %lo(.LCPI100_0)(a1)
2174-
; CHECK-NEXT: lui a1, %hi(.LCPI100_1)
2175-
; CHECK-NEXT: flh ft1, %lo(.LCPI100_1)(a1)
21762174
; CHECK-NEXT: vfabs.v v9, v8
21772175
; CHECK-NEXT: vmflt.vf v0, v9, ft0
2178-
; CHECK-NEXT: vfadd.vf v9, v9, ft1, v0.t
2179-
; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9, v0.t
2176+
; CHECK-NEXT: fsrmi a1, 4
2177+
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
2178+
; CHECK-NEXT: fsrm a1
21802179
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
21812180
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
21822181
; CHECK-NEXT: vse16.v v8, (a0)
@@ -2195,12 +2194,11 @@ define void @round_v4f32(<4 x float>* %x) {
21952194
; CHECK-NEXT: vle32.v v8, (a0)
21962195
; CHECK-NEXT: lui a1, %hi(.LCPI101_0)
21972196
; CHECK-NEXT: flw ft0, %lo(.LCPI101_0)(a1)
2198-
; CHECK-NEXT: lui a1, %hi(.LCPI101_1)
2199-
; CHECK-NEXT: flw ft1, %lo(.LCPI101_1)(a1)
22002197
; CHECK-NEXT: vfabs.v v9, v8
22012198
; CHECK-NEXT: vmflt.vf v0, v9, ft0
2202-
; CHECK-NEXT: vfadd.vf v9, v9, ft1, v0.t
2203-
; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9, v0.t
2199+
; CHECK-NEXT: fsrmi a1, 4
2200+
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
2201+
; CHECK-NEXT: fsrm a1
22042202
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
22052203
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
22062204
; CHECK-NEXT: vse32.v v8, (a0)
@@ -2219,12 +2217,11 @@ define void @round_v2f64(<2 x double>* %x) {
22192217
; CHECK-NEXT: vle64.v v8, (a0)
22202218
; CHECK-NEXT: lui a1, %hi(.LCPI102_0)
22212219
; CHECK-NEXT: fld ft0, %lo(.LCPI102_0)(a1)
2222-
; CHECK-NEXT: lui a1, %hi(.LCPI102_1)
2223-
; CHECK-NEXT: fld ft1, %lo(.LCPI102_1)(a1)
22242220
; CHECK-NEXT: vfabs.v v9, v8
22252221
; CHECK-NEXT: vmflt.vf v0, v9, ft0
2226-
; CHECK-NEXT: vfadd.vf v9, v9, ft1, v0.t
2227-
; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v9, v0.t
2222+
; CHECK-NEXT: fsrmi a1, 4
2223+
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
2224+
; CHECK-NEXT: fsrm a1
22282225
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
22292226
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
22302227
; CHECK-NEXT: vse64.v v8, (a0)

0 commit comments

Comments
 (0)