Skip to content

Commit 45f3263

Browse files
committed
[APFloat] Properly implement DoubleAPFloat::convertFromAPInt
The old implementation converted to the legacy semantics, inducing rounding and not properly handling inputs like (2^1000 + 2^200) which have have more precision than the legacy semantics can represent. Instead, we convert the integer into two floats and an error. The error is used to implement the rounding behavior. Remove related dead, untested code: convertFrom*ExtendedInteger
1 parent 5abec20 commit 45f3263

File tree

3 files changed

+491
-98
lines changed

3 files changed

+491
-98
lines changed

llvm/include/llvm/ADT/APFloat.h

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -460,12 +460,6 @@ class IEEEFloat final {
460460
LLVM_ABI opStatus convertToInteger(MutableArrayRef<integerPart>, unsigned int,
461461
bool, roundingMode, bool *) const;
462462
LLVM_ABI opStatus convertFromAPInt(const APInt &, bool, roundingMode);
463-
LLVM_ABI opStatus convertFromSignExtendedInteger(const integerPart *,
464-
unsigned int, bool,
465-
roundingMode);
466-
LLVM_ABI opStatus convertFromZeroExtendedInteger(const integerPart *,
467-
unsigned int, bool,
468-
roundingMode);
469463
LLVM_ABI Expected<opStatus> convertFromString(StringRef, roundingMode);
470464
LLVM_ABI APInt bitcastToAPInt() const;
471465
LLVM_ABI double convertToDouble() const;
@@ -805,6 +799,16 @@ class DoubleAPFloat final {
805799
unsigned int Width, bool IsSigned,
806800
roundingMode RM, bool *IsExact) const;
807801

802+
// Convert an unsigned integer Src to a floating point number,
803+
// rounding according to RM. The sign of the floating point number is not
804+
// modified.
805+
opStatus convertFromUnsignedParts(const integerPart *Src,
806+
unsigned int SrcCount, roundingMode RM);
807+
808+
// Handle overflow. Sign is preserved. We either become infinity or
809+
// the largest finite number.
810+
opStatus handleOverflow(roundingMode RM);
811+
808812
public:
809813
LLVM_ABI DoubleAPFloat(const fltSemantics &S);
810814
LLVM_ABI DoubleAPFloat(const fltSemantics &S, uninitializedTag);
@@ -860,14 +864,6 @@ class DoubleAPFloat final {
860864
roundingMode RM, bool *IsExact) const;
861865
LLVM_ABI opStatus convertFromAPInt(const APInt &Input, bool IsSigned,
862866
roundingMode RM);
863-
LLVM_ABI opStatus convertFromSignExtendedInteger(const integerPart *Input,
864-
unsigned int InputSize,
865-
bool IsSigned,
866-
roundingMode RM);
867-
LLVM_ABI opStatus convertFromZeroExtendedInteger(const integerPart *Input,
868-
unsigned int InputSize,
869-
bool IsSigned,
870-
roundingMode RM);
871867
LLVM_ABI unsigned int convertToHexString(char *DST, unsigned int HexDigits,
872868
bool UpperCase,
873869
roundingMode RM) const;
@@ -1344,22 +1340,15 @@ class APFloat : public APFloatBase {
13441340
// the precision of the conversion.
13451341
LLVM_ABI opStatus convertToInteger(APSInt &Result, roundingMode RM,
13461342
bool *IsExact) const;
1343+
1344+
// Convert a two's complement integer Input to a floating point number,
1345+
// rounding according to RM. IsSigned is true if the integer is signed,
1346+
// in which case it must be sign-extended.
13471347
opStatus convertFromAPInt(const APInt &Input, bool IsSigned,
13481348
roundingMode RM) {
13491349
APFLOAT_DISPATCH_ON_SEMANTICS(convertFromAPInt(Input, IsSigned, RM));
13501350
}
1351-
opStatus convertFromSignExtendedInteger(const integerPart *Input,
1352-
unsigned int InputSize, bool IsSigned,
1353-
roundingMode RM) {
1354-
APFLOAT_DISPATCH_ON_SEMANTICS(
1355-
convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM));
1356-
}
1357-
opStatus convertFromZeroExtendedInteger(const integerPart *Input,
1358-
unsigned int InputSize, bool IsSigned,
1359-
roundingMode RM) {
1360-
APFLOAT_DISPATCH_ON_SEMANTICS(
1361-
convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM));
1362-
}
1351+
13631352
LLVM_ABI Expected<opStatus> convertFromString(StringRef, roundingMode);
13641353
APInt bitcastToAPInt() const {
13651354
APFLOAT_DISPATCH_ON_SEMANTICS(bitcastToAPInt());

llvm/lib/Support/APFloat.cpp

Lines changed: 149 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -2927,51 +2927,6 @@ APFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
29272927
return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
29282928
}
29292929

2930-
/* Convert a two's complement integer SRC to a floating point number,
2931-
rounding according to ROUNDING_MODE. ISSIGNED is true if the
2932-
integer is signed, in which case it must be sign-extended. */
2933-
APFloat::opStatus
2934-
IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2935-
unsigned int srcCount, bool isSigned,
2936-
roundingMode rounding_mode) {
2937-
opStatus status;
2938-
2939-
if (isSigned &&
2940-
APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2941-
integerPart *copy;
2942-
2943-
/* If we're signed and negative negate a copy. */
2944-
sign = true;
2945-
copy = new integerPart[srcCount];
2946-
APInt::tcAssign(copy, src, srcCount);
2947-
APInt::tcNegate(copy, srcCount);
2948-
status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2949-
delete [] copy;
2950-
} else {
2951-
sign = false;
2952-
status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2953-
}
2954-
2955-
return status;
2956-
}
2957-
2958-
/* FIXME: should this just take a const APInt reference? */
2959-
APFloat::opStatus
2960-
IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2961-
unsigned int width, bool isSigned,
2962-
roundingMode rounding_mode) {
2963-
unsigned int partCount = partCountForBits(width);
2964-
APInt api = APInt(width, ArrayRef(parts, partCount));
2965-
2966-
sign = false;
2967-
if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2968-
sign = true;
2969-
api = -api;
2970-
}
2971-
2972-
return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2973-
}
2974-
29752930
Expected<APFloat::opStatus>
29762931
IEEEFloat::convertFromHexadecimalString(StringRef s,
29772932
roundingMode rounding_mode) {
@@ -5648,36 +5603,158 @@ DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
56485603
return FS;
56495604
}
56505605

5651-
APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
5652-
bool IsSigned,
5653-
roundingMode RM) {
5654-
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5655-
APFloat Tmp(semPPCDoubleDoubleLegacy);
5656-
auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
5657-
*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5658-
return Ret;
5659-
}
5606+
APFloat::opStatus DoubleAPFloat::handleOverflow(roundingMode RM) {
5607+
switch (RM) {
5608+
case APFloat::rmTowardZero:
5609+
makeLargest(/*Neg=*/isNegative());
5610+
break;
5611+
case APFloat::rmTowardNegative:
5612+
if (isNegative())
5613+
makeInf(/*Neg=*/true);
5614+
else
5615+
makeLargest(/*Neg=*/false);
5616+
break;
5617+
case APFloat::rmTowardPositive:
5618+
if (isNegative())
5619+
makeLargest(/*Neg=*/true);
5620+
else
5621+
makeInf(/*Neg=*/false);
5622+
break;
5623+
case APFloat::rmNearestTiesToAway:
5624+
case APFloat::rmNearestTiesToEven:
5625+
makeInf(/*Neg=*/isNegative());
5626+
break;
5627+
default:
5628+
llvm_unreachable("Invalid rounding mode found");
5629+
}
5630+
opStatus S = opInexact;
5631+
if (!getFirst().isFinite())
5632+
S = static_cast<opStatus>(S | opOverflow);
5633+
return S;
5634+
}
5635+
5636+
APFloat::opStatus DoubleAPFloat::convertFromUnsignedParts(
5637+
const integerPart *Src, unsigned int SrcCount, roundingMode RM) {
5638+
// Find the most significant bit of the source integer. APInt::tcMSB returns
5639+
// UINT_MAX for a zero value.
5640+
const unsigned SrcMSB = APInt::tcMSB(Src, SrcCount);
5641+
if (SrcMSB == UINT_MAX) {
5642+
// The source integer is 0.
5643+
makeZero(/*Neg=*/false);
5644+
return opOK;
5645+
}
56605646

5661-
APFloat::opStatus
5662-
DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
5663-
unsigned int InputSize,
5664-
bool IsSigned, roundingMode RM) {
5665-
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5666-
APFloat Tmp(semPPCDoubleDoubleLegacy);
5667-
auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
5668-
*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5669-
return Ret;
5647+
// Create a minimally-sized APInt to represent the source value.
5648+
const unsigned SrcBitWidth = SrcMSB + 1;
5649+
APSInt SrcInt{APInt{/*numBits=*/SrcBitWidth,
5650+
/*numWords=*/SrcCount, Src},
5651+
/*isUnsigned=*/true};
5652+
5653+
// Stage 1: Initial Approximation.
5654+
// Convert the source integer SrcInt to the Hi part of the DoubleAPFloat.
5655+
// We use round-to-nearest because it minimizes the initial error, which is
5656+
// crucial for the subsequent steps.
5657+
APFloat Hi{getFirst().getSemantics()};
5658+
Hi.convertFromAPInt(SrcInt, /*IsSigned=*/false, rmNearestTiesToEven);
5659+
5660+
// If the first approximation already overflows, the number is too large.
5661+
// NOTE: The underlying semantics are *more* conservative when choosing to
5662+
// overflow because their notion of ULP is much larger. As such, it is always
5663+
// safe to overflow at the DoubleAPFloat level if the APFloat overflows.
5664+
if (!Hi.isFinite())
5665+
return handleOverflow(RM);
5666+
5667+
// Stage 2: Exact Error Calculation.
5668+
// Calculate the exact error of the first approximation: Error = SrcInt - Hi.
5669+
// This is done by converting Hi back to an integer and subtracting it from
5670+
// the original source.
5671+
bool HiAsIntIsExact;
5672+
// Create an integer representation of Hi. Its width is determined by the
5673+
// exponent of Hi, ensuring it's just large enough. This width can exceed
5674+
// SrcBitWidth if the conversion to Hi rounded up to a power of two.
5675+
// accurately when converted back to an integer.
5676+
APSInt HiAsInt{static_cast<uint32_t>(ilogb(Hi) + 1), /*isUnsigned=*/true};
5677+
Hi.convertToInteger(HiAsInt, rmNearestTiesToEven, &HiAsIntIsExact);
5678+
const APInt Error = SrcInt.zext(HiAsInt.getBitWidth()) - HiAsInt;
5679+
5680+
// Stage 3: Error Approximation and Rounding.
5681+
// Convert the integer error into the Lo part of the DoubleAPFloat. This step
5682+
// captures the remainder of the original number. The rounding mode for this
5683+
// conversion (LoRM) may need to be adjusted from the user-requested RM to
5684+
// ensure the final sum (Hi + Lo) rounds correctly.
5685+
roundingMode LoRM = RM;
5686+
// Adjustments are only necessary when the initial approximation Hi was an
5687+
// overestimate, making the Error negative.
5688+
if (Error.isNegative()) {
5689+
if (RM == rmNearestTiesToAway) {
5690+
// For rmNearestTiesToAway, a tie should round away from zero. Since
5691+
// SrcInt is positive, this means rounding toward +infinity.
5692+
// A standard conversion of a negative Error would round ties toward
5693+
// -infinity, causing the final sum Hi + Lo to be smaller. To
5694+
// counteract this, we detect the tie case and override the rounding
5695+
// mode for Lo to rmTowardPositive.
5696+
const unsigned ErrorActiveBits = Error.getSignificantBits() - 1;
5697+
const unsigned LoPrecision = getSecond().getSemantics().precision;
5698+
if (ErrorActiveBits > LoPrecision) {
5699+
const unsigned RoundingBoundary = ErrorActiveBits - LoPrecision;
5700+
// A tie occurs when the bits to be truncated are of the form 100...0.
5701+
// This is detected by checking if the number of trailing zeros is
5702+
// exactly one less than the number of bits being truncated.
5703+
if (Error.countTrailingZeros() == RoundingBoundary - 1)
5704+
LoRM = rmTowardPositive;
5705+
}
5706+
} else if (RM == rmTowardZero) {
5707+
// For rmTowardZero, the final positive result must be truncated (rounded
5708+
// down). When Hi is an overestimate, Error is negative. A standard
5709+
// rmTowardZero conversion of Error would make it *less* negative,
5710+
// effectively rounding the final sum Hi + Lo *up*. To ensure the sum
5711+
// rounds down correctly, we force Lo to round toward -infinity.
5712+
LoRM = rmTowardNegative;
5713+
}
5714+
}
5715+
5716+
APFloat Lo{getSecond().getSemantics()};
5717+
opStatus Status = Lo.convertFromAPInt(Error, /*IsSigned=*/true, LoRM);
5718+
5719+
// Renormalize the pair (Hi, Lo) into a canonical DoubleAPFloat form where the
5720+
// components do not overlap. fastTwoSum performs this operation.
5721+
std::tie(Hi, Lo) = fastTwoSum(Hi, Lo);
5722+
Floats[0] = std::move(Hi);
5723+
Floats[1] = std::move(Lo);
5724+
5725+
// A final check for overflow is needed because fastTwoSum can cause a
5726+
// carry-out from Lo that pushes Hi to infinity.
5727+
if (!getFirst().isFinite())
5728+
return handleOverflow(RM);
5729+
5730+
// The largest DoubleAPFloat must be canonical. Values which are larger are
5731+
// not canonical and are equivalent to overflow.
5732+
if (getFirst().isFiniteNonZero() && Floats[0].isLargest()) {
5733+
DoubleAPFloat Largest{*Semantics};
5734+
Largest.makeLargest(/*Neg=*/false);
5735+
if (compare(Largest) == APFloat::cmpGreaterThan)
5736+
return handleOverflow(RM);
5737+
}
5738+
5739+
// The final status of the operation is determined by the conversion of the
5740+
// error term. If Lo could represent Error exactly, the entire conversion
5741+
// is exact. Otherwise, it's inexact.
5742+
return Status;
56705743
}
56715744

5672-
APFloat::opStatus
5673-
DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
5674-
unsigned int InputSize,
5675-
bool IsSigned, roundingMode RM) {
5676-
assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5677-
APFloat Tmp(semPPCDoubleDoubleLegacy);
5678-
auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
5679-
*this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5680-
return Ret;
5745+
APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
5746+
bool IsSigned,
5747+
roundingMode RM) {
5748+
const bool NegateInput = IsSigned && Input.isNegative();
5749+
APInt API = Input;
5750+
if (NegateInput)
5751+
API.negate();
5752+
5753+
const APFloat::opStatus Status =
5754+
convertFromUnsignedParts(API.getRawData(), API.getNumWords(), RM);
5755+
if (NegateInput)
5756+
changeSign();
5757+
return Status;
56815758
}
56825759

56835760
unsigned int DoubleAPFloat::convertToHexString(char *DST,

0 commit comments

Comments
 (0)