21
21
namespace LIBC_NAMESPACE {
22
22
namespace fputil {
23
23
24
- // A generic class to represent single precision, double precision, and quad
25
- // precision IEEE 754 floating point formats.
26
- // On most platforms, the 'float' type corresponds to single precision floating
27
- // point numbers, the 'double' type corresponds to double precision floating
28
- // point numers, and the 'long double' type corresponds to the quad precision
29
- // floating numbers. On x86 platforms however, the 'long double' type maps to
30
- // an x87 floating point format. This format is an IEEE 754 extension format.
31
- // It is handled as an explicit specialization of this class.
32
- template <typename T> struct FPBits : private FloatProperties <T> {
33
- static_assert (cpp::is_floating_point_v<T>,
34
- " FPBits instantiated with invalid type." );
35
- using typename FloatProperties<T>::StorageType;
36
- using FloatProperties<T>::TOTAL_LEN;
24
+ namespace internal {
37
25
38
- private:
39
- using FloatProperties<T>::EXP_SIG_MASK;
40
-
41
- public:
42
- using FloatProperties<T>::EXP_MASK;
43
- using FloatProperties<T>::EXP_BIAS;
44
- using FloatProperties<T>::EXP_LEN;
45
- using FloatProperties<T>::FRACTION_MASK;
46
- using FloatProperties<T>::FRACTION_LEN;
26
+ // This is a temporary class to unify common methods and properties between
27
+ // FPBits and FPBits<long double>.
28
+ template <FPType fp_type> struct FPBitsCommon : private FPProperties <fp_type> {
29
+ using UP = FPProperties<fp_type>;
30
+ using typename UP::StorageType;
31
+ using UP::TOTAL_LEN;
47
32
48
- private:
49
- using FloatProperties<T>::QUIET_NAN_MASK;
33
+ protected:
34
+ using UP::EXP_SIG_MASK;
35
+ using UP::QUIET_NAN_MASK;
50
36
51
37
public:
52
- using FloatProperties<T>::SIGN_MASK;
38
+ using UP::EXP_BIAS;
39
+ using UP::EXP_LEN;
40
+ using UP::EXP_MASK;
41
+ using UP::EXP_MASK_SHIFT;
42
+ using UP::FP_MASK;
43
+ using UP::FRACTION_LEN;
44
+ using UP::FRACTION_MASK;
45
+ using UP::SIGN_MASK;
53
46
54
47
// Reinterpreting bits as an integer value and interpreting the bits of an
55
48
// integer value as a floating point value is used in tests. So, a convenient
56
49
// type is provided for such reinterpretations.
57
50
StorageType bits;
58
51
52
+ LIBC_INLINE constexpr FPBitsCommon () : bits(0 ) {}
53
+ LIBC_INLINE explicit constexpr FPBitsCommon (StorageType bits) : bits(bits) {}
54
+
59
55
LIBC_INLINE constexpr void set_mantissa (StorageType mantVal) {
60
56
mantVal &= FRACTION_MASK;
61
57
bits &= ~FRACTION_MASK;
@@ -66,16 +62,89 @@ template <typename T> struct FPBits : private FloatProperties<T> {
66
62
return bits & FRACTION_MASK;
67
63
}
68
64
69
- LIBC_INLINE constexpr void set_biased_exponent (StorageType expVal) {
70
- expVal = (expVal << FRACTION_LEN) & EXP_MASK;
65
+ LIBC_INLINE constexpr void set_sign (bool signVal) {
66
+ if (get_sign () != signVal)
67
+ bits ^= SIGN_MASK;
68
+ }
69
+
70
+ LIBC_INLINE constexpr bool get_sign () const {
71
+ return (bits & SIGN_MASK) != 0 ;
72
+ }
73
+
74
+ LIBC_INLINE constexpr void set_biased_exponent (StorageType biased) {
75
+ // clear exponent bits
71
76
bits &= ~EXP_MASK;
72
- bits |= expVal;
77
+ // set exponent bits
78
+ bits |= (biased << EXP_MASK_SHIFT) & EXP_MASK;
73
79
}
74
80
75
81
LIBC_INLINE constexpr uint16_t get_biased_exponent () const {
76
- return uint16_t ((bits & EXP_MASK) >> FRACTION_LEN);
82
+ return uint16_t ((bits & EXP_MASK) >> EXP_MASK_SHIFT);
83
+ }
84
+
85
+ LIBC_INLINE constexpr int get_exponent () const {
86
+ return int (get_biased_exponent ()) - EXP_BIAS;
77
87
}
78
88
89
+ // If the number is subnormal, the exponent is treated as if it were the
90
+ // minimum exponent for a normal number. This is to keep continuity between
91
+ // the normal and subnormal ranges, but it causes problems for functions where
92
+ // values are calculated from the exponent, since just subtracting the bias
93
+ // will give a slightly incorrect result. Additionally, zero has an exponent
94
+ // of zero, and that should actually be treated as zero.
95
+ LIBC_INLINE constexpr int get_explicit_exponent () const {
96
+ const int biased_exp = int (get_biased_exponent ());
97
+ if (is_zero ()) {
98
+ return 0 ;
99
+ } else if (biased_exp == 0 ) {
100
+ return 1 - EXP_BIAS;
101
+ } else {
102
+ return biased_exp - EXP_BIAS;
103
+ }
104
+ }
105
+
106
+ LIBC_INLINE constexpr StorageType uintval () const { return bits & FP_MASK; }
107
+
108
+ LIBC_INLINE constexpr bool is_zero () const {
109
+ return (bits & EXP_SIG_MASK) == 0 ;
110
+ }
111
+ };
112
+
113
+ } // namespace internal
114
+
115
+ // A generic class to represent single precision, double precision, and quad
116
+ // precision IEEE 754 floating point formats.
117
+ // On most platforms, the 'float' type corresponds to single precision floating
118
+ // point numbers, the 'double' type corresponds to double precision floating
119
+ // point numers, and the 'long double' type corresponds to the quad precision
120
+ // floating numbers. On x86 platforms however, the 'long double' type maps to
121
+ // an x87 floating point format. This format is an IEEE 754 extension format.
122
+ // It is handled as an explicit specialization of this class.
123
+ template <typename T>
124
+ struct FPBits : public internal ::FPBitsCommon<get_fp_type<T>()> {
125
+ static_assert (cpp::is_floating_point_v<T>,
126
+ " FPBits instantiated with invalid type." );
127
+ using UP = internal::FPBitsCommon<get_fp_type<T>()>;
128
+ using StorageType = typename UP::StorageType;
129
+ using UP::bits;
130
+
131
+ private:
132
+ using UP::EXP_SIG_MASK;
133
+ using UP::QUIET_NAN_MASK;
134
+
135
+ public:
136
+ using UP::EXP_BIAS;
137
+ using UP::EXP_LEN;
138
+ using UP::EXP_MASK;
139
+ using UP::EXP_MASK_SHIFT;
140
+ using UP::FRACTION_LEN;
141
+ using UP::FRACTION_MASK;
142
+ using UP::SIGN_MASK;
143
+ using UP::TOTAL_LEN;
144
+
145
+ using UP::get_biased_exponent;
146
+ using UP::is_zero;
147
+
79
148
// The function return mantissa with the implicit bit set iff the current
80
149
// value is a valid normal number.
81
150
LIBC_INLINE constexpr StorageType get_explicit_mantissa () {
@@ -85,19 +154,6 @@ template <typename T> struct FPBits : private FloatProperties<T> {
85
154
(FRACTION_MASK & bits);
86
155
}
87
156
88
- LIBC_INLINE constexpr void set_sign (bool signVal) {
89
- bits |= SIGN_MASK;
90
- if (!signVal)
91
- bits -= SIGN_MASK;
92
- }
93
-
94
- LIBC_INLINE constexpr bool get_sign () const {
95
- return (bits & SIGN_MASK) != 0 ;
96
- }
97
-
98
- static_assert (sizeof (T) == sizeof (StorageType),
99
- " Data type and integral representation have different sizes." );
100
-
101
157
static constexpr int MAX_BIASED_EXPONENT = (1 << EXP_LEN) - 1 ;
102
158
static constexpr StorageType MIN_SUBNORMAL = StorageType(1 );
103
159
static constexpr StorageType MAX_SUBNORMAL = FRACTION_MASK;
@@ -109,49 +165,21 @@ template <typename T> struct FPBits : private FloatProperties<T> {
109
165
// type match.
110
166
template <typename XType, cpp::enable_if_t <cpp::is_same_v<T, XType>, int > = 0 >
111
167
LIBC_INLINE constexpr explicit FPBits (XType x)
112
- : bits (cpp::bit_cast<StorageType>(x)) {}
168
+ : UP (cpp::bit_cast<StorageType>(x)) {}
113
169
114
170
template <typename XType,
115
171
cpp::enable_if_t <cpp::is_same_v<XType, StorageType>, int > = 0 >
116
- LIBC_INLINE constexpr explicit FPBits (XType x) : bits(x) {}
117
-
118
- LIBC_INLINE constexpr FPBits () : bits(0 ) {}
172
+ LIBC_INLINE constexpr explicit FPBits (XType x) : UP(x) {}
119
173
120
- LIBC_INLINE constexpr T get_val () const { return cpp::bit_cast<T>(bits); }
174
+ LIBC_INLINE constexpr FPBits () : UP() { }
121
175
122
176
LIBC_INLINE constexpr void set_val (T value) {
123
177
bits = cpp::bit_cast<StorageType>(value);
124
178
}
125
179
126
- LIBC_INLINE constexpr explicit operator T () const { return get_val (); }
127
-
128
- LIBC_INLINE constexpr StorageType uintval () const { return bits; }
129
-
130
- LIBC_INLINE constexpr int get_exponent () const {
131
- return int (get_biased_exponent ()) - EXP_BIAS;
132
- }
133
-
134
- // If the number is subnormal, the exponent is treated as if it were the
135
- // minimum exponent for a normal number. This is to keep continuity between
136
- // the normal and subnormal ranges, but it causes problems for functions where
137
- // values are calculated from the exponent, since just subtracting the bias
138
- // will give a slightly incorrect result. Additionally, zero has an exponent
139
- // of zero, and that should actually be treated as zero.
140
- LIBC_INLINE constexpr int get_explicit_exponent () const {
141
- const int biased_exp = int (get_biased_exponent ());
142
- if (is_zero ()) {
143
- return 0 ;
144
- } else if (biased_exp == 0 ) {
145
- return 1 - EXP_BIAS;
146
- } else {
147
- return biased_exp - EXP_BIAS;
148
- }
149
- }
180
+ LIBC_INLINE constexpr T get_val () const { return cpp::bit_cast<T>(bits); }
150
181
151
- LIBC_INLINE constexpr bool is_zero () const {
152
- // Remove sign bit by shift
153
- return (bits << 1 ) == 0 ;
154
- }
182
+ LIBC_INLINE constexpr explicit operator T () const { return get_val (); }
155
183
156
184
LIBC_INLINE constexpr bool is_inf () const {
157
185
return (bits & EXP_SIG_MASK) == EXP_MASK;
0 commit comments