@@ -26,92 +26,84 @@ static_assert(4 == sizeof(float32_t), "Invalid `float32_t` size.");
2626typedef double float64_t ;
2727static_assert (8 == sizeof (float64_t ), " Invalid `float64_t` size." );
2828
29+ // TODO: this is 100% incorrect
2930typedef double float128_t ;
3031static_assert (8 == sizeof (float128_t ), " Invalid `float128_t` size." );
3132
3233// a long double can be anything from a 128-bit float (on AArch64/Linux) to a 64-bit double (AArch64 MacOS)
3334// to an 80-bit precision wrapped with padding (x86/x86-64). We do not do a static assert on the size
34- // since there are too many options.
35+ // since there are too many options.
3536
3637// A "native_float80_t" is a native type that is closes to approximating
3738// an x86 80-bit float.
3839// when building against CUDA, default to 64-bit float80s
39- #if !defined(__CUDACC__) && (defined(__x86_64__) || defined(__i386__) || defined(_M_X86))
40+ #if !defined(__CUDACC__) && !defined(_WIN32) && (defined(__x86_64__) || defined(__i386__) || defined(_M_X86))
4041 #if defined(__float80)
4142 typedef __float80 native_float80_t ;
4243 #else
4344 typedef long double native_float80_t ;
4445 #endif
45- static_assert (10 <= sizeof (native_float80_t ), " Invalid `native_float80_t` size." );
46+ static_assert (sizeof (native_float80_t ) >= 10 , " Invalid `native_float80_t` size." );
4647#else
4748 typedef double native_float80_t ;
48- static_assert (8 == sizeof (native_float80_t ), "Invalid `native_float80_t` size.");
49+ static_assert (sizeof (native_float80_t ) == 8 , "Invalid `native_float80_t` size.");
4950#endif
5051
51- static const int kEightyBitsInBytes = 10 ;
52- union union_ld {
53- struct {
54- uint8_t data[kEightyBitsInBytes ];
55- // when building against CUDA, default to 64-bit float80s
56- #if !defined(__CUDACC__) && (defined(__x86_64__) || defined(__i386__) || defined(_M_X86))
57- // We are doing x86 on x86, so we have native x86 FP80s, but they
58- // are not available in raw 80-bit native form.
59- //
60- // To get to the internal FP80 representation, we have to use a
61- // `long double` which is (usually! but not always)
62- // an FP80 padded to a 12 or 16 byte boundary
63- //
64- uint8_t padding[sizeof (native_float80_t ) - kEightyBitsInBytes ];
65- #else
66- // The closest native FP type that we can easily deal with is a 64-bit double
67- // this is less than the size of an FP80, so the data variable above will already
68- // enclose it. No extra padding is needed
69- #endif
70- } lds __attribute__ ((packed));
71- native_float80_t ld;
72- } __attribute__((packed));
73-
74- static void *memset_impl (void *b, int c, std::size_t len) {
75- auto *p = static_cast <int *>(b);
76- for (std::size_t i = 0 ; i < len; ++i) {
77- p[i] = c;
78- }
79- return b;
80- }
81-
82- static void *memcpy_impl (void *dst, const void *src, std::size_t n) {
83- auto *d = static_cast <int *>(dst);
84- const auto *s = static_cast <const int *>(src);
85- for (std::size_t i = 0 ; i < n; ++i) {
86- d[i] = s[i];
87- }
88- return dst;
89- }
90-
9152struct float80_t final {
92- uint8_t data[kEightyBitsInBytes ];
93-
94- inline ~float80_t (void ) = default ;
95- inline float80_t (void ) : data{0 ,} {}
53+ uint8_t data[10 ];
9654
55+ ~float80_t () = default ;
56+ float80_t () = default ;
9757 float80_t (const float80_t &) = default ;
9858 float80_t &operator =(const float80_t &) = default ;
9959
100- inline float80_t (native_float80_t ld) {
101- union_ld ldu;
102- memset_impl (&ldu, 0 , sizeof (ldu)); // zero out ldu to make padding consistent
103- ldu.ld = ld; // assign native value
104- // copy the representation to this object
105- memcpy_impl (&data[0 ], &ldu.lds .data [0 ], sizeof (data));
60+ float80_t (native_float80_t ld) {
61+ if constexpr (sizeof (ld) < sizeof (data)) {
62+ // Native floats are smaller than 80 bits, add padding
63+ memcpy_impl (data, &ld, sizeof (ld));
64+ memset_impl (data + sizeof (ld), 0 , sizeof (data) - sizeof (ld));
65+ } else {
66+ // Native floats are bigger than 80 bits, truncate
67+ memcpy_impl (data, &ld, sizeof (data));
68+ }
10669 }
10770
10871 operator native_float80_t () {
109- union_ld ldu;
110- memset_impl (&ldu, 0 , sizeof (ldu)); // zero out ldu to make padding consistent
111- // copy the internal representation into the union
112- memcpy_impl (&ldu.lds .data [0 ], &data[0 ], sizeof (data));
113- // extract the native backing type from it
114- return ldu.ld ;
72+ native_float80_t nf;
73+ if constexpr (sizeof (nf) < sizeof (data)) {
74+ // Native floats are smaller than 80 bits, truncate
75+ memcpy_impl (&nf, data, sizeof (nf));
76+ } else {
77+ // Native floats are bigger than 80 bits, add padding
78+ memcpy_impl ((unsigned char *)&nf, data, sizeof (data));
79+ memset_impl ((unsigned char *)&nf + sizeof (data), 0 , sizeof (nf) - sizeof (data));
80+ }
81+ return nf;
82+ }
83+
84+ static void *memset_impl (void *b, int c, std::size_t len) {
85+ #if defined(__clang__) || defined(__GNUC__)
86+ return __builtin_memset (b, c, len);
87+ #else
88+ auto *p = static_cast <int *>(b);
89+ for (std::size_t i = 0 ; i < len; ++i) {
90+ p[i] = c;
91+ }
92+ return b;
93+ #endif
94+ }
95+
96+ static void *memcpy_impl (void *dst, const void *src, std::size_t n) {
97+ #if defined(__clang__) || defined(__GNUC__)
98+ return __builtin_memcpy (dst, src, n);
99+ #else
100+ auto *d = static_cast <int *>(dst);
101+ const auto *s = static_cast <const int *>(src);
102+ for (std::size_t i = 0 ; i < n; ++i) {
103+ d[i] = s[i];
104+ }
105+ return dst;
106+ #endif
115107 }
116108} __attribute__((packed));
117109
@@ -147,10 +139,10 @@ union nan80_t {
147139 float80_t d;
148140 struct {
149141 uint64_t payload : 62 ;
150- uint64_t is_quiet_nan : 1 ;
151- uint64_t interger_bit : 1 ;
152- uint64_t exponent : 15 ;
153- uint64_t is_negative : 1 ;
142+ uint64_t is_quiet_nan : 1 ;
143+ uint64_t interger_bit : 1 ;
144+ uint16_t exponent : 15 ;
145+ uint16_t is_negative : 1 ;
154146 } __attribute__ ((packed));
155147} __attribute__((packed));
156148
0 commit comments