Skip to content

Commit 2fad60d

Browse files
oleksiyskononenkosamukweku
authored andcommitted
Improve ARM64 compatibility (#3447)
With changes on this PR datatable builds and passes all the tests on ARM platform. Since on this platform `long double` is identical to `double`, we had to - use tolerance `1e-15` for float tests; - improve handling of very small and very large floats when casting/freading similar to Rdatatable/data.table@ff3e7d4. WIP for #3222
1 parent 6a32a91 commit 2fad60d

File tree

9 files changed

+75
-53
lines changed

9 files changed

+75
-53
lines changed

docs/releases/v1.1.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,8 @@
170170
-[enh] Header detection heuristics has been improved in the case when
171171
some of the column names are missing. [#3363]
172172

173+
-[enh] Improved handling of very small and very large float values. [#3447]
174+
173175
-[fix] :func:`fread()` will no longer fail while reading mostly empty
174176
files. [#3055]
175177

src/core/column/cast_date32.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//------------------------------------------------------------------------------
2-
// Copyright 2021 H2O.ai
2+
// Copyright 2021-2023 H2O.ai
33
//
44
// Permission is hereby granted, free of charge, to any person obtaining a
55
// copy of this software and associated documentation files (the "Software"),
@@ -25,7 +25,6 @@
2525
#include "column/cast.h"
2626
#include "python/date.h"
2727
#include "python/string.h"
28-
#include "read/constants.h" // dt::read::pow10lookup
2928
namespace dt {
3029

3130

src/core/column/cast_string.cc

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//------------------------------------------------------------------------------
2-
// Copyright 2020-2021 H2O.ai
2+
// Copyright 2020-2023 H2O.ai
33
//
44
// Permission is hereby granted, free of charge, to any person obtaining a
55
// copy of this software and associated documentation files (the "Software"),
@@ -192,12 +192,23 @@ static bool parse_double(const char* ch, const char* end, double* out) {
192192
}
193193
e += Eneg? -exp : exp;
194194
}
195-
e += 350; // lookup table is arranged from -350 (0) to +350 (700)
196-
if (e < 0 || e > 700 || ch != end) return false;
197195

198-
double r = static_cast<double>(static_cast<long double>(mantissa) *
199-
dt::read::pow10lookup[e]);
200-
*out = negative? -r : r;
196+
if (e < -350 || e > 350 || ch != end) return false;
197+
auto r = static_cast<long double>(mantissa);
198+
199+
// For very small and very large floats do a separate lookup
200+
// for extra exponent, i.e. anything above 300 or below -300.
201+
// Note that the lookup array indices go from 0 (e == -300)
202+
// to 600 (e == 300). The approach is based on
203+
// https://github.com/Rdatatable/data.table/pull/4165
204+
if (e < -300 || e > 300) {
205+
auto extra = static_cast<int_fast8_t>(e - copysign(300, e));
206+
r *= dt::read::pow10lookup[extra + 300];
207+
e -= extra;
208+
}
209+
r *= dt::read::pow10lookup[e + 300];
210+
*out = static_cast<double>(negative? -r : r);
211+
201212
return true;
202213
}
203214

src/core/read/constants.cc

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
// License, v. 2.0. If a copy of the MPL was not distributed with this
44
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
55
//
6-
// © H2O.ai 2018
6+
// © H2O.ai 2018-2023
77
//------------------------------------------------------------------------------
88
#include <limits>
99
#include "read/constants.h"
@@ -66,18 +66,7 @@ const uint8_t allowedseps[128] = {
6666
};
6767

6868

69-
#if DT_OS_WINDOWS
70-
#define LDBL(value) std::numeric_limits<double>::max()
71-
#else
72-
#define LDBL(value) value
73-
#endif
74-
75-
const long double pow10lookup[701] = {
76-
1.0E-350L, 1.0E-349L, 1.0E-348L, 1.0E-347L, 1.0E-346L, 1.0E-345L, 1.0E-344L, 1.0E-343L, 1.0E-342L, 1.0E-341L,
77-
1.0E-340L, 1.0E-339L, 1.0E-338L, 1.0E-337L, 1.0E-336L, 1.0E-335L, 1.0E-334L, 1.0E-333L, 1.0E-332L, 1.0E-331L,
78-
1.0E-330L, 1.0E-329L, 1.0E-328L, 1.0E-327L, 1.0E-326L, 1.0E-325L, 1.0E-324L, 1.0E-323L, 1.0E-322L, 1.0E-321L,
79-
1.0E-320L, 1.0E-319L, 1.0E-318L, 1.0E-317L, 1.0E-316L, 1.0E-315L, 1.0E-314L, 1.0E-313L, 1.0E-312L, 1.0E-311L,
80-
1.0E-310L, 1.0E-309L, 1.0E-308L, 1.0E-307L, 1.0E-306L, 1.0E-305L, 1.0E-304L, 1.0E-303L, 1.0E-302L, 1.0E-301L,
69+
const long double pow10lookup[601] = {
8170
1.0E-300L, 1.0E-299L, 1.0E-298L, 1.0E-297L, 1.0E-296L, 1.0E-295L, 1.0E-294L, 1.0E-293L, 1.0E-292L, 1.0E-291L,
8271
1.0E-290L, 1.0E-289L, 1.0E-288L, 1.0E-287L, 1.0E-286L, 1.0E-285L, 1.0E-284L, 1.0E-283L, 1.0E-282L, 1.0E-281L,
8372
1.0E-280L, 1.0E-279L, 1.0E-278L, 1.0E-277L, 1.0E-276L, 1.0E-275L, 1.0E-274L, 1.0E-273L, 1.0E-272L, 1.0E-271L,
@@ -138,13 +127,8 @@ const long double pow10lookup[701] = {
138127
1.0E+270L, 1.0E+271L, 1.0E+272L, 1.0E+273L, 1.0E+274L, 1.0E+275L, 1.0E+276L, 1.0E+277L, 1.0E+278L, 1.0E+279L,
139128
1.0E+280L, 1.0E+281L, 1.0E+282L, 1.0E+283L, 1.0E+284L, 1.0E+285L, 1.0E+286L, 1.0E+287L, 1.0E+288L, 1.0E+289L,
140129
1.0E+290L, 1.0E+291L, 1.0E+292L, 1.0E+293L, 1.0E+294L, 1.0E+295L, 1.0E+296L, 1.0E+297L, 1.0E+298L, 1.0E+299L,
141-
1.0E+300L, 1.0E+301L, 1.0E+302L, 1.0E+303L, 1.0E+304L, 1.0E+305L, 1.0E+306L, 1.0E+307L, 1.0E+308L, LDBL(1.0E+309L),
142-
LDBL(1.0E+310L), LDBL(1.0E+311L), LDBL(1.0E+312L), LDBL(1.0E+313L), LDBL(1.0E+314L), LDBL(1.0E+315L), LDBL(1.0E+316L), LDBL(1.0E+317L), LDBL(1.0E+318L), LDBL(1.0E+319L),
143-
LDBL(1.0E+320L), LDBL(1.0E+321L), LDBL(1.0E+322L), LDBL(1.0E+323L), LDBL(1.0E+324L), LDBL(1.0E+325L), LDBL(1.0E+326L), LDBL(1.0E+327L), LDBL(1.0E+328L), LDBL(1.0E+329L),
144-
LDBL(1.0E+330L), LDBL(1.0E+331L), LDBL(1.0E+332L), LDBL(1.0E+333L), LDBL(1.0E+334L), LDBL(1.0E+335L), LDBL(1.0E+336L), LDBL(1.0E+337L), LDBL(1.0E+338L), LDBL(1.0E+339L),
145-
LDBL(1.0E+340L), LDBL(1.0E+341L), LDBL(1.0E+342L), LDBL(1.0E+343L), LDBL(1.0E+344L), LDBL(1.0E+345L), LDBL(1.0E+346L), LDBL(1.0E+347L), LDBL(1.0E+348L), LDBL(1.0E+349L),
146-
LDBL(1.0E+350L)
130+
1.0E+300L
147131
};
148132

149133

150-
}} // namespace dt::read::
134+
}} // namespace dt::read

src/core/read/constants.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ namespace read {
1313

1414
extern const uint8_t hexdigits[256];
1515
extern const uint8_t allowedseps[128];
16-
extern const long double pow10lookup[701];
16+
extern const long double pow10lookup[601];
1717

1818
}} // namespace dt::read::
1919
#endif

src/core/read/parsers/parse_float.cc

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//------------------------------------------------------------------------------
2-
// Copyright 2018-2021 H2O.ai
2+
// Copyright 2018-2023 H2O.ai
33
//
44
// Permission is hereby granted, free of charge, to any person obtaining a
55
// copy of this software and associated documentation files (the "Software"),
@@ -131,7 +131,7 @@ static void parse_float64_simple(const ParseContext& ctx) {
131131
const char* ch = ctx.ch;
132132

133133
bool neg = 0, Eneg = 0;
134-
double r;
134+
long double r;
135135

136136
if (ch < ctx.eof) {
137137
ch += (neg = *ch=='-') + (*ch=='+');
@@ -227,11 +227,23 @@ static void parse_float64_simple(const ParseContext& ctx) {
227227
}
228228
e += Eneg? -exp : exp;
229229
}
230-
e += 350; // lookup table is arranged from -350 (0) to +350 (700)
231-
if (e < 0 || e > 700) goto fail;
232230

233-
r = static_cast<double>(static_cast<long double>(acc) * dt::read::pow10lookup[e]);
234-
ctx.target->float64 = neg? -r : r;
231+
if (e < -350 || e > 350) goto fail;
232+
r = static_cast<long double>(acc);
233+
234+
// For very small and very large floats do a separate lookup
235+
// for extra exponent, i.e. anything above 300 or below -300.
236+
// Note that the lookup array indices go from 0 (e == -300)
237+
// to 600 (e == 300). The approach is based on
238+
// https://github.com/Rdatatable/data.table/pull/4165
239+
if (e < -300 || e > 300) {
240+
auto extra = static_cast<int_fast8_t>(e - copysign(300, e));
241+
r *= dt::read::pow10lookup[extra + 300];
242+
e -= extra;
243+
}
244+
r *= dt::read::pow10lookup[e + 300];
245+
ctx.target->float64 = static_cast<double>(neg? -r : r);
246+
235247
ctx.ch = ch;
236248
return;
237249

tests/conftest.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
33
#-------------------------------------------------------------------------------
4-
# Copyright 2018-2020 H2O.ai
4+
# Copyright 2018-2023 H2O.ai
55
#
66
# Permission is hereby granted, free of charge, to any person obtaining a
77
# copy of this software and associated documentation files (the "Software"),
@@ -47,6 +47,11 @@ def is_ppc64():
4747
return platform.system() == "Linux" and "ppc64le" in platform_hardware
4848

4949

50+
def is_arm():
51+
"""Helper function to determine ARM platform"""
52+
return platform.processor() == "arm"
53+
54+
5055
@pytest.fixture(scope="session")
5156
def noppc64():
5257
""" Skip the test if running in PowerPC64 """
@@ -83,10 +88,11 @@ def tol():
8388
long double type, resulting in a loss of precision when fread converts
8489
double literals into double numbers.
8590
"""
86-
platform_tols = {"Windows": 1e-15, "PowerPC64": 1e-16}
87-
platform_system = "PowerPC64" if is_ppc64() else platform.system()
88-
89-
return platform_tols.get(platform_system, 0)
91+
tols = {"Windows": 1e-15, "PowerPC64": 1e-16, "ARM": 1e-15}
92+
p = "PowerPC64" if is_ppc64() else \
93+
"ARM" if is_arm() else \
94+
platform.system()
95+
return tols.get(p, 0)
9096

9197

9298
@pytest.fixture(scope="session")

tests/fread/test-fread-small.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
33
#-------------------------------------------------------------------------------
4-
# Copyright 2018-2021 H2O.ai
4+
# Copyright 2018-2023 H2O.ai
55
#
66
# Permission is hereby granted, free of charge, to any person obtaining a
77
# copy of this software and associated documentation files (the "Software"),
@@ -163,15 +163,23 @@ def test_float_hex_invalid():
163163

164164

165165
def test_float_decimal0(tol):
166-
assert list_equals(dt.fread("1.3485701e-303\n").to_list(),
167-
[[1.3485701e-303]],
168-
rel_tol = tol)
169-
assert list_equals(dt.fread("1.46761e-313\n").to_list(),
170-
[[1.46761e-313]],
171-
rel_tol = tol)
172-
assert (dt.fread("A\n1.23456789123456789123456999\n")[0, 0] ==
173-
1.23456789123456789123456999)
174-
166+
src_str = ["1.23456789123456789123456999",
167+
"1.3485701e+303",
168+
"1.3485701e-303",
169+
"1.46761e+343",
170+
"1.46761e-343",
171+
]
172+
173+
src_float = [1.23456789123456789123456999,
174+
1.3485701e+303,
175+
1.3485701e-303,
176+
1.46761e+343,
177+
1.46761e-343,
178+
]
179+
180+
DT = dt.fread(text="\n".join(src_str))
181+
assert list_equals(DT.to_list(), [src_float], rel_tol = tol)
182+
assert DT[0, 0] == src_float[0]
175183

176184

177185
def test_float_precision():

tests/munging/test-cast.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
33
#-------------------------------------------------------------------------------
4-
# Copyright 2018-2021 H2O.ai
4+
# Copyright 2018-2023 H2O.ai
55
#
66
# Permission is hereby granted, free of charge, to any person obtaining a
77
# copy of this software and associated documentation files (the "Software"),
@@ -186,9 +186,9 @@ def test_cast_double_to_float():
186186

187187
@pytest.mark.parametrize("target_stype", ltype.real.stypes)
188188
def test_cast_str_to_double(target_stype):
189-
DT = dt.Frame(A=["2.45", "-3.333", "0.13e+29", "boo", None, "-4e-4"])
189+
DT = dt.Frame(A=["2.45", "-3.333", "0.13e+29", "boo", None, "-4e-4", "3.14e+323"])
190190
DT["A"] = target_stype
191-
assert_equals(DT, dt.Frame(A=[2.45, -3.333, 1.3e28, None, None, -0.0004],
191+
assert_equals(DT, dt.Frame(A=[2.45, -3.333, 1.3e28, None, None, -0.0004, 3.14e+323],
192192
stype=target_stype))
193193

194194

0 commit comments

Comments
 (0)