Skip to content

Commit e2672aa

Browse files
committed
feat: add ToString
1 parent 506d01d commit e2672aa

File tree

2 files changed

+323
-5
lines changed

2 files changed

+323
-5
lines changed

src/iceberg/expression/decimal.cc

Lines changed: 72 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,10 @@ static Status BuildFromArray(Decimal* result, const uint32_t* array, int64_t len
188188
int64_t next_index = length - 1;
189189
for (size_t i = 0; i < 2 && next_index >= 0; i++) {
190190
uint64_t lower_bits = array[next_index--];
191-
result_array[i] = (next_index < 0)
192-
? lower_bits
193-
: (static_cast<uint64_t>(lower_bits) << 32) | lower_bits;
191+
result_array[i] =
192+
(next_index < 0)
193+
? lower_bits
194+
: (static_cast<uint64_t>(array[next_index--]) << 32) | lower_bits;
194195
}
195196

196197
*result = Decimal(result_array[1], result_array[0]);
@@ -654,7 +655,6 @@ static void AppendLittleEndianArrayToString(const std::array<uint64_t, 2>& array
654655
size_t num_segments = 0;
655656
uint64_t* most_significant_elem = &copy[most_significant_elem_idx];
656657

657-
std::cout << copy[1] << " " << copy[0] << std::endl;
658658
do {
659659
// Compute remainder = copy % 1e9 and copy = copy / 1e9.
660660
uint32_t remainder = 0;
@@ -690,6 +690,71 @@ static void AppendLittleEndianArrayToString(const std::array<uint64_t, 2>& array
690690
out->append(oss.str());
691691
}
692692

693+
static void AdjustIntegerStringWithScale(std::string* str, int32_t scale) {
694+
if (scale == 0) {
695+
return;
696+
}
697+
assert(str != nullptr);
698+
assert(!str->empty());
699+
const bool is_negative = str->front() == '-';
700+
const auto is_negative_offset = static_cast<int32_t>(is_negative);
701+
const auto len = static_cast<int32_t>(str->size());
702+
const int32_t num_digits = len - is_negative_offset;
703+
const int32_t adjusted_exponent = num_digits - 1 - scale;
704+
705+
// Note that the -6 is taken from the Java BigDecimal documentation.
706+
if (scale < 0 || adjusted_exponent < -6) {
707+
// Example 1:
708+
// Precondition: *str = "123", is_negative_offset = 0, num_digits = 3, scale = -2,
709+
// adjusted_exponent = 4
710+
// After inserting decimal point: *str = "1.23"
711+
// After appending exponent: *str = "1.23E+4"
712+
// Example 2:
713+
// Precondition: *str = "-123", is_negative_offset = 1, num_digits = 3, scale = 9,
714+
// adjusted_exponent = -7
715+
// After inserting decimal point: *str = "-1.23"
716+
// After appending exponent: *str = "-1.23E-7"
717+
// Example 3:
718+
// Precondition: *str = "0", is_negative_offset = 0, num_digits = 1, scale = -1,
719+
// adjusted_exponent = 1
720+
// After inserting decimal point: *str = "0" // Not inserted
721+
// After appending exponent: *str = "0E+1"
722+
if (num_digits > 1) {
723+
str->insert(str->begin() + 1 + is_negative_offset, '.');
724+
}
725+
str->push_back('E');
726+
if (adjusted_exponent >= 0) {
727+
str->push_back('+');
728+
}
729+
// Append the adjusted exponent as a string.
730+
str->append(std::to_string(adjusted_exponent));
731+
return;
732+
}
733+
734+
if (num_digits > scale) {
735+
const auto n = static_cast<size_t>(len - scale);
736+
// Example 1:
737+
// Precondition: *str = "123", len = num_digits = 3, scale = 1, n = 2
738+
// After inserting decimal point: *str = "12.3"
739+
// Example 2:
740+
// Precondition: *str = "-123", len = 4, num_digits = 3, scale = 1, n = 3
741+
// After inserting decimal point: *str = "-12.3"
742+
str->insert(str->begin() + n, '.');
743+
return;
744+
}
745+
746+
// Example 1:
747+
// Precondition: *str = "123", is_negative_offset = 0, num_digits = 3, scale = 4
748+
// After insert: *str = "000123"
749+
// After setting decimal point: *str = "0.0123"
750+
// Example 2:
751+
// Precondition: *str = "-123", is_negative_offset = 1, num_digits = 3, scale = 4
752+
// After insert: *str = "-000123"
753+
// After setting decimal point: *str = "-0.0123"
754+
str->insert(is_negative_offset, scale - num_digits + 2, '0');
755+
str->at(is_negative_offset + 1) = '.';
756+
}
757+
693758
} // namespace
694759

695760
Result<std::string> Decimal::ToString(int32_t scale) const {
@@ -698,7 +763,9 @@ Result<std::string> Decimal::ToString(int32_t scale) const {
698763
"Decimal::ToString: scale must be in the range [-{}, {}], was {}", kMaxScale,
699764
kMaxScale, scale);
700765
}
701-
return NotImplemented("Decimal::ToString is not implemented yet");
766+
std::string str(ToIntegerString());
767+
AdjustIntegerStringWithScale(&str, scale);
768+
return str;
702769
}
703770

704771
std::string Decimal::ToIntegerString() const {

test/decimal_test.cc

Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,11 @@
1818
*/
1919
#include "iceberg/expression/decimal.h"
2020

21+
#include <array>
22+
#include <cstdint>
23+
2124
#include <gtest/gtest.h>
25+
#include <sys/types.h>
2226

2327
#include "gmock/gmock.h"
2428
#include "matchers.h"
@@ -164,4 +168,251 @@ TEST(DecimalTest, LargeValues) {
164168
}
165169
}
166170

171+
TEST(DecimalTest, TestStringRoundTrip) {
172+
static constexpr std::array<uint64_t, 11> kTestBits = {
173+
0,
174+
1,
175+
999,
176+
1000,
177+
std::numeric_limits<int32_t>::max(),
178+
(1ull << 31),
179+
std::numeric_limits<uint32_t>::max(),
180+
(1ull << 32),
181+
std::numeric_limits<int64_t>::max(),
182+
(1ull << 63),
183+
std::numeric_limits<uint64_t>::max(),
184+
};
185+
static constexpr std::array<int32_t, 3> kScales = {0, 1, 10};
186+
for (uint64_t high : kTestBits) {
187+
for (uint64_t low : kTestBits) {
188+
Decimal value(high, low);
189+
for (int32_t scale : kScales) {
190+
auto result = value.ToString(scale);
191+
192+
ASSERT_THAT(result, IsOk())
193+
<< "Failed to convert Decimal to string: " << value.ToIntegerString()
194+
<< ", scale: " << scale;
195+
196+
auto round_trip = Decimal::FromString(result.value());
197+
ASSERT_THAT(round_trip, IsOk())
198+
<< "Failed to convert string back to Decimal: " << result.value();
199+
200+
EXPECT_EQ(value, round_trip.value())
201+
<< "Round trip failed for value: " << value.ToIntegerString()
202+
<< ", scale: " << scale;
203+
}
204+
}
205+
}
206+
}
207+
208+
TEST(DecimalTest, FromStringLimits) {
209+
AssertDecimalFromString("1e37", Decimal(542101086242752217ULL, 68739955140067328ULL),
210+
38, 0);
211+
212+
AssertDecimalFromString(
213+
"-1e37", Decimal(17904642987466799398ULL, 18378004118569484288ULL), 38, 0);
214+
AssertDecimalFromString(
215+
"9.87e37", Decimal(5350537721215964381ULL, 15251391175463010304ULL), 38, 0);
216+
AssertDecimalFromString(
217+
"-9.87e37", Decimal(13096206352493587234ULL, 3195352898246541312ULL), 38, 0);
218+
AssertDecimalFromString("12345678901234567890123456789012345678",
219+
Decimal(669260594276348691ULL, 14143994781733811022ULL), 38, 0);
220+
AssertDecimalFromString("-12345678901234567890123456789012345678",
221+
Decimal(17777483479433202924ULL, 4302749291975740594ULL), 38,
222+
0);
223+
224+
// "9..9" (38 times)
225+
const auto dec38times9pos = Decimal(5421010862427522170ULL, 687399551400673279ULL);
226+
// "-9..9" (38 times)
227+
const auto dec38times9neg = Decimal(13025733211282029445ULL, 17759344522308878337ULL);
228+
229+
AssertDecimalFromString("99999999999999999999999999999999999999", dec38times9pos, 38,
230+
0);
231+
AssertDecimalFromString("-99999999999999999999999999999999999999", dec38times9neg, 38,
232+
0);
233+
AssertDecimalFromString("9.9999999999999999999999999999999999999e37", dec38times9pos,
234+
38, 0);
235+
AssertDecimalFromString("-9.9999999999999999999999999999999999999e37", dec38times9neg,
236+
38, 0);
237+
238+
// No exponent, many fractional digits
239+
AssertDecimalFromString("9.9999999999999999999999999999999999999", dec38times9pos, 38,
240+
37);
241+
AssertDecimalFromString("-9.9999999999999999999999999999999999999", dec38times9neg, 38,
242+
37);
243+
AssertDecimalFromString("0.99999999999999999999999999999999999999", dec38times9pos, 38,
244+
38);
245+
AssertDecimalFromString("-0.99999999999999999999999999999999999999", dec38times9neg, 38,
246+
38);
247+
248+
// Negative exponent
249+
AssertDecimalFromString("1e-38", Decimal(0, 1), 1, 38);
250+
AssertDecimalFromString(
251+
"-1e-38", Decimal(18446744073709551615ULL, 18446744073709551615ULL), 1, 38);
252+
AssertDecimalFromString("9.99e-36", Decimal(0, 999), 3, 38);
253+
AssertDecimalFromString(
254+
"-9.99e-36", Decimal(18446744073709551615ULL, 18446744073709550617ULL), 3, 38);
255+
AssertDecimalFromString("987e-38", Decimal(0, 987), 3, 38);
256+
AssertDecimalFromString(
257+
"-987e-38", Decimal(18446744073709551615ULL, 18446744073709550629ULL), 3, 38);
258+
AssertDecimalFromString("99999999999999999999999999999999999999e-37", dec38times9pos,
259+
38, 37);
260+
AssertDecimalFromString("-99999999999999999999999999999999999999e-37", dec38times9neg,
261+
38, 37);
262+
AssertDecimalFromString("99999999999999999999999999999999999999e-38", dec38times9pos,
263+
38, 38);
264+
AssertDecimalFromString("-99999999999999999999999999999999999999e-38", dec38times9neg,
265+
38, 38);
266+
}
267+
268+
TEST(DecimalTest, FromStringInvalid) {
269+
// Empty string
270+
auto result = Decimal::FromString("");
271+
ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument));
272+
ASSERT_THAT(result, HasErrorMessage(
273+
"Decimal::FromString: empty string is not a valid Decimal"));
274+
for (const auto& invalid_string :
275+
std::vector<std::string>{"-", "0.0.0", "0-13-32", "a", "-23092.235-",
276+
"-+23092.235", "+-23092.235", "00a", "1e1a", "0.00123D/3",
277+
"1.23eA8", "1.23E+3A", "-1.23E--5", "1.2345E+++07"}) {
278+
auto result = Decimal::FromString(invalid_string);
279+
ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument));
280+
ASSERT_THAT(result, HasErrorMessage("Decimal::FromString: invalid decimal string"));
281+
}
282+
283+
for (const auto& invalid_string :
284+
std::vector<std::string>{"1e39", "-1e39", "9e39", "-9e39", "9.9e40", "-9.9e40"}) {
285+
auto result = Decimal::FromString(invalid_string);
286+
ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument));
287+
ASSERT_THAT(result,
288+
HasErrorMessage("Decimal::FromString: scale must be in the range"));
289+
}
290+
}
291+
292+
TEST(DecimalTest, Division) {
293+
const std::string expected_string_value("-23923094039234029");
294+
const Decimal value(expected_string_value);
295+
const Decimal result(value / 3);
296+
const Decimal expected_value("-7974364679744676");
297+
ASSERT_EQ(expected_value, result);
298+
}
299+
300+
TEST(DecimalTest, ToString) {
301+
struct ToStringCase {
302+
int64_t test_value;
303+
int32_t scale;
304+
const char* expected_string;
305+
};
306+
307+
for (const auto& t : std::vector<ToStringCase>{
308+
{.test_value = 0, .scale = -1, .expected_string = "0E+1"},
309+
{.test_value = 0, .scale = 0, .expected_string = "0"},
310+
{.test_value = 0, .scale = 1, .expected_string = "0.0"},
311+
{.test_value = 0, .scale = 6, .expected_string = "0.000000"},
312+
{.test_value = 2, .scale = 7, .expected_string = "2E-7"},
313+
{.test_value = 2, .scale = -1, .expected_string = "2E+1"},
314+
{.test_value = 2, .scale = 0, .expected_string = "2"},
315+
{.test_value = 2, .scale = 1, .expected_string = "0.2"},
316+
{.test_value = 2, .scale = 6, .expected_string = "0.000002"},
317+
{.test_value = -2, .scale = 7, .expected_string = "-2E-7"},
318+
{.test_value = -2, .scale = 7, .expected_string = "-2E-7"},
319+
{.test_value = -2, .scale = -1, .expected_string = "-2E+1"},
320+
{.test_value = -2, .scale = 0, .expected_string = "-2"},
321+
{.test_value = -2, .scale = 1, .expected_string = "-0.2"},
322+
{.test_value = -2, .scale = 6, .expected_string = "-0.000002"},
323+
{.test_value = -2, .scale = 7, .expected_string = "-2E-7"},
324+
{.test_value = 123, .scale = -3, .expected_string = "1.23E+5"},
325+
{.test_value = 123, .scale = -1, .expected_string = "1.23E+3"},
326+
{.test_value = 123, .scale = 1, .expected_string = "12.3"},
327+
{.test_value = 123, .scale = 0, .expected_string = "123"},
328+
{.test_value = 123, .scale = 5, .expected_string = "0.00123"},
329+
{.test_value = 123, .scale = 8, .expected_string = "0.00000123"},
330+
{.test_value = 123, .scale = 9, .expected_string = "1.23E-7"},
331+
{.test_value = 123, .scale = 10, .expected_string = "1.23E-8"},
332+
{.test_value = -123, .scale = -3, .expected_string = "-1.23E+5"},
333+
{.test_value = -123, .scale = -1, .expected_string = "-1.23E+3"},
334+
{.test_value = -123, .scale = 1, .expected_string = "-12.3"},
335+
{.test_value = -123, .scale = 0, .expected_string = "-123"},
336+
{.test_value = -123, .scale = 5, .expected_string = "-0.00123"},
337+
{.test_value = -123, .scale = 8, .expected_string = "-0.00000123"},
338+
{.test_value = -123, .scale = 9, .expected_string = "-1.23E-7"},
339+
{.test_value = -123, .scale = 10, .expected_string = "-1.23E-8"},
340+
{.test_value = 1000000000, .scale = -3, .expected_string = "1.000000000E+12"},
341+
{.test_value = 1000000000, .scale = -1, .expected_string = "1.000000000E+10"},
342+
{.test_value = 1000000000, .scale = 0, .expected_string = "1000000000"},
343+
{.test_value = 1000000000, .scale = 1, .expected_string = "100000000.0"},
344+
{.test_value = 1000000000, .scale = 5, .expected_string = "10000.00000"},
345+
{.test_value = 1000000000,
346+
.scale = 15,
347+
.expected_string = "0.000001000000000"},
348+
{.test_value = 1000000000, .scale = 16, .expected_string = "1.000000000E-7"},
349+
{.test_value = 1000000000, .scale = 17, .expected_string = "1.000000000E-8"},
350+
{.test_value = -1000000000,
351+
.scale = -3,
352+
.expected_string = "-1.000000000E+12"},
353+
{.test_value = -1000000000,
354+
.scale = -1,
355+
.expected_string = "-1.000000000E+10"},
356+
{.test_value = -1000000000, .scale = 0, .expected_string = "-1000000000"},
357+
{.test_value = -1000000000, .scale = 1, .expected_string = "-100000000.0"},
358+
{.test_value = -1000000000, .scale = 5, .expected_string = "-10000.00000"},
359+
{.test_value = -1000000000,
360+
.scale = 15,
361+
.expected_string = "-0.000001000000000"},
362+
{.test_value = -1000000000, .scale = 16, .expected_string = "-1.000000000E-7"},
363+
{.test_value = -1000000000, .scale = 17, .expected_string = "-1.000000000E-8"},
364+
{.test_value = 1234567890123456789LL,
365+
.scale = -3,
366+
.expected_string = "1.234567890123456789E+21"},
367+
{.test_value = 1234567890123456789LL,
368+
.scale = -1,
369+
.expected_string = "1.234567890123456789E+19"},
370+
{.test_value = 1234567890123456789LL,
371+
.scale = 0,
372+
.expected_string = "1234567890123456789"},
373+
{.test_value = 1234567890123456789LL,
374+
.scale = 1,
375+
.expected_string = "123456789012345678.9"},
376+
{.test_value = 1234567890123456789LL,
377+
.scale = 5,
378+
.expected_string = "12345678901234.56789"},
379+
{.test_value = 1234567890123456789LL,
380+
.scale = 24,
381+
.expected_string = "0.000001234567890123456789"},
382+
{.test_value = 1234567890123456789LL,
383+
.scale = 25,
384+
.expected_string = "1.234567890123456789E-7"},
385+
{.test_value = -1234567890123456789LL,
386+
.scale = -3,
387+
.expected_string = "-1.234567890123456789E+21"},
388+
{.test_value = -1234567890123456789LL,
389+
.scale = -1,
390+
.expected_string = "-1.234567890123456789E+19"},
391+
{.test_value = -1234567890123456789LL,
392+
.scale = 0,
393+
.expected_string = "-1234567890123456789"},
394+
{.test_value = -1234567890123456789LL,
395+
.scale = 1,
396+
.expected_string = "-123456789012345678.9"},
397+
{.test_value = -1234567890123456789LL,
398+
.scale = 5,
399+
.expected_string = "-12345678901234.56789"},
400+
{.test_value = -1234567890123456789LL,
401+
.scale = 24,
402+
.expected_string = "-0.000001234567890123456789"},
403+
{.test_value = -1234567890123456789LL,
404+
.scale = 25,
405+
.expected_string = "-1.234567890123456789E-7"},
406+
}) {
407+
const Decimal value(t.test_value);
408+
auto result = value.ToString(t.scale);
409+
ASSERT_THAT(result, IsOk())
410+
<< "Failed to convert Decimal to string: " << value.ToIntegerString()
411+
<< ", scale: " << t.scale;
412+
413+
EXPECT_EQ(result.value(), t.expected_string)
414+
<< "Expected: " << t.expected_string << ", but got: " << result.value();
415+
}
416+
}
417+
167418
} // namespace iceberg

0 commit comments

Comments
 (0)