feat: add ToString

zhjwpku · zhjwpku · commit e2672aa6b336 · 2025-08-20T00:51:35.000+08:00
diff --git a/src/iceberg/expression/decimal.cc b/src/iceberg/expression/decimal.cc
@@ -188,9 +188,10 @@ static Status BuildFromArray(Decimal* result, const uint32_t* array, int64_t len
   int64_t next_index = length - 1;
   for (size_t i = 0; i < 2 && next_index >= 0; i++) {
     uint64_t lower_bits = array[next_index--];
-    result_array[i] = (next_index < 0)
-                          ? lower_bits
-                          : (static_cast<uint64_t>(lower_bits) << 32) | lower_bits;
+    result_array[i] =
+        (next_index < 0)
+            ? lower_bits
+            : (static_cast<uint64_t>(array[next_index--]) << 32) | lower_bits;
   }
 
   *result = Decimal(result_array[1], result_array[0]);
@@ -654,7 +655,6 @@ static void AppendLittleEndianArrayToString(const std::array<uint64_t, 2>& array
   size_t num_segments = 0;
   uint64_t* most_significant_elem = &copy[most_significant_elem_idx];
 
-  std::cout << copy[1] << " " << copy[0] << std::endl;
   do {
     // Compute remainder = copy % 1e9 and copy = copy / 1e9.
     uint32_t remainder = 0;
@@ -690,6 +690,71 @@ static void AppendLittleEndianArrayToString(const std::array<uint64_t, 2>& array
   out->append(oss.str());
 }
 
+static void AdjustIntegerStringWithScale(std::string* str, int32_t scale) {
+  if (scale == 0) {
+    return;
+  }
+  assert(str != nullptr);
+  assert(!str->empty());
+  const bool is_negative = str->front() == '-';
+  const auto is_negative_offset = static_cast<int32_t>(is_negative);
+  const auto len = static_cast<int32_t>(str->size());
+  const int32_t num_digits = len - is_negative_offset;
+  const int32_t adjusted_exponent = num_digits - 1 - scale;
+
+  // Note that the -6 is taken from the Java BigDecimal documentation.
+  if (scale < 0 || adjusted_exponent < -6) {
+    // Example 1:
+    // Precondition: *str = "123", is_negative_offset = 0, num_digits = 3, scale = -2,
+    //               adjusted_exponent = 4
+    // After inserting decimal point: *str = "1.23"
+    // After appending exponent: *str = "1.23E+4"
+    // Example 2:
+    // Precondition: *str = "-123", is_negative_offset = 1, num_digits = 3, scale = 9,
+    //               adjusted_exponent = -7
+    // After inserting decimal point: *str = "-1.23"
+    // After appending exponent: *str = "-1.23E-7"
+    // Example 3:
+    // Precondition: *str = "0", is_negative_offset = 0, num_digits = 1, scale = -1,
+    //               adjusted_exponent = 1
+    // After inserting decimal point: *str = "0" // Not inserted
+    // After appending exponent: *str = "0E+1"
+    if (num_digits > 1) {
+      str->insert(str->begin() + 1 + is_negative_offset, '.');
+    }
+    str->push_back('E');
+    if (adjusted_exponent >= 0) {
+      str->push_back('+');
+    }
+    // Append the adjusted exponent as a string.
+    str->append(std::to_string(adjusted_exponent));
+    return;
+  }
+
+  if (num_digits > scale) {
+    const auto n = static_cast<size_t>(len - scale);
+    // Example 1:
+    // Precondition: *str = "123", len = num_digits = 3, scale = 1, n = 2
+    // After inserting decimal point: *str = "12.3"
+    // Example 2:
+    // Precondition: *str = "-123", len = 4, num_digits = 3, scale = 1, n = 3
+    // After inserting decimal point: *str = "-12.3"
+    str->insert(str->begin() + n, '.');
+    return;
+  }
+
+  // Example 1:
+  // Precondition: *str = "123", is_negative_offset = 0, num_digits = 3, scale = 4
+  // After insert: *str = "000123"
+  // After setting decimal point: *str = "0.0123"
+  // Example 2:
+  // Precondition: *str = "-123", is_negative_offset = 1, num_digits = 3, scale = 4
+  // After insert: *str = "-000123"
+  // After setting decimal point: *str = "-0.0123"
+  str->insert(is_negative_offset, scale - num_digits + 2, '0');
+  str->at(is_negative_offset + 1) = '.';
+}
+
 }  // namespace
 
 Result<std::string> Decimal::ToString(int32_t scale) const {
@@ -698,7 +763,9 @@ Result<std::string> Decimal::ToString(int32_t scale) const {
         "Decimal::ToString: scale must be in the range [-{}, {}], was {}", kMaxScale,
         kMaxScale, scale);
   }
-  return NotImplemented("Decimal::ToString is not implemented yet");
+  std::string str(ToIntegerString());
+  AdjustIntegerStringWithScale(&str, scale);
+  return str;
 }
 
 std::string Decimal::ToIntegerString() const {
diff --git a/test/decimal_test.cc b/test/decimal_test.cc
@@ -18,7 +18,11 @@
  */
 #include "iceberg/expression/decimal.h"
 
+#include <array>
+#include <cstdint>
+
 #include <gtest/gtest.h>
+#include <sys/types.h>
 
 #include "gmock/gmock.h"
 #include "matchers.h"
@@ -164,4 +168,251 @@ TEST(DecimalTest, LargeValues) {
   }
 }
 
+TEST(DecimalTest, TestStringRoundTrip) {
+  static constexpr std::array<uint64_t, 11> kTestBits = {
+      0,
+      1,
+      999,
+      1000,
+      std::numeric_limits<int32_t>::max(),
+      (1ull << 31),
+      std::numeric_limits<uint32_t>::max(),
+      (1ull << 32),
+      std::numeric_limits<int64_t>::max(),
+      (1ull << 63),
+      std::numeric_limits<uint64_t>::max(),
+  };
+  static constexpr std::array<int32_t, 3> kScales = {0, 1, 10};
+  for (uint64_t high : kTestBits) {
+    for (uint64_t low : kTestBits) {
+      Decimal value(high, low);
+      for (int32_t scale : kScales) {
+        auto result = value.ToString(scale);
+
+        ASSERT_THAT(result, IsOk())
+            << "Failed to convert Decimal to string: " << value.ToIntegerString()
+            << ", scale: " << scale;
+
+        auto round_trip = Decimal::FromString(result.value());
+        ASSERT_THAT(round_trip, IsOk())
+            << "Failed to convert string back to Decimal: " << result.value();
+
+        EXPECT_EQ(value, round_trip.value())
+            << "Round trip failed for value: " << value.ToIntegerString()
+            << ", scale: " << scale;
+      }
+    }
+  }
+}
+
+TEST(DecimalTest, FromStringLimits) {
+  AssertDecimalFromString("1e37", Decimal(542101086242752217ULL, 68739955140067328ULL),
+                          38, 0);
+
+  AssertDecimalFromString(
+      "-1e37", Decimal(17904642987466799398ULL, 18378004118569484288ULL), 38, 0);
+  AssertDecimalFromString(
+      "9.87e37", Decimal(5350537721215964381ULL, 15251391175463010304ULL), 38, 0);
+  AssertDecimalFromString(
+      "-9.87e37", Decimal(13096206352493587234ULL, 3195352898246541312ULL), 38, 0);
+  AssertDecimalFromString("12345678901234567890123456789012345678",
+                          Decimal(669260594276348691ULL, 14143994781733811022ULL), 38, 0);
+  AssertDecimalFromString("-12345678901234567890123456789012345678",
+                          Decimal(17777483479433202924ULL, 4302749291975740594ULL), 38,
+                          0);
+
+  // "9..9" (38 times)
+  const auto dec38times9pos = Decimal(5421010862427522170ULL, 687399551400673279ULL);
+  // "-9..9" (38 times)
+  const auto dec38times9neg = Decimal(13025733211282029445ULL, 17759344522308878337ULL);
+
+  AssertDecimalFromString("99999999999999999999999999999999999999", dec38times9pos, 38,
+                          0);
+  AssertDecimalFromString("-99999999999999999999999999999999999999", dec38times9neg, 38,
+                          0);
+  AssertDecimalFromString("9.9999999999999999999999999999999999999e37", dec38times9pos,
+                          38, 0);
+  AssertDecimalFromString("-9.9999999999999999999999999999999999999e37", dec38times9neg,
+                          38, 0);
+
+  // No exponent, many fractional digits
+  AssertDecimalFromString("9.9999999999999999999999999999999999999", dec38times9pos, 38,
+                          37);
+  AssertDecimalFromString("-9.9999999999999999999999999999999999999", dec38times9neg, 38,
+                          37);
+  AssertDecimalFromString("0.99999999999999999999999999999999999999", dec38times9pos, 38,
+                          38);
+  AssertDecimalFromString("-0.99999999999999999999999999999999999999", dec38times9neg, 38,
+                          38);
+
+  // Negative exponent
+  AssertDecimalFromString("1e-38", Decimal(0, 1), 1, 38);
+  AssertDecimalFromString(
+      "-1e-38", Decimal(18446744073709551615ULL, 18446744073709551615ULL), 1, 38);
+  AssertDecimalFromString("9.99e-36", Decimal(0, 999), 3, 38);
+  AssertDecimalFromString(
+      "-9.99e-36", Decimal(18446744073709551615ULL, 18446744073709550617ULL), 3, 38);
+  AssertDecimalFromString("987e-38", Decimal(0, 987), 3, 38);
+  AssertDecimalFromString(
+      "-987e-38", Decimal(18446744073709551615ULL, 18446744073709550629ULL), 3, 38);
+  AssertDecimalFromString("99999999999999999999999999999999999999e-37", dec38times9pos,
+                          38, 37);
+  AssertDecimalFromString("-99999999999999999999999999999999999999e-37", dec38times9neg,
+                          38, 37);
+  AssertDecimalFromString("99999999999999999999999999999999999999e-38", dec38times9pos,
+                          38, 38);
+  AssertDecimalFromString("-99999999999999999999999999999999999999e-38", dec38times9neg,
+                          38, 38);
+}
+
+TEST(DecimalTest, FromStringInvalid) {
+  // Empty string
+  auto result = Decimal::FromString("");
+  ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument));
+  ASSERT_THAT(result, HasErrorMessage(
+                          "Decimal::FromString: empty string is not a valid Decimal"));
+  for (const auto& invalid_string :
+       std::vector<std::string>{"-", "0.0.0", "0-13-32", "a", "-23092.235-",
+                                "-+23092.235", "+-23092.235", "00a", "1e1a", "0.00123D/3",
+                                "1.23eA8", "1.23E+3A", "-1.23E--5", "1.2345E+++07"}) {
+    auto result = Decimal::FromString(invalid_string);
+    ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument));
+    ASSERT_THAT(result, HasErrorMessage("Decimal::FromString: invalid decimal string"));
+  }
+
+  for (const auto& invalid_string :
+       std::vector<std::string>{"1e39", "-1e39", "9e39", "-9e39", "9.9e40", "-9.9e40"}) {
+    auto result = Decimal::FromString(invalid_string);
+    ASSERT_THAT(result, IsError(ErrorKind::kInvalidArgument));
+    ASSERT_THAT(result,
+                HasErrorMessage("Decimal::FromString: scale must be in the range"));
+  }
+}
+
+TEST(DecimalTest, Division) {
+  const std::string expected_string_value("-23923094039234029");
+  const Decimal value(expected_string_value);
+  const Decimal result(value / 3);
+  const Decimal expected_value("-7974364679744676");
+  ASSERT_EQ(expected_value, result);
+}
+
+TEST(DecimalTest, ToString) {
+  struct ToStringCase {
+    int64_t test_value;
+    int32_t scale;
+    const char* expected_string;
+  };
+
+  for (const auto& t : std::vector<ToStringCase>{
+           {.test_value = 0, .scale = -1, .expected_string = "0E+1"},
+           {.test_value = 0, .scale = 0, .expected_string = "0"},
+           {.test_value = 0, .scale = 1, .expected_string = "0.0"},
+           {.test_value = 0, .scale = 6, .expected_string = "0.000000"},
+           {.test_value = 2, .scale = 7, .expected_string = "2E-7"},
+           {.test_value = 2, .scale = -1, .expected_string = "2E+1"},
+           {.test_value = 2, .scale = 0, .expected_string = "2"},
+           {.test_value = 2, .scale = 1, .expected_string = "0.2"},
+           {.test_value = 2, .scale = 6, .expected_string = "0.000002"},
+           {.test_value = -2, .scale = 7, .expected_string = "-2E-7"},
+           {.test_value = -2, .scale = 7, .expected_string = "-2E-7"},
+           {.test_value = -2, .scale = -1, .expected_string = "-2E+1"},
+           {.test_value = -2, .scale = 0, .expected_string = "-2"},
+           {.test_value = -2, .scale = 1, .expected_string = "-0.2"},
+           {.test_value = -2, .scale = 6, .expected_string = "-0.000002"},
+           {.test_value = -2, .scale = 7, .expected_string = "-2E-7"},
+           {.test_value = 123, .scale = -3, .expected_string = "1.23E+5"},
+           {.test_value = 123, .scale = -1, .expected_string = "1.23E+3"},
+           {.test_value = 123, .scale = 1, .expected_string = "12.3"},
+           {.test_value = 123, .scale = 0, .expected_string = "123"},
+           {.test_value = 123, .scale = 5, .expected_string = "0.00123"},
+           {.test_value = 123, .scale = 8, .expected_string = "0.00000123"},
+           {.test_value = 123, .scale = 9, .expected_string = "1.23E-7"},
+           {.test_value = 123, .scale = 10, .expected_string = "1.23E-8"},
+           {.test_value = -123, .scale = -3, .expected_string = "-1.23E+5"},
+           {.test_value = -123, .scale = -1, .expected_string = "-1.23E+3"},
+           {.test_value = -123, .scale = 1, .expected_string = "-12.3"},
+           {.test_value = -123, .scale = 0, .expected_string = "-123"},
+           {.test_value = -123, .scale = 5, .expected_string = "-0.00123"},
+           {.test_value = -123, .scale = 8, .expected_string = "-0.00000123"},
+           {.test_value = -123, .scale = 9, .expected_string = "-1.23E-7"},
+           {.test_value = -123, .scale = 10, .expected_string = "-1.23E-8"},
+           {.test_value = 1000000000, .scale = -3, .expected_string = "1.000000000E+12"},
+           {.test_value = 1000000000, .scale = -1, .expected_string = "1.000000000E+10"},
+           {.test_value = 1000000000, .scale = 0, .expected_string = "1000000000"},
+           {.test_value = 1000000000, .scale = 1, .expected_string = "100000000.0"},
+           {.test_value = 1000000000, .scale = 5, .expected_string = "10000.00000"},
+           {.test_value = 1000000000,
+            .scale = 15,
+            .expected_string = "0.000001000000000"},
+           {.test_value = 1000000000, .scale = 16, .expected_string = "1.000000000E-7"},
+           {.test_value = 1000000000, .scale = 17, .expected_string = "1.000000000E-8"},
+           {.test_value = -1000000000,
+            .scale = -3,
+            .expected_string = "-1.000000000E+12"},
+           {.test_value = -1000000000,
+            .scale = -1,
+            .expected_string = "-1.000000000E+10"},
+           {.test_value = -1000000000, .scale = 0, .expected_string = "-1000000000"},
+           {.test_value = -1000000000, .scale = 1, .expected_string = "-100000000.0"},
+           {.test_value = -1000000000, .scale = 5, .expected_string = "-10000.00000"},
+           {.test_value = -1000000000,
+            .scale = 15,
+            .expected_string = "-0.000001000000000"},
+           {.test_value = -1000000000, .scale = 16, .expected_string = "-1.000000000E-7"},
+           {.test_value = -1000000000, .scale = 17, .expected_string = "-1.000000000E-8"},
+           {.test_value = 1234567890123456789LL,
+            .scale = -3,
+            .expected_string = "1.234567890123456789E+21"},
+           {.test_value = 1234567890123456789LL,
+            .scale = -1,
+            .expected_string = "1.234567890123456789E+19"},
+           {.test_value = 1234567890123456789LL,
+            .scale = 0,
+            .expected_string = "1234567890123456789"},
+           {.test_value = 1234567890123456789LL,
+            .scale = 1,
+            .expected_string = "123456789012345678.9"},
+           {.test_value = 1234567890123456789LL,
+            .scale = 5,
+            .expected_string = "12345678901234.56789"},
+           {.test_value = 1234567890123456789LL,
+            .scale = 24,
+            .expected_string = "0.000001234567890123456789"},
+           {.test_value = 1234567890123456789LL,
+            .scale = 25,
+            .expected_string = "1.234567890123456789E-7"},
+           {.test_value = -1234567890123456789LL,
+            .scale = -3,
+            .expected_string = "-1.234567890123456789E+21"},
+           {.test_value = -1234567890123456789LL,
+            .scale = -1,
+            .expected_string = "-1.234567890123456789E+19"},
+           {.test_value = -1234567890123456789LL,
+            .scale = 0,
+            .expected_string = "-1234567890123456789"},
+           {.test_value = -1234567890123456789LL,
+            .scale = 1,
+            .expected_string = "-123456789012345678.9"},
+           {.test_value = -1234567890123456789LL,
+            .scale = 5,
+            .expected_string = "-12345678901234.56789"},
+           {.test_value = -1234567890123456789LL,
+            .scale = 24,
+            .expected_string = "-0.000001234567890123456789"},
+           {.test_value = -1234567890123456789LL,
+            .scale = 25,
+            .expected_string = "-1.234567890123456789E-7"},
+       }) {
+    const Decimal value(t.test_value);
+    auto result = value.ToString(t.scale);
+    ASSERT_THAT(result, IsOk())
+        << "Failed to convert Decimal to string: " << value.ToIntegerString()
+        << ", scale: " << t.scale;
+
+    EXPECT_EQ(result.value(), t.expected_string)
+        << "Expected: " << t.expected_string << ", but got: " << result.value();
+  }
+}
+
 }  // namespace iceberg