Skip to content

Commit 506d01d

Browse files
committed
feat: add ToIntegerString
1 parent e2b1b97 commit 506d01d

File tree

2 files changed

+80
-7
lines changed

2 files changed

+80
-7
lines changed

src/iceberg/expression/decimal.cc

Lines changed: 66 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,18 @@
1919

2020
#include "iceberg/expression/decimal.h"
2121

22+
#include <algorithm>
2223
#include <array>
2324
#include <bit>
2425
#include <cassert>
2526
#include <charconv>
2627
#include <cstddef>
2728
#include <cstdint>
2829
#include <format>
30+
#include <iomanip>
2931
#include <iostream>
3032
#include <limits>
33+
#include <sstream>
3134
#include <string>
3235
#include <string_view>
3336

@@ -622,9 +625,69 @@ static inline void ShiftAndAdd(std::string_view input, std::array<uint64_t, 2>&
622625
}
623626
}
624627

628+
// Returns a mask for the bit_index lower order bits.
629+
// Only valid for bit_index in the range [0, 64).
630+
constexpr uint64_t LeastSignificantBitMask(int64_t bit_index) {
631+
return (static_cast<uint64_t>(1) << bit_index) - 1;
632+
}
633+
625634
static void AppendLittleEndianArrayToString(const std::array<uint64_t, 2>& array,
626635
std::string* out) {
627-
// TODO(zhjwpku): Implementation this.
636+
const auto most_significant_non_zero = std::ranges::find_if(
637+
array.rbegin(), array.rend(), [](uint64_t v) { return v != 0; });
638+
if (most_significant_non_zero == array.rend()) {
639+
out->push_back('0');
640+
return;
641+
}
642+
643+
size_t most_significant_elem_idx = &*most_significant_non_zero - array.data();
644+
std::array<uint64_t, 2> copy = array;
645+
constexpr uint32_t k1e9 = 1000000000U;
646+
constexpr size_t kNumBits = 128;
647+
// Segments will contain the array split into groups that map to decimal digits, in
648+
// little endian order. Each segment will hold at most 9 decimal digits. For example, if
649+
// the input represents 9876543210123456789, then segments will be [123456789,
650+
// 876543210, 9].
651+
// The max number of segments needed = ceil(kNumBits * log(2) / log(1e9))
652+
// = ceil(kNumBits / 29.897352854) <= ceil(kNumBits / 29).
653+
std::array<uint32_t, (kNumBits + 28) / 29> segments;
654+
size_t num_segments = 0;
655+
uint64_t* most_significant_elem = &copy[most_significant_elem_idx];
656+
657+
std::cout << copy[1] << " " << copy[0] << std::endl;
658+
do {
659+
// Compute remainder = copy % 1e9 and copy = copy / 1e9.
660+
uint32_t remainder = 0;
661+
uint64_t* elem = most_significant_elem;
662+
do {
663+
// Compute dividend = (remainder << 32) | *elem (a virtual 96-bit integer);
664+
// *elem = dividend / 1e9;
665+
// remainder = dividend % 1e9.
666+
auto hi = static_cast<uint32_t>(*elem >> 32);
667+
auto lo = static_cast<uint32_t>(*elem & LeastSignificantBitMask(32));
668+
uint64_t dividend_hi = (static_cast<uint64_t>(remainder) << 32) | hi;
669+
uint64_t quotient_hi = dividend_hi / k1e9;
670+
remainder = static_cast<uint32_t>(dividend_hi % k1e9);
671+
uint64_t dividend_lo = (static_cast<uint64_t>(remainder) << 32) | lo;
672+
uint64_t quotient_lo = dividend_lo / k1e9;
673+
remainder = static_cast<uint32_t>(dividend_lo % k1e9);
674+
*elem = (quotient_hi << 32) | quotient_lo;
675+
} while (elem-- != copy.data());
676+
677+
segments[num_segments++] = remainder;
678+
} while (*most_significant_elem != 0 || most_significant_elem-- != copy.data());
679+
680+
const uint32_t* segment = &segments[num_segments - 1];
681+
std::stringstream oss;
682+
// First segment is formatted as-is.
683+
oss << *segment;
684+
// Remaining segments are formatted with leading zeros to fill 9 digits. e.g. 123 is
685+
// formatted as "000000123"
686+
while (segment != segments.data()) {
687+
--segment;
688+
oss << std::setfill('0') << std::setw(9) << *segment;
689+
}
690+
out->append(oss.str());
628691
}
629692

630693
} // namespace
@@ -689,8 +752,6 @@ Result<Decimal> Decimal::FromString(std::string_view str, int32_t* precision,
689752
result.Negate();
690753
}
691754

692-
std::cout << result_array[1] << " " << result_array[0] << std::endl;
693-
694755
if (parsed_scale < 0) {
695756
// For the scale to 0, to avoid negative scales (due to compatibility issues with
696757
// external systems such as databases)
@@ -798,13 +859,11 @@ ICEBERG_EXPORT Decimal operator*(const Decimal& lhs, const Decimal& rhs) {
798859
}
799860

800861
ICEBERG_EXPORT Decimal operator/(const Decimal& lhs, const Decimal& rhs) {
801-
auto [result, _] = lhs.Divide(rhs).value();
802-
return result;
862+
return lhs.Divide(rhs).value().first;
803863
}
804864

805865
ICEBERG_EXPORT Decimal operator%(const Decimal& lhs, const Decimal& rhs) {
806-
auto [_, remainder] = lhs.Divide(rhs).value();
807-
return remainder;
866+
return lhs.Divide(rhs).value().second;
808867
}
809868

810869
ICEBERG_EXPORT bool operator<(const Decimal& lhs, const Decimal& rhs) {

test/decimal_test.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,4 +150,18 @@ TEST(DecimalTest, SmallValues) {
150150
}
151151
}
152152

153+
TEST(DecimalTest, LargeValues) {
154+
const std::array<std::string, 4> string_values = {
155+
"99999999999999999999999999999999999999", "-99999999999999999999999999999999999999",
156+
"170141183460469231731687303715884105727", // maximum positive value
157+
"-170141183460469231731687303715884105728" // minimum negative value
158+
};
159+
160+
for (const auto& s : string_values) {
161+
const Decimal value(s);
162+
const std::string printed_value = value.ToIntegerString();
163+
EXPECT_EQ(printed_value, s) << "Expected: " << s << ", but got: " << printed_value;
164+
}
165+
}
166+
153167
} // namespace iceberg

0 commit comments

Comments
 (0)