Skip to content

Commit f4d4f5d

Browse files
committed
feat: add truncate_utils.h
1 parent 080b10a commit f4d4f5d

File tree

2 files changed

+20
-4
lines changed

2 files changed

+20
-4
lines changed

src/iceberg/transform_function.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,11 +144,11 @@ Result<Literal> TruncateTransform::Transform(const Literal& literal) {
144144
switch (source_type()->type_id()) {
145145
case TypeId::kInt: {
146146
auto value = std::get<int32_t>(literal.value());
147-
return Literal::Int(TruncateUtils::TruncateInt(value, width_));
147+
return Literal::Int(TruncateUtils::TruncateInteger(value, width_));
148148
}
149149
case TypeId::kLong: {
150150
auto value = std::get<int64_t>(literal.value());
151-
return Literal::Long(TruncateUtils::TruncateLong(value, width_));
151+
return Literal::Long(TruncateUtils::TruncateInteger(value, width_));
152152
}
153153
case TypeId::kDecimal: {
154154
// TODO(zhjwpku): Handle decimal truncation logic here

src/iceberg/util/truncate_utils.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,22 @@
1919

2020
#pragma once
2121

22-
#include <cstdint>
2322
#include <string>
23+
#include <utility>
2424

2525
#include "iceberg/iceberg_export.h"
2626

2727
namespace iceberg {
2828

2929
ICEBERG_EXPORT class TruncateUtils {
3030
public:
31+
/// \brief Truncate a UTF-8 string to a specified number of code points.
32+
///
33+
/// \param source The input string to truncate.
34+
/// \param L The maximum number of code points allowed in the output string.
35+
/// \return A valid UTF-8 string truncated to L code points.
36+
/// If the input string is already valid and has fewer than L code points, it is
37+
/// returned unchanged.
3138
static std::string TruncateUTF8(std::string&& source, size_t L) {
3239
size_t code_point_count = 0;
3340
size_t safe_point = 0;
@@ -36,7 +43,7 @@ ICEBERG_EXPORT class TruncateUtils {
3643
// Start of a new UTF-8 code point
3744
if ((source[i] & 0xC0) != 0x80) {
3845
code_point_count++;
39-
if (code_point_count > L) {
46+
if (code_point_count > static_cast<size_t>(L)) {
4047
safe_point = i;
4148
break;
4249
}
@@ -50,6 +57,15 @@ ICEBERG_EXPORT class TruncateUtils {
5057

5158
return std::move(source);
5259
}
60+
61+
/// \brief Truncate an integer v, either int32_t or int64_t, to v - (v % W).
62+
///
63+
/// The remainder, v % W, must be positive. For languages where % can produce negative
64+
/// values, the correct truncate function is: v - (((v % W) + W) % W)
65+
template <typename T>
66+
static inline T TruncateInteger(T v, size_t W) {
67+
return v - (((v % W) + W) % W);
68+
}
5369
};
5470

5571
} // namespace iceberg

0 commit comments

Comments
 (0)