Skip to content

Commit c3bbd2b

Browse files
committed
fix: comment reviews
1 parent b4f20f7 commit c3bbd2b

File tree

4 files changed

+40
-82
lines changed

4 files changed

+40
-82
lines changed

src/iceberg/transform.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,6 @@ class ICEBERG_EXPORT TransformFunction {
172172
public:
173173
virtual ~TransformFunction() = default;
174174
TransformFunction(TransformType transform_type, std::shared_ptr<Type> source_type);
175-
/// \brief Transform an input array to a new array
176-
virtual Result<ArrowArray> Transform(const ArrowArray& data) = 0;
177175
/// \brief Transform an input Literal to a new Literal
178176
virtual Result<std::optional<Literal>> Transform(const Literal& literal) = 0;
179177
/// \brief Get the transform type

src/iceberg/transform_function.cc

Lines changed: 10 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,6 @@ namespace iceberg {
3232
IdentityTransform::IdentityTransform(std::shared_ptr<Type> const& source_type)
3333
: TransformFunction(TransformType::kIdentity, source_type) {}
3434

35-
Result<ArrowArray> IdentityTransform::Transform(const ArrowArray& input) {
36-
return NotImplemented("IdentityTransform::Transform");
37-
}
38-
3935
Result<std::optional<Literal>> IdentityTransform::Transform(const Literal& literal) {
4036
return literal;
4137
}
@@ -57,10 +53,6 @@ BucketTransform::BucketTransform(std::shared_ptr<Type> const& source_type,
5753
int32_t num_buckets)
5854
: TransformFunction(TransformType::kBucket, source_type), num_buckets_(num_buckets) {}
5955

60-
Result<ArrowArray> BucketTransform::Transform(const ArrowArray& input) {
61-
return NotImplemented("BucketTransform::Transform");
62-
}
63-
6456
Result<std::optional<Literal>> BucketTransform::Transform(const Literal& literal) {
6557
assert(literal.type() == source_type());
6658
if (literal.IsBelowMin() || literal.IsAboveMax()) {
@@ -136,10 +128,6 @@ TruncateTransform::TruncateTransform(std::shared_ptr<Type> const& source_type,
136128
int32_t width)
137129
: TransformFunction(TransformType::kTruncate, source_type), width_(width) {}
138130

139-
Result<ArrowArray> TruncateTransform::Transform(const ArrowArray& input) {
140-
return NotImplemented("TruncateTransform::Transform");
141-
}
142-
143131
Result<std::optional<Literal>> TruncateTransform::Transform(const Literal& literal) {
144132
assert(literal.type() == source_type());
145133
if (literal.IsBelowMin() || literal.IsAboveMax()) {
@@ -151,11 +139,11 @@ Result<std::optional<Literal>> TruncateTransform::Transform(const Literal& liter
151139
switch (source_type()->type_id()) {
152140
case TypeId::kInt: {
153141
auto value = std::get<int32_t>(literal.value());
154-
return Literal::Int(value % width_);
142+
return Literal::Int(value - (((value % width_) + width_) % width_));
155143
}
156144
case TypeId::kLong: {
157145
auto value = std::get<int64_t>(literal.value());
158-
return Literal::Long(value % width_);
146+
return Literal::Long(value - (((value % width_) + width_) % width_));
159147
}
160148
case TypeId::kDecimal: {
161149
// TODO(zhjwpku): Handle decimal truncation logic here
@@ -164,8 +152,15 @@ Result<std::optional<Literal>> TruncateTransform::Transform(const Literal& liter
164152
case TypeId::kString: {
165153
auto value = std::get<std::string>(literal.value());
166154
if (value.size() > static_cast<size_t>(width_)) {
167-
value.resize(width_);
155+
size_t safe_point = width_;
156+
while (safe_point > 0 && (value[safe_point] & 0xC0) == 0x80) {
157+
// Find the last valid UTF-8 character boundary before or at width_
158+
safe_point--;
159+
}
160+
// Resize the string to the safe point
161+
value.resize(safe_point);
168162
}
163+
169164
return Literal::String(value);
170165
}
171166
case TypeId::kBinary: {
@@ -209,10 +204,6 @@ Result<std::unique_ptr<TransformFunction>> TruncateTransform::Make(
209204
YearTransform::YearTransform(std::shared_ptr<Type> const& source_type)
210205
: TransformFunction(TransformType::kTruncate, source_type) {}
211206

212-
Result<ArrowArray> YearTransform::Transform(const ArrowArray& input) {
213-
return NotImplemented("YearTransform::Transform");
214-
}
215-
216207
Result<std::optional<Literal>> YearTransform::Transform(const Literal& literal) {
217208
assert(literal.type() == source_type());
218209
if (literal.IsBelowMin() || literal.IsAboveMax()) {
@@ -265,10 +256,6 @@ Result<std::unique_ptr<TransformFunction>> YearTransform::Make(
265256
MonthTransform::MonthTransform(std::shared_ptr<Type> const& source_type)
266257
: TransformFunction(TransformType::kMonth, source_type) {}
267258

268-
Result<ArrowArray> MonthTransform::Transform(const ArrowArray& input) {
269-
return NotImplemented("MonthTransform::Transform");
270-
}
271-
272259
Result<std::optional<Literal>> MonthTransform::Transform(const Literal& literal) {
273260
assert(literal.type() == source_type());
274261
if (literal.IsBelowMin() || literal.IsAboveMax()) {
@@ -333,10 +320,6 @@ Result<std::unique_ptr<TransformFunction>> MonthTransform::Make(
333320
DayTransform::DayTransform(std::shared_ptr<Type> const& source_type)
334321
: TransformFunction(TransformType::kDay, source_type) {}
335322

336-
Result<ArrowArray> DayTransform::Transform(const ArrowArray& input) {
337-
return NotImplemented("DayTransform::Transform");
338-
}
339-
340323
Result<std::optional<Literal>> DayTransform::Transform(const Literal& literal) {
341324
assert(literal.type() == source_type());
342325
if (literal.IsBelowMin() || literal.IsAboveMax()) {
@@ -388,10 +371,6 @@ Result<std::unique_ptr<TransformFunction>> DayTransform::Make(
388371
HourTransform::HourTransform(std::shared_ptr<Type> const& source_type)
389372
: TransformFunction(TransformType::kHour, source_type) {}
390373

391-
Result<ArrowArray> HourTransform::Transform(const ArrowArray& input) {
392-
return NotImplemented("HourTransform::Transform");
393-
}
394-
395374
Result<std::optional<Literal>> HourTransform::Transform(const Literal& literal) {
396375
assert(literal.type() == source_type());
397376
if (literal.IsBelowMin() || literal.IsAboveMax()) {
@@ -441,10 +420,6 @@ Result<std::unique_ptr<TransformFunction>> HourTransform::Make(
441420
VoidTransform::VoidTransform(std::shared_ptr<Type> const& source_type)
442421
: TransformFunction(TransformType::kVoid, source_type) {}
443422

444-
Result<ArrowArray> VoidTransform::Transform(const ArrowArray& input) {
445-
return NotImplemented("VoidTransform::Transform");
446-
}
447-
448423
Result<std::optional<Literal>> VoidTransform::Transform(const Literal& literal) {
449424
return std::nullopt;
450425
}

src/iceberg/transform_function.h

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,6 @@ class IdentityTransform : public TransformFunction {
3030
/// \param source_type Type of the input data.
3131
explicit IdentityTransform(std::shared_ptr<Type> const& source_type);
3232

33-
/// \brief Returns the input array without modification.
34-
Result<ArrowArray> Transform(const ArrowArray& input) override;
35-
3633
/// \brief Returns the same Literal as the input.
3734
Result<std::optional<Literal>> Transform(const Literal& literal) override;
3835

@@ -53,9 +50,6 @@ class BucketTransform : public TransformFunction {
5350
/// \param num_buckets Number of buckets to hash into.
5451
BucketTransform(std::shared_ptr<Type> const& source_type, int32_t num_buckets);
5552

56-
/// \brief Applies the bucket hash function to the input array.
57-
Result<ArrowArray> Transform(const ArrowArray& input) override;
58-
5953
/// \brief Applies the bucket hash function to the input Literal.
6054
Result<std::optional<Literal>> Transform(const Literal& literal) override;
6155

@@ -80,9 +74,6 @@ class TruncateTransform : public TransformFunction {
8074
/// \param width The width to truncate to (e.g., for strings or numbers).
8175
TruncateTransform(std::shared_ptr<Type> const& source_type, int32_t width);
8276

83-
/// \brief Truncates values in the input array to the specified width.
84-
Result<ArrowArray> Transform(const ArrowArray& input) override;
85-
8677
/// \brief Truncates the input Literal to the specified width.
8778
Result<std::optional<Literal>> Transform(const Literal& literal) override;
8879

@@ -106,10 +97,6 @@ class YearTransform : public TransformFunction {
10697
/// \param source_type Must be a timestamp type.
10798
explicit YearTransform(std::shared_ptr<Type> const& source_type);
10899

109-
/// \brief Extracts the year from each date timestamp in the input array, as years from
110-
/// 1970.
111-
Result<ArrowArray> Transform(const ArrowArray& input) override;
112-
113100
/// \brief Extract a date or timestamp year, as years from 1970.
114101
Result<std::optional<Literal>> Transform(const Literal& literal) override;
115102

@@ -129,10 +116,6 @@ class MonthTransform : public TransformFunction {
129116
/// \param source_type Must be a timestamp type.
130117
explicit MonthTransform(std::shared_ptr<Type> const& source_type);
131118

132-
/// \brief Extracts the month from each date or timestamp in the input array, as months
133-
/// from 1970-01-01.
134-
Result<ArrowArray> Transform(const ArrowArray& input) override;
135-
136119
/// \brief Extract a date or timestamp month, as months from 1970-01-01.
137120
Result<std::optional<Literal>> Transform(const Literal& literal) override;
138121

@@ -152,10 +135,6 @@ class DayTransform : public TransformFunction {
152135
/// \param source_type Must be a timestamp type.
153136
explicit DayTransform(std::shared_ptr<Type> const& source_type);
154137

155-
/// \brief Extracts the day from each date or timestamp in the input array, as days from
156-
/// 1970-01-01.
157-
Result<ArrowArray> Transform(const ArrowArray& input) override;
158-
159138
/// \brief Extract a date or timestamp day, as days from 1970-01-01.
160139
Result<std::optional<Literal>> Transform(const Literal& literal) override;
161140

@@ -175,10 +154,6 @@ class HourTransform : public TransformFunction {
175154
/// \param source_type Must be a timestamp type.
176155
explicit HourTransform(std::shared_ptr<Type> const& source_type);
177156

178-
/// \brief Extracts the hour from each timestamp in the input array, as hours from
179-
/// 1970-01-01 00:00:00.
180-
Result<ArrowArray> Transform(const ArrowArray& input) override;
181-
182157
/// \brief Extract a timestamp hour, as hours from 1970-01-01 00:00:00.
183158
Result<std::optional<Literal>> Transform(const Literal& literal) override;
184159

@@ -198,9 +173,6 @@ class VoidTransform : public TransformFunction {
198173
/// \param source_type Input type (ignored).
199174
explicit VoidTransform(std::shared_ptr<Type> const& source_type);
200175

201-
/// \brief Returns an all-null array of the same length as the input.
202-
Result<ArrowArray> Transform(const ArrowArray& input) override;
203-
204176
/// \brief Returns a null literal.
205177
Result<std::optional<Literal>> Transform(const Literal& literal) override;
206178

test/transform_test.cc

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,6 @@ TEST(TransformTest, Transform) {
4040
auto source_type = iceberg::string();
4141
auto identity_transform = transform->Bind(source_type);
4242
ASSERT_TRUE(identity_transform);
43-
44-
ArrowArray arrow_array;
45-
auto result = identity_transform.value()->Transform(arrow_array);
46-
ASSERT_FALSE(result);
47-
EXPECT_EQ(ErrorKind::kNotImplemented, result.error().kind);
48-
EXPECT_EQ("IdentityTransform::Transform", result.error().message);
4943
}
5044

5145
TEST(TransformFunctionTest, CreateBucketTransform) {
@@ -193,7 +187,7 @@ TEST(TransformResultTypeTest, NegativeCases) {
193187
}
194188
}
195189

196-
TEST(TransformFunctionTransformTest, IdentityTransform) {
190+
TEST(TransformLiteralTest, IdentityTransform) {
197191
struct Case {
198192
std::shared_ptr<Type> source_type;
199193
Literal source;
@@ -247,7 +241,7 @@ TEST(TransformFunctionTransformTest, IdentityTransform) {
247241
}
248242
}
249243

250-
TEST(TransformFunctionTransformTest, BucketTransform) {
244+
TEST(TransformLiteralTest, BucketTransform) {
251245
constexpr int32_t num_buckets = 4;
252246
auto transform = Transform::Bucket(num_buckets);
253247

@@ -290,29 +284,47 @@ TEST(TransformFunctionTransformTest, BucketTransform) {
290284
}
291285
}
292286

293-
TEST(TransformFunctionTransformTest, TruncateTransform) {
294-
constexpr int32_t width = 5;
295-
auto transform = Transform::Truncate(width);
296-
287+
TEST(TransformLiteralTest, TruncateTransform) {
297288
struct Case {
298289
std::shared_ptr<Type> source_type;
290+
int32_t width;
299291
Literal source;
300292
Literal expected;
301293
};
302294

303295
const std::vector<Case> cases = {
304296
{.source_type = iceberg::int32(),
297+
.width = 5,
305298
.source = Literal::Int(123456),
306-
.expected = Literal::Int(1)},
299+
.expected = Literal::Int(123455)},
307300
{.source_type = iceberg::string(),
301+
.width = 5,
308302
.source = Literal::String("Hello, World!"),
309303
.expected = Literal::String("Hello")},
304+
{.source_type = iceberg::string(),
305+
.width = 5,
306+
.source = Literal::String("😜🧐🤔🤪🥳"),
307+
// Truncate to 5 bytes, the safe point should be four bytes which fits the first
308+
// emoji
309+
.expected = Literal::String("😜")},
310+
{.source_type = iceberg::string(),
311+
.width = 8,
312+
.source = Literal::String("😜🧐🤔🤪🥳"),
313+
// Truncate to 8 bytes, fits the first two emojis
314+
.expected = Literal::String("😜🧐")},
315+
{.source_type = iceberg::string(),
316+
.width = 3,
317+
.source = Literal::String("😜🧐🤔🤪🥳"),
318+
// Truncate to 3 bytes, the saft point will be 0 bytes
319+
.expected = Literal::String("")},
310320
{.source_type = iceberg::binary(),
321+
.width = 5,
311322
.source = Literal::Binary({0x01, 0x02, 0x03, 0x04, 0x05, 0x06}),
312323
.expected = Literal::Binary({0x01, 0x02, 0x03, 0x04, 0x05})},
313324
};
314325

315326
for (const auto& c : cases) {
327+
auto transform = Transform::Truncate(c.width);
316328
auto transformPtr = transform->Bind(c.source_type);
317329
ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind truncate transform";
318330
auto result = transformPtr.value()->Transform(c.source);
@@ -324,7 +336,7 @@ TEST(TransformFunctionTransformTest, TruncateTransform) {
324336
}
325337
}
326338

327-
TEST(TransformFunctionTransformTest, YearTransform) {
339+
TEST(TransformLiteralTest, YearTransform) {
328340
auto transform = Transform::Year();
329341

330342
struct Case {
@@ -335,6 +347,7 @@ TEST(TransformFunctionTransformTest, YearTransform) {
335347

336348
const std::vector<Case> cases = {
337349
{.source_type = iceberg::timestamp(),
350+
// 2021-06-01T11:43:20Z
338351
.source = Literal::Timestamp(1622547800000000),
339352
.expected = Literal::Int(2021)},
340353
{.source_type = iceberg::timestamp_tz(),
@@ -357,7 +370,7 @@ TEST(TransformFunctionTransformTest, YearTransform) {
357370
}
358371
}
359372

360-
TEST(TransformFunctionTransformTest, MonthTransform) {
373+
TEST(TransformLiteralTest, MonthTransform) {
361374
auto transform = Transform::Month();
362375

363376
struct Case {
@@ -423,7 +436,7 @@ TEST(TransformFunctionTransformTest, DayTransform) {
423436
}
424437
}
425438

426-
TEST(TransformFunctionTransformTest, HourTransform) {
439+
TEST(TransformLiteralTest, HourTransform) {
427440
auto transform = Transform::Hour();
428441

429442
struct Case {
@@ -453,7 +466,7 @@ TEST(TransformFunctionTransformTest, HourTransform) {
453466
}
454467
}
455468

456-
TEST(TransformFunctionTransformTest, VoidTransform) {
469+
TEST(TransformLiteralTest, VoidTransform) {
457470
auto transform = Transform::Void();
458471

459472
struct Case {

0 commit comments

Comments
 (0)