Skip to content

Commit cf9ddc1

Browse files
committed
feat: transform human string for literal
1 parent 472002e commit cf9ddc1

File tree

12 files changed

+504
-11
lines changed

12 files changed

+504
-11
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ set(ICEBERG_SOURCES
9595
util/snapshot_util.cc
9696
util/temporal_util.cc
9797
util/timepoint.cc
98+
util/transform_util.cc
9899
util/truncate_util.cc
99100
util/type_util.cc
100101
util/uuid.cc)

src/iceberg/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ iceberg_sources = files(
100100
'transaction.cc',
101101
'transform.cc',
102102
'transform_function.cc',
103+
'transform_util.cc',
103104
'type.cc',
104105
'update/pending_update.cc',
105106
'update/update_partition_spec.cc',

src/iceberg/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ add_iceberg_test(schema_test
6464
schema_util_test.cc
6565
sort_field_test.cc
6666
sort_order_test.cc
67+
transform_human_string_test.cc
6768
transform_test.cc
6869
type_test.cc)
6970

src/iceberg/test/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ iceberg_tests = {
4040
'schema_util_test.cc',
4141
'sort_field_test.cc',
4242
'sort_order_test.cc',
43+
'transform_human_string_test.cc',
4344
'transform_test.cc',
4445
'type_test.cc',
4546
),
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include <memory>
21+
#include <string>
22+
23+
#include <gmock/gmock.h>
24+
#include <gtest/gtest.h>
25+
26+
#include "iceberg/expression/literal.h"
27+
#include "iceberg/test/matchers.h"
28+
#include "iceberg/transform.h"
29+
30+
namespace iceberg {
31+
32+
struct HumanStringTestParam {
33+
std::string test_name;
34+
std::shared_ptr<Type> source_type;
35+
Literal literal;
36+
std::vector<std::string> expecteds;
37+
};
38+
39+
class IdentityTest : public ::testing::TestWithParam<HumanStringTestParam> {
40+
protected:
41+
std::vector<std::shared_ptr<Transform>> transforms_{{Transform::Identity()}};
42+
};
43+
44+
TEST_P(IdentityTest, ToHumanString) {
45+
const auto& param = GetParam();
46+
for (int32_t i = 0; i < transforms_.size(); ++i) {
47+
EXPECT_THAT(transforms_[i]->ToHumanString(param.literal),
48+
HasValue(::testing::Eq(param.expecteds[i])));
49+
}
50+
}
51+
52+
INSTANTIATE_TEST_SUITE_P(
53+
IdentityTestCases, IdentityTest,
54+
::testing::Values(
55+
HumanStringTestParam{.test_name = "Null",
56+
.literal = Literal::Null(std::make_shared<IntType>()),
57+
.expecteds{"null"}},
58+
HumanStringTestParam{.test_name = "Binary",
59+
.literal = Literal::Binary(std::vector<uint8_t>{1, 2, 3}),
60+
.expecteds{"AQID"}},
61+
HumanStringTestParam{.test_name = "Fixed",
62+
.literal = Literal::Fixed(std::vector<uint8_t>{1, 2, 3}),
63+
.expecteds{"AQID"}},
64+
HumanStringTestParam{.test_name = "Date",
65+
.literal = Literal::Date(17501),
66+
.expecteds{"2017-12-01"}},
67+
HumanStringTestParam{.test_name = "Time",
68+
.literal = Literal::Time(36775038194),
69+
.expecteds{"10:12:55.038194"}},
70+
HumanStringTestParam{.test_name = "TimestampWithZone",
71+
.literal = Literal::TimestampTz(1512151975038194),
72+
.expecteds{"2017-12-01T18:12:55.038194+00:00"}},
73+
HumanStringTestParam{.test_name = "TimestampWithoutZone",
74+
.literal = Literal::Timestamp(1512123175038194),
75+
.expecteds{"2017-12-01T10:12:55.038194"}},
76+
HumanStringTestParam{.test_name = "Long",
77+
.literal = Literal::Long(-1234567890000L),
78+
.expecteds{"-1234567890000"}},
79+
HumanStringTestParam{.test_name = "String",
80+
.literal = Literal::String("a/b/c=d"),
81+
.expecteds{"a/b/c=d"}}),
82+
[](const ::testing::TestParamInfo<HumanStringTestParam>& info) {
83+
return info.param.test_name;
84+
});
85+
86+
class DateTest : public ::testing::TestWithParam<HumanStringTestParam> {
87+
protected:
88+
std::vector<std::shared_ptr<Transform>> transforms_{
89+
Transform::Year(), Transform::Month(), Transform::Day()};
90+
};
91+
92+
TEST_P(DateTest, ToHumanString) {
93+
const auto& param = GetParam();
94+
95+
for (uint32_t i = 0; i < transforms_.size(); i++) {
96+
ICEBERG_UNWRAP_OR_FAIL(auto trans_func,
97+
transforms_[i]->Bind(std::make_shared<DateType>()));
98+
ICEBERG_UNWRAP_OR_FAIL(auto literal, trans_func->Transform(param.literal));
99+
EXPECT_THAT(transforms_[i]->ToHumanString(literal),
100+
HasValue(::testing::Eq(param.expecteds[i])));
101+
}
102+
}
103+
104+
INSTANTIATE_TEST_SUITE_P(
105+
DateTestCases, DateTest,
106+
::testing::Values(
107+
HumanStringTestParam{.test_name = "Date",
108+
.literal = Literal::Date(17501),
109+
.expecteds = {"2017", "2017-12", "2017-12-01"}},
110+
HumanStringTestParam{.test_name = "NegativeDate",
111+
.literal = Literal::Date(-2),
112+
.expecteds = {"1969", "1969-12", "1969-12-30"}},
113+
HumanStringTestParam{.test_name = "DateLowerBound",
114+
.literal = Literal::Date(0),
115+
.expecteds = {"1970", "1970-01", "1970-01-01"}},
116+
HumanStringTestParam{.test_name = "NegativeDateLowerBound",
117+
.literal = Literal::Date(-365),
118+
.expecteds = {"1969", "1969-01", "1969-01-01"}},
119+
HumanStringTestParam{.test_name = "NegativeDateUpperBound",
120+
.literal = Literal::Date(-1),
121+
.expecteds = {"1969", "1969-12", "1969-12-31"}},
122+
HumanStringTestParam{.test_name = "Null",
123+
.literal = Literal::Null(std::make_shared<DateType>()),
124+
.expecteds = {"null", "null", "null"}}),
125+
[](const ::testing::TestParamInfo<HumanStringTestParam>& info) {
126+
return info.param.test_name;
127+
});
128+
129+
class TimestampTest : public ::testing::TestWithParam<HumanStringTestParam> {
130+
protected:
131+
std::vector<std::shared_ptr<Transform>> transforms_{
132+
Transform::Year(), Transform::Month(), Transform::Day(), Transform::Hour()};
133+
};
134+
135+
TEST_P(TimestampTest, ToHumanString) {
136+
const auto& param = GetParam();
137+
for (uint32_t i = 0; i < transforms_.size(); i++) {
138+
ICEBERG_UNWRAP_OR_FAIL(auto trans_func, transforms_[i]->Bind(param.source_type));
139+
ICEBERG_UNWRAP_OR_FAIL(auto literal, trans_func->Transform(param.literal));
140+
EXPECT_THAT(transforms_[i]->ToHumanString(literal),
141+
HasValue(::testing::Eq(param.expecteds[i])));
142+
}
143+
}
144+
145+
INSTANTIATE_TEST_SUITE_P(
146+
TimestampTestCases, TimestampTest,
147+
::testing::Values(
148+
HumanStringTestParam{
149+
.test_name = "Timestamp",
150+
.source_type = std::make_shared<TimestampType>(),
151+
.literal = Literal::Timestamp(1512123175038194),
152+
.expecteds = {"2017", "2017-12", "2017-12-01", "2017-12-01-10"}},
153+
HumanStringTestParam{
154+
.test_name = "NegativeTimestamp",
155+
.source_type = std::make_shared<TimestampType>(),
156+
.literal = Literal::Timestamp(-136024961806),
157+
.expecteds = {"1969", "1969-12", "1969-12-30", "1969-12-30-10"}},
158+
HumanStringTestParam{
159+
.test_name = "TimestampLowerBound",
160+
.source_type = std::make_shared<TimestampType>(),
161+
.literal = Literal::Timestamp(0),
162+
.expecteds = {"1970", "1970-01", "1970-01-01", "1970-01-01-00"}},
163+
HumanStringTestParam{
164+
.test_name = "NegativeTimestampLowerBound",
165+
.source_type = std::make_shared<TimestampType>(),
166+
.literal = Literal::Timestamp(-172800000000),
167+
.expecteds = {"1969", "1969-12", "1969-12-30", "1969-12-30-00"},
168+
},
169+
HumanStringTestParam{
170+
.test_name = "NegativeTimestampUpperBound",
171+
.source_type = std::make_shared<TimestampType>(),
172+
.literal = Literal::Timestamp(-1),
173+
.expecteds = {"1969", "1969-12", "1969-12-31", "1969-12-31-23"}},
174+
HumanStringTestParam{
175+
.test_name = "TimestampTz",
176+
.source_type = std::make_shared<TimestampTzType>(),
177+
.literal = Literal::TimestampTz(1512151975038194),
178+
.expecteds = {"2017", "2017-12", "2017-12-01", "2017-12-01-18"}},
179+
HumanStringTestParam{.test_name = "Null",
180+
.source_type = std::make_shared<TimestampType>(),
181+
.literal = Literal::Null(std::make_shared<TimestampType>()),
182+
.expecteds = {"null", "null", "null", "null"}}),
183+
[](const ::testing::TestParamInfo<HumanStringTestParam>& info) {
184+
return info.param.test_name;
185+
});
186+
187+
} // namespace iceberg

src/iceberg/transform.cc

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "iceberg/util/checked_cast.h"
3232
#include "iceberg/util/macros.h"
3333
#include "iceberg/util/projection_util_internal.h"
34+
#include "iceberg/util/transform_util.h"
3435

3536
namespace iceberg {
3637
namespace {
@@ -366,6 +367,50 @@ Result<std::unique_ptr<UnboundPredicate>> Transform::ProjectStrict(
366367
std::unreachable();
367368
}
368369

370+
Result<std::string> Transform::ToHumanString(const Literal& value) {
371+
if (value.IsNull()) {
372+
return "null";
373+
}
374+
375+
switch (transform_type_) {
376+
case TransformType::kYear:
377+
return TransformUtil::HumanYear(std::get<int32_t>(value.value()));
378+
case TransformType::kMonth:
379+
return TransformUtil::HumanMonth(std::get<int32_t>(value.value()));
380+
case TransformType::kDay:
381+
return TransformUtil::HumanDay(std::get<int32_t>(value.value()));
382+
case TransformType::kHour:
383+
return TransformUtil::HumanHour(std::get<int32_t>(value.value()));
384+
default: {
385+
switch (value.type()->type_id()) {
386+
case TypeId::kDate:
387+
return TransformUtil::HumanDay(std::get<int32_t>(value.value()));
388+
case TypeId::kTime:
389+
return TransformUtil::HumanTime(std::get<int64_t>(value.value()));
390+
case TypeId::kTimestamp:
391+
return TransformUtil::HumanTimestamp(std::get<int64_t>(value.value()));
392+
case TypeId::kTimestampTz:
393+
return TransformUtil::HumanTimestampWithZone(std::get<int64_t>(value.value()));
394+
case TypeId::kFixed:
395+
case TypeId::kBinary: {
396+
const auto& binary_data = std::get<std::vector<uint8_t>>(value.value());
397+
return TransformUtil::Base64Encode(
398+
{reinterpret_cast<const char*>(binary_data.data()), binary_data.size()});
399+
}
400+
case TypeId::kDecimal: {
401+
const auto& decimal_type = internal::checked_cast<DecimalType&>(*value.type());
402+
const auto& decimal = std::get<::iceberg::Decimal>(value.value());
403+
return decimal.ToString(decimal_type.scale());
404+
}
405+
case TypeId::kString:
406+
return std::get<std::string>(value.value());
407+
default:
408+
return value.ToString();
409+
}
410+
}
411+
}
412+
}
413+
369414
bool TransformFunction::Equals(const TransformFunction& other) const {
370415
return transform_type_ == other.transform_type_ && *source_type_ == *other.source_type_;
371416
}

src/iceberg/transform.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,12 @@ class ICEBERG_EXPORT Transform : public util::Formattable {
194194
Result<std::unique_ptr<UnboundPredicate>> ProjectStrict(
195195
std::string_view name, const std::shared_ptr<BoundPredicate>& predicate);
196196

197+
/// \brief Returns a human-readable String representation of a transformed value.
198+
///
199+
/// \param value The literal value to be transformed.
200+
/// @return a human-readable String representation of the value
201+
Result<std::string> ToHumanString(const Literal& value);
202+
197203
/// \brief Returns a string representation of this transform (e.g., "bucket[16]").
198204
std::string ToString() const override;
199205

src/iceberg/util/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ install_headers(
3636
'string_util.h',
3737
'temporal_util.h',
3838
'timepoint.h',
39+
'transform_util.h',
3940
'truncate_util.h',
4041
'type_util.h',
4142
'uuid.h',

src/iceberg/util/timepoint.cc

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,6 @@
2020
#include "iceberg/util/timepoint.h"
2121

2222
#include <chrono>
23-
#include <iomanip>
24-
#include <sstream>
2523

2624
namespace iceberg {
2725

@@ -46,18 +44,35 @@ int64_t UnixNsFromTimePointNs(TimePointNs time_point_ns) {
4644
}
4745

4846
std::string FormatTimePointMs(TimePointMs time_point_ms) {
49-
auto unix_ms = UnixMsFromTimePointMs(time_point_ms);
50-
auto time_t = std::chrono::system_clock::to_time_t(time_point_ms);
47+
return std::format("{:%FT%T}", time_point_ms);
48+
}
49+
50+
std::string FormatUnixMicro(int64_t unix_micro) {
51+
auto tp = std::chrono::time_point<std::chrono::system_clock, std::chrono::seconds>{
52+
std::chrono::seconds(unix_micro / kMicrosPerSecond)};
5153

52-
// Format as ISO 8601-like string: YYYY-MM-DD HH:MM:SS
53-
std::ostringstream oss;
54-
oss << std::put_time(std::gmtime(&time_t), "%Y-%m-%d %H:%M:%S");
54+
auto micros = unix_micro % kMicrosPerSecond;
55+
if (micros == 0) {
56+
return std::format("{:%FT%T}", tp);
57+
} else if (micros % kMicrosPerMillis == 0) {
58+
return std::format("{:%FT%T}.{:03d}", tp, micros / kMicrosPerMillis);
59+
} else {
60+
return std::format("{:%FT%T}.{:06d}", tp, micros);
61+
}
62+
}
5563

56-
// Add milliseconds
57-
auto ms = unix_ms % 1000;
58-
oss << "." << std::setfill('0') << std::setw(3) << ms << " UTC";
64+
std::string FormatUnixMicroTz(int64_t unix_micro) {
65+
auto tp = std::chrono::time_point<std::chrono::system_clock, std::chrono::seconds>{
66+
std::chrono::seconds(unix_micro / kMicrosPerSecond)};
5967

60-
return oss.str();
68+
auto micros = unix_micro % kMicrosPerSecond;
69+
if (micros == 0) {
70+
return std::format("{:%FT%T}+00:00", tp);
71+
} else if (micros % kMicrosPerMillis == 0) {
72+
return std::format("{:%FT%T}.{:03d}+00:00", tp, micros / kMicrosPerMillis);
73+
} else {
74+
return std::format("{:%FT%T}.{:06d}+00:00", tp, micros);
75+
}
6176
}
6277

6378
} // namespace iceberg

src/iceberg/util/timepoint.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ using TimePointMs =
3434
using TimePointNs =
3535
std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>;
3636

37+
constexpr int64_t kMillisPerSecond = 1000;
38+
constexpr int64_t kMicrosPerMillis = 1000;
39+
constexpr int64_t kMicrosPerSecond = 1000000;
40+
3741
/// \brief Returns a TimePointMs from a Unix timestamp in milliseconds
3842
ICEBERG_EXPORT Result<TimePointMs> TimePointMsFromUnixMs(int64_t unix_ms);
3943

@@ -49,4 +53,12 @@ ICEBERG_EXPORT int64_t UnixNsFromTimePointNs(TimePointNs time_point_ns);
4953
/// \brief Returns a human-readable string representation of a TimePointMs
5054
ICEBERG_EXPORT std::string FormatTimePointMs(TimePointMs time_point_ms);
5155

56+
/// \brief Returns a human-readable string representation of a Unix timestamp in
57+
/// microseconds
58+
ICEBERG_EXPORT std::string FormatUnixMicro(int64_t unix_micro);
59+
60+
/// \brief Returns a human-readable string representation of a Unix timestamp in
61+
/// microseconds with time zone
62+
ICEBERG_EXPORT std::string FormatUnixMicroTz(int64_t unix_micro);
63+
5264
} // namespace iceberg

0 commit comments

Comments
 (0)