Skip to content

Commit b45e466

Browse files
committed
feat: transform function
1 parent d05a9b2 commit b45e466

File tree

11 files changed

+676
-64
lines changed

11 files changed

+676
-64
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ set(ICEBERG_SOURCES
2626
partition_field.cc
2727
partition_spec.cc
2828
transform.cc
29+
transform/transform_function.cc
30+
transform/transform_spec.cc
2931
type.cc)
3032

3133
set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)

src/iceberg/transform.cc

Lines changed: 58 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -21,55 +21,80 @@
2121

2222
#include <format>
2323

24-
namespace iceberg {
24+
#include "iceberg/transform/transform_function.h"
25+
#include "iceberg/transform/transform_spec.h"
26+
#include "iceberg/type.h"
2527

28+
namespace iceberg {
2629
namespace {
27-
/// \brief Get the relative transform name
28-
constexpr std::string_view ToString(TransformType type) {
29-
switch (type) {
30-
case TransformType::kUnknown:
31-
return "unknown";
32-
case TransformType::kIdentity:
33-
return "identity";
34-
case TransformType::kBucket:
35-
return "bucket";
36-
case TransformType::kTruncate:
37-
return "truncate";
38-
case TransformType::kYear:
39-
return "year";
40-
case TransformType::kMonth:
41-
return "month";
42-
case TransformType::kDay:
43-
return "day";
44-
case TransformType::kHour:
45-
return "hour";
46-
case TransformType::kVoid:
47-
return "void";
48-
default:
49-
return "invalid";
30+
31+
expected<int32_t, Error> GetInt32FromParam(TransformSpec const& transform_spec) {
32+
auto const& params = transform_spec.params;
33+
if (params.empty()) {
34+
return unexpected<Error>(
35+
{.kind = ErrorKind::kInvalidArgument,
36+
.message = "Transform requires 1 parameter, but none were provided."});
37+
}
38+
if (params.size() != 1U) {
39+
return unexpected<Error>(
40+
{.kind = ErrorKind::kInvalidArgument,
41+
.message = std::format("Transform expects exactly 1 parameter , but got {}.",
42+
transform_spec.params.size())});
5043
}
44+
return std::get<int32_t>(params[0]);
5145
}
46+
5247
} // namespace
5348

54-
TransformFunction::TransformFunction(TransformType type) : transform_type_(type) {}
49+
TransformFunction::TransformFunction(TransformType transform_type,
50+
std::shared_ptr<Type> source_type)
51+
: transform_type_(transform_type), source_type_(std::move(source_type)) {}
5552

5653
TransformType TransformFunction::transform_type() const { return transform_type_; }
5754

55+
std::shared_ptr<Type> const& TransformFunction::source_type() const {
56+
return source_type_;
57+
}
58+
5859
std::string TransformFunction::ToString() const {
5960
return std::format("{}", iceberg::ToString(transform_type_));
6061
}
6162

6263
bool TransformFunction::Equals(const TransformFunction& other) const {
63-
return transform_type_ == other.transform_type_;
64+
return transform_type_ == other.transform_type_ && *source_type_ == *other.source_type_;
6465
}
6566

66-
IdentityTransformFunction::IdentityTransformFunction()
67-
: TransformFunction(TransformType::kIdentity) {}
68-
69-
expected<ArrowArray, Error> IdentityTransformFunction::Transform(
70-
const ArrowArray& input) {
71-
return unexpected<Error>({.kind = ErrorKind::kNotSupported,
72-
.message = "IdentityTransformFunction::Transform"});
67+
expected<std::unique_ptr<TransformFunction>, Error> TransformFunction::Make(
68+
const TransformSpec& spec) {
69+
switch (spec.transform_type) {
70+
case TransformType::kIdentity:
71+
return std::make_unique<IdentityTransform>(spec.source_type);
72+
case TransformType::kBucket: {
73+
auto num_buckets = GetInt32FromParam(spec);
74+
if (!num_buckets.has_value()) return unexpected(num_buckets.error());
75+
return std::make_unique<BucketTransform>(spec.source_type, num_buckets.value());
76+
}
77+
case TransformType::kTruncate: {
78+
auto width = GetInt32FromParam(spec);
79+
if (!width.has_value()) return unexpected(width.error());
80+
return std::make_unique<TruncateTransform>(spec.source_type, width.value());
81+
}
82+
case TransformType::kYear:
83+
return std::make_unique<YearTransform>(spec.source_type);
84+
case TransformType::kMonth:
85+
return std::make_unique<MonthTransform>(spec.source_type);
86+
case TransformType::kDay:
87+
return std::make_unique<DayTransform>(spec.source_type);
88+
case TransformType::kHour:
89+
return std::make_unique<HourTransform>(spec.source_type);
90+
case TransformType::kVoid:
91+
return std::make_unique<VoidTransform>(spec.source_type);
92+
default:
93+
return unexpected<Error>(
94+
{.kind = ErrorKind::kInvalidArgument,
95+
.message = std::format("Unsupported or invalid transform type: {}",
96+
iceberg::ToString(spec.transform_type))});
97+
}
7398
}
7499

75100
} // namespace iceberg

src/iceberg/transform.h

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,42 @@ enum class TransformType {
5656
kVoid,
5757
};
5858

59+
/// \brief Get the relative transform name
60+
constexpr std::string_view ToString(TransformType type) {
61+
switch (type) {
62+
case TransformType::kUnknown:
63+
return "unknown";
64+
case TransformType::kIdentity:
65+
return "identity";
66+
case TransformType::kBucket:
67+
return "bucket";
68+
case TransformType::kTruncate:
69+
return "truncate";
70+
case TransformType::kYear:
71+
return "year";
72+
case TransformType::kMonth:
73+
return "month";
74+
case TransformType::kDay:
75+
return "day";
76+
case TransformType::kHour:
77+
return "hour";
78+
case TransformType::kVoid:
79+
return "void";
80+
}
81+
}
82+
5983
/// \brief A transform function used for partitioning.
6084
class ICEBERG_EXPORT TransformFunction : public util::Formattable {
6185
public:
62-
explicit TransformFunction(TransformType type);
86+
TransformFunction(TransformType transform_type, std::shared_ptr<Type> source_type);
6387
/// \brief Transform an input array to a new array
6488
virtual expected<ArrowArray, Error> Transform(const ArrowArray& data) = 0;
6589
/// \brief Get the transform type
66-
virtual TransformType transform_type() const;
90+
TransformType transform_type() const;
91+
/// \brief Get the source type of transform function
92+
std::shared_ptr<Type> const& source_type() const;
93+
/// \brief Get the result type of transform function
94+
virtual expected<std::shared_ptr<Type>, Error> ResultType() const = 0;
6795

6896
std::string ToString() const override;
6997

@@ -75,18 +103,28 @@ class ICEBERG_EXPORT TransformFunction : public util::Formattable {
75103
return !(lhs == rhs);
76104
}
77105

106+
/// \brief Make a TransformFunction instance based on the given TransformSpec.
107+
///
108+
/// This method examines the transform type and associated parameters within the
109+
/// provided TransformSpec, and returns a corresponding implementation of
110+
/// TransformFunction.
111+
///
112+
/// The function may fail if the specified transform type is not recognized or
113+
/// supported.
114+
///
115+
/// \param spec The TransformSpec that contains the transform type and the associated
116+
/// parameters.
117+
// \return An expected result containing a unique pointer to the
118+
/// corresponding TransformFunction implementation, or an error if the creation fails.
119+
static expected<std::unique_ptr<TransformFunction>, Error> Make(
120+
const TransformSpec& spec);
121+
78122
private:
79123
/// \brief Compare two partition specs for equality.
80124
[[nodiscard]] virtual bool Equals(const TransformFunction& other) const;
81125

82126
TransformType transform_type_;
83-
};
84-
85-
class IdentityTransformFunction : public TransformFunction {
86-
public:
87-
IdentityTransformFunction();
88-
/// \brief Transform will take an input array and transform it into a new array.
89-
expected<ArrowArray, Error> Transform(const ArrowArray& input) override;
127+
std::shared_ptr<Type> source_type_;
90128
};
91129

92130
} // namespace iceberg
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/transform/transform_function.h"
21+
22+
#include <format>
23+
24+
#include <iceberg/type.h>
25+
26+
namespace iceberg {
27+
28+
IdentityTransform::IdentityTransform(std::shared_ptr<Type> const& source_type)
29+
: TransformFunction(TransformType::kIdentity, source_type) {}
30+
31+
expected<ArrowArray, Error> IdentityTransform::Transform(const ArrowArray& input) {
32+
return unexpected<Error>(
33+
{.kind = ErrorKind::kNotImplemented, .message = "IdentityTransform::Transform"});
34+
}
35+
36+
expected<std::shared_ptr<Type>, Error> IdentityTransform::ResultType() const {
37+
auto src_type = source_type();
38+
if (!src_type || !src_type->is_primitive()) {
39+
return unexpected(Error{
40+
.kind = ErrorKind::kNotSupported,
41+
.message = std::format("{} is not a valid input type for identity transform",
42+
src_type ? src_type->ToString() : "null")});
43+
}
44+
return src_type;
45+
}
46+
47+
BucketTransform::BucketTransform(std::shared_ptr<Type> const& source_type,
48+
int32_t num_buckets)
49+
: TransformFunction(TransformType::kBucket, source_type), num_buckets_(num_buckets) {}
50+
51+
expected<ArrowArray, Error> BucketTransform::Transform(const ArrowArray& input) {
52+
return unexpected<Error>(
53+
{.kind = ErrorKind::kNotImplemented, .message = "BucketTransform::Transform"});
54+
}
55+
56+
expected<std::shared_ptr<Type>, Error> BucketTransform::ResultType() const {
57+
return unexpected<Error>(
58+
{.kind = ErrorKind::kNotImplemented, .message = "BucketTransform::result_type"});
59+
}
60+
61+
std::string BucketTransform::ToString() const {
62+
return std::format("{}[{}]", iceberg::ToString(transform_type()), num_buckets_);
63+
}
64+
65+
TruncateTransform::TruncateTransform(std::shared_ptr<Type> const& source_type,
66+
int32_t width)
67+
: TransformFunction(TransformType::kTruncate, source_type), width_(width) {}
68+
69+
expected<ArrowArray, Error> TruncateTransform::Transform(const ArrowArray& input) {
70+
return unexpected<Error>(
71+
{.kind = ErrorKind::kNotImplemented, .message = "TruncateTransform::Transform"});
72+
}
73+
74+
expected<std::shared_ptr<Type>, Error> TruncateTransform::ResultType() const {
75+
return unexpected<Error>(
76+
{.kind = ErrorKind::kNotImplemented, .message = "TruncateTransform::result_type"});
77+
}
78+
79+
std::string TruncateTransform::ToString() const {
80+
return std::format("{}[{}]", iceberg::ToString(transform_type()), width_);
81+
}
82+
83+
YearTransform::YearTransform(std::shared_ptr<Type> const& source_type)
84+
: TransformFunction(TransformType::kTruncate, source_type) {}
85+
86+
expected<ArrowArray, Error> YearTransform::Transform(const ArrowArray& input) {
87+
return unexpected<Error>(
88+
{.kind = ErrorKind::kNotImplemented, .message = "YearTransform::Transform"});
89+
}
90+
91+
expected<std::shared_ptr<Type>, Error> YearTransform::ResultType() const {
92+
return unexpected<Error>(
93+
{.kind = ErrorKind::kNotImplemented, .message = "YearTransform::result_type"});
94+
}
95+
96+
MonthTransform::MonthTransform(std::shared_ptr<Type> const& source_type)
97+
: TransformFunction(TransformType::kMonth, source_type) {}
98+
99+
expected<ArrowArray, Error> MonthTransform::Transform(const ArrowArray& input) {
100+
return unexpected<Error>(
101+
{.kind = ErrorKind::kNotImplemented, .message = "MonthTransform::Transform"});
102+
}
103+
104+
expected<std::shared_ptr<Type>, Error> MonthTransform::ResultType() const {
105+
return unexpected<Error>(
106+
{.kind = ErrorKind::kNotImplemented, .message = "MonthTransform::result_type"});
107+
}
108+
109+
DayTransform::DayTransform(std::shared_ptr<Type> const& source_type)
110+
: TransformFunction(TransformType::kDay, source_type) {}
111+
112+
expected<ArrowArray, Error> DayTransform::Transform(const ArrowArray& input) {
113+
return unexpected<Error>(
114+
{.kind = ErrorKind::kNotImplemented, .message = "DayTransform::Transform"});
115+
}
116+
117+
expected<std::shared_ptr<Type>, Error> DayTransform::ResultType() const {
118+
return unexpected<Error>(
119+
{.kind = ErrorKind::kNotImplemented, .message = "DayTransform::result_type"});
120+
}
121+
122+
HourTransform::HourTransform(std::shared_ptr<Type> const& source_type)
123+
: TransformFunction(TransformType::kHour, source_type) {}
124+
125+
expected<ArrowArray, Error> HourTransform::Transform(const ArrowArray& input) {
126+
return unexpected<Error>(
127+
{.kind = ErrorKind::kNotImplemented, .message = "HourTransform::Transform"});
128+
}
129+
130+
expected<std::shared_ptr<Type>, Error> HourTransform::ResultType() const {
131+
return unexpected<Error>(
132+
{.kind = ErrorKind::kNotImplemented, .message = "HourTransform::result_type"});
133+
}
134+
135+
VoidTransform::VoidTransform(std::shared_ptr<Type> const& source_type)
136+
: TransformFunction(TransformType::kVoid, source_type) {}
137+
138+
expected<ArrowArray, Error> VoidTransform::Transform(const ArrowArray& input) {
139+
return unexpected<Error>(
140+
{.kind = ErrorKind::kNotImplemented, .message = "VoidTransform::Transform"});
141+
}
142+
143+
expected<std::shared_ptr<Type>, Error> VoidTransform::ResultType() const {
144+
return unexpected<Error>(
145+
{.kind = ErrorKind::kNotImplemented, .message = "VoidTransform::result_type"});
146+
}
147+
148+
} // namespace iceberg

0 commit comments

Comments
 (0)