-
Notifications
You must be signed in to change notification settings - Fork 70
feat: transform function #61
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
795c1d3
392a1d1
fed1d56
350e5eb
5a46de2
9f21cb4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,66 +20,206 @@ | |
| #include "iceberg/transform.h" | ||
|
|
||
| #include <format> | ||
| #include <regex> | ||
|
|
||
| namespace iceberg { | ||
| #include "iceberg/transform_function.h" | ||
| #include "iceberg/type.h" | ||
|
|
||
| namespace iceberg { | ||
| namespace { | ||
| /// \brief Get the relative transform name | ||
| constexpr std::string_view ToString(TransformType type) { | ||
| constexpr std::string_view kUnknownName = "unknown"; | ||
| constexpr std::string_view kIdentityName = "identity"; | ||
| constexpr std::string_view kBucketName = "bucket"; | ||
| constexpr std::string_view kTruncateName = "truncate"; | ||
| constexpr std::string_view kYearName = "year"; | ||
| constexpr std::string_view kMonthName = "month"; | ||
| constexpr std::string_view kDayName = "day"; | ||
| constexpr std::string_view kHourName = "hour"; | ||
| constexpr std::string_view kVoidName = "void"; | ||
| } // namespace | ||
|
|
||
| constexpr std::string_view TransformTypeToString(TransformType type) { | ||
| switch (type) { | ||
| case TransformType::kUnknown: | ||
| return "unknown"; | ||
| return kUnknownName; | ||
| case TransformType::kIdentity: | ||
| return "identity"; | ||
| return kIdentityName; | ||
| case TransformType::kBucket: | ||
| return "bucket"; | ||
| return kBucketName; | ||
| case TransformType::kTruncate: | ||
| return "truncate"; | ||
| return kTruncateName; | ||
| case TransformType::kYear: | ||
| return "year"; | ||
| return kYearName; | ||
| case TransformType::kMonth: | ||
| return "month"; | ||
| return kMonthName; | ||
| case TransformType::kDay: | ||
| return "day"; | ||
| return kDayName; | ||
| case TransformType::kHour: | ||
| return "hour"; | ||
| return kHourName; | ||
| case TransformType::kVoid: | ||
| return "void"; | ||
| default: | ||
| return "invalid"; | ||
| return kVoidName; | ||
| } | ||
| } | ||
| } // namespace | ||
|
|
||
| TransformFunction::TransformFunction(TransformType type) : transform_type_(type) {} | ||
| std::shared_ptr<Transform> Transform::Identity() { | ||
| static auto instance = | ||
| std::shared_ptr<Transform>(new Transform(TransformType::kIdentity)); | ||
| return instance; | ||
| } | ||
|
|
||
| std::shared_ptr<Transform> Transform::Year() { | ||
| static auto instance = std::shared_ptr<Transform>(new Transform(TransformType::kYear)); | ||
| return instance; | ||
| } | ||
|
|
||
| TransformType TransformFunction::transform_type() const { return transform_type_; } | ||
| std::shared_ptr<Transform> Transform::Month() { | ||
| static auto instance = std::shared_ptr<Transform>(new Transform(TransformType::kMonth)); | ||
| return instance; | ||
| } | ||
|
|
||
| std::shared_ptr<Transform> Transform::Day() { | ||
| static auto instance = std::shared_ptr<Transform>(new Transform(TransformType::kDay)); | ||
| return instance; | ||
| } | ||
|
|
||
| std::shared_ptr<Transform> Transform::Hour() { | ||
| static auto instance = std::shared_ptr<Transform>(new Transform(TransformType::kHour)); | ||
| return instance; | ||
| } | ||
|
|
||
| std::shared_ptr<Transform> Transform::Void() { | ||
| static auto instance = std::shared_ptr<Transform>(new Transform(TransformType::kVoid)); | ||
| return instance; | ||
| } | ||
|
|
||
| std::shared_ptr<Transform> Transform::Bucket(int32_t num_buckets) { | ||
| return std::shared_ptr<Transform>(new Transform(TransformType::kBucket, num_buckets)); | ||
| } | ||
|
|
||
| std::shared_ptr<Transform> Transform::Truncate(int32_t width) { | ||
| return std::shared_ptr<Transform>(new Transform(TransformType::kTruncate, width)); | ||
| } | ||
|
|
||
| Transform::Transform(TransformType transform_type) : transform_type_(transform_type) {} | ||
|
|
||
| Transform::Transform(TransformType transform_type, int32_t param) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Throw for non-parameterized transform_type?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we want to avoid invalid inputs. Perhaps we should define a separate static functions to create each transform type?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, currently I only added Transform::Identity, I will add the others as well. |
||
| : transform_type_(transform_type), param_(param) {} | ||
|
|
||
| TransformType Transform::transform_type() const { return transform_type_; } | ||
|
|
||
| Result<std::unique_ptr<TransformFunction>> Transform::Bind( | ||
| const std::shared_ptr<Type>& source_type) const { | ||
| auto type_str = TransformTypeToString(transform_type_); | ||
|
|
||
| switch (transform_type_) { | ||
| case TransformType::kIdentity: | ||
| return std::make_unique<IdentityTransform>(source_type); | ||
|
|
||
| case TransformType::kBucket: { | ||
| if (auto param = std::get_if<int32_t>(¶m_)) { | ||
| return std::make_unique<BucketTransform>(source_type, *param); | ||
| } | ||
| return unexpected<Error>({ | ||
| .kind = ErrorKind::kInvalidArgument, | ||
| .message = std::format( | ||
| "Bucket requires int32 param, none found in transform '{}'", type_str), | ||
| }); | ||
| } | ||
|
|
||
| std::string TransformFunction::ToString() const { | ||
| return std::format("{}", iceberg::ToString(transform_type_)); | ||
| case TransformType::kTruncate: { | ||
| if (auto param = std::get_if<int32_t>(¶m_)) { | ||
| return std::make_unique<TruncateTransform>(source_type, *param); | ||
| } | ||
| return unexpected<Error>({ | ||
| .kind = ErrorKind::kInvalidArgument, | ||
| .message = std::format( | ||
| "Truncate requires int32 param, none found in transform '{}'", type_str), | ||
| }); | ||
| } | ||
|
|
||
| case TransformType::kYear: | ||
| return std::make_unique<YearTransform>(source_type); | ||
| case TransformType::kMonth: | ||
| return std::make_unique<MonthTransform>(source_type); | ||
| case TransformType::kDay: | ||
| return std::make_unique<DayTransform>(source_type); | ||
| case TransformType::kHour: | ||
| return std::make_unique<HourTransform>(source_type); | ||
| case TransformType::kVoid: | ||
| return std::make_unique<VoidTransform>(source_type); | ||
|
|
||
| default: | ||
| return unexpected<Error>({ | ||
| .kind = ErrorKind::kNotSupported, | ||
| .message = std::format("Unsupported transform type: '{}'", type_str), | ||
| }); | ||
| } | ||
| } | ||
|
|
||
| bool TransformFunction::Equals(const TransformFunction& other) const { | ||
| return transform_type_ == other.transform_type_; | ||
| return transform_type_ == other.transform_type_ && *source_type_ == *other.source_type_; | ||
| } | ||
|
|
||
| std::string Transform::ToString() const { | ||
| switch (transform_type_) { | ||
| case TransformType::kIdentity: | ||
| case TransformType::kYear: | ||
| case TransformType::kMonth: | ||
| case TransformType::kDay: | ||
| case TransformType::kHour: | ||
| case TransformType::kVoid: | ||
| case TransformType::kUnknown: | ||
| return std::format("{}", TransformTypeToString(transform_type_)); | ||
| case TransformType::kBucket: | ||
| case TransformType::kTruncate: | ||
| return std::format("{}[{}]", TransformTypeToString(transform_type_), | ||
| std::get<int32_t>(param_)); | ||
| } | ||
| } | ||
|
|
||
| IdentityTransformFunction::IdentityTransformFunction() | ||
| : TransformFunction(TransformType::kIdentity) {} | ||
| TransformFunction::TransformFunction(TransformType transform_type, | ||
| std::shared_ptr<Type> source_type) | ||
| : transform_type_(transform_type), source_type_(std::move(source_type)) {} | ||
|
|
||
| expected<ArrowArray, Error> IdentityTransformFunction::Transform( | ||
| const ArrowArray& input) { | ||
| return unexpected<Error>({.kind = ErrorKind::kNotSupported, | ||
| .message = "IdentityTransformFunction::Transform"}); | ||
| TransformType TransformFunction::transform_type() const { return transform_type_; } | ||
|
|
||
| std::shared_ptr<Type> const& TransformFunction::source_type() const { | ||
gty404 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return source_type_; | ||
| } | ||
|
|
||
| bool Transform::Equals(const Transform& other) const { | ||
| return transform_type_ == other.transform_type_ && param_ == other.param_; | ||
| } | ||
|
|
||
| expected<std::unique_ptr<TransformFunction>, Error> TransformFunctionFromString( | ||
| std::string_view str) { | ||
| if (str == "identity") { | ||
| return std::make_unique<IdentityTransformFunction>(); | ||
| Result<std::shared_ptr<Transform>> TransformFromString(std::string_view transform_str) { | ||
| if (transform_str == kIdentityName) return Transform::Identity(); | ||
| if (transform_str == kYearName) return Transform::Year(); | ||
| if (transform_str == kMonthName) return Transform::Month(); | ||
| if (transform_str == kDayName) return Transform::Day(); | ||
| if (transform_str == kHourName) return Transform::Hour(); | ||
| if (transform_str == kVoidName) return Transform::Void(); | ||
|
|
||
| // Match bucket[16] or truncate[4] | ||
| static const std::regex param_regex( | ||
| std::format(R"(({}|{})\[(\d+)\])", kBucketName, kTruncateName)); | ||
| std::string str(transform_str); | ||
| std::smatch match; | ||
| if (std::regex_match(str, match, param_regex)) { | ||
| const std::string type_str = match[1]; | ||
| const int32_t param = std::stoi(match[2]); | ||
|
|
||
| if (type_str == kBucketName) { | ||
| return Transform::Bucket(param); | ||
| } | ||
| if (type_str == kTruncateName) { | ||
| return Transform::Truncate(param); | ||
| } | ||
| } | ||
| return unexpected<Error>( | ||
| {.kind = ErrorKind::kInvalidArgument, | ||
| .message = "Invalid TransformFunction string: " + std::string(str)}); | ||
|
|
||
| return unexpected<Error>({ | ||
| .kind = ErrorKind::kInvalidArgument, | ||
| .message = std::format("Invalid Transform string: {}", transform_str), | ||
| }); | ||
| } | ||
|
|
||
| } // namespace iceberg | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Throw for parameterized transform_type?