|
20 | 20 | #include "iceberg/transform.h" |
21 | 21 |
|
22 | 22 | #include <format> |
| 23 | +#include <regex> |
23 | 24 |
|
24 | | -namespace iceberg { |
| 25 | +#include "iceberg/transform_function.h" |
| 26 | +#include "iceberg/type.h" |
25 | 27 |
|
| 28 | +namespace iceberg { |
26 | 29 | namespace { |
27 | | -/// \brief Get the relative transform name |
28 | | -constexpr std::string_view ToString(TransformType type) { |
| 30 | +constexpr std::string_view kUnknownName = "unknown"; |
| 31 | +constexpr std::string_view kIdentityName = "identity"; |
| 32 | +constexpr std::string_view kBucketName = "bucket"; |
| 33 | +constexpr std::string_view kTruncateName = "truncate"; |
| 34 | +constexpr std::string_view kYearName = "year"; |
| 35 | +constexpr std::string_view kMonthName = "month"; |
| 36 | +constexpr std::string_view kDayName = "day"; |
| 37 | +constexpr std::string_view kHourName = "hour"; |
| 38 | +constexpr std::string_view kVoidName = "void"; |
| 39 | +} // namespace |
| 40 | + |
| 41 | +constexpr std::string_view TransformTypeToString(TransformType type) { |
29 | 42 | switch (type) { |
30 | 43 | case TransformType::kUnknown: |
31 | | - return "unknown"; |
| 44 | + return kUnknownName; |
32 | 45 | case TransformType::kIdentity: |
33 | | - return "identity"; |
| 46 | + return kIdentityName; |
34 | 47 | case TransformType::kBucket: |
35 | | - return "bucket"; |
| 48 | + return kBucketName; |
36 | 49 | case TransformType::kTruncate: |
37 | | - return "truncate"; |
| 50 | + return kTruncateName; |
38 | 51 | case TransformType::kYear: |
39 | | - return "year"; |
| 52 | + return kYearName; |
40 | 53 | case TransformType::kMonth: |
41 | | - return "month"; |
| 54 | + return kMonthName; |
42 | 55 | case TransformType::kDay: |
43 | | - return "day"; |
| 56 | + return kDayName; |
44 | 57 | case TransformType::kHour: |
45 | | - return "hour"; |
| 58 | + return kHourName; |
46 | 59 | case TransformType::kVoid: |
47 | | - return "void"; |
48 | | - default: |
49 | | - return "invalid"; |
| 60 | + return kVoidName; |
50 | 61 | } |
51 | 62 | } |
52 | | -} // namespace |
53 | 63 |
|
54 | | -TransformFunction::TransformFunction(TransformType type) : transform_type_(type) {} |
| 64 | +std::shared_ptr<Transform> Transform::Identity() { |
| 65 | + static auto instance = |
| 66 | + std::shared_ptr<Transform>(new Transform(TransformType::kIdentity)); |
| 67 | + return instance; |
| 68 | +} |
| 69 | + |
| 70 | +std::shared_ptr<Transform> Transform::Year() { |
| 71 | + static auto instance = std::shared_ptr<Transform>(new Transform(TransformType::kYear)); |
| 72 | + return instance; |
| 73 | +} |
55 | 74 |
|
56 | | -TransformType TransformFunction::transform_type() const { return transform_type_; } |
| 75 | +std::shared_ptr<Transform> Transform::Month() { |
| 76 | + static auto instance = std::shared_ptr<Transform>(new Transform(TransformType::kMonth)); |
| 77 | + return instance; |
| 78 | +} |
| 79 | + |
| 80 | +std::shared_ptr<Transform> Transform::Day() { |
| 81 | + static auto instance = std::shared_ptr<Transform>(new Transform(TransformType::kDay)); |
| 82 | + return instance; |
| 83 | +} |
| 84 | + |
| 85 | +std::shared_ptr<Transform> Transform::Hour() { |
| 86 | + static auto instance = std::shared_ptr<Transform>(new Transform(TransformType::kHour)); |
| 87 | + return instance; |
| 88 | +} |
| 89 | + |
| 90 | +std::shared_ptr<Transform> Transform::Void() { |
| 91 | + static auto instance = std::shared_ptr<Transform>(new Transform(TransformType::kVoid)); |
| 92 | + return instance; |
| 93 | +} |
| 94 | + |
| 95 | +std::shared_ptr<Transform> Transform::Bucket(int32_t num_buckets) { |
| 96 | + return std::shared_ptr<Transform>(new Transform(TransformType::kBucket, num_buckets)); |
| 97 | +} |
| 98 | + |
| 99 | +std::shared_ptr<Transform> Transform::Truncate(int32_t width) { |
| 100 | + return std::shared_ptr<Transform>(new Transform(TransformType::kTruncate, width)); |
| 101 | +} |
| 102 | + |
| 103 | +Transform::Transform(TransformType transform_type) : transform_type_(transform_type) {} |
| 104 | + |
| 105 | +Transform::Transform(TransformType transform_type, int32_t param) |
| 106 | + : transform_type_(transform_type), param_(param) {} |
| 107 | + |
| 108 | +TransformType Transform::transform_type() const { return transform_type_; } |
| 109 | + |
| 110 | +expected<std::unique_ptr<TransformFunction>, Error> Transform::Bind( |
| 111 | + const std::shared_ptr<Type>& source_type) const { |
| 112 | + auto type_str = TransformTypeToString(transform_type_); |
| 113 | + |
| 114 | + switch (transform_type_) { |
| 115 | + case TransformType::kIdentity: |
| 116 | + return std::make_unique<IdentityTransform>(source_type); |
| 117 | + |
| 118 | + case TransformType::kBucket: { |
| 119 | + if (auto param = std::get_if<int32_t>(¶m_)) { |
| 120 | + return std::make_unique<BucketTransform>(source_type, *param); |
| 121 | + } |
| 122 | + return unexpected<Error>({ |
| 123 | + .kind = ErrorKind::kInvalidArgument, |
| 124 | + .message = std::format( |
| 125 | + "Bucket requires int32 param, none found in transform '{}'", type_str), |
| 126 | + }); |
| 127 | + } |
57 | 128 |
|
58 | | -std::string TransformFunction::ToString() const { |
59 | | - return std::format("{}", iceberg::ToString(transform_type_)); |
| 129 | + case TransformType::kTruncate: { |
| 130 | + if (auto param = std::get_if<int32_t>(¶m_)) { |
| 131 | + return std::make_unique<TruncateTransform>(source_type, *param); |
| 132 | + } |
| 133 | + return unexpected<Error>({ |
| 134 | + .kind = ErrorKind::kInvalidArgument, |
| 135 | + .message = std::format( |
| 136 | + "Truncate requires int32 param, none found in transform '{}'", type_str), |
| 137 | + }); |
| 138 | + } |
| 139 | + |
| 140 | + case TransformType::kYear: |
| 141 | + return std::make_unique<YearTransform>(source_type); |
| 142 | + case TransformType::kMonth: |
| 143 | + return std::make_unique<MonthTransform>(source_type); |
| 144 | + case TransformType::kDay: |
| 145 | + return std::make_unique<DayTransform>(source_type); |
| 146 | + case TransformType::kHour: |
| 147 | + return std::make_unique<HourTransform>(source_type); |
| 148 | + case TransformType::kVoid: |
| 149 | + return std::make_unique<VoidTransform>(source_type); |
| 150 | + |
| 151 | + default: |
| 152 | + return unexpected<Error>({ |
| 153 | + .kind = ErrorKind::kNotSupported, |
| 154 | + .message = std::format("Unsupported transform type: '{}'", type_str), |
| 155 | + }); |
| 156 | + } |
60 | 157 | } |
61 | 158 |
|
62 | 159 | bool TransformFunction::Equals(const TransformFunction& other) const { |
63 | | - return transform_type_ == other.transform_type_; |
| 160 | + return transform_type_ == other.transform_type_ && *source_type_ == *other.source_type_; |
| 161 | +} |
| 162 | + |
| 163 | +std::string Transform::ToString() const { |
| 164 | + switch (transform_type_) { |
| 165 | + case TransformType::kIdentity: |
| 166 | + case TransformType::kYear: |
| 167 | + case TransformType::kMonth: |
| 168 | + case TransformType::kDay: |
| 169 | + case TransformType::kHour: |
| 170 | + case TransformType::kVoid: |
| 171 | + case TransformType::kUnknown: |
| 172 | + return std::format("{}", TransformTypeToString(transform_type_)); |
| 173 | + case TransformType::kBucket: |
| 174 | + case TransformType::kTruncate: |
| 175 | + return std::format("{}[{}]", TransformTypeToString(transform_type_), |
| 176 | + std::get<int32_t>(param_)); |
| 177 | + } |
64 | 178 | } |
65 | 179 |
|
66 | | -IdentityTransformFunction::IdentityTransformFunction() |
67 | | - : TransformFunction(TransformType::kIdentity) {} |
| 180 | +TransformFunction::TransformFunction(TransformType transform_type, |
| 181 | + std::shared_ptr<Type> source_type) |
| 182 | + : transform_type_(transform_type), source_type_(std::move(source_type)) {} |
68 | 183 |
|
69 | | -expected<ArrowArray, Error> IdentityTransformFunction::Transform( |
70 | | - const ArrowArray& input) { |
71 | | - return unexpected<Error>({.kind = ErrorKind::kNotSupported, |
72 | | - .message = "IdentityTransformFunction::Transform"}); |
| 184 | +TransformType TransformFunction::transform_type() const { return transform_type_; } |
| 185 | + |
| 186 | +std::shared_ptr<Type> const& TransformFunction::source_type() const { |
| 187 | + return source_type_; |
| 188 | +} |
| 189 | + |
| 190 | +bool Transform::Equals(const Transform& other) const { |
| 191 | + return transform_type_ == other.transform_type_ && param_ == other.param_; |
73 | 192 | } |
74 | 193 |
|
75 | | -expected<std::unique_ptr<TransformFunction>, Error> TransformFunctionFromString( |
76 | | - std::string_view str) { |
77 | | - if (str == "identity") { |
78 | | - return std::make_unique<IdentityTransformFunction>(); |
| 194 | +Result<std::shared_ptr<Transform>> TransformFromString(std::string_view transform_str) { |
| 195 | + if (transform_str == kIdentityName) return Transform::Identity(); |
| 196 | + if (transform_str == kYearName) return Transform::Year(); |
| 197 | + if (transform_str == kMonthName) return Transform::Month(); |
| 198 | + if (transform_str == kDayName) return Transform::Day(); |
| 199 | + if (transform_str == kHourName) return Transform::Hour(); |
| 200 | + if (transform_str == kVoidName) return Transform::Void(); |
| 201 | + |
| 202 | + // Match bucket[16] or truncate[4] |
| 203 | + static const std::regex param_regex( |
| 204 | + std::format(R"(({}|{})\[(\d+)\])", kBucketName, kTruncateName)); |
| 205 | + std::cmatch match; |
| 206 | + if (std::regex_match(transform_str.begin(), transform_str.end(), match, param_regex)) { |
| 207 | + std::string type_str = match[1]; |
| 208 | + int32_t param = std::stoi(match[2]); |
| 209 | + |
| 210 | + if (type_str == kBucketName) { |
| 211 | + return Transform::Bucket(param); |
| 212 | + } |
| 213 | + if (type_str == kTruncateName) { |
| 214 | + return Transform::Truncate(param); |
| 215 | + } |
79 | 216 | } |
80 | | - return unexpected<Error>( |
81 | | - {.kind = ErrorKind::kInvalidArgument, |
82 | | - .message = "Invalid TransformFunction string: " + std::string(str)}); |
| 217 | + |
| 218 | + return unexpected<Error>({ |
| 219 | + .kind = ErrorKind::kInvalidArgument, |
| 220 | + .message = std::format("Invalid Transform string: {}", transform_str), |
| 221 | + }); |
83 | 222 | } |
84 | 223 |
|
85 | 224 | } // namespace iceberg |
0 commit comments