Skip to content

Commit db44f75

Browse files
authored
feat: implement transform ResultType (#132)
1 parent 5bffdf6 commit db44f75

File tree

4 files changed

+272
-21
lines changed

4 files changed

+272
-21
lines changed

src/iceberg/transform.cc

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -115,34 +115,34 @@ Result<std::unique_ptr<TransformFunction>> Transform::Bind(
115115

116116
switch (transform_type_) {
117117
case TransformType::kIdentity:
118-
return std::make_unique<IdentityTransform>(source_type);
118+
return IdentityTransform::Make(source_type);
119119

120120
case TransformType::kBucket: {
121121
if (auto param = std::get_if<int32_t>(&param_)) {
122-
return std::make_unique<BucketTransform>(source_type, *param);
122+
return BucketTransform::Make(source_type, *param);
123123
}
124124
return InvalidArgument("Bucket requires int32 param, none found in transform '{}'",
125125
type_str);
126126
}
127127

128128
case TransformType::kTruncate: {
129129
if (auto param = std::get_if<int32_t>(&param_)) {
130-
return std::make_unique<TruncateTransform>(source_type, *param);
130+
return TruncateTransform::Make(source_type, *param);
131131
}
132132
return InvalidArgument(
133133
"Truncate requires int32 param, none found in transform '{}'", type_str);
134134
}
135135

136136
case TransformType::kYear:
137-
return std::make_unique<YearTransform>(source_type);
137+
return YearTransform::Make(source_type);
138138
case TransformType::kMonth:
139-
return std::make_unique<MonthTransform>(source_type);
139+
return MonthTransform::Make(source_type);
140140
case TransformType::kDay:
141-
return std::make_unique<DayTransform>(source_type);
141+
return DayTransform::Make(source_type);
142142
case TransformType::kHour:
143-
return std::make_unique<HourTransform>(source_type);
143+
return HourTransform::Make(source_type);
144144
case TransformType::kVoid:
145-
return std::make_unique<VoidTransform>(source_type);
145+
return VoidTransform::Make(source_type);
146146

147147
default:
148148
return NotSupported("Unsupported transform type: '{}'", type_str);

src/iceberg/transform_function.cc

Lines changed: 138 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,16 @@ Result<ArrowArray> IdentityTransform::Transform(const ArrowArray& input) {
3131
}
3232

3333
Result<std::shared_ptr<Type>> IdentityTransform::ResultType() const {
34-
auto src_type = source_type();
35-
if (!src_type || !src_type->is_primitive()) {
34+
return source_type();
35+
}
36+
37+
Result<std::unique_ptr<TransformFunction>> IdentityTransform::Make(
38+
std::shared_ptr<Type> const& source_type) {
39+
if (!source_type || !source_type->is_primitive()) {
3640
return NotSupported("{} is not a valid input type for identity transform",
37-
src_type ? src_type->ToString() : "null");
41+
source_type ? source_type->ToString() : "null");
3842
}
39-
return src_type;
43+
return std::make_unique<IdentityTransform>(source_type);
4044
}
4145

4246
BucketTransform::BucketTransform(std::shared_ptr<Type> const& source_type,
@@ -48,7 +52,35 @@ Result<ArrowArray> BucketTransform::Transform(const ArrowArray& input) {
4852
}
4953

5054
Result<std::shared_ptr<Type>> BucketTransform::ResultType() const {
51-
return NotImplemented("BucketTransform::result_type");
55+
return iceberg::int32();
56+
}
57+
58+
Result<std::unique_ptr<TransformFunction>> BucketTransform::Make(
59+
std::shared_ptr<Type> const& source_type, int32_t num_buckets) {
60+
if (!source_type) {
61+
return NotSupported("null is not a valid input type for bucket transform");
62+
}
63+
switch (source_type->type_id()) {
64+
case TypeId::kInt:
65+
case TypeId::kLong:
66+
case TypeId::kDecimal:
67+
case TypeId::kDate:
68+
case TypeId::kTime:
69+
case TypeId::kTimestamp:
70+
case TypeId::kTimestampTz:
71+
case TypeId::kString:
72+
case TypeId::kUuid:
73+
case TypeId::kFixed:
74+
case TypeId::kBinary:
75+
break;
76+
default:
77+
return NotSupported("{} is not a valid input type for bucket transform",
78+
source_type->ToString());
79+
}
80+
if (num_buckets <= 0) {
81+
return InvalidArgument("Number of buckets must be positive, got {}", num_buckets);
82+
}
83+
return std::make_unique<BucketTransform>(source_type, num_buckets);
5284
}
5385

5486
TruncateTransform::TruncateTransform(std::shared_ptr<Type> const& source_type,
@@ -60,7 +92,29 @@ Result<ArrowArray> TruncateTransform::Transform(const ArrowArray& input) {
6092
}
6193

6294
Result<std::shared_ptr<Type>> TruncateTransform::ResultType() const {
63-
return NotImplemented("TruncateTransform::result_type");
95+
return source_type();
96+
}
97+
98+
Result<std::unique_ptr<TransformFunction>> TruncateTransform::Make(
99+
std::shared_ptr<Type> const& source_type, int32_t width) {
100+
if (!source_type) {
101+
return NotSupported("null is not a valid input type for truncate transform");
102+
}
103+
switch (source_type->type_id()) {
104+
case TypeId::kInt:
105+
case TypeId::kLong:
106+
case TypeId::kDecimal:
107+
case TypeId::kString:
108+
case TypeId::kBinary:
109+
break;
110+
default:
111+
return NotSupported("{} is not a valid input type for truncate transform",
112+
source_type->ToString());
113+
}
114+
if (width <= 0) {
115+
return InvalidArgument("Width must be positive, got {}", width);
116+
}
117+
return std::make_unique<TruncateTransform>(source_type, width);
64118
}
65119

66120
YearTransform::YearTransform(std::shared_ptr<Type> const& source_type)
@@ -71,7 +125,24 @@ Result<ArrowArray> YearTransform::Transform(const ArrowArray& input) {
71125
}
72126

73127
Result<std::shared_ptr<Type>> YearTransform::ResultType() const {
74-
return NotImplemented("YearTransform::result_type");
128+
return iceberg::int32();
129+
}
130+
131+
Result<std::unique_ptr<TransformFunction>> YearTransform::Make(
132+
std::shared_ptr<Type> const& source_type) {
133+
if (!source_type) {
134+
return NotSupported("null is not a valid input type for year transform");
135+
}
136+
switch (source_type->type_id()) {
137+
case TypeId::kDate:
138+
case TypeId::kTimestamp:
139+
case TypeId::kTimestampTz:
140+
break;
141+
default:
142+
return NotSupported("{} is not a valid input type for year transform",
143+
source_type->ToString());
144+
}
145+
return std::make_unique<YearTransform>(source_type);
75146
}
76147

77148
MonthTransform::MonthTransform(std::shared_ptr<Type> const& source_type)
@@ -82,7 +153,24 @@ Result<ArrowArray> MonthTransform::Transform(const ArrowArray& input) {
82153
}
83154

84155
Result<std::shared_ptr<Type>> MonthTransform::ResultType() const {
85-
return NotImplemented("MonthTransform::result_type");
156+
return iceberg::int32();
157+
}
158+
159+
Result<std::unique_ptr<TransformFunction>> MonthTransform::Make(
160+
std::shared_ptr<Type> const& source_type) {
161+
if (!source_type) {
162+
return NotSupported("null is not a valid input type for month transform");
163+
}
164+
switch (source_type->type_id()) {
165+
case TypeId::kDate:
166+
case TypeId::kTimestamp:
167+
case TypeId::kTimestampTz:
168+
break;
169+
default:
170+
return NotSupported("{} is not a valid input type for month transform",
171+
source_type->ToString());
172+
}
173+
return std::make_unique<MonthTransform>(source_type);
86174
}
87175

88176
DayTransform::DayTransform(std::shared_ptr<Type> const& source_type)
@@ -92,8 +180,23 @@ Result<ArrowArray> DayTransform::Transform(const ArrowArray& input) {
92180
return NotImplemented("DayTransform::Transform");
93181
}
94182

95-
Result<std::shared_ptr<Type>> DayTransform::ResultType() const {
96-
return NotImplemented("DayTransform::result_type");
183+
Result<std::shared_ptr<Type>> DayTransform::ResultType() const { return iceberg::date(); }
184+
185+
Result<std::unique_ptr<TransformFunction>> DayTransform::Make(
186+
std::shared_ptr<Type> const& source_type) {
187+
if (!source_type) {
188+
return NotSupported("null is not a valid input type for day transform");
189+
}
190+
switch (source_type->type_id()) {
191+
case TypeId::kDate:
192+
case TypeId::kTimestamp:
193+
case TypeId::kTimestampTz:
194+
break;
195+
default:
196+
return NotSupported("{} is not a valid input type for day transform",
197+
source_type->ToString());
198+
}
199+
return std::make_unique<DayTransform>(source_type);
97200
}
98201

99202
HourTransform::HourTransform(std::shared_ptr<Type> const& source_type)
@@ -104,7 +207,23 @@ Result<ArrowArray> HourTransform::Transform(const ArrowArray& input) {
104207
}
105208

106209
Result<std::shared_ptr<Type>> HourTransform::ResultType() const {
107-
return NotImplemented("HourTransform::result_type");
210+
return iceberg::int32();
211+
}
212+
213+
Result<std::unique_ptr<TransformFunction>> HourTransform::Make(
214+
std::shared_ptr<Type> const& source_type) {
215+
if (!source_type) {
216+
return NotSupported("null is not a valid input type for hour transform");
217+
}
218+
switch (source_type->type_id()) {
219+
case TypeId::kTimestamp:
220+
case TypeId::kTimestampTz:
221+
break;
222+
default:
223+
return NotSupported("{} is not a valid input type for hour transform",
224+
source_type->ToString());
225+
}
226+
return std::make_unique<HourTransform>(source_type);
108227
}
109228

110229
VoidTransform::VoidTransform(std::shared_ptr<Type> const& source_type)
@@ -114,8 +233,14 @@ Result<ArrowArray> VoidTransform::Transform(const ArrowArray& input) {
114233
return NotImplemented("VoidTransform::Transform");
115234
}
116235

117-
Result<std::shared_ptr<Type>> VoidTransform::ResultType() const {
118-
return NotImplemented("VoidTransform::result_type");
236+
Result<std::shared_ptr<Type>> VoidTransform::ResultType() const { return source_type(); }
237+
238+
Result<std::unique_ptr<TransformFunction>> VoidTransform::Make(
239+
std::shared_ptr<Type> const& source_type) {
240+
if (!source_type) {
241+
return NotSupported("null is not a valid input type for void transform");
242+
}
243+
return std::make_unique<VoidTransform>(source_type);
119244
}
120245

121246
} // namespace iceberg

src/iceberg/transform_function.h

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@ class IdentityTransform : public TransformFunction {
3535

3636
/// \brief Returns the same type as the source type if it is valid.
3737
Result<std::shared_ptr<Type>> ResultType() const override;
38+
39+
/// \brief Create an IdentityTransform.
40+
/// \param source_type Type of the input data.
41+
/// \return A Result containing the IdentityTransform or an error.
42+
static Result<std::unique_ptr<TransformFunction>> Make(
43+
std::shared_ptr<Type> const& source_type);
3844
};
3945

4046
/// \brief Bucket transform that hashes input values into N buckets.
@@ -50,6 +56,13 @@ class BucketTransform : public TransformFunction {
5056
/// \brief Returns INT32 as the output type.
5157
Result<std::shared_ptr<Type>> ResultType() const override;
5258

59+
/// \brief Create a BucketTransform.
60+
/// \param source_type Type of the input data.
61+
/// \param num_buckets Number of buckets to hash into.
62+
/// \return A Result containing the BucketTransform or an error.
63+
static Result<std::unique_ptr<TransformFunction>> Make(
64+
std::shared_ptr<Type> const& source_type, int32_t num_buckets);
65+
5366
private:
5467
int32_t num_buckets_;
5568
};
@@ -67,6 +80,13 @@ class TruncateTransform : public TransformFunction {
6780
/// \brief Returns the same type as source_type.
6881
Result<std::shared_ptr<Type>> ResultType() const override;
6982

83+
/// \brief Create a TruncateTransform.
84+
/// \param source_type Type of the input data.
85+
/// \param width The width to truncate to.
86+
/// \return A Result containing the TruncateTransform or an error.
87+
static Result<std::unique_ptr<TransformFunction>> Make(
88+
std::shared_ptr<Type> const& source_type, int32_t width);
89+
7090
private:
7191
int32_t width_;
7292
};
@@ -82,6 +102,12 @@ class YearTransform : public TransformFunction {
82102

83103
/// \brief Returns INT32 as the output type.
84104
Result<std::shared_ptr<Type>> ResultType() const override;
105+
106+
/// \brief Create a YearTransform.
107+
/// \param source_type Type of the input data.
108+
/// \return A Result containing the YearTransform or an error.
109+
static Result<std::unique_ptr<TransformFunction>> Make(
110+
std::shared_ptr<Type> const& source_type);
85111
};
86112

87113
/// \brief Month transform that extracts the month component from timestamp inputs.
@@ -95,6 +121,12 @@ class MonthTransform : public TransformFunction {
95121

96122
/// \brief Returns INT32 as the output type.
97123
Result<std::shared_ptr<Type>> ResultType() const override;
124+
125+
/// \brief Create a MonthTransform.
126+
/// \param source_type Type of the input data.
127+
/// \return A Result containing the MonthTransform or an error.
128+
static Result<std::unique_ptr<TransformFunction>> Make(
129+
std::shared_ptr<Type> const& source_type);
98130
};
99131

100132
/// \brief Day transform that extracts the day of the month from timestamp inputs.
@@ -108,6 +140,12 @@ class DayTransform : public TransformFunction {
108140

109141
/// \brief Returns INT32 as the output type.
110142
Result<std::shared_ptr<Type>> ResultType() const override;
143+
144+
/// \brief Create a DayTransform.
145+
/// \param source_type Type of the input data.
146+
/// \return A Result containing the DayTransform or an error.
147+
static Result<std::unique_ptr<TransformFunction>> Make(
148+
std::shared_ptr<Type> const& source_type);
111149
};
112150

113151
/// \brief Hour transform that extracts the hour component from timestamp inputs.
@@ -121,6 +159,12 @@ class HourTransform : public TransformFunction {
121159

122160
/// \brief Returns INT32 as the output type.
123161
Result<std::shared_ptr<Type>> ResultType() const override;
162+
163+
/// \brief Create a HourTransform.
164+
/// \param source_type Type of the input data.
165+
/// \return A Result containing the HourTransform or an error.
166+
static Result<std::unique_ptr<TransformFunction>> Make(
167+
std::shared_ptr<Type> const& source_type);
124168
};
125169

126170
/// \brief Void transform that discards the input and always returns null.
@@ -134,6 +178,12 @@ class VoidTransform : public TransformFunction {
134178

135179
/// \brief Returns null type or a sentinel type indicating void.
136180
Result<std::shared_ptr<Type>> ResultType() const override;
181+
182+
/// \brief Create a VoidTransform.
183+
/// \param source_type Input type (ignored).
184+
/// \return A Result containing the VoidTransform or an error.
185+
static Result<std::unique_ptr<TransformFunction>> Make(
186+
std::shared_ptr<Type> const& source_type);
137187
};
138188

139189
} // namespace iceberg

0 commit comments

Comments
 (0)