Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions src/iceberg/transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,34 +115,34 @@ Result<std::unique_ptr<TransformFunction>> Transform::Bind(

switch (transform_type_) {
case TransformType::kIdentity:
return std::make_unique<IdentityTransform>(source_type);
return IdentityTransform::Make(source_type);

case TransformType::kBucket: {
if (auto param = std::get_if<int32_t>(&param_)) {
return std::make_unique<BucketTransform>(source_type, *param);
return BucketTransform::Make(source_type, *param);
}
return InvalidArgument("Bucket requires int32 param, none found in transform '{}'",
type_str);
}

case TransformType::kTruncate: {
if (auto param = std::get_if<int32_t>(&param_)) {
return std::make_unique<TruncateTransform>(source_type, *param);
return TruncateTransform::Make(source_type, *param);
}
return InvalidArgument(
"Truncate requires int32 param, none found in transform '{}'", type_str);
}

case TransformType::kYear:
return std::make_unique<YearTransform>(source_type);
return YearTransform::Make(source_type);
case TransformType::kMonth:
return std::make_unique<MonthTransform>(source_type);
return MonthTransform::Make(source_type);
case TransformType::kDay:
return std::make_unique<DayTransform>(source_type);
return DayTransform::Make(source_type);
case TransformType::kHour:
return std::make_unique<HourTransform>(source_type);
return HourTransform::Make(source_type);
case TransformType::kVoid:
return std::make_unique<VoidTransform>(source_type);
return VoidTransform::Make(source_type);

default:
return NotSupported("Unsupported transform type: '{}'", type_str);
Expand Down
145 changes: 133 additions & 12 deletions src/iceberg/transform_function.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,16 @@ Result<ArrowArray> IdentityTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> IdentityTransform::ResultType() const {
auto src_type = source_type();
if (!src_type || !src_type->is_primitive()) {
return source_type();
}

Result<std::unique_ptr<TransformFunction>> IdentityTransform::Make(
std::shared_ptr<Type> const& source_type) {
if (!source_type || !source_type->is_primitive()) {
return NotSupported("{} is not a valid input type for identity transform",
src_type ? src_type->ToString() : "null");
source_type ? source_type->ToString() : "null");
}
return src_type;
return std::make_unique<IdentityTransform>(source_type);
}

BucketTransform::BucketTransform(std::shared_ptr<Type> const& source_type,
Expand All @@ -48,7 +52,32 @@ Result<ArrowArray> BucketTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> BucketTransform::ResultType() const {
return NotImplemented("BucketTransform::result_type");
auto src_type = source_type();
switch (src_type->type_id()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we check this in the Make function and blindly return int32() here? We can add static bool BucketTransform::Accepts(const std::shared_ptr<Type>& source_type). I'm open to discuss this but not a blocker for this PR.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, changed to suggested, I didn't not add Accepts though.

case TypeId::kInt:
case TypeId::kLong:
case TypeId::kDecimal:
case TypeId::kDate:
case TypeId::kTime:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
case TypeId::kString:
case TypeId::kUuid:
case TypeId::kFixed:
case TypeId::kBinary:
return iceberg::int32();
default:
return NotSupported("{} is not a valid input type for bucket transform",
src_type->ToString());
}
}

Result<std::unique_ptr<TransformFunction>> BucketTransform::Make(
std::shared_ptr<Type> const& source_type, int32_t num_buckets) {
if (!source_type) {
return NotSupported("null is not a valid input type for bucket transform");
}
return std::make_unique<BucketTransform>(source_type, num_buckets);
}

TruncateTransform::TruncateTransform(std::shared_ptr<Type> const& source_type,
Expand All @@ -60,7 +89,26 @@ Result<ArrowArray> TruncateTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> TruncateTransform::ResultType() const {
return NotImplemented("TruncateTransform::result_type");
auto src_type = source_type();
switch (src_type->type_id()) {
case TypeId::kInt:
case TypeId::kLong:
case TypeId::kDecimal:
case TypeId::kString:
case TypeId::kBinary:
return src_type;
default:
return NotSupported("{} is not a valid input type for truncate transform",
src_type->ToString());
}
}

Result<std::unique_ptr<TransformFunction>> TruncateTransform::Make(
std::shared_ptr<Type> const& source_type, int32_t width) {
if (!source_type) {
return NotSupported("null is not a valid input type for truncate transform");
}
return std::make_unique<TruncateTransform>(source_type, width);
}

YearTransform::YearTransform(std::shared_ptr<Type> const& source_type)
Expand All @@ -71,7 +119,24 @@ Result<ArrowArray> YearTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> YearTransform::ResultType() const {
return NotImplemented("YearTransform::result_type");
auto src_type = source_type();
switch (src_type->type_id()) {
case TypeId::kDate:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
return iceberg::int32();
default:
return NotSupported("{} is not a valid input type for year transform",
src_type->ToString());
}
}

Result<std::unique_ptr<TransformFunction>> YearTransform::Make(
std::shared_ptr<Type> const& source_type) {
if (!source_type) {
return NotSupported("null is not a valid input type for year transform");
}
return std::make_unique<YearTransform>(source_type);
}

MonthTransform::MonthTransform(std::shared_ptr<Type> const& source_type)
Expand All @@ -82,7 +147,24 @@ Result<ArrowArray> MonthTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> MonthTransform::ResultType() const {
return NotImplemented("MonthTransform::result_type");
auto src_type = source_type();
switch (src_type->type_id()) {
case TypeId::kDate:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
return iceberg::int32();
default:
return NotSupported("{} is not a valid input type for month transform",
src_type->ToString());
}
}

Result<std::unique_ptr<TransformFunction>> MonthTransform::Make(
std::shared_ptr<Type> const& source_type) {
if (!source_type) {
return NotSupported("null is not a valid input type for month transform");
}
return std::make_unique<MonthTransform>(source_type);
}

DayTransform::DayTransform(std::shared_ptr<Type> const& source_type)
Expand All @@ -93,7 +175,24 @@ Result<ArrowArray> DayTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> DayTransform::ResultType() const {
return NotImplemented("DayTransform::result_type");
auto src_type = source_type();
switch (src_type->type_id()) {
case TypeId::kDate:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
return iceberg::date();
default:
return NotSupported("{} is not a valid input type for day transform",
src_type->ToString());
}
}

Result<std::unique_ptr<TransformFunction>> DayTransform::Make(
std::shared_ptr<Type> const& source_type) {
if (!source_type) {
return NotSupported("null is not a valid input type for day transform");
}
return std::make_unique<DayTransform>(source_type);
}

HourTransform::HourTransform(std::shared_ptr<Type> const& source_type)
Expand All @@ -104,7 +203,23 @@ Result<ArrowArray> HourTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> HourTransform::ResultType() const {
return NotImplemented("HourTransform::result_type");
auto src_type = source_type();
switch (src_type->type_id()) {
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
return iceberg::int32();
default:
return NotSupported("{} is not a valid input type for hour transform",
src_type->ToString());
}
}

Result<std::unique_ptr<TransformFunction>> HourTransform::Make(
std::shared_ptr<Type> const& source_type) {
if (!source_type) {
return NotSupported("null is not a valid input type for hour transform");
}
return std::make_unique<HourTransform>(source_type);
}

VoidTransform::VoidTransform(std::shared_ptr<Type> const& source_type)
Expand All @@ -114,8 +229,14 @@ Result<ArrowArray> VoidTransform::Transform(const ArrowArray& input) {
return NotImplemented("VoidTransform::Transform");
}

Result<std::shared_ptr<Type>> VoidTransform::ResultType() const {
return NotImplemented("VoidTransform::result_type");
Result<std::shared_ptr<Type>> VoidTransform::ResultType() const { return source_type(); }

Result<std::unique_ptr<TransformFunction>> VoidTransform::Make(
std::shared_ptr<Type> const& source_type) {
if (!source_type) {
return NotSupported("null is not a valid input type for void transform");
}
return std::make_unique<VoidTransform>(source_type);
}

} // namespace iceberg
50 changes: 50 additions & 0 deletions src/iceberg/transform_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ class IdentityTransform : public TransformFunction {

/// \brief Returns the same type as the source type if it is valid.
Result<std::shared_ptr<Type>> ResultType() const override;

/// \brief Create an IdentityTransform.
/// \param source_type Type of the input data.
/// \return A Result containing the IdentityTransform or an error.
static Result<std::unique_ptr<TransformFunction>> Make(
std::shared_ptr<Type> const& source_type);
};

/// \brief Bucket transform that hashes input values into N buckets.
Expand All @@ -50,6 +56,13 @@ class BucketTransform : public TransformFunction {
/// \brief Returns INT32 as the output type.
Result<std::shared_ptr<Type>> ResultType() const override;

/// \brief Create a BucketTransform.
/// \param source_type Type of the input data.
/// \param num_buckets Number of buckets to hash into.
/// \return A Result containing the BucketTransform or an error.
static Result<std::unique_ptr<TransformFunction>> Make(
std::shared_ptr<Type> const& source_type, int32_t num_buckets);

private:
int32_t num_buckets_;
};
Expand All @@ -67,6 +80,13 @@ class TruncateTransform : public TransformFunction {
/// \brief Returns the same type as source_type.
Result<std::shared_ptr<Type>> ResultType() const override;

/// \brief Create a TruncateTransform.
/// \param source_type Type of the input data.
/// \param width The width to truncate to.
/// \return A Result containing the TruncateTransform or an error.
static Result<std::unique_ptr<TransformFunction>> Make(
std::shared_ptr<Type> const& source_type, int32_t width);

private:
int32_t width_;
};
Expand All @@ -82,6 +102,12 @@ class YearTransform : public TransformFunction {

/// \brief Returns INT32 as the output type.
Result<std::shared_ptr<Type>> ResultType() const override;

/// \brief Create a YearTransform.
/// \param source_type Type of the input data.
/// \return A Result containing the YearTransform or an error.
static Result<std::unique_ptr<TransformFunction>> Make(
std::shared_ptr<Type> const& source_type);
};

/// \brief Month transform that extracts the month component from timestamp inputs.
Expand All @@ -95,6 +121,12 @@ class MonthTransform : public TransformFunction {

/// \brief Returns INT32 as the output type.
Result<std::shared_ptr<Type>> ResultType() const override;

/// \brief Create a MonthTransform.
/// \param source_type Type of the input data.
/// \return A Result containing the MonthTransform or an error.
static Result<std::unique_ptr<TransformFunction>> Make(
std::shared_ptr<Type> const& source_type);
};

/// \brief Day transform that extracts the day of the month from timestamp inputs.
Expand All @@ -108,6 +140,12 @@ class DayTransform : public TransformFunction {

/// \brief Returns INT32 as the output type.
Result<std::shared_ptr<Type>> ResultType() const override;

/// \brief Create a DayTransform.
/// \param source_type Type of the input data.
/// \return A Result containing the DayTransform or an error.
static Result<std::unique_ptr<TransformFunction>> Make(
std::shared_ptr<Type> const& source_type);
};

/// \brief Hour transform that extracts the hour component from timestamp inputs.
Expand All @@ -121,6 +159,12 @@ class HourTransform : public TransformFunction {

/// \brief Returns INT32 as the output type.
Result<std::shared_ptr<Type>> ResultType() const override;

/// \brief Create a HourTransform.
/// \param source_type Type of the input data.
/// \return A Result containing the HourTransform or an error.
static Result<std::unique_ptr<TransformFunction>> Make(
std::shared_ptr<Type> const& source_type);
};

/// \brief Void transform that discards the input and always returns null.
Expand All @@ -134,6 +178,12 @@ class VoidTransform : public TransformFunction {

/// \brief Returns null type or a sentinel type indicating void.
Result<std::shared_ptr<Type>> ResultType() const override;

/// \brief Create a VoidTransform.
/// \param source_type Input type (ignored).
/// \return A Result containing the VoidTransform or an error.
static Result<std::unique_ptr<TransformFunction>> Make(
std::shared_ptr<Type> const& source_type);
};

} // namespace iceberg
Loading
Loading