Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions src/iceberg/transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,34 +115,34 @@ Result<std::unique_ptr<TransformFunction>> Transform::Bind(

switch (transform_type_) {
case TransformType::kIdentity:
return std::make_unique<IdentityTransform>(source_type);
return IdentityTransform::Make(source_type);

case TransformType::kBucket: {
if (auto param = std::get_if<int32_t>(&param_)) {
return std::make_unique<BucketTransform>(source_type, *param);
return BucketTransform::Make(source_type, *param);
}
return InvalidArgument("Bucket requires int32 param, none found in transform '{}'",
type_str);
}

case TransformType::kTruncate: {
if (auto param = std::get_if<int32_t>(&param_)) {
return std::make_unique<TruncateTransform>(source_type, *param);
return TruncateTransform::Make(source_type, *param);
}
return InvalidArgument(
"Truncate requires int32 param, none found in transform '{}'", type_str);
}

case TransformType::kYear:
return std::make_unique<YearTransform>(source_type);
return YearTransform::Make(source_type);
case TransformType::kMonth:
return std::make_unique<MonthTransform>(source_type);
return MonthTransform::Make(source_type);
case TransformType::kDay:
return std::make_unique<DayTransform>(source_type);
return DayTransform::Make(source_type);
case TransformType::kHour:
return std::make_unique<HourTransform>(source_type);
return HourTransform::Make(source_type);
case TransformType::kVoid:
return std::make_unique<VoidTransform>(source_type);
return VoidTransform::Make(source_type);

default:
return NotSupported("Unsupported transform type: '{}'", type_str);
Expand Down
151 changes: 138 additions & 13 deletions src/iceberg/transform_function.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,16 @@ Result<ArrowArray> IdentityTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> IdentityTransform::ResultType() const {
auto src_type = source_type();
if (!src_type || !src_type->is_primitive()) {
return source_type();
}

Result<std::unique_ptr<TransformFunction>> IdentityTransform::Make(
std::shared_ptr<Type> const& source_type) {
if (!source_type || !source_type->is_primitive()) {
return NotSupported("{} is not a valid input type for identity transform",
src_type ? src_type->ToString() : "null");
source_type ? source_type->ToString() : "null");
}
return src_type;
return std::make_unique<IdentityTransform>(source_type);
}

BucketTransform::BucketTransform(std::shared_ptr<Type> const& source_type,
Expand All @@ -48,7 +52,35 @@ Result<ArrowArray> BucketTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> BucketTransform::ResultType() const {
return NotImplemented("BucketTransform::result_type");
return iceberg::int32();
}

Result<std::unique_ptr<TransformFunction>> BucketTransform::Make(
std::shared_ptr<Type> const& source_type, int32_t num_buckets) {
if (!source_type) {
return NotSupported("null is not a valid input type for bucket transform");
}
switch (source_type->type_id()) {
case TypeId::kInt:
case TypeId::kLong:
case TypeId::kDecimal:
case TypeId::kDate:
case TypeId::kTime:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
case TypeId::kString:
case TypeId::kUuid:
case TypeId::kFixed:
case TypeId::kBinary:
break;
default:
return NotSupported("{} is not a valid input type for bucket transform",
source_type->ToString());
}
if (num_buckets <= 0) {
return InvalidArgument("Number of buckets must be positive, got {}", num_buckets);
}
return std::make_unique<BucketTransform>(source_type, num_buckets);
}

TruncateTransform::TruncateTransform(std::shared_ptr<Type> const& source_type,
Expand All @@ -60,7 +92,29 @@ Result<ArrowArray> TruncateTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> TruncateTransform::ResultType() const {
return NotImplemented("TruncateTransform::result_type");
return source_type();
}

Result<std::unique_ptr<TransformFunction>> TruncateTransform::Make(
std::shared_ptr<Type> const& source_type, int32_t width) {
if (!source_type) {
return NotSupported("null is not a valid input type for truncate transform");
}
switch (source_type->type_id()) {
case TypeId::kInt:
case TypeId::kLong:
case TypeId::kDecimal:
case TypeId::kString:
case TypeId::kBinary:
break;
default:
return NotSupported("{} is not a valid input type for truncate transform",
source_type->ToString());
}
if (width <= 0) {
return InvalidArgument("Width must be positive, got {}", width);
}
return std::make_unique<TruncateTransform>(source_type, width);
}

YearTransform::YearTransform(std::shared_ptr<Type> const& source_type)
Expand All @@ -71,7 +125,24 @@ Result<ArrowArray> YearTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> YearTransform::ResultType() const {
return NotImplemented("YearTransform::result_type");
return iceberg::int32();
}

Result<std::unique_ptr<TransformFunction>> YearTransform::Make(
std::shared_ptr<Type> const& source_type) {
if (!source_type) {
return NotSupported("null is not a valid input type for year transform");
}
switch (source_type->type_id()) {
case TypeId::kDate:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
break;
default:
return NotSupported("{} is not a valid input type for year transform",
source_type->ToString());
}
return std::make_unique<YearTransform>(source_type);
}

MonthTransform::MonthTransform(std::shared_ptr<Type> const& source_type)
Expand All @@ -82,7 +153,24 @@ Result<ArrowArray> MonthTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> MonthTransform::ResultType() const {
return NotImplemented("MonthTransform::result_type");
return iceberg::int32();
}

Result<std::unique_ptr<TransformFunction>> MonthTransform::Make(
std::shared_ptr<Type> const& source_type) {
if (!source_type) {
return NotSupported("null is not a valid input type for month transform");
}
switch (source_type->type_id()) {
case TypeId::kDate:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
break;
default:
return NotSupported("{} is not a valid input type for month transform",
source_type->ToString());
}
return std::make_unique<MonthTransform>(source_type);
}

DayTransform::DayTransform(std::shared_ptr<Type> const& source_type)
Expand All @@ -92,8 +180,23 @@ Result<ArrowArray> DayTransform::Transform(const ArrowArray& input) {
return NotImplemented("DayTransform::Transform");
}

Result<std::shared_ptr<Type>> DayTransform::ResultType() const {
return NotImplemented("DayTransform::result_type");
Result<std::shared_ptr<Type>> DayTransform::ResultType() const { return iceberg::date(); }

Result<std::unique_ptr<TransformFunction>> DayTransform::Make(
std::shared_ptr<Type> const& source_type) {
if (!source_type) {
return NotSupported("null is not a valid input type for day transform");
}
switch (source_type->type_id()) {
case TypeId::kDate:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
break;
default:
return NotSupported("{} is not a valid input type for day transform",
source_type->ToString());
}
return std::make_unique<DayTransform>(source_type);
}

HourTransform::HourTransform(std::shared_ptr<Type> const& source_type)
Expand All @@ -104,7 +207,23 @@ Result<ArrowArray> HourTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> HourTransform::ResultType() const {
return NotImplemented("HourTransform::result_type");
return iceberg::int32();
}

Result<std::unique_ptr<TransformFunction>> HourTransform::Make(
std::shared_ptr<Type> const& source_type) {
if (!source_type) {
return NotSupported("null is not a valid input type for hour transform");
}
switch (source_type->type_id()) {
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
break;
default:
return NotSupported("{} is not a valid input type for hour transform",
source_type->ToString());
}
return std::make_unique<HourTransform>(source_type);
}

VoidTransform::VoidTransform(std::shared_ptr<Type> const& source_type)
Expand All @@ -114,8 +233,14 @@ Result<ArrowArray> VoidTransform::Transform(const ArrowArray& input) {
return NotImplemented("VoidTransform::Transform");
}

Result<std::shared_ptr<Type>> VoidTransform::ResultType() const {
return NotImplemented("VoidTransform::result_type");
Result<std::shared_ptr<Type>> VoidTransform::ResultType() const { return source_type(); }

Result<std::unique_ptr<TransformFunction>> VoidTransform::Make(
std::shared_ptr<Type> const& source_type) {
if (!source_type) {
return NotSupported("null is not a valid input type for void transform");
}
return std::make_unique<VoidTransform>(source_type);
}

} // namespace iceberg
50 changes: 50 additions & 0 deletions src/iceberg/transform_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ class IdentityTransform : public TransformFunction {

/// \brief Returns the same type as the source type if it is valid.
Result<std::shared_ptr<Type>> ResultType() const override;

/// \brief Create an IdentityTransform.
/// \param source_type Type of the input data.
/// \return A Result containing the IdentityTransform or an error.
static Result<std::unique_ptr<TransformFunction>> Make(
std::shared_ptr<Type> const& source_type);
};

/// \brief Bucket transform that hashes input values into N buckets.
Expand All @@ -50,6 +56,13 @@ class BucketTransform : public TransformFunction {
/// \brief Returns INT32 as the output type.
Result<std::shared_ptr<Type>> ResultType() const override;

/// \brief Create a BucketTransform.
/// \param source_type Type of the input data.
/// \param num_buckets Number of buckets to hash into.
/// \return A Result containing the BucketTransform or an error.
static Result<std::unique_ptr<TransformFunction>> Make(
std::shared_ptr<Type> const& source_type, int32_t num_buckets);

private:
int32_t num_buckets_;
};
Expand All @@ -67,6 +80,13 @@ class TruncateTransform : public TransformFunction {
/// \brief Returns the same type as source_type.
Result<std::shared_ptr<Type>> ResultType() const override;

/// \brief Create a TruncateTransform.
/// \param source_type Type of the input data.
/// \param width The width to truncate to.
/// \return A Result containing the TruncateTransform or an error.
static Result<std::unique_ptr<TransformFunction>> Make(
std::shared_ptr<Type> const& source_type, int32_t width);

private:
int32_t width_;
};
Expand All @@ -82,6 +102,12 @@ class YearTransform : public TransformFunction {

/// \brief Returns INT32 as the output type.
Result<std::shared_ptr<Type>> ResultType() const override;

/// \brief Create a YearTransform.
/// \param source_type Type of the input data.
/// \return A Result containing the YearTransform or an error.
static Result<std::unique_ptr<TransformFunction>> Make(
std::shared_ptr<Type> const& source_type);
};

/// \brief Month transform that extracts the month component from timestamp inputs.
Expand All @@ -95,6 +121,12 @@ class MonthTransform : public TransformFunction {

/// \brief Returns INT32 as the output type.
Result<std::shared_ptr<Type>> ResultType() const override;

/// \brief Create a MonthTransform.
/// \param source_type Type of the input data.
/// \return A Result containing the MonthTransform or an error.
static Result<std::unique_ptr<TransformFunction>> Make(
std::shared_ptr<Type> const& source_type);
};

/// \brief Day transform that extracts the day of the month from timestamp inputs.
Expand All @@ -108,6 +140,12 @@ class DayTransform : public TransformFunction {

/// \brief Returns INT32 as the output type.
Result<std::shared_ptr<Type>> ResultType() const override;

/// \brief Create a DayTransform.
/// \param source_type Type of the input data.
/// \return A Result containing the DayTransform or an error.
static Result<std::unique_ptr<TransformFunction>> Make(
std::shared_ptr<Type> const& source_type);
};

/// \brief Hour transform that extracts the hour component from timestamp inputs.
Expand All @@ -121,6 +159,12 @@ class HourTransform : public TransformFunction {

/// \brief Returns INT32 as the output type.
Result<std::shared_ptr<Type>> ResultType() const override;

/// \brief Create a HourTransform.
/// \param source_type Type of the input data.
/// \return A Result containing the HourTransform or an error.
static Result<std::unique_ptr<TransformFunction>> Make(
std::shared_ptr<Type> const& source_type);
};

/// \brief Void transform that discards the input and always returns null.
Expand All @@ -134,6 +178,12 @@ class VoidTransform : public TransformFunction {

/// \brief Returns null type or a sentinel type indicating void.
Result<std::shared_ptr<Type>> ResultType() const override;

/// \brief Create a VoidTransform.
/// \param source_type Input type (ignored).
/// \return A Result containing the VoidTransform or an error.
static Result<std::unique_ptr<TransformFunction>> Make(
std::shared_ptr<Type> const& source_type);
};

} // namespace iceberg
Loading
Loading