Skip to content

Commit 61b83ea

Browse files
authored
feat: add transform project (#371)
Add Transform::Project for inclusive predicate projection, which transforms a BoundPredicate to an inclusive predicate on partition values.
1 parent 4133aa2 commit 61b83ea

File tree

8 files changed

+1423
-1
lines changed

8 files changed

+1423
-1
lines changed

src/iceberg/expression/predicate.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "iceberg/expression/expression.h"
2828
#include "iceberg/expression/literal.h"
2929
#include "iceberg/expression/term.h"
30+
#include "iceberg/iceberg_export.h"
3031

3132
namespace iceberg {
3233

src/iceberg/test/transform_test.cc

Lines changed: 766 additions & 0 deletions
Large diffs are not rendered by default.

src/iceberg/transform.cc

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,14 @@
2323
#include <regex>
2424
#include <utility>
2525

26+
#include "iceberg/expression/predicate.h"
27+
#include "iceberg/expression/term.h"
28+
#include "iceberg/result.h"
2629
#include "iceberg/transform_function.h"
2730
#include "iceberg/type.h"
31+
#include "iceberg/util/checked_cast.h"
32+
#include "iceberg/util/macros.h"
33+
#include "iceberg/util/projection_util_internal.h"
2834

2935
namespace iceberg {
3036
namespace {
@@ -240,6 +246,66 @@ bool Transform::SatisfiesOrderOf(const Transform& other) const {
240246
std::unreachable();
241247
}
242248

249+
Result<std::unique_ptr<UnboundPredicate>> Transform::Project(
250+
std::string_view name, const std::shared_ptr<BoundPredicate>& predicate) {
251+
switch (transform_type_) {
252+
case TransformType::kIdentity:
253+
return ProjectionUtil::IdentityProject(name, predicate);
254+
case TransformType::kBucket: {
255+
// If the predicate has a transformed child that matches the given transform, return
256+
// a predicate.
257+
if (predicate->term()->kind() == Term::Kind::kTransform) {
258+
const auto boundTransform =
259+
internal::checked_pointer_cast<BoundTransform>(predicate->term());
260+
if (*this == *boundTransform->transform()) {
261+
return ProjectionUtil::RemoveTransform(name, predicate);
262+
} else {
263+
return nullptr;
264+
}
265+
}
266+
ICEBERG_ASSIGN_OR_RAISE(auto func, Bind(predicate->term()->type()));
267+
return ProjectionUtil::BucketProject(name, predicate, func);
268+
}
269+
case TransformType::kTruncate: {
270+
// If the predicate has a transformed child that matches the given transform, return
271+
// a predicate.
272+
if (predicate->term()->kind() == Term::Kind::kTransform) {
273+
const auto boundTransform =
274+
internal::checked_pointer_cast<BoundTransform>(predicate->term());
275+
if (*this == *boundTransform->transform()) {
276+
return ProjectionUtil::RemoveTransform(name, predicate);
277+
} else {
278+
return nullptr;
279+
}
280+
}
281+
ICEBERG_ASSIGN_OR_RAISE(auto func, Bind(predicate->term()->type()));
282+
return ProjectionUtil::TruncateProject(name, predicate, func);
283+
}
284+
case TransformType::kYear:
285+
case TransformType::kMonth:
286+
case TransformType::kDay:
287+
case TransformType::kHour: {
288+
// If the predicate has a transformed child that matches the given transform, return
289+
// a predicate.
290+
if (predicate->term()->kind() == Term::Kind::kTransform) {
291+
const auto boundTransform =
292+
internal::checked_pointer_cast<BoundTransform>(predicate->term());
293+
if (*this == *boundTransform->transform()) {
294+
return ProjectionUtil::RemoveTransform(name, predicate);
295+
} else {
296+
return nullptr;
297+
}
298+
}
299+
ICEBERG_ASSIGN_OR_RAISE(auto func, Bind(predicate->term()->type()));
300+
return ProjectionUtil::TemporalProject(name, predicate, func);
301+
}
302+
case TransformType::kUnknown:
303+
case TransformType::kVoid:
304+
return nullptr;
305+
}
306+
std::unreachable();
307+
}
308+
243309
bool TransformFunction::Equals(const TransformFunction& other) const {
244310
return transform_type_ == other.transform_type_ && *source_type_ == *other.source_type_;
245311
}

src/iceberg/transform.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
#include <cstdint>
2525
#include <memory>
26+
#include <string_view>
2627
#include <utility>
2728
#include <variant>
2829

@@ -164,11 +165,23 @@ class ICEBERG_EXPORT Transform : public util::Formattable {
164165
/// For example, sorting by day(ts) will produce an ordering that is also by month(ts)
165166
/// or year(ts). However, sorting by day(ts) will not satisfy the order of hour(ts) or
166167
/// identity(ts).
167-
///
168+
/// \param other The other transform to compare with.
168169
/// \return true if ordering by this transform is equivalent to ordering by the other
169170
/// transform.
170171
bool SatisfiesOrderOf(const Transform& other) const;
171172

173+
/// \brief Transforms a BoundPredicate to an inclusive predicate on the partition values
174+
/// produced by the transform.
175+
///
176+
/// This inclusive transform guarantees that if predicate->Test(value) is true, then
177+
/// Projected(transform(value)) is true.
178+
/// \param name The name of the partition column.
179+
/// \param predicate The predicate to project.
180+
/// \return A Result containing either a unique pointer to the projected predicate,
181+
/// nullptr if the projection cannot be performed, or an Error if the projection fails.
182+
Result<std::unique_ptr<UnboundPredicate>> Project(
183+
std::string_view name, const std::shared_ptr<BoundPredicate>& predicate);
184+
172185
/// \brief Returns a string representation of this transform (e.g., "bucket[16]").
173186
std::string ToString() const override;
174187

src/iceberg/transform_function.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ class ICEBERG_EXPORT TruncateTransform : public TransformFunction {
8383
/// \brief Returns the same type as source_type.
8484
std::shared_ptr<Type> ResultType() const override;
8585

86+
/// \brief Returns the width to truncate to.
87+
int32_t width() const { return width_; }
88+
8689
/// \brief Create a TruncateTransform.
8790
/// \param source_type Type of the input data.
8891
/// \param width The width to truncate to.

src/iceberg/type_fwd.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,9 @@ class Uuid;
123123
class Expression;
124124
class Literal;
125125

126+
class BoundPredicate;
127+
class UnboundPredicate;
128+
126129
class DataTableScan;
127130
class FileScanTask;
128131
class ScanTask;

0 commit comments

Comments
 (0)