Skip to content

Commit 7ebdbff

Browse files
committed
feat: optimize truncate(col) == value to startsWith predicate
Implements optimization to rewrite truncate equality predicates as startsWith for better predicate pushdown and index usage. The optimization applies when: - Operation is equality - Term is a truncate transform on string column - Literal has exactly the truncate width in UTF-8 code points Implementation uses transform_func()->Transform() to validate that: 1. truncate(literal) == literal (literal is compatible) 2. truncate(literal + 'x') == literal (literal has exact width) This approach leverages the transform function without duplicating UTF-8 code point counting logic.
1 parent 3b59ef2 commit 7ebdbff

File tree

2 files changed

+49
-1
lines changed

2 files changed

+49
-1
lines changed

src/iceberg/expression/predicate.cc

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@
2525

2626
#include "iceberg/expression/expressions.h"
2727
#include "iceberg/expression/literal.h"
28+
#include "iceberg/expression/term.h"
2829
#include "iceberg/result.h"
30+
#include "iceberg/transform.h"
31+
#include "iceberg/transform_function.h"
2932
#include "iceberg/type.h"
3033
#include "iceberg/util/checked_cast.h"
3134
#include "iceberg/util/formatter_internal.h"
@@ -286,7 +289,48 @@ Result<std::shared_ptr<Expression>> UnboundPredicate<B>::BindLiteralOperation(
286289
}
287290
}
288291

289-
// TODO(gangwu): translate truncate(col) == value to startsWith(value)
292+
if (BASE::op() == Expression::Operation::kEq &&
293+
bound_term->kind() == Term::Kind::kTransform) {
294+
// Safe to cast after kind check confirms it's a transform
295+
auto* transform_term = internal::checked_cast<BoundTransform*>(bound_term.get());
296+
297+
if (transform_term->transform()->transform_type() == TransformType::kTruncate &&
298+
literal.type()->type_id() == TypeId::kString &&
299+
!literal.IsNull()) { // Null safety: skip null literals
300+
301+
// Apply truncate transform to the literal and check if result matches
302+
// This verifies the literal is compatible with the truncate operation
303+
auto transformed_result = transform_term->transform_func()->Transform(literal);
304+
if (!transformed_result.has_value() || transformed_result.value() != literal) {
305+
// Transform failed or modified the literal - can't optimize
306+
return BoundLiteralPredicate::Make(BASE::op(), std::move(bound_term),
307+
std::move(literal));
308+
}
309+
310+
// Literal passed truncate unchanged. Now check if adding one more character
311+
// would cause truncation. If yes, then the literal has EXACTLY the width.
312+
// Example:
313+
// - "Alice" with width=5: adding "x" makes "Alicex", truncate to "Alice" (can
314+
// optimize)
315+
// - "abc" with width=10: adding "x" makes "abcx", truncate to "abcx" != "abc"
316+
// (cannot optimize)
317+
318+
auto& string_value = std::get<std::string>(literal.value());
319+
auto extended_literal = Literal::String(string_value + "x");
320+
auto extended_result =
321+
transform_term->transform_func()->Transform(extended_literal);
322+
323+
if (extended_result.has_value() && extended_result.value() == literal) {
324+
// Adding a character gets truncated back to original - literal has exact width!
325+
// Rewrite: truncate(col, width) == "value" → col STARTS_WITH "value"
326+
return BoundLiteralPredicate::Make(Expression::Operation::kStartsWith,
327+
transform_term->reference(),
328+
std::move(literal));
329+
}
330+
// Literal is shorter than width - can't optimize
331+
}
332+
}
333+
290334
return BoundLiteralPredicate::Make(BASE::op(), std::move(bound_term),
291335
std::move(literal));
292336
}

src/iceberg/expression/term.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,10 @@ class ICEBERG_EXPORT BoundTransform : public BoundTerm {
250250

251251
const std::shared_ptr<Transform>& transform() const { return transform_; }
252252

253+
const std::shared_ptr<TransformFunction>& transform_func() const {
254+
return transform_func_;
255+
}
256+
253257
Kind kind() const override { return Kind::kTransform; }
254258

255259
private:

0 commit comments

Comments
 (0)