|
18 | 18 | */ |
19 | 19 |
|
20 | 20 | #include "iceberg/expression/expressions.h" |
| 21 | +#include "iceberg/expression/predicate.h" |
21 | 22 | #include "iceberg/schema.h" |
22 | 23 | #include "iceberg/test/matchers.h" |
23 | 24 | #include "iceberg/type.h" |
@@ -433,4 +434,127 @@ TEST_F(PredicateTest, ComplexExpressionCombinations) { |
433 | 434 | EXPECT_EQ(nested->op(), Expression::Operation::kAnd); |
434 | 435 | } |
435 | 436 |
|
| 437 | +TEST_F(PredicateTest, TruncateOptimizationToStartsWith) { |
| 438 | + // Test that truncate(col) == "value" is optimized to col STARTS_WITH "value" |
| 439 | + |
| 440 | + // Create a truncate transform expression: truncate(name, 5) |
| 441 | + auto truncate_expr = Expressions::Truncate("name", 5); |
| 442 | + |
| 443 | + // Create predicate: truncate(name, 5) == "Alice" |
| 444 | + auto truncate_eq_pred = std::make_shared<UnboundPredicate<BoundTransform>>( |
| 445 | + Expression::Operation::kEq, truncate_expr, Literal::String("Alice")); |
| 446 | + |
| 447 | + // Bind the predicate to the schema |
| 448 | + auto bound_result = truncate_eq_pred->Bind(*schema_, /*case_sensitive=*/true); |
| 449 | + ASSERT_THAT(bound_result, IsOk()); |
| 450 | + auto bound_pred = bound_result.value(); |
| 451 | + |
| 452 | + // After optimization, it should be a STARTS_WITH operation |
| 453 | + EXPECT_EQ(bound_pred->op(), Expression::Operation::kStartsWith); |
| 454 | + |
| 455 | + // Verify it's a BoundLiteralPredicate |
| 456 | + auto* literal_pred = dynamic_cast<BoundLiteralPredicate*>(bound_pred.get()); |
| 457 | + ASSERT_NE(literal_pred, nullptr); |
| 458 | + |
| 459 | + // The term should now be a direct reference to "name", not a transform |
| 460 | + EXPECT_EQ(literal_pred->term()->kind(), Term::Kind::kReference); |
| 461 | + |
| 462 | + // The literal should still be "Alice" |
| 463 | + EXPECT_EQ(literal_pred->literal(), Literal::String("Alice")); |
| 464 | +} |
| 465 | + |
| 466 | +TEST_F(PredicateTest, TruncateOptimizationNotAppliedForNonEquality) { |
| 467 | + // Test that optimization is NOT applied for non-equality operations |
| 468 | + |
| 469 | + auto truncate_expr = Expressions::Truncate("name", 5); |
| 470 | + |
| 471 | + // Test with less-than (should NOT be optimized) |
| 472 | + auto truncate_lt_pred = std::make_shared<UnboundPredicate<BoundTransform>>( |
| 473 | + Expression::Operation::kLt, truncate_expr, Literal::String("Bob")); |
| 474 | + auto bound_lt_result = truncate_lt_pred->Bind(*schema_, /*case_sensitive=*/true); |
| 475 | + ASSERT_THAT(bound_lt_result, IsOk()); |
| 476 | + auto bound_lt = bound_lt_result.value(); |
| 477 | + |
| 478 | + // Should remain as kLt, not converted to STARTS_WITH |
| 479 | + EXPECT_EQ(bound_lt->op(), Expression::Operation::kLt); |
| 480 | + |
| 481 | + // The term should still be a transform |
| 482 | + auto* literal_pred_lt = dynamic_cast<BoundLiteralPredicate*>(bound_lt.get()); |
| 483 | + ASSERT_NE(literal_pred_lt, nullptr); |
| 484 | + EXPECT_EQ(literal_pred_lt->term()->kind(), Term::Kind::kTransform); |
| 485 | +} |
| 486 | + |
| 487 | +TEST_F(PredicateTest, TruncateOptimizationNotAppliedForNonString) { |
| 488 | + // Test that optimization is NOT applied for non-string types |
| 489 | + // (truncate can also work on binary types, but optimization only applies to strings) |
| 490 | + |
| 491 | + // Create a schema with binary field |
| 492 | + auto binary_schema = std::make_shared<Schema>( |
| 493 | + std::vector<SchemaField>{SchemaField::MakeOptional(1, "data", binary())}, |
| 494 | + /*schema_id=*/0); |
| 495 | + |
| 496 | + auto truncate_expr = Expressions::Truncate("data", 10); |
| 497 | + auto truncate_eq_pred = std::make_shared<UnboundPredicate<BoundTransform>>( |
| 498 | + Expression::Operation::kEq, truncate_expr, |
| 499 | + Literal::Binary({0x01, 0x02, 0x03, 0x04, 0x05})); |
| 500 | + |
| 501 | + auto bound_result = truncate_eq_pred->Bind(*binary_schema, /*case_sensitive=*/true); |
| 502 | + ASSERT_THAT(bound_result, IsOk()); |
| 503 | + auto bound_pred = bound_result.value(); |
| 504 | + |
| 505 | + // Should remain as kEq, not converted to STARTS_WITH (binary doesn't support startsWith) |
| 506 | + EXPECT_EQ(bound_pred->op(), Expression::Operation::kEq); |
| 507 | + |
| 508 | + // The term should still be a transform |
| 509 | + auto* literal_pred = dynamic_cast<BoundLiteralPredicate*>(bound_pred.get()); |
| 510 | + ASSERT_NE(literal_pred, nullptr); |
| 511 | + EXPECT_EQ(literal_pred->term()->kind(), Term::Kind::kTransform); |
| 512 | +} |
| 513 | + |
| 514 | +TEST_F(PredicateTest, TruncateOptimizationNotAppliedForWidthMismatch) { |
| 515 | + // CRITICAL TEST: Optimization must NOT apply when literal length != truncate width |
| 516 | + // Example: truncate(col, 10) == "abc" should NOT become STARTS_WITH |
| 517 | + // Because "abc1234567" would match STARTS_WITH but NOT truncate equality |
| 518 | + |
| 519 | + auto truncate_expr = Expressions::Truncate("name", 10); |
| 520 | + |
| 521 | + // Literal "abc" has length 3, but truncate width is 10 |
| 522 | + auto truncate_eq_pred = std::make_shared<UnboundPredicate<BoundTransform>>( |
| 523 | + Expression::Operation::kEq, truncate_expr, Literal::String("abc")); |
| 524 | + |
| 525 | + auto bound_result = truncate_eq_pred->Bind(*schema_, /*case_sensitive=*/true); |
| 526 | + ASSERT_THAT(bound_result, IsOk()); |
| 527 | + auto bound_pred = bound_result.value(); |
| 528 | + |
| 529 | + // Should remain as kEq, NOT converted to STARTS_WITH |
| 530 | + EXPECT_EQ(bound_pred->op(), Expression::Operation::kEq); |
| 531 | + |
| 532 | + // The term should still be a transform (not optimized away) |
| 533 | + auto* literal_pred = dynamic_cast<BoundLiteralPredicate*>(bound_pred.get()); |
| 534 | + ASSERT_NE(literal_pred, nullptr); |
| 535 | + EXPECT_EQ(literal_pred->term()->kind(), Term::Kind::kTransform); |
| 536 | +} |
| 537 | + |
| 538 | +TEST_F(PredicateTest, TruncateOptimizationAppliedWhenLengthMatches) { |
| 539 | + // Test that optimization IS applied when literal length == truncate width |
| 540 | + |
| 541 | + auto truncate_expr = Expressions::Truncate("name", 5); |
| 542 | + |
| 543 | + // Literal "Alice" has length 5, matching truncate width 5 |
| 544 | + auto truncate_eq_pred = std::make_shared<UnboundPredicate<BoundTransform>>( |
| 545 | + Expression::Operation::kEq, truncate_expr, Literal::String("Alice")); |
| 546 | + |
| 547 | + auto bound_result = truncate_eq_pred->Bind(*schema_, /*case_sensitive=*/true); |
| 548 | + ASSERT_THAT(bound_result, IsOk()); |
| 549 | + auto bound_pred = bound_result.value(); |
| 550 | + |
| 551 | + // Should be optimized to STARTS_WITH |
| 552 | + EXPECT_EQ(bound_pred->op(), Expression::Operation::kStartsWith); |
| 553 | + |
| 554 | + // The term should be a direct reference (optimization applied) |
| 555 | + auto* literal_pred = dynamic_cast<BoundLiteralPredicate*>(bound_pred.get()); |
| 556 | + ASSERT_NE(literal_pred, nullptr); |
| 557 | + EXPECT_EQ(literal_pred->term()->kind(), Term::Kind::kReference); |
| 558 | +} |
| 559 | + |
436 | 560 | } // namespace iceberg |
0 commit comments