@@ -870,4 +870,142 @@ TEST_F(PredicateTest, BoundSetPredicateTestSingleLiteral) {
870870 EXPECT_THAT (bound_literal->Test (Literal::Int (41 )), HasValue (testing::Eq (false )));
871871}
872872
873+ TEST_F (PredicateTest, TruncateLiteralOptimizationExactWidth) {
874+ // Test optimization: truncate(name, 5) == "Alice" should become name STARTS_WITH
875+ // "Alice"
876+ auto truncate_term = Expressions::Truncate (" name" , 5 );
877+ ICEBERG_ASSIGN_OR_THROW (auto equal_pred, UnboundPredicate<BoundTransform>::Make (
878+ Expression::Operation::kEq , truncate_term,
879+ Literal::String (" Alice" )));
880+
881+ ICEBERG_ASSIGN_OR_THROW (auto bound_pred,
882+ equal_pred->Bind (*schema_, /* case_sensitive=*/ true ));
883+
884+ // Should be optimized to STARTS_WITH operation
885+ EXPECT_EQ (bound_pred->op (), Expression::Operation::kStartsWith );
886+
887+ // Verify it's a bound literal predicate on the reference (not the transform)
888+ auto bound_literal = AssertAndCastToBoundPredicate (bound_pred);
889+ EXPECT_THAT (bound_literal->Test (Literal::String (" Alice" )), HasValue (testing::Eq (true )));
890+ EXPECT_THAT (bound_literal->Test (Literal::String (" AliceX" )),
891+ HasValue (testing::Eq (true )));
892+ EXPECT_THAT (bound_literal->Test (Literal::String (" Alice123" )),
893+ HasValue (testing::Eq (true )));
894+ EXPECT_THAT (bound_literal->Test (Literal::String (" Bob" )), HasValue (testing::Eq (false )));
895+ EXPECT_THAT (bound_literal->Test (Literal::String (" Alic" )), HasValue (testing::Eq (false )));
896+ }
897+
898+ TEST_F (PredicateTest, TruncateLiteralOptimizationShorterLiteral) {
899+ // Test no optimization: truncate(name, 10) == "abc" should NOT be optimized
900+ // because "abc" is shorter than width 10
901+ auto truncate_term = Expressions::Truncate (" name" , 10 );
902+ ICEBERG_ASSIGN_OR_THROW (auto equal_pred, UnboundPredicate<BoundTransform>::Make (
903+ Expression::Operation::kEq , truncate_term,
904+ Literal::String (" abc" )));
905+
906+ ICEBERG_ASSIGN_OR_THROW (auto bound_pred,
907+ equal_pred->Bind (*schema_, /* case_sensitive=*/ true ));
908+
909+ // Should remain as EQUAL operation (not optimized to STARTS_WITH)
910+ EXPECT_EQ (bound_pred->op (), Expression::Operation::kEq );
911+ }
912+
913+ TEST_F (PredicateTest, TruncateLiteralOptimizationNullLiteral) {
914+ // Test no optimization with null literal - skipped as null strings are handled
915+ // differently Null values are tested through IS NULL predicates, not equality
916+ // predicates
917+ GTEST_SKIP () << " Null literal equality not supported for strings" ;
918+ }
919+
920+ TEST_F (PredicateTest, TruncateLiteralOptimizationNonEqualityOperations) {
921+ // Test that non-equality operations are not optimized
922+ auto truncate_term = Expressions::Truncate (" name" , 5 );
923+
924+ // NotEqual should not be optimized
925+ ICEBERG_ASSIGN_OR_THROW (
926+ auto not_equal_pred,
927+ UnboundPredicate<BoundTransform>::Make (Expression::Operation::kNotEq , truncate_term,
928+ Literal::String (" Alice" )));
929+ ICEBERG_ASSIGN_OR_THROW (auto bound_not_equal,
930+ not_equal_pred->Bind (*schema_, /* case_sensitive=*/ true ));
931+ EXPECT_EQ (bound_not_equal->op (), Expression::Operation::kNotEq );
932+
933+ // LessThan should not be optimized
934+ ICEBERG_ASSIGN_OR_THROW (auto lt_pred, UnboundPredicate<BoundTransform>::Make (
935+ Expression::Operation::kLt , truncate_term,
936+ Literal::String (" Alice" )));
937+ ICEBERG_ASSIGN_OR_THROW (auto bound_lt,
938+ lt_pred->Bind (*schema_, /* case_sensitive=*/ true ));
939+ EXPECT_EQ (bound_lt->op (), Expression::Operation::kLt );
940+
941+ // GreaterThan should not be optimized
942+ ICEBERG_ASSIGN_OR_THROW (auto gt_pred, UnboundPredicate<BoundTransform>::Make (
943+ Expression::Operation::kGt , truncate_term,
944+ Literal::String (" Alice" )));
945+ ICEBERG_ASSIGN_OR_THROW (auto bound_gt,
946+ gt_pred->Bind (*schema_, /* case_sensitive=*/ true ));
947+ EXPECT_EQ (bound_gt->op (), Expression::Operation::kGt );
948+ }
949+
950+ TEST_F (PredicateTest, TruncateLiteralOptimizationUTF8MultibyteCharacters) {
951+ // Test optimization with UTF-8 multibyte characters (5 code points, not bytes)
952+ auto truncate_term = Expressions::Truncate (" name" , 5 );
953+
954+ // "你好世界!" is 5 UTF-8 code points
955+ ICEBERG_ASSIGN_OR_THROW (auto equal_pred, UnboundPredicate<BoundTransform>::Make (
956+ Expression::Operation::kEq , truncate_term,
957+ Literal::String (" 你好世界!" )));
958+ ICEBERG_ASSIGN_OR_THROW (auto bound_pred,
959+ equal_pred->Bind (*schema_, /* case_sensitive=*/ true ));
960+
961+ // Should be optimized to STARTS_WITH
962+ EXPECT_EQ (bound_pred->op (), Expression::Operation::kStartsWith );
963+
964+ // Test with mixed ASCII and UTF-8: "你好世界x" is 5 code points (4 Chinese + 1 ASCII)
965+ ICEBERG_ASSIGN_OR_THROW (auto mixed_pred, UnboundPredicate<BoundTransform>::Make (
966+ Expression::Operation::kEq , truncate_term,
967+ Literal::String (" 你好世界x" )));
968+ ICEBERG_ASSIGN_OR_THROW (auto bound_mixed,
969+ mixed_pred->Bind (*schema_, /* case_sensitive=*/ true ));
970+ EXPECT_EQ (bound_mixed->op (), Expression::Operation::kStartsWith );
971+
972+ // Test with 3 UTF-8 characters (shorter than width) - should NOT optimize
973+ ICEBERG_ASSIGN_OR_THROW (
974+ auto shorter_pred,
975+ UnboundPredicate<BoundTransform>::Make (Expression::Operation::kEq , truncate_term,
976+ Literal::String (" 你好世" )));
977+ ICEBERG_ASSIGN_OR_THROW (auto bound_shorter,
978+ shorter_pred->Bind (*schema_, /* case_sensitive=*/ true ));
979+ EXPECT_EQ (bound_shorter->op (), Expression::Operation::kEq );
980+ }
981+
982+ TEST_F (PredicateTest, TruncateLiteralOptimizationEmptyString) {
983+ // Test edge case: empty string with any width should not optimize
984+ auto truncate_term = Expressions::Truncate (" name" , 5 );
985+ ICEBERG_ASSIGN_OR_THROW (auto equal_pred, UnboundPredicate<BoundTransform>::Make (
986+ Expression::Operation::kEq , truncate_term,
987+ Literal::String (" " )));
988+
989+ ICEBERG_ASSIGN_OR_THROW (auto bound_pred,
990+ equal_pred->Bind (*schema_, /* case_sensitive=*/ true ));
991+
992+ // Empty string is shorter than width, should not optimize
993+ EXPECT_EQ (bound_pred->op (), Expression::Operation::kEq );
994+ }
995+
996+ TEST_F (PredicateTest, TruncateLiteralOptimizationNonTruncateTransform) {
997+ // Test that other transforms (e.g., bucket) are not optimized
998+ // Bucket returns an integer, so we use an integer literal
999+ auto bucket_term = Expressions::Bucket (" id" , 10 ); // id is int64
1000+ ICEBERG_ASSIGN_OR_THROW (auto equal_pred,
1001+ UnboundPredicate<BoundTransform>::Make (
1002+ Expression::Operation::kEq , bucket_term, Literal::Int (5 )));
1003+
1004+ ICEBERG_ASSIGN_OR_THROW (auto bound_pred,
1005+ equal_pred->Bind (*schema_, /* case_sensitive=*/ true ));
1006+
1007+ // Should remain as EQUAL operation (bucket transform not optimized)
1008+ EXPECT_EQ (bound_pred->op (), Expression::Operation::kEq );
1009+ }
1010+
8731011} // namespace iceberg
0 commit comments