@@ -870,4 +870,144 @@ TEST_F(PredicateTest, BoundSetPredicateTestSingleLiteral) {
870870 EXPECT_THAT (bound_literal->Test (Literal::Int (41 )), HasValue (testing::Eq (false )));
871871}
872872
873+ TEST_F (PredicateTest, TruncateLiteralOptimizationExactWidth) {
874+ // Test optimization: truncate(name, 5) == "Alice" should become name STARTS_WITH "Alice"
875+ auto truncate_term = Expressions::Truncate (" name" , 5 );
876+ ICEBERG_ASSIGN_OR_THROW (
877+ auto equal_pred,
878+ UnboundPredicate<BoundTransform>::Make (Expression::Operation::kEq , truncate_term,
879+ Literal::String (" Alice" )));
880+
881+ ICEBERG_ASSIGN_OR_THROW (auto bound_pred,
882+ equal_pred->Bind (*schema_, /* case_sensitive=*/ true ));
883+
884+ // Should be optimized to STARTS_WITH operation
885+ EXPECT_EQ (bound_pred->op (), Expression::Operation::kStartsWith );
886+
887+ // Verify it's a bound literal predicate on the reference (not the transform)
888+ auto bound_literal = AssertAndCastToBoundPredicate (bound_pred);
889+ EXPECT_THAT (bound_literal->Test (Literal::String (" Alice" )), HasValue (testing::Eq (true )));
890+ EXPECT_THAT (bound_literal->Test (Literal::String (" AliceX" )), HasValue (testing::Eq (true )));
891+ EXPECT_THAT (bound_literal->Test (Literal::String (" Alice123" )),
892+ HasValue (testing::Eq (true )));
893+ EXPECT_THAT (bound_literal->Test (Literal::String (" Bob" )), HasValue (testing::Eq (false )));
894+ EXPECT_THAT (bound_literal->Test (Literal::String (" Alic" )), HasValue (testing::Eq (false )));
895+ }
896+
897+ TEST_F (PredicateTest, TruncateLiteralOptimizationShorterLiteral) {
898+ // Test no optimization: truncate(name, 10) == "abc" should NOT be optimized
899+ // because "abc" is shorter than width 10
900+ auto truncate_term = Expressions::Truncate (" name" , 10 );
901+ ICEBERG_ASSIGN_OR_THROW (
902+ auto equal_pred,
903+ UnboundPredicate<BoundTransform>::Make (Expression::Operation::kEq , truncate_term,
904+ Literal::String (" abc" )));
905+
906+ ICEBERG_ASSIGN_OR_THROW (auto bound_pred,
907+ equal_pred->Bind (*schema_, /* case_sensitive=*/ true ));
908+
909+ // Should remain as EQUAL operation (not optimized to STARTS_WITH)
910+ EXPECT_EQ (bound_pred->op (), Expression::Operation::kEq );
911+ }
912+
913+ TEST_F (PredicateTest, TruncateLiteralOptimizationNullLiteral) {
914+ // Test no optimization with null literal - skipped as null strings are handled differently
915+ // Null values are tested through IS NULL predicates, not equality predicates
916+ GTEST_SKIP () << " Null literal equality not supported for strings" ;
917+ }
918+
919+ TEST_F (PredicateTest, TruncateLiteralOptimizationNonEqualityOperations) {
920+ // Test that non-equality operations are not optimized
921+ auto truncate_term = Expressions::Truncate (" name" , 5 );
922+
923+ // NotEqual should not be optimized
924+ ICEBERG_ASSIGN_OR_THROW (
925+ auto not_equal_pred,
926+ UnboundPredicate<BoundTransform>::Make (Expression::Operation::kNotEq , truncate_term,
927+ Literal::String (" Alice" )));
928+ ICEBERG_ASSIGN_OR_THROW (auto bound_not_equal,
929+ not_equal_pred->Bind (*schema_, /* case_sensitive=*/ true ));
930+ EXPECT_EQ (bound_not_equal->op (), Expression::Operation::kNotEq );
931+
932+ // LessThan should not be optimized
933+ ICEBERG_ASSIGN_OR_THROW (
934+ auto lt_pred, UnboundPredicate<BoundTransform>::Make (Expression::Operation::kLt ,
935+ truncate_term,
936+ Literal::String (" Alice" )));
937+ ICEBERG_ASSIGN_OR_THROW (auto bound_lt, lt_pred->Bind (*schema_, /* case_sensitive=*/ true ));
938+ EXPECT_EQ (bound_lt->op (), Expression::Operation::kLt );
939+
940+ // GreaterThan should not be optimized
941+ ICEBERG_ASSIGN_OR_THROW (
942+ auto gt_pred, UnboundPredicate<BoundTransform>::Make (Expression::Operation::kGt ,
943+ truncate_term,
944+ Literal::String (" Alice" )));
945+ ICEBERG_ASSIGN_OR_THROW (auto bound_gt, gt_pred->Bind (*schema_, /* case_sensitive=*/ true ));
946+ EXPECT_EQ (bound_gt->op (), Expression::Operation::kGt );
947+ }
948+
949+ TEST_F (PredicateTest, TruncateLiteralOptimizationUTF8MultibyteCharacters) {
950+ // Test optimization with UTF-8 multibyte characters (5 code points, not bytes)
951+ auto truncate_term = Expressions::Truncate (" name" , 5 );
952+
953+ // "你好世界!" is 5 UTF-8 code points
954+ ICEBERG_ASSIGN_OR_THROW (
955+ auto equal_pred,
956+ UnboundPredicate<BoundTransform>::Make (Expression::Operation::kEq , truncate_term,
957+ Literal::String (" 你好世界!" )));
958+ ICEBERG_ASSIGN_OR_THROW (auto bound_pred,
959+ equal_pred->Bind (*schema_, /* case_sensitive=*/ true ));
960+
961+ // Should be optimized to STARTS_WITH
962+ EXPECT_EQ (bound_pred->op (), Expression::Operation::kStartsWith );
963+
964+ // Test with mixed ASCII and UTF-8: "你好世界x" is 5 code points (4 Chinese + 1 ASCII)
965+ ICEBERG_ASSIGN_OR_THROW (
966+ auto mixed_pred,
967+ UnboundPredicate<BoundTransform>::Make (Expression::Operation::kEq , truncate_term,
968+ Literal::String (" 你好世界x" )));
969+ ICEBERG_ASSIGN_OR_THROW (auto bound_mixed,
970+ mixed_pred->Bind (*schema_, /* case_sensitive=*/ true ));
971+ EXPECT_EQ (bound_mixed->op (), Expression::Operation::kStartsWith );
972+
973+ // Test with 3 UTF-8 characters (shorter than width) - should NOT optimize
974+ ICEBERG_ASSIGN_OR_THROW (
975+ auto shorter_pred,
976+ UnboundPredicate<BoundTransform>::Make (Expression::Operation::kEq , truncate_term,
977+ Literal::String (" 你好世" )));
978+ ICEBERG_ASSIGN_OR_THROW (auto bound_shorter,
979+ shorter_pred->Bind (*schema_, /* case_sensitive=*/ true ));
980+ EXPECT_EQ (bound_shorter->op (), Expression::Operation::kEq );
981+ }
982+
983+ TEST_F (PredicateTest, TruncateLiteralOptimizationEmptyString) {
984+ // Test edge case: empty string with any width should not optimize
985+ auto truncate_term = Expressions::Truncate (" name" , 5 );
986+ ICEBERG_ASSIGN_OR_THROW (
987+ auto equal_pred,
988+ UnboundPredicate<BoundTransform>::Make (Expression::Operation::kEq , truncate_term,
989+ Literal::String (" " )));
990+
991+ ICEBERG_ASSIGN_OR_THROW (auto bound_pred,
992+ equal_pred->Bind (*schema_, /* case_sensitive=*/ true ));
993+
994+ // Empty string is shorter than width, should not optimize
995+ EXPECT_EQ (bound_pred->op (), Expression::Operation::kEq );
996+ }
997+
998+ TEST_F (PredicateTest, TruncateLiteralOptimizationNonTruncateTransform) {
999+ // Test that other transforms (e.g., bucket) are not optimized
1000+ // Bucket returns an integer, so we use an integer literal
1001+ auto bucket_term = Expressions::Bucket (" id" , 10 ); // id is int64
1002+ ICEBERG_ASSIGN_OR_THROW (
1003+ auto equal_pred, UnboundPredicate<BoundTransform>::Make (Expression::Operation::kEq ,
1004+ bucket_term, Literal::Int (5 )));
1005+
1006+ ICEBERG_ASSIGN_OR_THROW (auto bound_pred,
1007+ equal_pred->Bind (*schema_, /* case_sensitive=*/ true ));
1008+
1009+ // Should remain as EQUAL operation (bucket transform not optimized)
1010+ EXPECT_EQ (bound_pred->op (), Expression::Operation::kEq );
1011+ }
1012+
8731013} // namespace iceberg
0 commit comments