35
35
BoundLessThan ,
36
36
BoundLessThanOrEqual ,
37
37
BoundLiteralPredicate ,
38
+ BoundNotEqualTo ,
38
39
BoundNotIn ,
39
40
BoundNotStartsWith ,
40
41
BoundPredicate ,
43
44
BoundTerm ,
44
45
BoundUnaryPredicate ,
45
46
EqualTo ,
47
+ GreaterThan ,
46
48
GreaterThanOrEqual ,
49
+ LessThan ,
47
50
LessThanOrEqual ,
51
+ NotEqualTo ,
48
52
NotStartsWith ,
49
53
Reference ,
50
54
StartsWith ,
@@ -144,6 +148,9 @@ def result_type(self, source: IcebergType) -> IcebergType: ...
144
148
@abstractmethod
145
149
def project (self , name : str , pred : BoundPredicate [L ]) -> Optional [UnboundPredicate [Any ]]: ...
146
150
151
+ @abstractmethod
152
+ def strict_project (self , name : str , pred : BoundPredicate [Any ]) -> Optional [UnboundPredicate [Any ]]: ...
153
+
147
154
@property
148
155
def preserves_order (self ) -> bool :
149
156
return False
@@ -216,6 +223,21 @@ def project(self, name: str, pred: BoundPredicate[L]) -> Optional[UnboundPredica
216
223
# For example, (x > 0) and (x < 3) can be turned into in({1, 2}) and projected.
217
224
return None
218
225
226
+ def strict_project (self , name : str , pred : BoundPredicate [Any ]) -> Optional [UnboundPredicate [Any ]]:
227
+ transformer = self .transform (pred .term .ref ().field .field_type )
228
+
229
+ if isinstance (pred .term , BoundTransform ):
230
+ return _project_transform_predicate (self , name , pred )
231
+ elif isinstance (pred , BoundUnaryPredicate ):
232
+ return pred .as_unbound (Reference (name ))
233
+ elif isinstance (pred , BoundNotEqualTo ):
234
+ return pred .as_unbound (Reference (name ), _transform_literal (transformer , pred .literal ))
235
+ elif isinstance (pred , BoundNotIn ):
236
+ return pred .as_unbound (Reference (name ), {_transform_literal (transformer , literal ) for literal in pred .literals })
237
+ else :
238
+ # no strict projection for comparison or equality
239
+ return None
240
+
219
241
def can_transform (self , source : IcebergType ) -> bool :
220
242
return isinstance (
221
243
source ,
@@ -306,6 +328,19 @@ def project(self, name: str, pred: BoundPredicate[L]) -> Optional[UnboundPredica
306
328
else :
307
329
return None
308
330
331
+ def strict_project (self , name : str , pred : BoundPredicate [Any ]) -> Optional [UnboundPredicate [Any ]]:
332
+ transformer = self .transform (pred .term .ref ().field .field_type )
333
+ if isinstance (pred .term , BoundTransform ):
334
+ return _project_transform_predicate (self , name , pred )
335
+ elif isinstance (pred , BoundUnaryPredicate ):
336
+ return pred .as_unbound (Reference (name ))
337
+ elif isinstance (pred , BoundLiteralPredicate ):
338
+ return _truncate_number_strict (name , pred , transformer )
339
+ elif isinstance (pred , BoundNotIn ):
340
+ return _set_apply_transform (name , pred , transformer )
341
+ else :
342
+ return None
343
+
309
344
@property
310
345
def dedup_name (self ) -> str :
311
346
return "time"
@@ -516,10 +551,20 @@ def project(self, name: str, pred: BoundPredicate[L]) -> Optional[UnboundPredica
516
551
return pred .as_unbound (Reference (name ))
517
552
elif isinstance (pred , BoundLiteralPredicate ):
518
553
return pred .as_unbound (Reference (name ), pred .literal )
519
- elif isinstance (pred , (BoundIn , BoundNotIn )):
554
+ elif isinstance (pred , BoundSetPredicate ):
555
+ return pred .as_unbound (Reference (name ), pred .literals )
556
+ else :
557
+ return None
558
+
559
+ def strict_project (self , name : str , pred : BoundPredicate [Any ]) -> Optional [UnboundPredicate [Any ]]:
560
+ if isinstance (pred , BoundUnaryPredicate ):
561
+ return pred .as_unbound (Reference (name ))
562
+ elif isinstance (pred , BoundLiteralPredicate ):
563
+ return pred .as_unbound (Reference (name ), pred .literal )
564
+ elif isinstance (pred , BoundSetPredicate ):
520
565
return pred .as_unbound (Reference (name ), pred .literals )
521
566
else :
522
- raise ValueError ( f"Could not project: { pred } " )
567
+ return None
523
568
524
569
@property
525
570
def preserves_order (self ) -> bool :
@@ -590,6 +635,47 @@ def project(self, name: str, pred: BoundPredicate[L]) -> Optional[UnboundPredica
590
635
return _truncate_array (name , pred , self .transform (field_type ))
591
636
return None
592
637
638
+ def strict_project (self , name : str , pred : BoundPredicate [Any ]) -> Optional [UnboundPredicate [Any ]]:
639
+ field_type = pred .term .ref ().field .field_type
640
+
641
+ if isinstance (pred .term , BoundTransform ):
642
+ return _project_transform_predicate (self , name , pred )
643
+
644
+ if isinstance (field_type , (IntegerType , LongType , DecimalType )):
645
+ if isinstance (pred , BoundUnaryPredicate ):
646
+ return pred .as_unbound (Reference (name ))
647
+ elif isinstance (pred , BoundLiteralPredicate ):
648
+ return _truncate_number_strict (name , pred , self .transform (field_type ))
649
+ elif isinstance (pred , BoundNotIn ):
650
+ return _set_apply_transform (name , pred , self .transform (field_type ))
651
+ else :
652
+ return None
653
+
654
+ if isinstance (pred , BoundLiteralPredicate ):
655
+ if isinstance (pred , BoundStartsWith ):
656
+ literal_width = len (pred .literal .value )
657
+ if literal_width < self .width :
658
+ return pred .as_unbound (name , pred .literal .value )
659
+ elif literal_width == self .width :
660
+ return EqualTo (name , pred .literal .value )
661
+ else :
662
+ return None
663
+ elif isinstance (pred , BoundNotStartsWith ):
664
+ literal_width = len (pred .literal .value )
665
+ if literal_width < self .width :
666
+ return pred .as_unbound (name , pred .literal .value )
667
+ elif literal_width == self .width :
668
+ return NotEqualTo (name , pred .literal .value )
669
+ else :
670
+ return pred .as_unbound (name , self .transform (field_type )(pred .literal .value ))
671
+ else :
672
+ # ProjectionUtil.truncateArrayStrict(name, pred, this);
673
+ return _truncate_array_strict (name , pred , self .transform (field_type ))
674
+ elif isinstance (pred , BoundNotIn ):
675
+ return _set_apply_transform (name , pred , self .transform (field_type ))
676
+ else :
677
+ return None
678
+
593
679
@property
594
680
def width (self ) -> int :
595
681
return self ._width
@@ -714,6 +800,9 @@ def result_type(self, source: IcebergType) -> StringType:
714
800
def project (self , name : str , pred : BoundPredicate [L ]) -> Optional [UnboundPredicate [Any ]]:
715
801
return None
716
802
803
+ def strict_project (self , name : str , pred : BoundPredicate [Any ]) -> Optional [UnboundPredicate [Any ]]:
804
+ return None
805
+
717
806
def __repr__ (self ) -> str :
718
807
"""Return the string representation of the UnknownTransform class."""
719
808
return f"UnknownTransform(transform={ repr (self ._transform )} )"
@@ -736,6 +825,9 @@ def result_type(self, source: IcebergType) -> IcebergType:
736
825
def project (self , name : str , pred : BoundPredicate [L ]) -> Optional [UnboundPredicate [Any ]]:
737
826
return None
738
827
828
+ def strict_project (self , name : str , pred : BoundPredicate [L ]) -> Optional [UnboundPredicate [Any ]]:
829
+ return None
830
+
739
831
def to_human_string (self , _ : IcebergType , value : Optional [S ]) -> str :
740
832
return "null"
741
833
@@ -766,6 +858,47 @@ def _truncate_number(
766
858
return None
767
859
768
860
861
+ def _truncate_number_strict (
862
+ name : str , pred : BoundLiteralPredicate [L ], transform : Callable [[Optional [L ]], Optional [L ]]
863
+ ) -> Optional [UnboundPredicate [Any ]]:
864
+ boundary = pred .literal
865
+
866
+ if not isinstance (boundary , (LongLiteral , DecimalLiteral , DateLiteral , TimestampLiteral )):
867
+ raise ValueError (f"Expected a numeric literal, got: { type (boundary )} " )
868
+
869
+ if isinstance (pred , BoundLessThan ):
870
+ return LessThan (Reference (name ), _transform_literal (transform , boundary ))
871
+ elif isinstance (pred , BoundLessThanOrEqual ):
872
+ return LessThan (Reference (name ), _transform_literal (transform , boundary .increment ())) # type: ignore
873
+ elif isinstance (pred , BoundGreaterThan ):
874
+ return GreaterThan (Reference (name ), _transform_literal (transform , boundary ))
875
+ elif isinstance (pred , BoundGreaterThanOrEqual ):
876
+ return GreaterThan (Reference (name ), _transform_literal (transform , boundary .decrement ())) # type: ignore
877
+ elif isinstance (pred , BoundNotEqualTo ):
878
+ return EqualTo (Reference (name ), _transform_literal (transform , boundary ))
879
+ elif isinstance (pred , BoundEqualTo ):
880
+ # there is no predicate that guarantees equality because adjacent longs transform to the
881
+ # same value
882
+ return None
883
+ else :
884
+ return None
885
+
886
+
887
+ def _truncate_array_strict (
888
+ name : str , pred : BoundLiteralPredicate [L ], transform : Callable [[Optional [L ]], Optional [L ]]
889
+ ) -> Optional [UnboundPredicate [Any ]]:
890
+ boundary = pred .literal
891
+
892
+ if isinstance (pred , (BoundLessThan , BoundLessThanOrEqual )):
893
+ return LessThan (Reference (name ), _transform_literal (transform , boundary ))
894
+ elif isinstance (pred , (BoundGreaterThan , BoundGreaterThanOrEqual )):
895
+ return GreaterThan (Reference (name ), _transform_literal (transform , boundary ))
896
+ if isinstance (pred , BoundNotEqualTo ):
897
+ return NotEqualTo (Reference (name ), _transform_literal (transform , boundary ))
898
+ else :
899
+ return None
900
+
901
+
769
902
def _truncate_array (
770
903
name : str , pred : BoundLiteralPredicate [L ], transform : Callable [[Optional [L ]], Optional [L ]]
771
904
) -> Optional [UnboundPredicate [Any ]]:
@@ -808,7 +941,8 @@ def _remove_transform(partition_name: str, pred: BoundPredicate[L]) -> UnboundPr
808
941
def _set_apply_transform (name : str , pred : BoundSetPredicate [L ], transform : Callable [[L ], L ]) -> UnboundPredicate [Any ]:
809
942
literals = pred .literals
810
943
if isinstance (pred , BoundSetPredicate ):
811
- return pred .as_unbound (Reference (name ), {_transform_literal (transform , literal ) for literal in literals })
944
+ transformed_literals = {_transform_literal (transform , literal ) for literal in literals }
945
+ return pred .as_unbound (Reference (name = name ), literals = transformed_literals )
812
946
else :
813
947
raise ValueError (f"Unknown BoundSetPredicate: { pred } " )
814
948
0 commit comments