@@ -269,7 +269,7 @@ def _get_hidden_ordering_column(self, key: str) -> ibis_types.Column:
269
269
return typing .cast (ibis_types .Column , self ._hidden_ordering_column_names [key ])
270
270
271
271
def apply_limit (self , max_results : int ) -> ArrayValue :
272
- table = self .to_ibis_expr (
272
+ table = self ._to_ibis_expr (
273
273
ordering_mode = "order_by" ,
274
274
expose_hidden_cols = True ,
275
275
).limit (max_results )
@@ -285,11 +285,23 @@ def apply_limit(self, max_results: int) -> ArrayValue:
285
285
ordering = self ._ordering ,
286
286
)
287
287
288
- def filter (self , predicate : ibis_types .BooleanValue ) -> ArrayValue :
288
+ def filter (self , predicate_id : str , keep_null : bool = False ) -> ArrayValue :
289
+ """Filter the table on a given expression, the predicate must be a boolean series aligned with the table expression."""
290
+ condition = typing .cast (ibis_types .BooleanValue , self .get_column (predicate_id ))
291
+ if keep_null :
292
+ condition = typing .cast (
293
+ ibis_types .BooleanValue ,
294
+ condition .fillna (
295
+ typing .cast (ibis_types .BooleanScalar , ibis_types .literal (True ))
296
+ ),
297
+ )
298
+ return self ._filter (condition )
299
+
300
+ def _filter (self , predicate_value : ibis_types .BooleanValue ) -> ArrayValue :
289
301
"""Filter the table on a given expression, the predicate must be a boolean series aligned with the table expression."""
290
302
expr = self .builder ()
291
303
expr .ordering = expr .ordering .with_non_sequential ()
292
- expr .predicates = [* self ._predicates , predicate ]
304
+ expr .predicates = [* self ._predicates , predicate_value ]
293
305
return expr .build ()
294
306
295
307
def order_by (
@@ -310,7 +322,7 @@ def _uniform_sampling(self, fraction: float) -> ArrayValue:
310
322
.. warning::
311
323
The row numbers of result is non-deterministic, avoid to use.
312
324
"""
313
- table = self .to_ibis_expr (
325
+ table = self ._to_ibis_expr (
314
326
ordering_mode = "order_by" , expose_hidden_cols = True , fraction = fraction
315
327
)
316
328
columns = [table [column_name ] for column_name in self ._column_names ]
@@ -342,7 +354,7 @@ def project_offsets(self) -> ArrayValue:
342
354
if self ._ordering .is_sequential :
343
355
return self
344
356
# TODO(tbergeron): Enforce total ordering
345
- table = self .to_ibis_expr (
357
+ table = self ._to_ibis_expr (
346
358
ordering_mode = "offset_col" , order_col_name = ORDER_ID_COLUMN
347
359
)
348
360
columns = [table [column_name ] for column_name in self ._column_names ]
@@ -412,7 +424,7 @@ def projection(self, columns: Iterable[ibis_types.Value]) -> ArrayValue:
412
424
def shape (self ) -> typing .Tuple [int , int ]:
413
425
"""Returns dimensions as (length, width) tuple."""
414
426
width = len (self .columns )
415
- count_expr = self .to_ibis_expr (ordering_mode = "unordered" ).count ()
427
+ count_expr = self ._to_ibis_expr (ordering_mode = "unordered" ).count ()
416
428
sql = self ._session .ibis_client .compile (count_expr )
417
429
row_iterator , _ = self ._session ._start_query (
418
430
sql = sql ,
@@ -435,7 +447,7 @@ def concat(self, other: typing.Sequence[ArrayValue]) -> ArrayValue:
435
447
)
436
448
for i , expr in enumerate ([self , * other ]):
437
449
ordering_prefix = str (i ).zfill (prefix_size )
438
- table = expr .to_ibis_expr (
450
+ table = expr ._to_ibis_expr (
439
451
ordering_mode = "string_encoded" , order_col_name = ORDER_ID_COLUMN
440
452
)
441
453
# Rename the value columns based on horizontal offset before applying union.
@@ -522,7 +534,7 @@ def aggregate(
522
534
by_column_id: column id of the aggregation key, this is preserved through the transform
523
535
dropna: whether null keys should be dropped
524
536
"""
525
- table = self .to_ibis_expr (ordering_mode = "unordered" )
537
+ table = self ._to_ibis_expr (ordering_mode = "unordered" )
526
538
stats = {
527
539
col_out : agg_op ._as_ibis (table [col_in ])
528
540
for col_in , agg_op , col_out in aggregations
@@ -541,7 +553,7 @@ def aggregate(
541
553
expr = ArrayValue (self ._session , result , columns = columns , ordering = ordering )
542
554
if dropna :
543
555
for column_id in by_column_ids :
544
- expr = expr .filter (
556
+ expr = expr ._filter (
545
557
ops .notnull_op ._as_ibis (expr .get_column (column_id ))
546
558
)
547
559
# Can maybe remove this as Ordering id is redundant as by_column is unique after aggregation
@@ -572,7 +584,7 @@ def corr_aggregate(
572
584
Arguments:
573
585
corr_aggregations: left_column_id, right_column_id, output_column_id tuples
574
586
"""
575
- table = self .to_ibis_expr (ordering_mode = "unordered" )
587
+ table = self ._to_ibis_expr (ordering_mode = "unordered" )
576
588
stats = {
577
589
col_out : table [col_left ].corr (table [col_right ], how = "pop" )
578
590
for col_left , col_right , col_out in corr_aggregations
@@ -646,7 +658,24 @@ def project_window_op(
646
658
# TODO(tbergeron): Automatically track analytic expression usage and defer reprojection until required for valid query generation.
647
659
return result ._reproject_to_table () if not skip_reproject_unsafe else result
648
660
649
- def to_ibis_expr (
661
+ def to_sql (
662
+ self ,
663
+ ordering_mode : Literal [
664
+ "order_by" , "string_encoded" , "offset_col" , "unordered"
665
+ ] = "order_by" ,
666
+ order_col_name : Optional [str ] = ORDER_ID_COLUMN ,
667
+ col_id_overrides : typing .Mapping [str , str ] = {},
668
+ ) -> str :
669
+ sql = self ._session .ibis_client .compile (
670
+ self ._to_ibis_expr (
671
+ ordering_mode = ordering_mode ,
672
+ order_col_name = order_col_name ,
673
+ col_id_overrides = col_id_overrides ,
674
+ )
675
+ )
676
+ return typing .cast (str , sql )
677
+
678
+ def _to_ibis_expr (
650
679
self ,
651
680
ordering_mode : Literal [
652
681
"order_by" , "string_encoded" , "offset_col" , "unordered"
@@ -814,7 +843,7 @@ def start_query(
814
843
# a LocalSession for unit testing.
815
844
# TODO(swast): Add a timeout here? If the query is taking a long time,
816
845
# maybe we just print the job metadata that we have so far?
817
- table = self .to_ibis_expr (expose_hidden_cols = expose_extra_columns )
846
+ table = self ._to_ibis_expr (expose_hidden_cols = expose_extra_columns )
818
847
sql = self ._session .ibis_client .compile (table ) # type:ignore
819
848
return self ._session ._start_query (
820
849
sql = sql ,
@@ -833,7 +862,7 @@ def _reproject_to_table(self) -> ArrayValue:
833
862
some operations such as window operations that cannot be used
834
863
recursively in projections.
835
864
"""
836
- table = self .to_ibis_expr (
865
+ table = self ._to_ibis_expr (
837
866
ordering_mode = "unordered" ,
838
867
expose_hidden_cols = True ,
839
868
)
@@ -912,7 +941,7 @@ def unpivot(
912
941
Returns:
913
942
ArrayValue: The unpivoted ArrayValue
914
943
"""
915
- table = self .to_ibis_expr (ordering_mode = "offset_col" )
944
+ table = self ._to_ibis_expr (ordering_mode = "offset_col" )
916
945
sub_expressions = []
917
946
918
947
# Use ibis memtable to infer type of rowlabels (if possible)
@@ -1054,7 +1083,7 @@ def slice(
1054
1083
start = start if (start is not None ) else last_offset
1055
1084
cond_list .append ((start - expr_with_offsets .offsets ) % (- step ) == 0 )
1056
1085
1057
- sliced_expr = expr_with_offsets .filter (
1086
+ sliced_expr = expr_with_offsets ._filter (
1058
1087
functools .reduce (lambda x , y : x & y , cond_list )
1059
1088
)
1060
1089
return sliced_expr if step > 0 else sliced_expr .reversed ()
0 commit comments