File tree Expand file tree Collapse file tree 3 files changed +19
-9
lines changed Expand file tree Collapse file tree 3 files changed +19
-9
lines changed Original file line number Diff line number Diff line change @@ -382,9 +382,8 @@ def explode(self, column_ids: typing.Sequence[str]) -> ArrayValue:
382
382
for column_id in column_ids :
383
383
assert bigframes .dtypes .is_array_like (self .get_column_type (column_id ))
384
384
385
- return ArrayValue (
386
- nodes .ExplodeNode (child = self .node , column_ids = tuple (column_ids ))
387
- )
385
+ offsets = tuple (self .get_offset_for_name (id ) for id in column_ids )
386
+ return ArrayValue (nodes .ExplodeNode (child = self .node , column_ids = offsets ))
388
387
389
388
def _uniform_sampling (self , fraction : float ) -> ArrayValue :
390
389
"""Sampling the table on given fraction.
@@ -393,3 +392,6 @@ def _uniform_sampling(self, fraction: float) -> ArrayValue:
393
392
The row numbers of result is non-deterministic, avoid to use.
394
393
"""
395
394
return ArrayValue (nodes .RandomSampleNode (self .node , fraction ))
395
+
396
+ def get_offset_for_name (self , name : str ):
397
+ return self .schema .names .index (name )
Original file line number Diff line number Diff line change @@ -401,8 +401,9 @@ def _uniform_sampling(self, fraction: float) -> UnorderedIR:
401
401
columns = columns ,
402
402
)
403
403
404
- def explode (self , column_ids : typing .Sequence [str ]) -> UnorderedIR :
404
+ def explode (self , offsets : typing .Sequence [int ]) -> UnorderedIR :
405
405
table = self ._to_ibis_expr ()
406
+ column_ids = tuple (table .columns [offset ] for offset in offsets )
406
407
407
408
# The offset array ensures null represents empty arrays after unnesting.
408
409
offset_array_id = bigframes .core .guid .generate_guid ("offset_array_" )
@@ -712,16 +713,20 @@ def _uniform_sampling(self, fraction: float) -> OrderedIR:
712
713
ordering = self ._ordering ,
713
714
)
714
715
715
- def explode (self , column_ids : typing .Sequence [str ]) -> OrderedIR :
716
+ def explode (self , offsets : typing .Sequence [int ]) -> OrderedIR :
716
717
table = self ._to_ibis_expr (ordering_mode = "unordered" , expose_hidden_cols = True )
718
+ column_ids = tuple (table .columns [offset ] for offset in offsets )
717
719
718
720
offset_array_id = bigframes .core .guid .generate_guid ("offset_array_" )
719
721
offset_array = (
720
722
vendored_ibis_ops .GenerateArray (
721
723
ibis .greatest (
722
724
0 ,
723
725
ibis .least (
724
- * [table [column_id ].length () - 1 for column_id in column_ids ]
726
+ * [
727
+ table [table .columns [offset ]].length () - 1
728
+ for offset in offsets
729
+ ]
725
730
),
726
731
)
727
732
)
Original file line number Diff line number Diff line change 42
42
OVERHEAD_VARIABLES = 5
43
43
44
44
45
+ COL_OFFSET = int
46
+
47
+
45
48
@dataclass (frozen = True )
46
49
class BigFrameNode :
47
50
"""
@@ -826,7 +829,7 @@ def variables_introduced(self) -> int:
826
829
827
830
@dataclass (frozen = True )
828
831
class ExplodeNode (UnaryNode ):
829
- column_ids : typing .Tuple [str , ...]
832
+ column_ids : typing .Tuple [COL_OFFSET , ...]
830
833
831
834
@property
832
835
def row_preserving (self ) -> bool :
@@ -844,9 +847,9 @@ def schema(self) -> schemata.ArraySchema:
844
847
self .child .schema .get_type (name ).pyarrow_dtype .value_type
845
848
),
846
849
)
847
- if name in self .column_ids
850
+ if offset in self .column_ids
848
851
else schemata .SchemaItem (name , self .child .schema .get_type (name ))
849
- for name in self .child .schema .names
852
+ for offset , name in enumerate ( self .child .schema .names )
850
853
)
851
854
return schemata .ArraySchema (items )
852
855
You can’t perform that action at this time.
0 commit comments