@@ -336,8 +336,7 @@ def reset_index(self, drop: bool = True) -> Block:
336
336
self .session ._default_index_type
337
337
== bigframes .enums .DefaultIndexKind .SEQUENTIAL_INT64
338
338
):
339
- new_index_col_id = guid .generate_guid ()
340
- expr = expr .promote_offsets (new_index_col_id )
339
+ expr , new_index_col_id = expr .promote_offsets ()
341
340
new_index_cols = [new_index_col_id ]
342
341
elif self .session ._default_index_type == bigframes .enums .DefaultIndexKind .NULL :
343
342
new_index_cols = []
@@ -846,9 +845,7 @@ def project_expr(
846
845
"""
847
846
Apply a scalar expression to the block. Creates a new column to store the result.
848
847
"""
849
- # TODO(tbergeron): handle labels safely so callers don't need to
850
- result_id = guid .generate_guid ()
851
- array_val = self ._expr .project_to_id (expr , result_id )
848
+ array_val , result_id = self ._expr .project_to_id (expr )
852
849
block = Block (
853
850
array_val ,
854
851
index_columns = self .index_columns ,
@@ -971,12 +968,10 @@ def apply_window_op(
971
968
for key in window_spec .grouping_keys :
972
969
block , not_null_id = block .apply_unary_op (key , ops .notnull_op )
973
970
block = block .filter_by_id (not_null_id ).drop_columns ([not_null_id ])
974
- result_id = guid .generate_guid ()
975
- expr = block ._expr .project_window_op (
971
+ expr , result_id = block ._expr .project_window_op (
976
972
column ,
977
973
op ,
978
974
window_spec ,
979
- result_id ,
980
975
skip_reproject_unsafe = skip_reproject_unsafe ,
981
976
never_skip_nulls = never_skip_nulls ,
982
977
)
@@ -1005,8 +1000,7 @@ def create_constant(
1005
1000
label : Label = None ,
1006
1001
dtype : typing .Optional [bigframes .dtypes .Dtype ] = None ,
1007
1002
) -> typing .Tuple [Block , str ]:
1008
- result_id = guid .generate_guid ()
1009
- expr = self .expr .create_constant (result_id , scalar_constant , dtype = dtype )
1003
+ expr , result_id = self .expr .create_constant (scalar_constant , dtype = dtype )
1010
1004
# Create index copy with label inserted
1011
1005
# See: https://pandas.pydata.org/docs/reference/api/pandas.Index.insert.html
1012
1006
labels = self .column_labels .insert (len (self .column_labels ), label )
@@ -1063,10 +1057,9 @@ def aggregate_all_and_stack(
1063
1057
)
1064
1058
for col_id in self .value_columns
1065
1059
]
1066
- index_id = guid .generate_guid ()
1067
- result_expr = self .expr .aggregate (
1060
+ result_expr , index_id = self .expr .aggregate (
1068
1061
aggregations , dropna = dropna
1069
- ).create_constant (index_id , None , None )
1062
+ ).create_constant (None , None )
1070
1063
# Transpose as last operation so that final block has valid transpose cache
1071
1064
return Block (
1072
1065
result_expr ,
@@ -1077,8 +1070,7 @@ def aggregate_all_and_stack(
1077
1070
else : # axis_n == 1
1078
1071
# using offsets as identity to group on.
1079
1072
# TODO: Allow to promote identity/total_order columns instead for better perf
1080
- offset_col = guid .generate_guid ()
1081
- expr_with_offsets = self .expr .promote_offsets (offset_col )
1073
+ expr_with_offsets , offset_col = self .expr .promote_offsets ()
1082
1074
stacked_expr , (_ , value_col_ids , passthrough_cols ,) = unpivot (
1083
1075
expr_with_offsets ,
1084
1076
row_labels = self .column_labels ,
@@ -1224,8 +1216,7 @@ def aggregate(
1224
1216
1225
1217
names : typing .List [Label ] = []
1226
1218
if len (by_column_ids ) == 0 :
1227
- label_id = guid .generate_guid ()
1228
- result_expr = result_expr .create_constant (label_id , 0 , pd .Int64Dtype ())
1219
+ result_expr , label_id = result_expr .create_constant (0 , pd .Int64Dtype ())
1229
1220
index_columns = (label_id ,)
1230
1221
names = [None ]
1231
1222
else :
@@ -1275,8 +1266,7 @@ def get_stat(
1275
1266
for stat in stats_to_fetch
1276
1267
]
1277
1268
expr = self .expr .aggregate (aggregations )
1278
- offset_index_id = guid .generate_guid ()
1279
- expr = expr .promote_offsets (offset_index_id )
1269
+ expr , offset_index_id = expr .promote_offsets ()
1280
1270
block = Block (
1281
1271
expr ,
1282
1272
index_columns = [offset_index_id ],
@@ -1303,8 +1293,7 @@ def get_binary_stat(
1303
1293
)
1304
1294
]
1305
1295
expr = self .expr .aggregate (aggregations )
1306
- offset_index_id = guid .generate_guid ()
1307
- expr = expr .promote_offsets (offset_index_id )
1296
+ expr , offset_index_id = expr .promote_offsets ()
1308
1297
block = Block (
1309
1298
expr ,
1310
1299
index_columns = [offset_index_id ],
@@ -1406,9 +1395,10 @@ def explode(
1406
1395
expr = self .expr .explode (column_ids )
1407
1396
1408
1397
if ignore_index :
1409
- new_index_ids = guid .generate_guid ()
1398
+ expr = expr .drop_columns (self .index_columns )
1399
+ expr , new_index_ids = expr .promote_offsets ()
1410
1400
return Block (
1411
- expr . drop_columns ( self . index_columns ). promote_offsets ( new_index_ids ) ,
1401
+ expr ,
1412
1402
column_labels = self .column_labels ,
1413
1403
# Initiates default index creation using the block constructor.
1414
1404
index_columns = [new_index_ids ],
@@ -1593,8 +1583,7 @@ def retrieve_repr_request_results(
1593
1583
return computed_df , count , query_job
1594
1584
1595
1585
def promote_offsets (self , label : Label = None ) -> typing .Tuple [Block , str ]:
1596
- result_id = guid .generate_guid ()
1597
- expr = self ._expr .promote_offsets (result_id )
1586
+ expr , result_id = self ._expr .promote_offsets ()
1598
1587
return (
1599
1588
Block (
1600
1589
expr ,
@@ -1611,13 +1600,11 @@ def add_prefix(self, prefix: str, axis: str | int | None = None) -> Block:
1611
1600
expr = self ._expr
1612
1601
new_index_cols = []
1613
1602
for index_col in self ._index_columns :
1614
- new_col = guid .generate_guid ()
1615
- expr = expr .project_to_id (
1603
+ expr , new_col = expr .project_to_id (
1616
1604
expression = ops .add_op .as_expr (
1617
1605
ex .const (prefix ),
1618
1606
ops .AsTypeOp (to_type = "string" ).as_expr (index_col ),
1619
1607
),
1620
- output_id = new_col ,
1621
1608
)
1622
1609
new_index_cols .append (new_col )
1623
1610
expr = expr .select_columns ((* new_index_cols , * self .value_columns ))
@@ -1637,13 +1624,11 @@ def add_suffix(self, suffix: str, axis: str | int | None = None) -> Block:
1637
1624
expr = self ._expr
1638
1625
new_index_cols = []
1639
1626
for index_col in self ._index_columns :
1640
- new_col = guid .generate_guid ()
1641
- expr = expr .project_to_id (
1627
+ expr , new_col = expr .project_to_id (
1642
1628
expression = ops .add_op .as_expr (
1643
1629
ops .AsTypeOp (to_type = "string" ).as_expr (index_col ),
1644
1630
ex .const (suffix ),
1645
1631
),
1646
- output_id = new_col ,
1647
1632
)
1648
1633
new_index_cols .append (new_col )
1649
1634
expr = expr .select_columns ((* new_index_cols , * self .value_columns ))
@@ -1785,8 +1770,7 @@ def melt(
1785
1770
)
1786
1771
1787
1772
if create_offsets_index :
1788
- index_id = guid .generate_guid ()
1789
- unpivot_expr = unpivot_expr .promote_offsets (index_id )
1773
+ unpivot_expr , index_id = unpivot_expr .promote_offsets ()
1790
1774
index_cols = [index_id ]
1791
1775
else :
1792
1776
index_cols = []
@@ -2012,12 +1996,10 @@ def merge(
2012
1996
2013
1997
coalesced_ids = []
2014
1998
for left_id , right_id in zip (left_join_ids , right_join_ids ):
2015
- coalesced_id = guid .generate_guid ()
2016
- joined_expr = joined_expr .project_to_id (
1999
+ joined_expr , coalesced_id = joined_expr .project_to_id (
2017
2000
ops .coalesce_op .as_expr (
2018
2001
get_column_left [left_id ], get_column_right [right_id ]
2019
2002
),
2020
- coalesced_id ,
2021
2003
)
2022
2004
coalesced_ids .append (coalesced_id )
2023
2005
@@ -2076,8 +2058,7 @@ def merge(
2076
2058
expr = joined_expr
2077
2059
index_columns = []
2078
2060
else :
2079
- offset_index_id = guid .generate_guid ()
2080
- expr = joined_expr .promote_offsets (offset_index_id )
2061
+ expr , offset_index_id = joined_expr .promote_offsets ()
2081
2062
index_columns = [offset_index_id ]
2082
2063
2083
2064
return Block (expr , index_columns = index_columns , column_labels = labels )
@@ -2442,8 +2423,7 @@ def _get_rows_as_json_values(self) -> Block:
2442
2423
# expression.
2443
2424
# TODO(shobs): Replace direct SQL manipulation by structured expression
2444
2425
# manipulation
2445
- ordering_column_name = guid .generate_guid ()
2446
- expr = self .expr .promote_offsets (ordering_column_name )
2426
+ expr , ordering_column_name = self .expr .promote_offsets ()
2447
2427
expr_sql = self .session ._to_sql (expr )
2448
2428
2449
2429
# Names of the columns to serialize for the row.
@@ -2869,8 +2849,8 @@ def coalesce_columns(
2869
2849
expr = expr .drop_columns ([left_id ])
2870
2850
elif how == "outer" :
2871
2851
coalesced_id = guid .generate_guid ()
2872
- expr = expr .project_to_id (
2873
- ops .coalesce_op .as_expr (left_id , right_id ), coalesced_id
2852
+ expr , coalesced_id = expr .project_to_id (
2853
+ ops .coalesce_op .as_expr (left_id , right_id )
2874
2854
)
2875
2855
expr = expr .drop_columns ([left_id , right_id ])
2876
2856
result_ids .append (coalesced_id )
@@ -3047,7 +3027,7 @@ def unpivot(
3047
3027
explode_offsets_id = labels_mapping [labels_array .column_ids [- 1 ]]
3048
3028
3049
3029
# Build the output rows as a case statment that selects between the N input columns
3050
- unpivot_exprs : List [Tuple [ ex .Expression , str ] ] = []
3030
+ unpivot_exprs : List [ex .Expression ] = []
3051
3031
# Supports producing multiple stacked ouput columns for stacking only part of hierarchical index
3052
3032
for input_ids in unpivot_columns :
3053
3033
# row explode offset used to choose the input column
@@ -3064,11 +3044,11 @@ def unpivot(
3064
3044
)
3065
3045
)
3066
3046
col_expr = ops .case_when_op .as_expr (* cases )
3067
- unpivot_exprs .append (( col_expr , guid . generate_guid ()) )
3047
+ unpivot_exprs .append (col_expr )
3068
3048
3069
- unpivot_col_ids = [ id for _ , id in unpivot_exprs ]
3049
+ joined_array , unpivot_col_ids = joined_array . compute_values ( unpivot_exprs )
3070
3050
3071
- return joined_array .compute_values ( unpivot_exprs ). select_columns (
3051
+ return joined_array .select_columns (
3072
3052
[* index_col_ids , * unpivot_col_ids , * new_passthrough_cols ]
3073
3053
), (tuple (index_col_ids ), tuple (unpivot_col_ids ), tuple (new_passthrough_cols ))
3074
3054
0 commit comments