googleapis
diff --git a/‎bigframes/core/__init__.py
Lines changed: 62 additions & 27 deletions b/‎bigframes/core/__init__.py
Lines changed: 62 additions & 27 deletions
diff --git a/‎bigframes/core/blocks.py
Lines changed: 26 additions & 46 deletions b/‎bigframes/core/blocks.py
Lines changed: 26 additions & 46 deletions
@@ -18,7 +18,7 @@
 import functools
 import io
 import typing
-from typing import Iterable, Optional, Sequence, Tuple
+from typing import Iterable, List, Optional, Sequence, Tuple
 import warnings
 
 import google.cloud.bigquery
@@ -106,6 +106,7 @@ def from_table(
 
     @property
     def column_ids(self) -> typing.Sequence[str]:
+        """Returns column ids as strings."""
         return self.schema.names
 
     @property
@@ -182,10 +183,11 @@ def order_by(self, by: Sequence[OrderingExpression]) -> ArrayValue:
     def reversed(self) -> ArrayValue:
         return ArrayValue(nodes.ReversedNode(child=self.node))
 
-    def promote_offsets(self, col_id: str) -> ArrayValue:
+    def promote_offsets(self) -> Tuple[ArrayValue, str]:
         """
         Convenience function to promote copy of column offsets to a value column. Can be used to reset index.
         """
+        col_id = self._gen_namespaced_uid()
         if self.node.order_ambiguous and not (self.session._strictly_ordered):
             if not self.session._allows_ambiguity:
                 raise ValueError(
@@ -197,21 +199,30 @@ def promote_offsets(self, col_id: str) -> ArrayValue:
                     bigframes.exceptions.AmbiguousWindowWarning,
                 )
 
-        return ArrayValue(nodes.PromoteOffsetsNode(child=self.node, col_id=col_id))
+        return (
+            ArrayValue(nodes.PromoteOffsetsNode(child=self.node, col_id=col_id)),
+            col_id,
+        )
 
     def concat(self, other: typing.Sequence[ArrayValue]) -> ArrayValue:
         """Append together multiple ArrayValue objects."""
         return ArrayValue(
             nodes.ConcatNode(children=tuple([self.node, *[val.node for val in other]]))
         )
 
-    def compute_values(self, assignments: Sequence[Tuple[ex.Expression, str]]):
-        return ArrayValue(
-            nodes.ProjectionNode(child=self.node, assignments=tuple(assignments))
+    def compute_values(self, assignments: Sequence[ex.Expression]):
+        col_ids = self._gen_namespaced_uids(len(assignments))
+        ex_id_pairs = tuple((ex, id) for ex, id in zip(assignments, col_ids))
+        return (
+            ArrayValue(nodes.ProjectionNode(child=self.node, assignments=ex_id_pairs)),
+            col_ids,
         )
 
-    def project_to_id(self, expression: ex.Expression, output_id: str):
-        return self.compute_values(((expression, output_id),))
+    def project_to_id(self, expression: ex.Expression):
+        array_val, ids = self.compute_values(
+            [expression],
+        )
+        return array_val, ids[0]
 
     def assign(self, source_id: str, destination_id: str) -> ArrayValue:
         if destination_id in self.column_ids:  # Mutate case
@@ -234,19 +245,22 @@ def assign(self, source_id: str, destination_id: str) -> ArrayValue:
 
     def create_constant(
         self,
-        destination_id: str,
         value: typing.Any,
         dtype: typing.Optional[bigframes.dtypes.Dtype],
-    ) -> ArrayValue:
+    ) -> Tuple[ArrayValue, str]:
+        destination_id = self._gen_namespaced_uid()
         if pandas.isna(value):
             # Need to assign a data type when value is NaN.
             dtype = dtype or bigframes.dtypes.DEFAULT_DTYPE
 
-        return ArrayValue(
-            nodes.ProjectionNode(
-                child=self.node,
-                assignments=((ex.const(value, dtype), destination_id),),
-            )
+        return (
+            ArrayValue(
+                nodes.ProjectionNode(
+                    child=self.node,
+                    assignments=((ex.const(value, dtype), destination_id),),
+                )
+            ),
+            destination_id,
         )
 
     def select_columns(self, column_ids: typing.Sequence[str]) -> ArrayValue:
@@ -297,11 +311,10 @@ def project_window_op(
         column_name: str,
         op: agg_ops.UnaryWindowOp,
         window_spec: WindowSpec,
-        output_name=None,
         *,
         never_skip_nulls=False,
         skip_reproject_unsafe: bool = False,
-    ) -> ArrayValue:
+    ) -> Tuple[ArrayValue, str]:
         """
         Creates a new expression based on this expression with unary operation applied to one column.
         column_name: the id of the input column present in the expression
@@ -324,16 +337,20 @@ def project_window_op(
                         bigframes.exceptions.AmbiguousWindowWarning,
                     )
 
-        return ArrayValue(
-            nodes.WindowOpNode(
-                child=self.node,
-                column_name=column_name,
-                op=op,
-                window_spec=window_spec,
-                output_name=output_name,
-                never_skip_nulls=never_skip_nulls,
-                skip_reproject_unsafe=skip_reproject_unsafe,
-            )
+        output_name = self._gen_namespaced_uid()
+        return (
+            ArrayValue(
+                nodes.WindowOpNode(
+                    child=self.node,
+                    column_name=column_name,
+                    op=op,
+                    window_spec=window_spec,
+                    output_name=output_name,
+                    never_skip_nulls=never_skip_nulls,
+                    skip_reproject_unsafe=skip_reproject_unsafe,
+                )
+            ),
+            output_name,
         )
 
     def _reproject_to_table(self) -> ArrayValue:
@@ -410,3 +427,21 @@ def _uniform_sampling(self, fraction: float) -> ArrayValue:
 
     def get_offset_for_name(self, name: str):
         return self.schema.names.index(name)
+
+    # Deterministically generate namespaced ids for new variables
+    # These new ids are only unique within the current namespace.
+    # Many operations, such as joins, create new namespaces. See: BigFrameNode.defines_namespace
+    # When migrating to integer ids, these will generate the next available integer, in order to densely pack ids
+    # this will help represent variables sets as compact bitsets
+    def _gen_namespaced_uid(self) -> str:
+        return self._gen_namespaced_uids(1)[0]
+
+    def _gen_namespaced_uids(self, n: int) -> List[str]:
+        i = len(self.node.defined_variables)
+        genned_ids: List[str] = []
+        while len(genned_ids) < n:
+            attempted_id = f"col_{i}"
+            if attempted_id not in self.node.defined_variables:
+                genned_ids.append(attempted_id)
+            i = i + 1
+        return genned_ids
@@ -336,8 +336,7 @@ def reset_index(self, drop: bool = True) -> Block:
             self.session._default_index_type
             == bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64
         ):
-            new_index_col_id = guid.generate_guid()
-            expr = expr.promote_offsets(new_index_col_id)
+            expr, new_index_col_id = expr.promote_offsets()
             new_index_cols = [new_index_col_id]
         elif self.session._default_index_type == bigframes.enums.DefaultIndexKind.NULL:
             new_index_cols = []
@@ -846,9 +845,7 @@ def project_expr(
         """
         Apply a scalar expression to the block. Creates a new column to store the result.
         """
-        # TODO(tbergeron): handle labels safely so callers don't need to
-        result_id = guid.generate_guid()
-        array_val = self._expr.project_to_id(expr, result_id)
+        array_val, result_id = self._expr.project_to_id(expr)
         block = Block(
             array_val,
             index_columns=self.index_columns,
@@ -971,12 +968,10 @@ def apply_window_op(
             for key in window_spec.grouping_keys:
                 block, not_null_id = block.apply_unary_op(key, ops.notnull_op)
                 block = block.filter_by_id(not_null_id).drop_columns([not_null_id])
-        result_id = guid.generate_guid()
-        expr = block._expr.project_window_op(
+        expr, result_id = block._expr.project_window_op(
             column,
             op,
             window_spec,
-            result_id,
             skip_reproject_unsafe=skip_reproject_unsafe,
             never_skip_nulls=never_skip_nulls,
         )
@@ -1005,8 +1000,7 @@ def create_constant(
         label: Label = None,
         dtype: typing.Optional[bigframes.dtypes.Dtype] = None,
     ) -> typing.Tuple[Block, str]:
-        result_id = guid.generate_guid()
-        expr = self.expr.create_constant(result_id, scalar_constant, dtype=dtype)
+        expr, result_id = self.expr.create_constant(scalar_constant, dtype=dtype)
         # Create index copy with label inserted
         # See: https://pandas.pydata.org/docs/reference/api/pandas.Index.insert.html
         labels = self.column_labels.insert(len(self.column_labels), label)
@@ -1063,10 +1057,9 @@ def aggregate_all_and_stack(
                 )
                 for col_id in self.value_columns
             ]
-            index_id = guid.generate_guid()
-            result_expr = self.expr.aggregate(
+            result_expr, index_id = self.expr.aggregate(
                 aggregations, dropna=dropna
-            ).create_constant(index_id, None, None)
+            ).create_constant(None, None)
             # Transpose as last operation so that final block has valid transpose cache
             return Block(
                 result_expr,
@@ -1077,8 +1070,7 @@ def aggregate_all_and_stack(
         else:  # axis_n == 1
             # using offsets as identity to group on.
             # TODO: Allow to promote identity/total_order columns instead for better perf
-            offset_col = guid.generate_guid()
-            expr_with_offsets = self.expr.promote_offsets(offset_col)
+            expr_with_offsets, offset_col = self.expr.promote_offsets()
             stacked_expr, (_, value_col_ids, passthrough_cols,) = unpivot(
                 expr_with_offsets,
                 row_labels=self.column_labels,
@@ -1224,8 +1216,7 @@ def aggregate(
 
         names: typing.List[Label] = []
         if len(by_column_ids) == 0:
-            label_id = guid.generate_guid()
-            result_expr = result_expr.create_constant(label_id, 0, pd.Int64Dtype())
+            result_expr, label_id = result_expr.create_constant(0, pd.Int64Dtype())
             index_columns = (label_id,)
             names = [None]
         else:
@@ -1275,8 +1266,7 @@ def get_stat(
             for stat in stats_to_fetch
         ]
         expr = self.expr.aggregate(aggregations)
-        offset_index_id = guid.generate_guid()
-        expr = expr.promote_offsets(offset_index_id)
+        expr, offset_index_id = expr.promote_offsets()
         block = Block(
             expr,
             index_columns=[offset_index_id],
@@ -1303,8 +1293,7 @@ def get_binary_stat(
             )
         ]
         expr = self.expr.aggregate(aggregations)
-        offset_index_id = guid.generate_guid()
-        expr = expr.promote_offsets(offset_index_id)
+        expr, offset_index_id = expr.promote_offsets()
         block = Block(
             expr,
             index_columns=[offset_index_id],
@@ -1406,9 +1395,10 @@ def explode(
             expr = self.expr.explode(column_ids)
 
         if ignore_index:
-            new_index_ids = guid.generate_guid()
+            expr = expr.drop_columns(self.index_columns)
+            expr, new_index_ids = expr.promote_offsets()
             return Block(
-                expr.drop_columns(self.index_columns).promote_offsets(new_index_ids),
+                expr,
                 column_labels=self.column_labels,
                 # Initiates default index creation using the block constructor.
                 index_columns=[new_index_ids],
@@ -1593,8 +1583,7 @@ def retrieve_repr_request_results(
         return computed_df, count, query_job
 
     def promote_offsets(self, label: Label = None) -> typing.Tuple[Block, str]:
-        result_id = guid.generate_guid()
-        expr = self._expr.promote_offsets(result_id)
+        expr, result_id = self._expr.promote_offsets()
         return (
             Block(
                 expr,
@@ -1611,13 +1600,11 @@ def add_prefix(self, prefix: str, axis: str | int | None = None) -> Block:
             expr = self._expr
             new_index_cols = []
             for index_col in self._index_columns:
-                new_col = guid.generate_guid()
-                expr = expr.project_to_id(
+                expr, new_col = expr.project_to_id(
                     expression=ops.add_op.as_expr(
                         ex.const(prefix),
                         ops.AsTypeOp(to_type="string").as_expr(index_col),
                     ),
-                    output_id=new_col,
                 )
                 new_index_cols.append(new_col)
             expr = expr.select_columns((*new_index_cols, *self.value_columns))
@@ -1637,13 +1624,11 @@ def add_suffix(self, suffix: str, axis: str | int | None = None) -> Block:
             expr = self._expr
             new_index_cols = []
             for index_col in self._index_columns:
-                new_col = guid.generate_guid()
-                expr = expr.project_to_id(
+                expr, new_col = expr.project_to_id(
                     expression=ops.add_op.as_expr(
                         ops.AsTypeOp(to_type="string").as_expr(index_col),
                         ex.const(suffix),
                     ),
-                    output_id=new_col,
                 )
                 new_index_cols.append(new_col)
             expr = expr.select_columns((*new_index_cols, *self.value_columns))
@@ -1785,8 +1770,7 @@ def melt(
         )
 
         if create_offsets_index:
-            index_id = guid.generate_guid()
-            unpivot_expr = unpivot_expr.promote_offsets(index_id)
+            unpivot_expr, index_id = unpivot_expr.promote_offsets()
             index_cols = [index_id]
         else:
             index_cols = []
@@ -2012,12 +1996,10 @@ def merge(
 
         coalesced_ids = []
         for left_id, right_id in zip(left_join_ids, right_join_ids):
-            coalesced_id = guid.generate_guid()
-            joined_expr = joined_expr.project_to_id(
+            joined_expr, coalesced_id = joined_expr.project_to_id(
                 ops.coalesce_op.as_expr(
                     get_column_left[left_id], get_column_right[right_id]
                 ),
-                coalesced_id,
             )
             coalesced_ids.append(coalesced_id)
 
@@ -2076,8 +2058,7 @@ def merge(
             expr = joined_expr
             index_columns = []
         else:
-            offset_index_id = guid.generate_guid()
-            expr = joined_expr.promote_offsets(offset_index_id)
+            expr, offset_index_id = joined_expr.promote_offsets()
             index_columns = [offset_index_id]
 
         return Block(expr, index_columns=index_columns, column_labels=labels)
@@ -2442,8 +2423,7 @@ def _get_rows_as_json_values(self) -> Block:
         # expression.
         # TODO(shobs): Replace direct SQL manipulation by structured expression
         # manipulation
-        ordering_column_name = guid.generate_guid()
-        expr = self.expr.promote_offsets(ordering_column_name)
+        expr, ordering_column_name = self.expr.promote_offsets()
         expr_sql = self.session._to_sql(expr)
 
         # Names of the columns to serialize for the row.
@@ -2869,8 +2849,8 @@ def coalesce_columns(
             expr = expr.drop_columns([left_id])
         elif how == "outer":
             coalesced_id = guid.generate_guid()
-            expr = expr.project_to_id(
-                ops.coalesce_op.as_expr(left_id, right_id), coalesced_id
+            expr, coalesced_id = expr.project_to_id(
+                ops.coalesce_op.as_expr(left_id, right_id)
             )
             expr = expr.drop_columns([left_id, right_id])
             result_ids.append(coalesced_id)
@@ -3047,7 +3027,7 @@ def unpivot(
     explode_offsets_id = labels_mapping[labels_array.column_ids[-1]]
 
     # Build the output rows as a case statment that selects between the N input columns
-    unpivot_exprs: List[Tuple[ex.Expression, str]] = []
+    unpivot_exprs: List[ex.Expression] = []
     # Supports producing multiple stacked ouput columns for stacking only part of hierarchical index
     for input_ids in unpivot_columns:
         # row explode offset used to choose the input column
@@ -3064,11 +3044,11 @@ def unpivot(
             )
         )
         col_expr = ops.case_when_op.as_expr(*cases)
-        unpivot_exprs.append((col_expr, guid.generate_guid()))
+        unpivot_exprs.append(col_expr)
 
-    unpivot_col_ids = [id for _, id in unpivot_exprs]
+    joined_array, unpivot_col_ids = joined_array.compute_values(unpivot_exprs)
 
-    return joined_array.compute_values(unpivot_exprs).select_columns(
+    return joined_array.select_columns(
         [*index_col_ids, *unpivot_col_ids, *new_passthrough_cols]
     ), (tuple(index_col_ids), tuple(unpivot_col_ids), tuple(new_passthrough_cols))