diff --git a/.flake8 b/.flake8 index 32986c7928..10de52cbec 100644 --- a/.flake8 +++ b/.flake8 @@ -14,9 +14,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Generated by synthtool. DO NOT EDIT! [flake8] -ignore = E203, E231, E266, E501, W503 +ignore = E203, E231, E266, E501, W503, E704 exclude = # Exclude generated code. **/proto/** diff --git a/.github/header-checker-lint.yml b/.github/header-checker-lint.yml index 6fe78aa798..3058bec338 100644 --- a/.github/header-checker-lint.yml +++ b/.github/header-checker-lint.yml @@ -2,14 +2,14 @@ "allowedLicenses": ["Apache-2.0", "MIT", "BSD-3"], "ignoreFiles": ["**/requirements.txt", "**/requirements-test.txt", "**/__init__.py", "samples/**/constraints.txt", "samples/**/constraints-test.txt"], "sourceFileExtensions": [ - "ts", - "js", - "java", - "sh", - "Dockerfile", - "yaml", + "ts", + "js", + "java", + "sh", + "Dockerfile", + "yaml", "py", "html", "txt" ] -} \ No newline at end of file +} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f839c3c0a4..d9d4d3a6c1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.0.1 + rev: v5.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -28,11 +28,11 @@ repos: - id: isort name: isort (python) - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 25.1.0 hooks: - id: black - repo: https://github.com/pycqa/flake8 - rev: 7.1.2 + rev: 7.3.0 hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-mypy diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 039f436812..84ff396aef 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -92,4 +92,4 @@ harassment or threats to anyone's safety, we may take action without notice. This Code of Conduct is adapted from the Contributor Covenant, version 1.4, available at -https://www.contributor-covenant.org/version/1/4/code-of-conduct.html \ No newline at end of file +https://www.contributor-covenant.org/version/1/4/code-of-conduct.html diff --git a/OWNERS b/OWNERS index 562ee0f19b..24e6752edd 100644 --- a/OWNERS +++ b/OWNERS @@ -5,4 +5,4 @@ jiaxun@google.com mlaurencechen@google.com shobs@google.com swast@google.com -tbergeron@google.com \ No newline at end of file +tbergeron@google.com diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index 7ca7fb693b..6050f22925 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -14,7 +14,7 @@ """This module integrates BigQuery built-in functions for use with DataFrame objects, such as array functions: -https://cloud.google.com/bigquery/docs/reference/standard-sql/array_functions. """ +https://cloud.google.com/bigquery/docs/reference/standard-sql/array_functions.""" from bigframes.bigquery._operations.approx_agg import approx_top_count from bigframes.bigquery._operations.array import ( diff --git a/bigframes/bigquery/_operations/struct.py b/bigframes/bigquery/_operations/struct.py index 7cb826351c..93fb81eb54 100644 --- a/bigframes/bigquery/_operations/struct.py +++ b/bigframes/bigquery/_operations/struct.py @@ -15,7 +15,7 @@ """This module integrates BigQuery built-in functions for use with DataFrame objects, such as array functions: -https://cloud.google.com/bigquery/docs/reference/standard-sql/array_functions. """ +https://cloud.google.com/bigquery/docs/reference/standard-sql/array_functions.""" from __future__ import annotations diff --git a/bigframes/core/array_value.py b/bigframes/core/array_value.py index b47637cb59..9c21d528d2 100644 --- a/bigframes/core/array_value.py +++ b/bigframes/core/array_value.py @@ -524,9 +524,11 @@ def prepare_join_names( ) -> Tuple[bigframes.core.nodes.BigFrameNode, dict[str, str]]: if set(other.node.ids) & set(self.node.ids): r_mapping = { # Rename conflicting names - rcol.name: rcol.name - if (rcol.name not in self.column_ids) - else bigframes.core.guid.generate_guid() + rcol.name: ( + rcol.name + if (rcol.name not in self.column_ids) + else bigframes.core.guid.generate_guid() + ) for rcol in other.node.ids } return ( diff --git a/bigframes/core/bigframe_node.py b/bigframes/core/bigframe_node.py index 9054ab9ba0..31a702d4ae 100644 --- a/bigframes/core/bigframe_node.py +++ b/bigframes/core/bigframe_node.py @@ -155,8 +155,7 @@ def roots(self) -> typing.Set[BigFrameNode]: # TODO: Store some local data lazily for select, aggregate nodes. @property @abc.abstractmethod - def fields(self) -> Sequence[field.Field]: - ... + def fields(self) -> Sequence[field.Field]: ... @property def ids(self) -> Iterable[identifiers.ColumnId]: @@ -322,9 +321,9 @@ def iter_nodes_topo(self: BigFrameNode) -> Generator[BigFrameNode, None, None]: [node for node in self.unique_nodes() if not node.child_nodes] ) - child_to_parents: Dict[ - BigFrameNode, Set[BigFrameNode] - ] = collections.defaultdict(set) + child_to_parents: Dict[BigFrameNode, Set[BigFrameNode]] = ( + collections.defaultdict(set) + ) for parent, child in self.edges(): child_to_parents[child].add(parent) diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py index cb7c1923cf..8ba56b4b55 100644 --- a/bigframes/core/block_transforms.py +++ b/bigframes/core/block_transforms.py @@ -376,9 +376,11 @@ def value_counts( [ ordering.OrderingExpression( ex.deref(count_id), - direction=ordering.OrderingDirection.ASC - if ascending - else ordering.OrderingDirection.DESC, + direction=( + ordering.OrderingDirection.ASC + if ascending + else ordering.OrderingDirection.DESC + ), ) ] ) @@ -435,9 +437,11 @@ def rank( window_ordering = ( ordering.OrderingExpression( ex.deref(col), - ordering.OrderingDirection.ASC - if ascending - else ordering.OrderingDirection.DESC, + ( + ordering.OrderingDirection.ASC + if ascending + else ordering.OrderingDirection.DESC + ), na_last=(na_option in ["bottom", "keep"]), ), ) @@ -445,12 +449,12 @@ def rank( block, rownum_id = block.apply_window_op( col if na_option == "keep" else nullity_col_id, agg_ops.dense_rank_op if method == "dense" else agg_ops.count_op, - window_spec=windows.unbound( - grouping_keys=grouping_cols, ordering=window_ordering - ) - if method == "dense" - else windows.rows( - end=0, ordering=window_ordering, grouping_keys=grouping_cols + window_spec=( + windows.unbound(grouping_keys=grouping_cols, ordering=window_ordering) + if method == "dense" + else windows.rows( + end=0, ordering=window_ordering, grouping_keys=grouping_cols + ) ), skip_reproject_unsafe=(col != columns[-1]), ) diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py index c8632ebc8c..650232c2b9 100644 --- a/bigframes/core/blocks.py +++ b/bigframes/core/blocks.py @@ -630,7 +630,8 @@ def to_pandas_batches( """Download results one message at a time. page_size and max_results determine the size and number of batches, - see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob#google_cloud_bigquery_job_QueryJob_result""" + see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob#google_cloud_bigquery_job_QueryJob_result + """ execute_result = self.session._executor.execute( self.expr, ordered=True, @@ -1058,9 +1059,9 @@ def project_exprs( return Block( new_array, index_columns=self.index_columns, - column_labels=labels - if drop - else self.column_labels.append(pd.Index(labels)), + column_labels=( + labels if drop else self.column_labels.append(pd.Index(labels)) + ), index_labels=self._index_labels, ) @@ -1180,9 +1181,11 @@ def aggregate_all_and_stack( if axis_n == 0: aggregations = [ ( - ex.UnaryAggregation(operation, ex.deref(col_id)) - if isinstance(operation, agg_ops.UnaryAggregateOp) - else ex.NullaryAggregation(operation), + ( + ex.UnaryAggregation(operation, ex.deref(col_id)) + if isinstance(operation, agg_ops.UnaryAggregateOp) + else ex.NullaryAggregation(operation) + ), col_id, ) for col_id in self.value_columns @@ -1201,7 +1204,11 @@ def aggregate_all_and_stack( # using offsets as identity to group on. # TODO: Allow to promote identity/total_order columns instead for better perf expr_with_offsets, offset_col = self.expr.promote_offsets() - stacked_expr, (_, value_col_ids, passthrough_cols,) = unpivot( + stacked_expr, ( + _, + value_col_ids, + passthrough_cols, + ) = unpivot( expr_with_offsets, row_labels=self.column_labels, unpivot_columns=[tuple(self.value_columns)], @@ -1387,9 +1394,11 @@ def get_stat( aggregations = [ ( - ex.UnaryAggregation(stat, ex.deref(column_id)) - if isinstance(stat, agg_ops.UnaryAggregateOp) - else ex.NullaryAggregation(stat), + ( + ex.UnaryAggregation(stat, ex.deref(column_id)) + if isinstance(stat, agg_ops.UnaryAggregateOp) + else ex.NullaryAggregation(stat) + ), stat.name, ) for stat in stats_to_fetch @@ -1442,9 +1451,11 @@ def summarize( labels = pd.Index([stat.name for stat in stats]) aggregations = [ ( - ex.UnaryAggregation(stat, ex.deref(col_id)) - if isinstance(stat, agg_ops.UnaryAggregateOp) - else ex.NullaryAggregation(stat), + ( + ex.UnaryAggregation(stat, ex.deref(col_id)) + if isinstance(stat, agg_ops.UnaryAggregateOp) + else ex.NullaryAggregation(stat) + ), f"{col_id}-{stat.name}", ) for stat in stats @@ -2314,16 +2325,19 @@ def _align_both_axes( rcol_indexer if (rcol_indexer is not None) else range(len(columns)) ) - left_input_lookup = ( - lambda index: ex.deref(get_column_left[self.value_columns[index]]) - if index != -1 - else ex.const(None) - ) - righ_input_lookup = ( - lambda index: ex.deref(get_column_right[other.value_columns[index]]) - if index != -1 - else ex.const(None) - ) + def left_input_lookup(index): + return ( + ex.deref(get_column_left[self.value_columns[index]]) + if index != -1 + else ex.const(None) + ) + + def righ_input_lookup(index): + return ( + ex.deref(get_column_right[other.value_columns[index]]) + if index != -1 + else ex.const(None) + ) left_inputs = [left_input_lookup(i) for i in lcol_indexer] right_inputs = [righ_input_lookup(i) for i in rcol_indexer] @@ -2373,18 +2387,19 @@ def _align_series_block_axis_1( rcol_indexer if (rcol_indexer is not None) else range(len(columns)) ) - left_input_lookup = ( - lambda index: ex.deref(get_column_left[self.value_columns[index]]) - if index != -1 - else ex.const(None) - ) - righ_input_lookup = ( - lambda index: ex.deref( - get_column_right[other.transpose().value_columns[index]] + def left_input_lookup(index): + return ( + ex.deref(get_column_left[self.value_columns[index]]) + if index != -1 + else ex.const(None) + ) + + def righ_input_lookup(index): + return ( + ex.deref(get_column_right[other.transpose().value_columns[index]]) + if index != -1 + else ex.const(None) ) - if index != -1 - else ex.const(None) - ) left_inputs = [left_input_lookup(i) for i in lcol_indexer] right_inputs = [righ_input_lookup(i) for i in rcol_indexer] @@ -2408,14 +2423,13 @@ def _align_pd_series_axis_1( rcol_indexer if (rcol_indexer is not None) else range(len(columns)) ) - left_input_lookup = ( - lambda index: ex.deref(self.value_columns[index]) - if index != -1 - else ex.const(None) - ) - righ_input_lookup = ( - lambda index: ex.const(other.iloc[index]) if index != -1 else ex.const(None) - ) + def left_input_lookup(index): + return ( + ex.deref(self.value_columns[index]) if index != -1 else ex.const(None) + ) + + def righ_input_lookup(index): + return ex.const(other.iloc[index]) if index != -1 else ex.const(None) left_inputs = [left_input_lookup(i) for i in lcol_indexer] right_inputs = [righ_input_lookup(i) for i in rcol_indexer] @@ -2447,7 +2461,10 @@ def join( sort: bool = False, block_identity_join: bool = False, always_order: bool = False, - ) -> Tuple[Block, Tuple[Mapping[str, str], Mapping[str, str]],]: + ) -> Tuple[ + Block, + Tuple[Mapping[str, str], Mapping[str, str]], + ]: """ Join two blocks objects together, and provide mappings between source columns and output columns. @@ -2637,9 +2654,11 @@ def cached(self, *, force: bool = False, session_aware: bool = False) -> None: self.session._executor.cached( self.expr, config=executors.CacheConfig( - optimize_for="auto" - if session_aware - else executors.HierarchicalKey(tuple(self.index_columns)), + optimize_for=( + "auto" + if session_aware + else executors.HierarchicalKey(tuple(self.index_columns)) + ), if_cached="replace" if force else "reuse-any", ), ) @@ -2920,9 +2939,12 @@ def is_uniquely_named(self: BlockIndexProperties): return len(set(self.names)) == len(self.names) -def try_new_row_join( - left: Block, right: Block -) -> Optional[Tuple[Block, Tuple[Mapping[str, str], Mapping[str, str]],]]: +def try_new_row_join(left: Block, right: Block) -> Optional[ + Tuple[ + Block, + Tuple[Mapping[str, str], Mapping[str, str]], + ] +]: join_keys = tuple( (left_id, right_id) for left_id, right_id in zip(left.index_columns, right.index_columns) @@ -2953,7 +2975,12 @@ def try_legacy_row_join( right: Block, *, how="left", -) -> Optional[Tuple[Block, Tuple[Mapping[str, str], Mapping[str, str]],]]: +) -> Optional[ + Tuple[ + Block, + Tuple[Mapping[str, str], Mapping[str, str]], + ] +]: """Joins two blocks that have a common root expression by merging the projections.""" left_expr = left.expr right_expr = right.expr @@ -3007,7 +3034,10 @@ def try_legacy_row_join( def join_with_single_row( left: Block, single_row_block: Block, -) -> Tuple[Block, Tuple[Mapping[str, str], Mapping[str, str]],]: +) -> Tuple[ + Block, + Tuple[Mapping[str, str], Mapping[str, str]], +]: """ Special join case where other is a single row block. This property is not validated, caller responsible for not passing multi-row block. @@ -3042,7 +3072,10 @@ def join_mono_indexed( how="left", sort: bool = False, propogate_order: bool = False, -) -> Tuple[Block, Tuple[Mapping[str, str], Mapping[str, str]],]: +) -> Tuple[ + Block, + Tuple[Mapping[str, str], Mapping[str, str]], +]: left_expr = left.expr right_expr = right.expr @@ -3072,9 +3105,9 @@ def join_mono_indexed( combined_expr, index_columns=coalesced_join_cols, column_labels=[*left.column_labels, *right.column_labels], - index_labels=[left.index.name] - if left.index.name == right.index.name - else [None], + index_labels=( + [left.index.name] if left.index.name == right.index.name else [None] + ), ) return ( block, @@ -3089,7 +3122,10 @@ def join_multi_indexed( how="left", sort: bool = False, propogate_order: bool = False, -) -> Tuple[Block, Tuple[Mapping[str, str], Mapping[str, str]],]: +) -> Tuple[ + Block, + Tuple[Mapping[str, str], Mapping[str, str]], +]: if not (left.index.is_uniquely_named() and right.index.is_uniquely_named()): raise ValueError("Joins not supported on indices with non-unique level names") @@ -3373,9 +3409,11 @@ def unpivot( *( ( ops.eq_op.as_expr(explode_offsets_id, ex.const(i)), - ex.deref(column_mapping[id_or_null]) - if (id_or_null is not None) - else ex.const(None), + ( + ex.deref(column_mapping[id_or_null]) + if (id_or_null is not None) + else ex.const(None) + ), ) for i, id_or_null in enumerate(input_ids) ) diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py index 314b54fc6d..626d2f0a08 100644 --- a/bigframes/core/compile/compiled.py +++ b/bigframes/core/compile/compiled.py @@ -57,10 +57,13 @@ def __init__( # TODO(swast): Validate that each column references the same table (or # no table for literal values). self._columns = tuple( - column.resolve(table) # type:ignore - # TODO(https://github.com/ibis-project/ibis/issues/7613): use - # public API to refer to Deferred type. - if isinstance(column, ibis_deferred.Deferred) else column + ( + column.resolve(table) # type:ignore + # TODO(https://github.com/ibis-project/ibis/issues/7613): use + # public API to refer to Deferred type. + if isinstance(column, ibis_deferred.Deferred) + else column + ) for column in columns ) # To allow for more efficient lookup by column name, create a diff --git a/bigframes/core/compile/default_ordering.py b/bigframes/core/compile/default_ordering.py index 1a1350cfd6..a48f8a62b3 100644 --- a/bigframes/core/compile/default_ordering.py +++ b/bigframes/core/compile/default_ordering.py @@ -48,9 +48,11 @@ def _convert_to_nonnull_string(column: ibis_types.Value) -> ibis_types.StringVal # Escape backslashes and use backslash as delineator escaped = cast( ibis_types.StringColumn, - result.fill_null(ibis_types.literal("")) - if hasattr(result, "fill_null") - else result.fillna(""), + ( + result.fill_null(ibis_types.literal("")) + if hasattr(result, "fill_null") + else result.fillna("") + ), ).replace( "\\", # type: ignore "\\\\", # type: ignore diff --git a/bigframes/core/compile/explode.py b/bigframes/core/compile/explode.py index 59e3a13d02..4197343898 100644 --- a/bigframes/core/compile/explode.py +++ b/bigframes/core/compile/explode.py @@ -59,9 +59,11 @@ def explode_unordered( output_cols = tuple(input.column_ids) + ((offsets_id,) if offsets_id else ()) unnested_columns = [ - table_w_offset[column_id][table_w_offset[unnest_offset_id]].name(column_id) - if column_id in column_ids - else table_w_offset[column_id] + ( + table_w_offset[column_id][table_w_offset[unnest_offset_id]].name(column_id) + if column_id in column_ids + else table_w_offset[column_id] + ) for column_id in output_cols ] table_w_unnest = table_w_offset.select(*unnested_columns) diff --git a/bigframes/core/compile/googlesql/abc.py b/bigframes/core/compile/googlesql/abc.py index 081836467c..8c3ea70cf7 100644 --- a/bigframes/core/compile/googlesql/abc.py +++ b/bigframes/core/compile/googlesql/abc.py @@ -21,5 +21,4 @@ class SQLSyntax(abc.ABC): """Abstract base class provides GoogleSQL syntax.""" @abc.abstractmethod - def sql(self): - ... + def sql(self): ... diff --git a/bigframes/core/compile/googlesql/query.py b/bigframes/core/compile/googlesql/query.py index f591216b3a..276a7152af 100644 --- a/bigframes/core/compile/googlesql/query.py +++ b/bigframes/core/compile/googlesql/query.py @@ -53,9 +53,9 @@ def sql(self) -> str: class Select(abc.SQLSyntax): """This class represents GoogleSQL `select` syntax.""" - select_list: typing.Sequence[ - typing.Union[SelectExpression, SelectAll] - ] = dataclasses.field(default_factory=list) + select_list: typing.Sequence[typing.Union[SelectExpression, SelectAll]] = ( + dataclasses.field(default_factory=list) + ) from_clause_list: typing.Sequence[FromClause] = dataclasses.field( default_factory=list ) @@ -86,9 +86,7 @@ def _select_field(self, field) -> SelectExpression: alias = ( expr.AliasExpression(field[1]) if isinstance(field[1], str) - else field[1] - if (field[0] != field[1]) - else None + else field[1] if (field[0] != field[1]) else None ) return SelectExpression( expression=expr.ColumnExpression(name=field[0]), alias=alias diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index 95517ead35..e078f28ade 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -1290,8 +1290,7 @@ def remote_function_op_impl(x: ibis_types.Value, op: ops.RemoteFunctionOp): @ibis_udf.scalar.builtin( name=str(op.function_def.routine_ref), signature=ibis_py_sig ) - def udf(input): - ... + def udf(input): ... x_transformed = udf(x) if not op.apply_on_null: @@ -1309,8 +1308,7 @@ def binary_remote_function_op_impl( @ibis_udf.scalar.builtin( name=str(op.function_def.routine_ref), signature=ibis_py_sig ) - def udf(input1, input2): - ... + def udf(input1, input2): ... x_transformed = udf(x, y) return x_transformed @@ -1329,8 +1327,7 @@ def nary_remote_function_op_impl( signature=ibis_py_sig, param_name_overrides=arg_names, ) - def udf(*inputs): - ... + def udf(*inputs): ... result = udf(*operands) return result diff --git a/bigframes/core/expression.py b/bigframes/core/expression.py index 7b20e430ff..a011e77c94 100644 --- a/bigframes/core/expression.py +++ b/bigframes/core/expression.py @@ -53,8 +53,7 @@ class Aggregation(abc.ABC): @abc.abstractmethod def output_type( self, input_fields: Mapping[ids.ColumnId, field.Field] - ) -> dtypes.ExpressionType: - ... + ) -> dtypes.ExpressionType: ... @property def column_references(self) -> typing.Tuple[ids.ColumnId, ...]: @@ -65,8 +64,7 @@ def remap_column_refs( self, name_mapping: Mapping[ids.ColumnId, ids.ColumnId], allow_partial_bindings: bool = False, - ) -> Aggregation: - ... + ) -> Aggregation: ... @dataclasses.dataclass(frozen=True) @@ -183,8 +181,7 @@ def nullable(self) -> bool: @property @abc.abstractmethod - def column_references(self) -> typing.Tuple[ids.ColumnId, ...]: - ... + def column_references(self) -> typing.Tuple[ids.ColumnId, ...]: ... def remap_column_refs( self: TExpression, @@ -198,8 +195,7 @@ def remap_column_refs( @property @abc.abstractmethod - def is_const(self) -> bool: - ... + def is_const(self) -> bool: ... @property @abc.abstractmethod @@ -211,8 +207,7 @@ def is_resolved(self) -> bool: @property @abc.abstractmethod - def output_type(self) -> dtypes.ExpressionType: - ... + def output_type(self) -> dtypes.ExpressionType: ... @abc.abstractmethod def bind_refs( @@ -250,8 +245,9 @@ def is_identity(self) -> bool: return False @abc.abstractmethod - def transform_children(self, t: Callable[[Expression], Expression]) -> Expression: - ... + def transform_children( + self, t: Callable[[Expression], Expression] + ) -> Expression: ... def walk(self) -> Generator[Expression, None, None]: yield self diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py index c60e40880b..2be8704938 100644 --- a/bigframes/core/indexers.py +++ b/bigframes/core/indexers.py @@ -139,16 +139,14 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame): @typing.overload def __getitem__( self, key: LocSingleKey - ) -> Union[bigframes.dataframe.DataFrame, pd.Series]: - ... + ) -> Union[bigframes.dataframe.DataFrame, pd.Series]: ... # Technically this is wrong since we can have duplicate column labels, but # this is expected to be rare. @typing.overload def __getitem__( self, key: Tuple[LocSingleKey, str] - ) -> Union[bigframes.series.Series, bigframes.core.scalar.Scalar]: - ... + ) -> Union[bigframes.series.Series, bigframes.core.scalar.Scalar]: ... def __getitem__(self, key): # TODO(tbergeron): Pandas will try both splitting 2-tuple into row, index or as 2-part @@ -283,15 +281,13 @@ def __getitem__( @typing.overload def _loc_getitem_series_or_dataframe( series_or_dataframe: bigframes.series.Series, key -) -> Union[bigframes.core.scalar.Scalar, bigframes.series.Series]: - ... +) -> Union[bigframes.core.scalar.Scalar, bigframes.series.Series]: ... @typing.overload def _loc_getitem_series_or_dataframe( series_or_dataframe: bigframes.dataframe.DataFrame, key -) -> Union[bigframes.dataframe.DataFrame, pd.Series]: - ... +) -> Union[bigframes.dataframe.DataFrame, pd.Series]: ... def _loc_getitem_series_or_dataframe( @@ -353,8 +349,7 @@ def _perform_loc_list_join( series_or_dataframe: bigframes.series.Series, keys_index: indexes.Index, drop_levels: bool = False, -) -> bigframes.series.Series: - ... +) -> bigframes.series.Series: ... @typing.overload @@ -362,8 +357,7 @@ def _perform_loc_list_join( series_or_dataframe: bigframes.dataframe.DataFrame, keys_index: indexes.Index, drop_levels: bool = False, -) -> bigframes.dataframe.DataFrame: - ... +) -> bigframes.dataframe.DataFrame: ... def _perform_loc_list_join( @@ -421,15 +415,13 @@ def _struct_accessor_check_and_warn( @typing.overload def _iloc_getitem_series_or_dataframe( series_or_dataframe: bigframes.series.Series, key -) -> Union[bigframes.series.Series, bigframes.core.scalar.Scalar]: - ... +) -> Union[bigframes.series.Series, bigframes.core.scalar.Scalar]: ... @typing.overload def _iloc_getitem_series_or_dataframe( series_or_dataframe: bigframes.dataframe.DataFrame, key -) -> Union[bigframes.dataframe.DataFrame, pd.Series, bigframes.core.scalar.Scalar]: - ... +) -> Union[bigframes.dataframe.DataFrame, pd.Series, bigframes.core.scalar.Scalar]: ... def _iloc_getitem_series_or_dataframe( diff --git a/bigframes/core/indexes/base.py b/bigframes/core/indexes/base.py index 9ad201c73d..83c610befc 100644 --- a/bigframes/core/indexes/base.py +++ b/bigframes/core/indexes/base.py @@ -407,9 +407,11 @@ def sort_values( na_last = na_position == "last" index_columns = self._block.index_columns ordering = [ - order.ascending_over(column, na_last) - if ascending - else order.descending_over(column, na_last) + ( + order.ascending_over(column, na_last) + if ascending + else order.descending_over(column, na_last) + ) for column in index_columns ] return Index(self._block.order_by(ordering)) @@ -506,8 +508,7 @@ def fillna(self, value=None) -> Index: def rename( self, name: Union[blocks.Label, Sequence[blocks.Label]], - ) -> Index: - ... + ) -> Index: ... @overload def rename( @@ -515,8 +516,7 @@ def rename( name: Union[blocks.Label, Sequence[blocks.Label]], *, inplace: Literal[False], - ) -> Index: - ... + ) -> Index: ... @overload def rename( @@ -524,8 +524,7 @@ def rename( name: Union[blocks.Label, Sequence[blocks.Label]], *, inplace: Literal[True], - ) -> None: - ... + ) -> None: ... def rename( self, @@ -683,14 +682,12 @@ def to_pandas( # type: ignore[overload-overlap] *, allow_large_results: Optional[bool] = ..., dry_run: Literal[False] = ..., - ) -> pandas.Index: - ... + ) -> pandas.Index: ... @overload def to_pandas( self, *, allow_large_results: Optional[bool] = ..., dry_run: Literal[True] = ... - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def to_pandas( self, diff --git a/bigframes/core/local_data.py b/bigframes/core/local_data.py index 958113dda3..b1ac6bbe95 100644 --- a/bigframes/core/local_data.py +++ b/bigframes/core/local_data.py @@ -216,10 +216,11 @@ def iter_array( elif dtype == bigframes.dtypes.TIMEDELTA_DTYPE: if duration_type == "int": yield from map( - lambda x: ((x.days * 3600 * 24) + x.seconds) * 1_000_000 - + x.microseconds - if x is not None - else x, + lambda x: ( + ((x.days * 3600 * 24) + x.seconds) * 1_000_000 + x.microseconds + if x is not None + else x + ), values, ) else: @@ -234,7 +235,7 @@ def _( value_generator = iter_array( array.flatten(), bigframes.dtypes.get_array_inner_type(dtype) ) - for (start, end) in _pairwise(array.offsets): + for start, end in _pairwise(array.offsets): arr_size = end.as_py() - start.as_py() yield list(itertools.islice(value_generator, arr_size)) @@ -374,7 +375,7 @@ def _get_managed_storage_type(dtype: bigframes.dtypes.Dtype) -> pa.DataType: def _recursive_map_types( - f: Callable[[pa.DataType], pa.DataType] + f: Callable[[pa.DataType], pa.DataType], ) -> Callable[[pa.DataType], pa.DataType]: @functools.wraps(f) def recursive_f(type: pa.DataType) -> pa.DataType: diff --git a/bigframes/core/nodes.py b/bigframes/core/nodes.py index cf6e8a7e5c..1c2f271a01 100644 --- a/bigframes/core/nodes.py +++ b/bigframes/core/nodes.py @@ -66,17 +66,14 @@ class AdditiveNode: @property @abc.abstractmethod - def added_fields(self) -> Tuple[Field, ...]: - ... + def added_fields(self) -> Tuple[Field, ...]: ... @property @abc.abstractmethod - def additive_base(self) -> BigFrameNode: - ... + def additive_base(self) -> BigFrameNode: ... @abc.abstractmethod - def replace_additive_base(self, BigFrameNode) -> BigFrameNode: - ... + def replace_additive_base(self, BigFrameNode) -> BigFrameNode: ... @dataclasses.dataclass(frozen=True, eq=False) @@ -591,7 +588,9 @@ def transform_children(self, t: Callable[[BigFrameNode], BigFrameNode]) -> LeafN class ScanItem(typing.NamedTuple): id: identifiers.ColumnId - dtype: bigframes.dtypes.Dtype # Might be multiple logical types for a given physical source type + dtype: ( + bigframes.dtypes.Dtype + ) # Might be multiple logical types for a given physical source type source_id: str # Flexible enough for both local data and bq data def with_id(self, id: identifiers.ColumnId) -> ScanItem: @@ -759,9 +758,11 @@ def from_table(table: bq.Table, columns: Sequence[str] = ()) -> GbqTable: table_id=table.table_id, physical_schema=schema, is_physically_stored=(table.table_type in ["TABLE", "MATERIALIZED_VIEW"]), - cluster_cols=None - if table.clustering_fields is None - else tuple(table.clustering_fields), + cluster_cols=( + None + if table.clustering_fields is None + else tuple(table.clustering_fields) + ), ) def get_table_ref(self) -> bq.TableReference: @@ -1602,15 +1603,17 @@ def row_preserving(self) -> bool: @property def fields(self) -> Sequence[Field]: fields = ( - Field( - field.id, - bigframes.dtypes.arrow_dtype_to_bigframes_dtype( - self.child.get_type(field.id).pyarrow_dtype.value_type # type: ignore - ), - nullable=True, + ( + Field( + field.id, + bigframes.dtypes.arrow_dtype_to_bigframes_dtype( + self.child.get_type(field.id).pyarrow_dtype.value_type # type: ignore + ), + nullable=True, + ) + if field.id in set(map(lambda x: x.id, self.column_ids)) + else field ) - if field.id in set(map(lambda x: x.id, self.column_ids)) - else field for field in self.child.fields ) if self.offsets_col is not None: diff --git a/bigframes/core/ordering.py b/bigframes/core/ordering.py index 2fc7573b21..aeded31660 100644 --- a/bigframes/core/ordering.py +++ b/bigframes/core/ordering.py @@ -332,15 +332,13 @@ def remap_column_refs( def join( self, other: TotalOrdering, - ) -> TotalOrdering: - ... + ) -> TotalOrdering: ... @typing.overload def join( self, other: RowOrdering, - ) -> RowOrdering: - ... + ) -> RowOrdering: ... def join( self, diff --git a/bigframes/core/reshape/concat.py b/bigframes/core/reshape/concat.py index a42488cbe8..cc81319ae6 100644 --- a/bigframes/core/reshape/concat.py +++ b/bigframes/core/reshape/concat.py @@ -31,8 +31,7 @@ def concat( axis: typing.Literal["index", 0] = ..., join=..., ignore_index=..., -) -> bigframes.series.Series: - ... +) -> bigframes.series.Series: ... @typing.overload @@ -42,8 +41,7 @@ def concat( axis: typing.Literal["index", 0] = ..., join=..., ignore_index=..., -) -> bigframes.dataframe.DataFrame: - ... +) -> bigframes.dataframe.DataFrame: ... @typing.overload @@ -53,8 +51,7 @@ def concat( axis: typing.Literal["columns", 1], join=..., ignore_index=..., -) -> bigframes.dataframe.DataFrame: - ... +) -> bigframes.dataframe.DataFrame: ... @typing.overload @@ -64,8 +61,7 @@ def concat( axis=..., join=..., ignore_index=..., -) -> Union[bigframes.dataframe.DataFrame, bigframes.series.Series]: - ... +) -> Union[bigframes.dataframe.DataFrame, bigframes.series.Series]: ... def concat( diff --git a/bigframes/core/rewrite/identifiers.py b/bigframes/core/rewrite/identifiers.py index 0093e183b4..63b5554321 100644 --- a/bigframes/core/rewrite/identifiers.py +++ b/bigframes/core/rewrite/identifiers.py @@ -51,7 +51,9 @@ def remap_variables( return ( with_new_vars, - node_var_mapping - if root.defines_namespace - else (ref_mapping | node_var_mapping), + ( + node_var_mapping + if root.defines_namespace + else (ref_mapping | node_var_mapping) + ), ) diff --git a/bigframes/core/rewrite/op_lowering.py b/bigframes/core/rewrite/op_lowering.py index a64a4cc8c4..e063eeb7a7 100644 --- a/bigframes/core/rewrite/op_lowering.py +++ b/bigframes/core/rewrite/op_lowering.py @@ -23,12 +23,10 @@ class OpLoweringRule(abc.ABC): @property @abc.abstractmethod - def op(self) -> type[ops.ScalarOp]: - ... + def op(self) -> type[ops.ScalarOp]: ... @abc.abstractmethod - def lower(self, expr: expression.OpExpression) -> expression.Expression: - ... + def lower(self, expr: expression.OpExpression) -> expression.Expression: ... def lower_ops( diff --git a/bigframes/core/rewrite/pruning.py b/bigframes/core/rewrite/pruning.py index 8a07f0b87e..3fa6c2a7f1 100644 --- a/bigframes/core/rewrite/pruning.py +++ b/bigframes/core/rewrite/pruning.py @@ -170,8 +170,7 @@ def prune_aggregate( def prune_leaf( node: nodes.BigFrameNode, used_cols: typing.AbstractSet[identifiers.ColumnId], -): - ... +): ... @prune_leaf.register diff --git a/bigframes/core/utils.py b/bigframes/core/utils.py index dd37a352a7..a019709c76 100644 --- a/bigframes/core/utils.py +++ b/bigframes/core/utils.py @@ -112,15 +112,19 @@ def get_standardized_ids( Tuple of (standardized_column_ids, standardized_index_ids) """ col_ids = [ - UNNAMED_COLUMN_ID - if col_label is None - else label_to_identifier(col_label, strict=strict) + ( + UNNAMED_COLUMN_ID + if col_label is None + else label_to_identifier(col_label, strict=strict) + ) for col_label in col_labels ] idx_ids = [ - UNNAMED_INDEX_ID - if idx_label is None - else label_to_identifier(idx_label, strict=strict) + ( + UNNAMED_INDEX_ID + if idx_label is None + else label_to_identifier(idx_label, strict=strict) + ) for idx_label in idx_labels ] @@ -234,7 +238,7 @@ def wrapper(*args, **kwargs): def timedelta_to_micros( - timedelta: typing.Union[pd.Timedelta, datetime.timedelta, np.timedelta64] + timedelta: typing.Union[pd.Timedelta, datetime.timedelta, np.timedelta64], ) -> int: if isinstance(timedelta, pd.Timedelta): # pd.Timedelta.value returns total nanoseconds. diff --git a/bigframes/core/validations.py b/bigframes/core/validations.py index 701752c9fc..d7b51e19ba 100644 --- a/bigframes/core/validations.py +++ b/bigframes/core/validations.py @@ -32,12 +32,10 @@ class HasSession(Protocol): @property - def _session(self) -> Session: - ... + def _session(self) -> Session: ... @property - def _block(self) -> Block: - ... + def _block(self) -> Block: ... def requires_index(meth): diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 7de4bdbc91..0e1065761b 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -1672,8 +1672,7 @@ def to_pandas( # type: ignore[overload-overlap] ordered: bool = ..., dry_run: Literal[False] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.DataFrame: - ... + ) -> pandas.DataFrame: ... @overload def to_pandas( @@ -1685,8 +1684,7 @@ def to_pandas( ordered: bool = ..., dry_run: Literal[True] = ..., allow_large_results: Optional[bool] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def to_pandas( self, @@ -2111,20 +2109,17 @@ def _resolve_levels(self, level: LevelsType) -> typing.Sequence[str]: return self._block.index.resolve_level(level) @overload - def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: - ... + def rename(self, *, columns: Mapping[blocks.Label, blocks.Label]) -> DataFrame: ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[False] - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: Literal[True] - ) -> None: - ... + ) -> None: ... def rename( self, *, columns: Mapping[blocks.Label, blocks.Label], inplace: bool = False @@ -2141,8 +2136,7 @@ def rename( def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def rename_axis( @@ -2151,8 +2145,7 @@ def rename_axis( *, inplace: Literal[False], **kwargs, - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def rename_axis( @@ -2161,8 +2154,7 @@ def rename_axis( *, inplace: Literal[True], **kwargs, - ) -> None: - ... + ) -> None: ... def rename_axis( self, @@ -2341,8 +2333,7 @@ def sort_index( ascending: bool = ..., inplace: Literal[False] = ..., na_position: Literal["first", "last"] = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def sort_index( @@ -2351,8 +2342,7 @@ def sort_index( ascending: bool = ..., inplace: Literal[True] = ..., na_position: Literal["first", "last"] = ..., - ) -> None: - ... + ) -> None: ... @validations.requires_index def sort_index( @@ -2367,9 +2357,11 @@ def sort_index( na_last = na_position == "last" index_columns = self._block.index_columns ordering = [ - order.ascending_over(column, na_last) - if ascending - else order.descending_over(column, na_last) + ( + order.ascending_over(column, na_last) + if ascending + else order.descending_over(column, na_last) + ) for column in index_columns ] block = self._block.order_by(ordering) @@ -2388,8 +2380,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> DataFrame: - ... + ) -> DataFrame: ... @overload def sort_values( @@ -2400,8 +2391,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> None: - ... + ) -> None: ... def sort_values( self, @@ -2684,11 +2674,11 @@ def replace( ): if utils.is_dict_like(value): return self.apply( - lambda x: x.replace( - to_replace=to_replace, value=value[x.name], regex=regex + lambda x: ( + x.replace(to_replace=to_replace, value=value[x.name], regex=regex) + if (x.name in value) + else x ) - if (x.name in value) - else x ) return self.apply( lambda x: x.replace(to_replace=to_replace, value=value, regex=regex) @@ -3042,9 +3032,11 @@ def nunique(self) -> bigframes.series.Series: def agg( self, - func: str - | typing.Sequence[str] - | typing.Mapping[blocks.Label, typing.Sequence[str] | str], + func: ( + str + | typing.Sequence[str] + | typing.Mapping[blocks.Label, typing.Sequence[str] | str] + ), ) -> DataFrame | bigframes.series.Series: if utils.is_dict_like(func): # Must check dict-like first because dictionaries are list-like diff --git a/bigframes/formatting_helpers.py b/bigframes/formatting_helpers.py index 48afb4fdbd..21b69244bf 100644 --- a/bigframes/formatting_helpers.py +++ b/bigframes/formatting_helpers.py @@ -43,7 +43,7 @@ def add_feedback_link( exception: Union[ api_core_exceptions.RetryError, api_core_exceptions.GoogleAPICallError - ] + ], ): exception.message = exception.message + f" {constants.FEEDBACK_LINK}" diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py index 371784332c..7ab964a699 100644 --- a/bigframes/functions/_function_session.py +++ b/bigframes/functions/_function_session.py @@ -565,9 +565,11 @@ def wrapper(func): bq_connection_manager, cloud_function_region, cloud_functions_client, - None - if cloud_function_service_account == "default" - else cloud_function_service_account, + ( + None + if cloud_function_service_account == "default" + else cloud_function_service_account + ), cloud_function_kms_key_name, cloud_function_docker_repository, cloud_build_service_account=cloud_build_service_account, diff --git a/bigframes/functions/_utils.py b/bigframes/functions/_utils.py index 0b7222db86..8383e34b32 100644 --- a/bigframes/functions/_utils.py +++ b/bigframes/functions/_utils.py @@ -250,9 +250,9 @@ def get_bigframes_metadata(*, python_output_type: Optional[type] = None) -> str: python_output_array_type in function_typing.RF_SUPPORTED_ARRAY_OUTPUT_PYTHON_TYPES ): - inner_metadata[ - "python_array_output_type" - ] = python_output_array_type.__name__ + inner_metadata["python_array_output_type"] = ( + python_output_array_type.__name__ + ) metadata = {"value": inner_metadata} metadata_ser = json.dumps(metadata) diff --git a/bigframes/ml/compose.py b/bigframes/ml/compose.py index 46d40d5fc8..60ce5a591b 100644 --- a/bigframes/ml/compose.py +++ b/bigframes/ml/compose.py @@ -172,7 +172,13 @@ def _keys(self): @property def transformers_( self, - ) -> List[Tuple[str, SingleColTransformer, str,]]: + ) -> List[ + Tuple[ + str, + SingleColTransformer, + str, + ] + ]: """The collection of transformers as tuples of (name, transformer, column).""" result: List[ Tuple[ diff --git a/bigframes/ml/imported.py b/bigframes/ml/imported.py index a73ee352d0..b5bba1960a 100644 --- a/bigframes/ml/imported.py +++ b/bigframes/ml/imported.py @@ -72,7 +72,8 @@ def predict(self, X: utils.ArrayType) -> bpd.DataFrame: Input DataFrame. Schema is defined by the model. Returns: - bigframes.dataframe.DataFrame: Output DataFrame. Schema is defined by the model.""" + bigframes.dataframe.DataFrame: Output DataFrame. Schema is defined by the model. + """ if not self._bqml_model: if self.model_path is None: @@ -151,7 +152,8 @@ def predict(self, X: utils.ArrayType) -> bpd.DataFrame: Input DataFrame or Series. Schema is defined by the model. Returns: - bigframes.dataframe.DataFrame: Output DataFrame, schema is defined by the model.""" + bigframes.dataframe.DataFrame: Output DataFrame, schema is defined by the model. + """ if not self._bqml_model: if self.model_path is None: @@ -270,7 +272,8 @@ def predict(self, X: utils.ArrayType) -> bpd.DataFrame: Input DataFrame or Series. Schema is defined by the model. Returns: - bigframes.dataframe.DataFrame: Output DataFrame. Schema is defined by the model.""" + bigframes.dataframe.DataFrame: Output DataFrame. Schema is defined by the model. + """ if not self._bqml_model: if self.model_path is None: diff --git a/bigframes/ml/impute.py b/bigframes/ml/impute.py index f19c8e2cd3..ab849efe41 100644 --- a/bigframes/ml/impute.py +++ b/bigframes/ml/impute.py @@ -13,7 +13,8 @@ # limitations under the License. """Transformers for missing value imputation. This module is styled after -scikit-learn's preprocessing module: https://scikit-learn.org/stable/modules/impute.html.""" +scikit-learn's preprocessing module: https://scikit-learn.org/stable/modules/impute.html. +""" from __future__ import annotations diff --git a/bigframes/ml/loader.py b/bigframes/ml/loader.py index a6366273fe..162c0359f0 100644 --- a/bigframes/ml/loader.py +++ b/bigframes/ml/loader.py @@ -79,9 +79,7 @@ ) -def from_bq( - session: bigframes.session.Session, bq_model: bigquery.Model -) -> Union[ +def from_bq(session: bigframes.session.Session, bq_model: bigquery.Model) -> Union[ decomposition.MatrixFactorization, decomposition.PCA, cluster.KMeans, diff --git a/bigframes/ml/preprocessing.py b/bigframes/ml/preprocessing.py index 0448d8544a..7ca81b4bbd 100644 --- a/bigframes/ml/preprocessing.py +++ b/bigframes/ml/preprocessing.py @@ -13,7 +13,8 @@ # limitations under the License. """Transformers that prepare data for other estimators. This module is styled after -scikit-learn's preprocessing module: https://scikit-learn.org/stable/modules/preprocessing.html.""" +scikit-learn's preprocessing module: https://scikit-learn.org/stable/modules/preprocessing.html. +""" from __future__ import annotations diff --git a/bigframes/ml/sql.py b/bigframes/ml/sql.py index 2937368c92..09a46b235d 100644 --- a/bigframes/ml/sql.py +++ b/bigframes/ml/sql.py @@ -160,7 +160,8 @@ def ml_one_hot_encoder( name: str, ) -> str: """Encode ML.ONE_HOT_ENCODER for BQML. - https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-one-hot-encoder for params.""" + https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-one-hot-encoder for params. + """ return f"""ML.ONE_HOT_ENCODER({sql_utils.identifier(numeric_expr_sql)}, '{drop}', {top_k}, {frequency_threshold}) OVER() AS {sql_utils.identifier(name)}""" def ml_label_encoder( @@ -171,14 +172,16 @@ def ml_label_encoder( name: str, ) -> str: """Encode ML.LABEL_ENCODER for BQML. - https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-label-encoder for params.""" + https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-label-encoder for params. + """ return f"""ML.LABEL_ENCODER({sql_utils.identifier(numeric_expr_sql)}, {top_k}, {frequency_threshold}) OVER() AS {sql_utils.identifier(name)}""" def ml_polynomial_expand( self, columns: Iterable[str], degree: int, name: str ) -> str: """Encode ML.POLYNOMIAL_EXPAND. - https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-polynomial-expand""" + https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-polynomial-expand + """ return f"""ML.POLYNOMIAL_EXPAND({self.struct_columns(columns)}, {degree}) AS {sql_utils.identifier(name)}""" def ml_distance( @@ -190,7 +193,8 @@ def ml_distance( name: str, ) -> str: """Encode ML.DISTANCE for BQML. - https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-distance""" + https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-distance + """ return f"""SELECT *, ML.DISTANCE({sql_utils.identifier(col_x)}, {sql_utils.identifier(col_y)}, '{type}') AS {sql_utils.identifier(name)} FROM ({source_sql})""" def ai_forecast( @@ -199,7 +203,8 @@ def ai_forecast( options: Mapping[str, Union[int, float, bool, Iterable[str]]], ): """Encode AI.FORECAST. - https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-ai-forecast""" + https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-ai-forecast + """ named_parameters_sql = self.build_named_parameters(**options) return f"""SELECT * FROM AI.FORECAST(({source_sql}),{named_parameters_sql})""" diff --git a/bigframes/operations/aggregations.py b/bigframes/operations/aggregations.py index 1c321c0bf8..b400d25348 100644 --- a/bigframes/operations/aggregations.py +++ b/bigframes/operations/aggregations.py @@ -55,8 +55,9 @@ def order_independent(self): return False @abc.abstractmethod - def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: - ... + def output_type( + self, *input_types: dtypes.ExpressionType + ) -> dtypes.ExpressionType: ... @dataclasses.dataclass(frozen=True) @@ -82,13 +83,11 @@ class AggregateOp(WindowOp): @property @abc.abstractmethod - def name(self) -> str: - ... + def name(self) -> str: ... @property @abc.abstractmethod - def arguments(self) -> int: - ... + def arguments(self) -> int: ... @property def order_independent(self): diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py index f2bbcb3320..4a7f1fd784 100644 --- a/bigframes/operations/base.py +++ b/bigframes/operations/base.py @@ -224,20 +224,28 @@ def _apply_binary_aggregation( AlignedExprT = Union[ex.ScalarConstantExpression, ex.DerefOp] @typing.overload - def _align( - self, other: series.Series, how="outer" - ) -> tuple[ex.DerefOp, ex.DerefOp, blocks.Block,]: - ... + def _align(self, other: series.Series, how="outer") -> tuple[ + ex.DerefOp, + ex.DerefOp, + blocks.Block, + ]: ... @typing.overload def _align( self, other: typing.Union[series.Series, scalars.Scalar], how="outer" - ) -> tuple[ex.DerefOp, AlignedExprT, blocks.Block,]: - ... + ) -> tuple[ + ex.DerefOp, + AlignedExprT, + blocks.Block, + ]: ... def _align( self, other: typing.Union[series.Series, scalars.Scalar], how="outer" - ) -> tuple[ex.DerefOp, AlignedExprT, blocks.Block,]: + ) -> tuple[ + ex.DerefOp, + AlignedExprT, + blocks.Block, + ]: """Aligns the series value with another scalar or series object. Returns new left column id, right column id and joined tabled expression.""" values, block = self._align_n( [ diff --git a/bigframes/operations/base_ops.py b/bigframes/operations/base_ops.py index c0145a6711..de7276ac6b 100644 --- a/bigframes/operations/base_ops.py +++ b/bigframes/operations/base_ops.py @@ -27,11 +27,11 @@ class RowOp(typing.Protocol): @property - def name(self) -> str: - ... + def name(self) -> str: ... - def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType: - ... + def output_type( + self, *input_types: dtypes.ExpressionType + ) -> dtypes.ExpressionType: ... @property def is_monotonic(self) -> bool: @@ -168,7 +168,7 @@ def as_expr( def _convert_expr_input( - input: typing.Union[str, bigframes.core.expression.Expression] + input: typing.Union[str, bigframes.core.expression.Expression], ) -> bigframes.core.expression.Expression: """Allows creating column references with just a string""" import bigframes.core.expression diff --git a/bigframes/operations/blob.py b/bigframes/operations/blob.py index 63875ded99..aeae675eec 100644 --- a/bigframes/operations/blob.py +++ b/bigframes/operations/blob.py @@ -79,7 +79,8 @@ def metadata(self) -> bigframes.series.Series: """Retrieve the metadata of the Blob. Returns: - bigframes.series.Series: JSON metadata of the Blob. Contains fields: content_type, md5_hash, size and updated(time).""" + bigframes.series.Series: JSON metadata of the Blob. Contains fields: content_type, md5_hash, size and updated(time). + """ details_json = self._apply_unary_op(ops.obj_fetch_metadata_op).struct.field( "details" ) diff --git a/bigframes/operations/type.py b/bigframes/operations/type.py index b4029d74c7..84cad1efa8 100644 --- a/bigframes/operations/type.py +++ b/bigframes/operations/type.py @@ -37,8 +37,7 @@ def as_method(self): class UnaryTypeSignature(TypeSignature): @abc.abstractmethod - def output_type(self, input_type: ExpressionType) -> ExpressionType: - ... + def output_type(self, input_type: ExpressionType) -> ExpressionType: ... @property def as_method(self): @@ -53,8 +52,7 @@ class BinaryTypeSignature(TypeSignature): @abc.abstractmethod def output_type( self, left_type: ExpressionType, right_type: ExpressionType - ) -> ExpressionType: - ... + ) -> ExpressionType: ... @property def as_method(self): diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 76e0f8719b..82c3d66589 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -188,8 +188,7 @@ def to_datetime( utc: bool = False, format: Optional[str] = None, unit: Optional[str] = None, -) -> bigframes.series.Series: - ... +) -> bigframes.series.Series: ... @typing.overload @@ -199,8 +198,7 @@ def to_datetime( utc: bool = False, format: Optional[str] = None, unit: Optional[str] = None, -) -> Union[pandas.Timestamp, datetime]: - ... +) -> Union[pandas.Timestamp, datetime]: ... def to_datetime( diff --git a/bigframes/pandas/io/api.py b/bigframes/pandas/io/api.py index a88cc7a011..7797492827 100644 --- a/bigframes/pandas/io/api.py +++ b/bigframes/pandas/io/api.py @@ -186,8 +186,7 @@ def read_gbq( # type: ignore[overload-overlap] use_cache: Optional[bool] = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., -) -> bigframes.dataframe.DataFrame: - ... +) -> bigframes.dataframe.DataFrame: ... @overload @@ -202,8 +201,7 @@ def read_gbq( use_cache: Optional[bool] = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., -) -> pandas.Series: - ... +) -> pandas.Series: ... def read_gbq( @@ -281,8 +279,7 @@ def _read_gbq_colab( # type: ignore[overload-overlap] *, pyformat_args: Optional[Dict[str, Any]] = ..., dry_run: Literal[False] = ..., -) -> bigframes.dataframe.DataFrame: - ... +) -> bigframes.dataframe.DataFrame: ... @overload @@ -291,8 +288,7 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = ..., dry_run: Literal[True] = ..., -) -> pandas.Series: - ... +) -> pandas.Series: ... def _read_gbq_colab( @@ -395,8 +391,7 @@ def read_gbq_query( # type: ignore[overload-overlap] col_order: Iterable[str] = ..., filters: vendored_pandas_gbq.FiltersType = ..., dry_run: Literal[False] = ..., -) -> bigframes.dataframe.DataFrame: - ... +) -> bigframes.dataframe.DataFrame: ... @overload @@ -411,8 +406,7 @@ def read_gbq_query( col_order: Iterable[str] = ..., filters: vendored_pandas_gbq.FiltersType = ..., dry_run: Literal[True] = ..., -) -> pandas.Series: - ... +) -> pandas.Series: ... def read_gbq_query( @@ -456,8 +450,7 @@ def read_gbq_table( # type: ignore[overload-overlap] use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., -) -> bigframes.dataframe.DataFrame: - ... +) -> bigframes.dataframe.DataFrame: ... @overload @@ -471,8 +464,7 @@ def read_gbq_table( use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., -) -> pandas.Series: - ... +) -> pandas.Series: ... def read_gbq_table( @@ -508,8 +500,7 @@ def read_pandas( pandas_dataframe: pandas.DataFrame, *, write_engine: constants.WriteEngineType = "default", -) -> bigframes.dataframe.DataFrame: - ... +) -> bigframes.dataframe.DataFrame: ... @typing.overload @@ -517,8 +508,7 @@ def read_pandas( pandas_dataframe: pandas.Series, *, write_engine: constants.WriteEngineType = "default", -) -> bigframes.series.Series: - ... +) -> bigframes.series.Series: ... @typing.overload @@ -526,8 +516,7 @@ def read_pandas( pandas_dataframe: pandas.Index, *, write_engine: constants.WriteEngineType = "default", -) -> bigframes.core.indexes.Index: - ... +) -> bigframes.core.indexes.Index: ... def read_pandas( diff --git a/bigframes/series.py b/bigframes/series.py index 3a1af0bb1d..b24ff0633b 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -267,8 +267,7 @@ def copy(self) -> Series: def rename( self, index: Union[blocks.Label, Mapping[Any, Any]] = None, - ) -> Series: - ... + ) -> Series: ... @overload def rename( @@ -277,8 +276,7 @@ def rename( *, inplace: Literal[False], **kwargs, - ) -> Series: - ... + ) -> Series: ... @overload def rename( @@ -287,8 +285,7 @@ def rename( *, inplace: Literal[True], **kwargs, - ) -> None: - ... + ) -> None: ... def rename( self, @@ -350,8 +347,7 @@ def rename( def rename_axis( self, mapper: typing.Union[blocks.Label, typing.Sequence[blocks.Label]], - ) -> Series: - ... + ) -> Series: ... @overload def rename_axis( @@ -360,8 +356,7 @@ def rename_axis( *, inplace: Literal[False], **kwargs, - ) -> Series: - ... + ) -> Series: ... @overload def rename_axis( @@ -370,8 +365,7 @@ def rename_axis( *, inplace: Literal[True], **kwargs, - ) -> None: - ... + ) -> None: ... @validations.requires_index def rename_axis( @@ -1644,8 +1638,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> None: - ... + ) -> None: ... @typing.overload def sort_values( @@ -1656,8 +1649,7 @@ def sort_values( ascending: bool | typing.Sequence[bool] = ..., kind: str = ..., na_position: typing.Literal["first", "last"] = ..., - ) -> Series: - ... + ) -> Series: ... def sort_values( self, @@ -1674,9 +1666,13 @@ def sort_values( raise ValueError("Param na_position must be one of 'first' or 'last'") block = self._block.order_by( [ - order.ascending_over(self._value_column, (na_position == "last")) - if ascending - else order.descending_over(self._value_column, (na_position == "last")) + ( + order.ascending_over(self._value_column, (na_position == "last")) + if ascending + else order.descending_over( + self._value_column, (na_position == "last") + ) + ) ], ) if inplace: @@ -1688,14 +1684,12 @@ def sort_values( @typing.overload # type: ignore[override] def sort_index( self, *, axis=..., inplace: Literal[False] = ..., ascending=..., na_position=... - ) -> Series: - ... + ) -> Series: ... @typing.overload def sort_index( self, *, axis=0, inplace: Literal[True] = ..., ascending=..., na_position=... - ) -> None: - ... + ) -> None: ... @validations.requires_index def sort_index( @@ -1709,9 +1703,11 @@ def sort_index( block = self._block na_last = na_position == "last" ordering = [ - order.ascending_over(column, na_last) - if ascending - else order.descending_over(column, na_last) + ( + order.ascending_over(column, na_last) + if ascending + else order.descending_over(column, na_last) + ) for column in block.index_columns ] block = block.order_by(ordering) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index d27cd48cdd..beb71ebea8 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -394,8 +394,7 @@ def read_gbq( # type: ignore[overload-overlap] use_cache: Optional[bool] = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq( @@ -410,8 +409,7 @@ def read_gbq( use_cache: Optional[bool] = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq( self, @@ -424,7 +422,7 @@ def read_gbq( filters: third_party_pandas_gbq.FiltersType = (), use_cache: Optional[bool] = None, col_order: Iterable[str] = (), - dry_run: bool = False + dry_run: bool = False, # Add a verify index argument that fails if the index is not unique. ) -> dataframe.DataFrame | pandas.Series: # TODO(b/281571214): Generate prompt to show the progress of read_gbq. @@ -479,8 +477,7 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def _read_gbq_colab( @@ -489,8 +486,7 @@ def _read_gbq_colab( *, pyformat_args: Optional[Dict[str, Any]] = None, dry_run: Literal[True] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... @log_adapter.log_name_override("read_gbq_colab") def _read_gbq_colab( @@ -551,8 +547,7 @@ def read_gbq_query( # type: ignore[overload-overlap] col_order: Iterable[str] = ..., filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_query( @@ -567,8 +562,7 @@ def read_gbq_query( col_order: Iterable[str] = ..., filters: third_party_pandas_gbq.FiltersType = ..., dry_run: Literal[True] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_query( self, @@ -671,8 +665,7 @@ def read_gbq_table( # type: ignore[overload-overlap] use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[False] = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_table( @@ -686,8 +679,7 @@ def read_gbq_table( use_cache: bool = ..., col_order: Iterable[str] = ..., dry_run: Literal[True] = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_table( self, @@ -813,8 +805,7 @@ def read_pandas( pandas_dataframe: pandas.Index, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.core.indexes.Index: - ... + ) -> bigframes.core.indexes.Index: ... @typing.overload def read_pandas( @@ -822,8 +813,7 @@ def read_pandas( pandas_dataframe: pandas.Series, *, write_engine: constants.WriteEngineType = "default", - ) -> bigframes.series.Series: - ... + ) -> bigframes.series.Series: ... @typing.overload def read_pandas( @@ -831,8 +821,7 @@ def read_pandas( pandas_dataframe: pandas.DataFrame, *, write_engine: constants.WriteEngineType = "default", - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... def read_pandas( self, @@ -2135,7 +2124,8 @@ def _create_bq_connection( iam_role: Optional[str] = None, ) -> str: """Create the connection with the session settings and try to attach iam role to the connection SA. - If any of project, location or connection isn't specified, use the session defaults. Returns fully-qualified connection name.""" + If any of project, location or connection isn't specified, use the session defaults. Returns fully-qualified connection name. + """ connection = self._bq_connection if not connection else connection connection = bigframes.clients.get_canonical_bq_connection_id( connection_id=connection, diff --git a/bigframes/session/_io/bigquery/__init__.py b/bigframes/session/_io/bigquery/__init__.py index fdc240fa69..4ba875bfdd 100644 --- a/bigframes/session/_io/bigquery/__init__.py +++ b/bigframes/session/_io/bigquery/__init__.py @@ -247,8 +247,7 @@ def start_query_with_client( timeout: Optional[float], metrics: Optional[bigframes.session.metrics.ExecutionMetrics] = None, query_with_job: Literal[True], -) -> Tuple[bigquery.table.RowIterator, bigquery.QueryJob]: - ... +) -> Tuple[bigquery.table.RowIterator, bigquery.QueryJob]: ... @overload @@ -262,8 +261,7 @@ def start_query_with_client( timeout: Optional[float], metrics: Optional[bigframes.session.metrics.ExecutionMetrics] = None, query_with_job: Literal[False], -) -> Tuple[bigquery.table.RowIterator, Optional[bigquery.QueryJob]]: - ... +) -> Tuple[bigquery.table.RowIterator, Optional[bigquery.QueryJob]]: ... def start_query_with_client( diff --git a/bigframes/session/_io/bigquery/read_gbq_table.py b/bigframes/session/_io/bigquery/read_gbq_table.py index 6322040428..370f29bcdd 100644 --- a/bigframes/session/_io/bigquery/read_gbq_table.py +++ b/bigframes/session/_io/bigquery/read_gbq_table.py @@ -237,11 +237,9 @@ def _is_table_clustered_or_partitioned( def get_index_cols( table: bigquery.table.Table, - index_col: Iterable[str] - | str - | Iterable[int] - | int - | bigframes.enums.DefaultIndexKind, + index_col: ( + Iterable[str] | str | Iterable[int] | int | bigframes.enums.DefaultIndexKind + ), *, rename_to_schema: Optional[Dict[str, str]] = None, ) -> List[str]: diff --git a/bigframes/session/_io/pandas.py b/bigframes/session/_io/pandas.py index 9340e060ac..b443bc8a1a 100644 --- a/bigframes/session/_io/pandas.py +++ b/bigframes/session/_io/pandas.py @@ -95,12 +95,16 @@ def arrow_to_pandas( # location since pandas 1.2.0. See: # https://pandas.pydata.org/docs/dev/reference/api/pandas.arrays.FloatingArray.html pd_array = pandas.arrays.FloatingArray( # type: ignore - nonnull.to_numpy() - if isinstance(nonnull, pyarrow.ChunkedArray) - else nonnull.to_numpy(zero_copy_only=False), - mask.to_numpy() - if isinstance(mask, pyarrow.ChunkedArray) - else mask.to_numpy(zero_copy_only=False), + ( + nonnull.to_numpy() + if isinstance(nonnull, pyarrow.ChunkedArray) + else nonnull.to_numpy(zero_copy_only=False) + ), + ( + mask.to_numpy() + if isinstance(mask, pyarrow.ChunkedArray) + else mask.to_numpy(zero_copy_only=False) + ), ) series = pandas.Series(pd_array, dtype=dtype) elif dtype == pandas.Int64Dtype(): @@ -109,12 +113,16 @@ def arrow_to_pandas( mask = pyarrow.compute.is_null(column) nonnull = pyarrow.compute.fill_null(column, 0) pd_array = pandas.arrays.IntegerArray( - nonnull.to_numpy() - if isinstance(nonnull, pyarrow.ChunkedArray) - else nonnull.to_numpy(zero_copy_only=False), - mask.to_numpy() - if isinstance(mask, pyarrow.ChunkedArray) - else mask.to_numpy(zero_copy_only=False), + ( + nonnull.to_numpy() + if isinstance(nonnull, pyarrow.ChunkedArray) + else nonnull.to_numpy(zero_copy_only=False) + ), + ( + mask.to_numpy() + if isinstance(mask, pyarrow.ChunkedArray) + else mask.to_numpy(zero_copy_only=False) + ), ) series = pandas.Series(pd_array, dtype=dtype) elif dtype == bigframes.dtypes.STRING_DTYPE: diff --git a/bigframes/session/bq_caching_executor.py b/bigframes/session/bq_caching_executor.py index a970e75a0f..c45c1a57fb 100644 --- a/bigframes/session/bq_caching_executor.py +++ b/bigframes/session/bq_caching_executor.py @@ -734,7 +734,7 @@ def _if_schema_match( def _sanitize( - schema: Tuple[bigquery.SchemaField, ...] + schema: Tuple[bigquery.SchemaField, ...], ) -> Tuple[bigquery.SchemaField, ...]: # Schema inferred from SQL strings and Ibis expressions contain only names, types and modes, # so we disregard other fields (e.g timedelta description for timedelta columns) for validations. diff --git a/bigframes/session/executor.py b/bigframes/session/executor.py index cc8f086f9f..10a338205c 100644 --- a/bigframes/session/executor.py +++ b/bigframes/session/executor.py @@ -88,9 +88,9 @@ def to_pandas_batches( ) -> Iterator[pd.DataFrame]: assert (page_size is None) or (page_size > 0) assert (max_results is None) or (max_results > 0) - batch_iter: Iterator[ - Union[pyarrow.Table, pyarrow.RecordBatch] - ] = self.arrow_batches + batch_iter: Iterator[Union[pyarrow.Table, pyarrow.RecordBatch]] = ( + self.arrow_batches + ) if max_results is not None: batch_iter = pyarrow_utils.truncate_pyarrow_iterable( batch_iter, max_results diff --git a/bigframes/session/loader.py b/bigframes/session/loader.py index c264abd860..46dd663546 100644 --- a/bigframes/session/loader.py +++ b/bigframes/session/loader.py @@ -193,11 +193,9 @@ def _check_columns_param(columns: Iterable[str], table_columns: Iterable[str]): def _check_names_param( names: Iterable[str], - index_col: Iterable[str] - | str - | Iterable[int] - | int - | bigframes.enums.DefaultIndexKind, + index_col: ( + Iterable[str] | str | Iterable[int] | int | bigframes.enums.DefaultIndexKind + ), columns: Iterable[str], table_columns: Iterable[str], ): @@ -483,11 +481,9 @@ def read_gbq_table( # type: ignore[overload-overlap] self, table_id: str, *, - index_col: Iterable[str] - | str - | Iterable[int] - | int - | bigframes.enums.DefaultIndexKind = ..., + index_col: ( + Iterable[str] | str | Iterable[int] | int | bigframes.enums.DefaultIndexKind + ) = ..., columns: Iterable[str] = ..., names: Optional[Iterable[str]] = ..., max_results: Optional[int] = ..., @@ -498,19 +494,16 @@ def read_gbq_table( # type: ignore[overload-overlap] force_total_order: Optional[bool] = ..., n_rows: Optional[int] = None, index_col_in_columns: bool = False, - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_table( self, table_id: str, *, - index_col: Iterable[str] - | str - | Iterable[int] - | int - | bigframes.enums.DefaultIndexKind = ..., + index_col: ( + Iterable[str] | str | Iterable[int] | int | bigframes.enums.DefaultIndexKind + ) = ..., columns: Iterable[str] = ..., names: Optional[Iterable[str]] = ..., max_results: Optional[int] = ..., @@ -521,18 +514,15 @@ def read_gbq_table( force_total_order: Optional[bool] = ..., n_rows: Optional[int] = None, index_col_in_columns: bool = False, - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_table( self, table_id: str, *, - index_col: Iterable[str] - | str - | Iterable[int] - | int - | bigframes.enums.DefaultIndexKind = (), + index_col: ( + Iterable[str] | str | Iterable[int] | int | bigframes.enums.DefaultIndexKind + ) = (), columns: Iterable[str] = (), names: Optional[Iterable[str]] = None, max_results: Optional[int] = None, @@ -706,9 +696,9 @@ def read_gbq_table( query = bf_io_bigquery.to_query( table_id, columns=all_columns, - sql_predicate=bf_io_bigquery.compile_filters(filters) - if filters - else None, + sql_predicate=( + bf_io_bigquery.compile_filters(filters) if filters else None + ), max_results=max_results, # We're executing the query, so we don't need time travel for # determinism. @@ -899,8 +889,7 @@ def read_gbq_query( # type: ignore[overload-overlap] dry_run: Literal[False] = ..., force_total_order: Optional[bool] = ..., allow_large_results: bool = ..., - ) -> dataframe.DataFrame: - ... + ) -> dataframe.DataFrame: ... @overload def read_gbq_query( @@ -916,8 +905,7 @@ def read_gbq_query( dry_run: Literal[True] = ..., force_total_order: Optional[bool] = ..., allow_large_results: bool = ..., - ) -> pandas.Series: - ... + ) -> pandas.Series: ... def read_gbq_query( self, diff --git a/bigframes/session/planner.py b/bigframes/session/planner.py index 2a562abadf..e346fe5c8d 100644 --- a/bigframes/session/planner.py +++ b/bigframes/session/planner.py @@ -39,9 +39,9 @@ def session_aware_cache_plan( caching_target = cur_node = root caching_target_refs = node_counts.get(caching_target, 0) - filters: list[ - ex.Expression - ] = [] # accumulate filters into this as traverse downwards + filters: list[ex.Expression] = ( + [] + ) # accumulate filters into this as traverse downwards clusterable_cols: set[ids.ColumnId] = set() while isinstance(cur_node, de_cachable_types): if isinstance(cur_node, nodes.FilterNode): diff --git a/bigframes/session/temporary_storage.py b/bigframes/session/temporary_storage.py index 0c2a36f3fe..42617c8f6c 100644 --- a/bigframes/session/temporary_storage.py +++ b/bigframes/session/temporary_storage.py @@ -19,14 +19,11 @@ class TemporaryStorageManager(Protocol): @property - def location(self) -> str: - ... + def location(self) -> str: ... def create_temp_table( self, schema: Sequence[bigquery.SchemaField], cluster_cols: Sequence[str] = [] - ) -> bigquery.TableReference: - ... + ) -> bigquery.TableReference: ... # implementations should be robust to repeatedly closing - def close(self) -> None: - ... + def close(self) -> None: ... diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html index 6316a537f7..95e9c77fcf 100644 --- a/docs/_templates/layout.html +++ b/docs/_templates/layout.html @@ -20,8 +20,8 @@ {% endblock %}