Skip to content

Commit ac13caf

Browse files
authored
Merge branch 'main' into deps-adjust-required-kokoro-checks
2 parents 98329c4 + 2c19681 commit ac13caf

File tree

14 files changed

+545
-126
lines changed

14 files changed

+545
-126
lines changed

google/cloud/bigquery/_helpers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -978,11 +978,11 @@ def _build_resource_from_properties(obj, filter_fields):
978978
"""
979979
partial = {}
980980
for filter_field in filter_fields:
981-
api_field = obj._PROPERTY_TO_API_FIELD.get(filter_field)
981+
api_field = _get_sub_prop(obj._PROPERTY_TO_API_FIELD, filter_field)
982982
if api_field is None and filter_field not in obj._properties:
983983
raise ValueError("No property %s" % filter_field)
984984
elif api_field is not None:
985-
partial[api_field] = obj._properties.get(api_field)
985+
_set_sub_prop(partial, api_field, _get_sub_prop(obj._properties, api_field))
986986
else:
987987
# allows properties that are not defined in the library
988988
# and properties that have the same name as API resource key

google/cloud/bigquery/_pandas_helpers.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,12 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
"""Shared helper functions for connecting BigQuery and pandas."""
15+
"""Shared helper functions for connecting BigQuery and pandas.
16+
17+
NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package,
18+
instead. See: go/pandas-gbq-and-bigframes-redundancy and
19+
https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pandas_to_bigquery.py
20+
"""
1621

1722
import concurrent.futures
1823
from datetime import datetime
@@ -40,6 +45,16 @@
4045
else:
4146
import numpy
4247

48+
49+
try:
50+
import pandas_gbq.schema.pandas_to_bigquery # type: ignore
51+
52+
pandas_gbq_import_exception = None
53+
except ImportError as exc:
54+
pandas_gbq = None
55+
pandas_gbq_import_exception = exc
56+
57+
4358
try:
4459
import db_dtypes # type: ignore
4560

@@ -445,6 +460,10 @@ def _first_array_valid(series):
445460
def dataframe_to_bq_schema(dataframe, bq_schema):
446461
"""Convert a pandas DataFrame schema to a BigQuery schema.
447462
463+
DEPRECATED: Use
464+
pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields(),
465+
instead. See: go/pandas-gbq-and-bigframes-redundancy.
466+
448467
Args:
449468
dataframe (pandas.DataFrame):
450469
DataFrame for which the client determines the BigQuery schema.
@@ -460,6 +479,20 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
460479
The automatically determined schema. Returns None if the type of
461480
any column cannot be determined.
462481
"""
482+
if pandas_gbq is None:
483+
warnings.warn(
484+
"Loading pandas DataFrame into BigQuery will require pandas-gbq "
485+
"package version 0.26.1 or greater in the future. "
486+
f"Tried to import pandas-gbq and got: {pandas_gbq_import_exception}",
487+
category=FutureWarning,
488+
)
489+
else:
490+
return pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields(
491+
dataframe,
492+
override_bigquery_fields=bq_schema,
493+
index=True,
494+
)
495+
463496
if bq_schema:
464497
bq_schema = schema._to_schema_fields(bq_schema)
465498
bq_schema_index = {field.name: field for field in bq_schema}

google/cloud/bigquery/_pyarrow_helpers.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,12 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
"""Shared helper functions for connecting BigQuery and pyarrow."""
15+
"""Shared helper functions for connecting BigQuery and pyarrow.
16+
17+
NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package,
18+
instead. See: go/pandas-gbq-and-bigframes-redundancy and
19+
https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pyarrow_to_bigquery.py
20+
"""
1621

1722
from typing import Any
1823

google/cloud/bigquery/schema.py

Lines changed: 32 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,9 @@
1515
"""Schemas for BigQuery tables / queries."""
1616

1717
from __future__ import annotations
18-
import collections
1918
import enum
2019
import typing
21-
from typing import Any, cast, Dict, Iterable, Optional, Union
20+
from typing import Any, cast, Dict, Iterable, Optional, Union, Sequence
2221

2322
from google.cloud.bigquery import _helpers
2423
from google.cloud.bigquery import standard_sql
@@ -489,6 +488,8 @@ def _parse_schema_resource(info):
489488
Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]:
490489
A list of parsed fields, or ``None`` if no "fields" key found.
491490
"""
491+
if isinstance(info, list):
492+
return [SchemaField.from_api_repr(f) for f in info]
492493
return [SchemaField.from_api_repr(f) for f in info.get("fields", ())]
493494

494495

@@ -501,40 +502,46 @@ def _build_schema_resource(fields):
501502
Returns:
502503
Sequence[Dict]: Mappings describing the schema of the supplied fields.
503504
"""
504-
return [field.to_api_repr() for field in fields]
505+
if isinstance(fields, Sequence):
506+
# Input is a Sequence (e.g. a list): Process and return a list of SchemaFields
507+
return [field.to_api_repr() for field in fields]
508+
509+
else:
510+
raise TypeError("Schema must be a Sequence (e.g. a list) or None.")
505511

506512

507513
def _to_schema_fields(schema):
508-
"""Coerce `schema` to a list of schema field instances.
514+
"""Coerces schema to a list of SchemaField instances while
515+
preserving the original structure as much as possible.
509516
510517
Args:
511-
schema(Sequence[Union[ \
512-
:class:`~google.cloud.bigquery.schema.SchemaField`, \
513-
Mapping[str, Any] \
514-
]]):
515-
Table schema to convert. If some items are passed as mappings,
516-
their content must be compatible with
517-
:meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`.
518+
schema (Sequence[Union[ \
519+
:class:`~google.cloud.bigquery.schema.SchemaField`, \
520+
Mapping[str, Any] \
521+
]
522+
]
523+
)::
524+
Table schema to convert. Can be a list of SchemaField
525+
objects or mappings.
518526
519527
Returns:
520-
Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`]
528+
A list of SchemaField objects.
521529
522530
Raises:
523-
Exception: If ``schema`` is not a sequence, or if any item in the
524-
sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField`
525-
instance or a compatible mapping representation of the field.
531+
TypeError: If schema is not a Sequence.
526532
"""
527-
for field in schema:
528-
if not isinstance(field, (SchemaField, collections.abc.Mapping)):
529-
raise ValueError(
530-
"Schema items must either be fields or compatible "
531-
"mapping representations."
532-
)
533533

534-
return [
535-
field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field)
536-
for field in schema
537-
]
534+
if isinstance(schema, Sequence):
535+
# Input is a Sequence (e.g. a list): Process and return a list of SchemaFields
536+
return [
537+
field
538+
if isinstance(field, SchemaField)
539+
else SchemaField.from_api_repr(field)
540+
for field in schema
541+
]
542+
543+
else:
544+
raise TypeError("Schema must be a Sequence (e.g. a list) or None.")
538545

539546

540547
class PolicyTagList(object):

google/cloud/bigquery/table.py

Lines changed: 69 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
import functools
2222
import operator
2323
import typing
24-
from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union
24+
from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union, Sequence
25+
2526
import warnings
2627

2728
try:
@@ -66,6 +67,7 @@
6667
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
6768
from google.cloud.bigquery.enums import DefaultPandasDTypes
6869
from google.cloud.bigquery.external_config import ExternalConfig
70+
from google.cloud.bigquery import schema as _schema
6971
from google.cloud.bigquery.schema import _build_schema_resource
7072
from google.cloud.bigquery.schema import _parse_schema_resource
7173
from google.cloud.bigquery.schema import _to_schema_fields
@@ -398,7 +400,7 @@ class Table(_TableBase):
398400
"partitioning_type": "timePartitioning",
399401
"range_partitioning": "rangePartitioning",
400402
"time_partitioning": "timePartitioning",
401-
"schema": "schema",
403+
"schema": ["schema", "fields"],
402404
"snapshot_definition": "snapshotDefinition",
403405
"clone_definition": "cloneDefinition",
404406
"streaming_buffer": "streamingBuffer",
@@ -411,6 +413,7 @@ class Table(_TableBase):
411413
"max_staleness": "maxStaleness",
412414
"resource_tags": "resourceTags",
413415
"external_catalog_table_options": "externalCatalogTableOptions",
416+
"foreign_type_info": ["schema", "foreignTypeInfo"],
414417
}
415418

416419
def __init__(self, table_ref, schema=None) -> None:
@@ -451,8 +454,20 @@ def schema(self):
451454
If ``schema`` is not a sequence, or if any item in the sequence
452455
is not a :class:`~google.cloud.bigquery.schema.SchemaField`
453456
instance or a compatible mapping representation of the field.
457+
458+
.. Note::
459+
If you are referencing a schema for an external catalog table such
460+
as a Hive table, it will also be necessary to populate the foreign_type_info
461+
attribute. This is not necessary if defining the schema for a BigQuery table.
462+
463+
For details, see:
464+
https://cloud.google.com/bigquery/docs/external-tables
465+
https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets
466+
454467
"""
455-
prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"])
468+
prop = _helpers._get_sub_prop(
469+
self._properties, self._PROPERTY_TO_API_FIELD["schema"]
470+
)
456471
if not prop:
457472
return []
458473
else:
@@ -463,10 +478,21 @@ def schema(self, value):
463478
api_field = self._PROPERTY_TO_API_FIELD["schema"]
464479

465480
if value is None:
466-
self._properties[api_field] = None
467-
else:
481+
_helpers._set_sub_prop(
482+
self._properties,
483+
api_field,
484+
None,
485+
)
486+
elif isinstance(value, Sequence):
468487
value = _to_schema_fields(value)
469-
self._properties[api_field] = {"fields": _build_schema_resource(value)}
488+
value = _build_schema_resource(value)
489+
_helpers._set_sub_prop(
490+
self._properties,
491+
api_field,
492+
value,
493+
)
494+
else:
495+
raise TypeError("Schema must be a Sequence (e.g. a list) or None.")
470496

471497
@property
472498
def labels(self):
@@ -1075,6 +1101,43 @@ def external_catalog_table_options(
10751101
self._PROPERTY_TO_API_FIELD["external_catalog_table_options"]
10761102
] = value
10771103

1104+
@property
1105+
def foreign_type_info(self) -> Optional[_schema.ForeignTypeInfo]:
1106+
"""Optional. Specifies metadata of the foreign data type definition in
1107+
field schema (TableFieldSchema.foreign_type_definition).
1108+
1109+
Returns:
1110+
Optional[schema.ForeignTypeInfo]:
1111+
Foreign type information, or :data:`None` if not set.
1112+
1113+
.. Note::
1114+
foreign_type_info is only required if you are referencing an
1115+
external catalog such as a Hive table.
1116+
For details, see:
1117+
https://cloud.google.com/bigquery/docs/external-tables
1118+
https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets
1119+
"""
1120+
1121+
prop = _helpers._get_sub_prop(
1122+
self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"]
1123+
)
1124+
if prop is not None:
1125+
return _schema.ForeignTypeInfo.from_api_repr(prop)
1126+
return None
1127+
1128+
@foreign_type_info.setter
1129+
def foreign_type_info(self, value: Union[_schema.ForeignTypeInfo, dict, None]):
1130+
value = _helpers._isinstance_or_raise(
1131+
value,
1132+
(_schema.ForeignTypeInfo, dict),
1133+
none_allowed=True,
1134+
)
1135+
if isinstance(value, _schema.ForeignTypeInfo):
1136+
value = value.to_api_repr()
1137+
_helpers._set_sub_prop(
1138+
self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"], value
1139+
)
1140+
10781141
@classmethod
10791142
def from_string(cls, full_table_id: str) -> "Table":
10801143
"""Construct a table from fully-qualified table ID.

noxfile.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,14 @@ def default(session, install_extras=True):
110110
else:
111111
install_target = "."
112112
session.install("-e", install_target, "-c", constraints_path)
113+
114+
# Test with some broken "extras" in case the user didn't install the extra
115+
# directly. For example, pandas-gbq is recommended for pandas features, but
116+
# we want to test that we fallback to the previous behavior. For context,
117+
# see internal document go/pandas-gbq-and-bigframes-redundancy.
118+
if session.python == UNIT_TEST_PYTHON_VERSIONS[0]:
119+
session.run("python", "-m", "pip", "uninstall", "pandas-gbq", "-y")
120+
113121
session.run("python", "-m", "pip", "freeze")
114122

115123
# Run py.test against the unit tests.
@@ -228,6 +236,13 @@ def system(session):
228236
extras = "[all]"
229237
session.install("-e", f".{extras}", "-c", constraints_path)
230238

239+
# Test with some broken "extras" in case the user didn't install the extra
240+
# directly. For example, pandas-gbq is recommended for pandas features, but
241+
# we want to test that we fallback to the previous behavior. For context,
242+
# see internal document go/pandas-gbq-and-bigframes-redundancy.
243+
if session.python == SYSTEM_TEST_PYTHON_VERSIONS[0]:
244+
session.run("python", "-m", "pip", "uninstall", "pandas-gbq", "-y")
245+
231246
# print versions of all dependencies
232247
session.run("python", "-m", "pip", "freeze")
233248

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ bqstorage = [
7474
]
7575
pandas = [
7676
"pandas >= 1.1.0",
77+
"pandas-gbq >= 0.26.1; python_version >= '3.8'",
78+
"grpcio >= 1.47.0, < 2.0dev",
79+
"grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'",
7780
"pyarrow >= 3.0.0",
7881
"db-dtypes >= 0.3.0, < 2.0.0dev",
7982
"importlib_metadata >= 1.0.0; python_version < '3.8'",

testing/constraints-3.8.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,11 @@
11
grpcio==1.47.0
22
pandas==1.2.0
3+
4+
# This constraints file is used to check that lower bounds
5+
# are correct in setup.py
6+
#
7+
# Pin the version to the lower bound.
8+
#
9+
# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
10+
# Then this file should have foo==1.14.0
11+
pandas-gbq==0.26.1

tests/system/test_pandas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1259,7 +1259,7 @@ def test_upload_time_and_datetime_56(bigquery_client, dataset_id):
12591259
df = pandas.DataFrame(
12601260
dict(
12611261
dt=[
1262-
datetime.datetime(2020, 1, 8, 8, 0, 0),
1262+
datetime.datetime(2020, 1, 8, 8, 0, 0, tzinfo=datetime.timezone.utc),
12631263
datetime.datetime(
12641264
2020,
12651265
1,

tests/unit/job/test_load.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ def test_schema_setter_invalid_field(self):
272272

273273
config = LoadJobConfig()
274274
full_name = SchemaField("full_name", "STRING", mode="REQUIRED")
275-
with self.assertRaises(ValueError):
275+
with self.assertRaises(TypeError):
276276
config.schema = [full_name, object()]
277277

278278
def test_schema_setter(self):

0 commit comments

Comments
 (0)