Merge branch 'main' into deps-adjust-required-kokoro-checks

chalmerlowe · web-flow · commit ac13caf8097e · 2025-02-21T13:00:38.000-05:00
diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py
@@ -978,11 +978,11 @@ def _build_resource_from_properties(obj, filter_fields):
     """
     partial = {}
     for filter_field in filter_fields:
-        api_field = obj._PROPERTY_TO_API_FIELD.get(filter_field)
+        api_field = _get_sub_prop(obj._PROPERTY_TO_API_FIELD, filter_field)
         if api_field is None and filter_field not in obj._properties:
             raise ValueError("No property %s" % filter_field)
         elif api_field is not None:
-            partial[api_field] = obj._properties.get(api_field)
+            _set_sub_prop(partial, api_field, _get_sub_prop(obj._properties, api_field))
         else:
             # allows properties that are not defined in the library
             # and properties that have the same name as API resource key
diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
@@ -12,7 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Shared helper functions for connecting BigQuery and pandas."""
+"""Shared helper functions for connecting BigQuery and pandas.
+
+NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package,
+instead. See: go/pandas-gbq-and-bigframes-redundancy and
+https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pandas_to_bigquery.py
+"""
 
 import concurrent.futures
 from datetime import datetime
@@ -40,6 +45,16 @@
 else:
     import numpy
 
+
+try:
+    import pandas_gbq.schema.pandas_to_bigquery  # type: ignore
+
+    pandas_gbq_import_exception = None
+except ImportError as exc:
+    pandas_gbq = None
+    pandas_gbq_import_exception = exc
+
+
 try:
     import db_dtypes  # type: ignore
 
@@ -445,6 +460,10 @@ def _first_array_valid(series):
 def dataframe_to_bq_schema(dataframe, bq_schema):
     """Convert a pandas DataFrame schema to a BigQuery schema.
 
+    DEPRECATED: Use
+    pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields(),
+    instead. See: go/pandas-gbq-and-bigframes-redundancy.
+
     Args:
         dataframe (pandas.DataFrame):
             DataFrame for which the client determines the BigQuery schema.
@@ -460,6 +479,20 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
             The automatically determined schema. Returns None if the type of
             any column cannot be determined.
     """
+    if pandas_gbq is None:
+        warnings.warn(
+            "Loading pandas DataFrame into BigQuery will require pandas-gbq "
+            "package version 0.26.1 or greater in the future. "
+            f"Tried to import pandas-gbq and got: {pandas_gbq_import_exception}",
+            category=FutureWarning,
+        )
+    else:
+        return pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields(
+            dataframe,
+            override_bigquery_fields=bq_schema,
+            index=True,
+        )
+
     if bq_schema:
         bq_schema = schema._to_schema_fields(bq_schema)
         bq_schema_index = {field.name: field for field in bq_schema}
diff --git a/google/cloud/bigquery/_pyarrow_helpers.py b/google/cloud/bigquery/_pyarrow_helpers.py
@@ -12,7 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Shared helper functions for connecting BigQuery and pyarrow."""
+"""Shared helper functions for connecting BigQuery and pyarrow.
+
+NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package,
+instead. See: go/pandas-gbq-and-bigframes-redundancy and
+https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pyarrow_to_bigquery.py
+"""
 
 from typing import Any
 
diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py
@@ -15,10 +15,9 @@
 """Schemas for BigQuery tables / queries."""
 
 from __future__ import annotations
-import collections
 import enum
 import typing
-from typing import Any, cast, Dict, Iterable, Optional, Union
+from typing import Any, cast, Dict, Iterable, Optional, Union, Sequence
 
 from google.cloud.bigquery import _helpers
 from google.cloud.bigquery import standard_sql
@@ -489,6 +488,8 @@ def _parse_schema_resource(info):
         Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]:
             A list of parsed fields, or ``None`` if no "fields" key found.
     """
+    if isinstance(info, list):
+        return [SchemaField.from_api_repr(f) for f in info]
     return [SchemaField.from_api_repr(f) for f in info.get("fields", ())]
 
 
@@ -501,40 +502,46 @@ def _build_schema_resource(fields):
     Returns:
         Sequence[Dict]: Mappings describing the schema of the supplied fields.
     """
-    return [field.to_api_repr() for field in fields]
+    if isinstance(fields, Sequence):
+        # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields
+        return [field.to_api_repr() for field in fields]
+
+    else:
+        raise TypeError("Schema must be a Sequence (e.g. a list) or None.")
 
 
 def _to_schema_fields(schema):
-    """Coerce `schema` to a list of schema field instances.
+    """Coerces schema to a list of SchemaField instances while
+    preserving the original structure as much as possible.
 
     Args:
-        schema(Sequence[Union[ \
-            :class:`~google.cloud.bigquery.schema.SchemaField`, \
-            Mapping[str, Any] \
-        ]]):
-            Table schema to convert. If some items are passed as mappings,
-            their content must be compatible with
-            :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`.
+        schema (Sequence[Union[ \
+                   :class:`~google.cloud.bigquery.schema.SchemaField`, \
+                   Mapping[str, Any] \
+                       ]
+                   ]
+               )::
+            Table schema to convert. Can be a list of SchemaField
+            objects or mappings.
 
     Returns:
-        Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`]
+        A list of SchemaField objects.
 
     Raises:
-        Exception: If ``schema`` is not a sequence, or if any item in the
-        sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField`
-        instance or a compatible mapping representation of the field.
+        TypeError: If schema is not a Sequence.
     """
-    for field in schema:
-        if not isinstance(field, (SchemaField, collections.abc.Mapping)):
-            raise ValueError(
-                "Schema items must either be fields or compatible "
-                "mapping representations."
-            )
 
-    return [
-        field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field)
-        for field in schema
-    ]
+    if isinstance(schema, Sequence):
+        # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields
+        return [
+            field
+            if isinstance(field, SchemaField)
+            else SchemaField.from_api_repr(field)
+            for field in schema
+        ]
+
+    else:
+        raise TypeError("Schema must be a Sequence (e.g. a list) or None.")
 
 
 class PolicyTagList(object):
diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
@@ -21,7 +21,8 @@
 import functools
 import operator
 import typing
-from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union, Sequence
+
 import warnings
 
 try:
@@ -66,6 +67,7 @@
 from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
 from google.cloud.bigquery.enums import DefaultPandasDTypes
 from google.cloud.bigquery.external_config import ExternalConfig
+from google.cloud.bigquery import schema as _schema
 from google.cloud.bigquery.schema import _build_schema_resource
 from google.cloud.bigquery.schema import _parse_schema_resource
 from google.cloud.bigquery.schema import _to_schema_fields
@@ -398,7 +400,7 @@ class Table(_TableBase):
         "partitioning_type": "timePartitioning",
         "range_partitioning": "rangePartitioning",
         "time_partitioning": "timePartitioning",
-        "schema": "schema",
+        "schema": ["schema", "fields"],
         "snapshot_definition": "snapshotDefinition",
         "clone_definition": "cloneDefinition",
         "streaming_buffer": "streamingBuffer",
@@ -411,6 +413,7 @@ class Table(_TableBase):
         "max_staleness": "maxStaleness",
         "resource_tags": "resourceTags",
         "external_catalog_table_options": "externalCatalogTableOptions",
+        "foreign_type_info": ["schema", "foreignTypeInfo"],
     }
 
     def __init__(self, table_ref, schema=None) -> None:
@@ -451,8 +454,20 @@ def schema(self):
                 If ``schema`` is not a sequence, or if any item in the sequence
                 is not a :class:`~google.cloud.bigquery.schema.SchemaField`
                 instance or a compatible mapping representation of the field.
+
+        .. Note::
+            If you are referencing a schema for an external catalog table such
+            as a Hive table, it will also be necessary to populate the foreign_type_info
+            attribute. This is not necessary if defining the schema for a BigQuery table.
+
+            For details, see:
+            https://cloud.google.com/bigquery/docs/external-tables
+            https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets
+
         """
-        prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"])
+        prop = _helpers._get_sub_prop(
+            self._properties, self._PROPERTY_TO_API_FIELD["schema"]
+        )
         if not prop:
             return []
         else:
@@ -463,10 +478,21 @@ def schema(self, value):
         api_field = self._PROPERTY_TO_API_FIELD["schema"]
 
         if value is None:
-            self._properties[api_field] = None
-        else:
+            _helpers._set_sub_prop(
+                self._properties,
+                api_field,
+                None,
+            )
+        elif isinstance(value, Sequence):
             value = _to_schema_fields(value)
-            self._properties[api_field] = {"fields": _build_schema_resource(value)}
+            value = _build_schema_resource(value)
+            _helpers._set_sub_prop(
+                self._properties,
+                api_field,
+                value,
+            )
+        else:
+            raise TypeError("Schema must be a Sequence (e.g. a list) or None.")
 
     @property
     def labels(self):
@@ -1075,6 +1101,43 @@ def external_catalog_table_options(
                 self._PROPERTY_TO_API_FIELD["external_catalog_table_options"]
             ] = value
 
+    @property
+    def foreign_type_info(self) -> Optional[_schema.ForeignTypeInfo]:
+        """Optional. Specifies metadata of the foreign data type definition in
+        field schema (TableFieldSchema.foreign_type_definition).
+
+        Returns:
+            Optional[schema.ForeignTypeInfo]:
+                Foreign type information, or :data:`None` if not set.
+
+        .. Note::
+            foreign_type_info is only required if you are referencing an
+            external catalog such as a Hive table.
+            For details, see:
+            https://cloud.google.com/bigquery/docs/external-tables
+            https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets
+        """
+
+        prop = _helpers._get_sub_prop(
+            self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"]
+        )
+        if prop is not None:
+            return _schema.ForeignTypeInfo.from_api_repr(prop)
+        return None
+
+    @foreign_type_info.setter
+    def foreign_type_info(self, value: Union[_schema.ForeignTypeInfo, dict, None]):
+        value = _helpers._isinstance_or_raise(
+            value,
+            (_schema.ForeignTypeInfo, dict),
+            none_allowed=True,
+        )
+        if isinstance(value, _schema.ForeignTypeInfo):
+            value = value.to_api_repr()
+        _helpers._set_sub_prop(
+            self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"], value
+        )
+
     @classmethod
     def from_string(cls, full_table_id: str) -> "Table":
         """Construct a table from fully-qualified table ID.
diff --git a/noxfile.py b/noxfile.py
@@ -110,6 +110,14 @@ def default(session, install_extras=True):
     else:
         install_target = "."
     session.install("-e", install_target, "-c", constraints_path)
+
+    # Test with some broken "extras" in case the user didn't install the extra
+    # directly. For example, pandas-gbq is recommended for pandas features, but
+    # we want to test that we fallback to the previous behavior. For context,
+    # see internal document go/pandas-gbq-and-bigframes-redundancy.
+    if session.python == UNIT_TEST_PYTHON_VERSIONS[0]:
+        session.run("python", "-m", "pip", "uninstall", "pandas-gbq", "-y")
+
     session.run("python", "-m", "pip", "freeze")
 
     # Run py.test against the unit tests.
@@ -228,6 +236,13 @@ def system(session):
         extras = "[all]"
     session.install("-e", f".{extras}", "-c", constraints_path)
 
+    # Test with some broken "extras" in case the user didn't install the extra
+    # directly. For example, pandas-gbq is recommended for pandas features, but
+    # we want to test that we fallback to the previous behavior. For context,
+    # see internal document go/pandas-gbq-and-bigframes-redundancy.
+    if session.python == SYSTEM_TEST_PYTHON_VERSIONS[0]:
+        session.run("python", "-m", "pip", "uninstall", "pandas-gbq", "-y")
+
     # print versions of all dependencies
     session.run("python", "-m", "pip", "freeze")
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -74,6 +74,9 @@ bqstorage = [
 ]
 pandas = [
   "pandas >= 1.1.0",
+  "pandas-gbq >= 0.26.1; python_version >= '3.8'",
+  "grpcio >= 1.47.0, < 2.0dev",
+  "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'",
   "pyarrow >= 3.0.0",
   "db-dtypes >= 0.3.0, < 2.0.0dev",
   "importlib_metadata >= 1.0.0; python_version < '3.8'",
diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt
@@ -1,2 +1,11 @@
 grpcio==1.47.0
 pandas==1.2.0
+
+# This constraints file is used to check that lower bounds
+# are correct in setup.py
+#
+# Pin the version to the lower bound.
+#
+# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
+# Then this file should have foo==1.14.0
+pandas-gbq==0.26.1
diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
@@ -1259,7 +1259,7 @@ def test_upload_time_and_datetime_56(bigquery_client, dataset_id):
     df = pandas.DataFrame(
         dict(
             dt=[
-                datetime.datetime(2020, 1, 8, 8, 0, 0),
+                datetime.datetime(2020, 1, 8, 8, 0, 0, tzinfo=datetime.timezone.utc),
                 datetime.datetime(
                     2020,
                     1,
diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py
@@ -272,7 +272,7 @@ def test_schema_setter_invalid_field(self):
 
         config = LoadJobConfig()
         full_name = SchemaField("full_name", "STRING", mode="REQUIRED")
-        with self.assertRaises(ValueError):
+        with self.assertRaises(TypeError):
             config.schema = [full_name, object()]
 
     def test_schema_setter(self):
diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py
diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py
diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py
diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py