Skip to content

Commit 47474a9

Browse files
committed
Adds Schema class and modifies schema handling
1 parent 48c8cc6 commit 47474a9

File tree

5 files changed

+121
-31
lines changed

5 files changed

+121
-31
lines changed

google/cloud/bigquery/client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3680,7 +3680,7 @@ def insert_rows(
36803680
if selected_fields is not None:
36813681
schema = selected_fields
36823682

3683-
if len(schema) == 0:
3683+
if not schema:
36843684
raise ValueError(
36853685
(
36863686
"Could not determine schema for table '{}'. Call client.get_table() "
@@ -4029,7 +4029,7 @@ def list_rows(
40294029

40304030
# No schema, but no selected_fields. Assume the developer wants all
40314031
# columns, so get the table resource for them rather than failing.
4032-
elif len(schema) == 0:
4032+
elif not schema:
40334033
table = self.get_table(table.reference, retry=retry, timeout=timeout)
40344034
schema = table.schema
40354035

google/cloud/bigquery/schema.py

Lines changed: 101 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -547,40 +547,66 @@ def _build_schema_resource(fields):
547547
"""
548548
return [field.to_api_repr() for field in fields]
549549

550-
551550
def _to_schema_fields(schema):
552-
"""Coerce `schema` to a list of schema field instances.
553-
554-
Args:
555-
schema(Sequence[Union[ \
556-
:class:`~google.cloud.bigquery.schema.SchemaField`, \
557-
Mapping[str, Any] \
558-
]]):
559-
Table schema to convert. If some items are passed as mappings,
560-
their content must be compatible with
561-
:meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`.
562-
563-
Returns:
564-
Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`]
551+
"""TODO docstring
552+
QUESTION: do we want a flag to force the generation of a Schema object?
565553
566-
Raises:
567-
Exception: If ``schema`` is not a sequence, or if any item in the
568-
sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField`
569-
instance or a compatible mapping representation of the field.
554+
CAST a list of elements to either:
555+
* a Schema object with SchemaFields and an attribute
556+
* a list of SchemaFields but no attribute
570557
"""
571-
572558
for field in schema:
573559
if not isinstance(field, (SchemaField, collections.abc.Mapping)):
574560
raise ValueError(
575561
"Schema items must either be fields or compatible "
576562
"mapping representations."
577563
)
578-
564+
565+
if isinstance(schema, Schema):
566+
schema = Schema([
567+
field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field)
568+
for field in schema
569+
], foreign_type_info=schema.foreign_type_info)
570+
return schema
579571
return [
580572
field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field)
581573
for field in schema
582574
]
583575

576+
# OLD TO DELETE
577+
# def _to_schema_fields(schema):
578+
# """Coerce `schema` to a list of schema field instances.
579+
580+
# Args:
581+
# schema(Sequence[Union[ \
582+
# :class:`~google.cloud.bigquery.schema.SchemaField`, \
583+
# Mapping[str, Any] \
584+
# ]]):
585+
# Table schema to convert. If some items are passed as mappings,
586+
# their content must be compatible with
587+
# :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`.
588+
589+
# Returns:
590+
# Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`]
591+
592+
# Raises:
593+
# Exception: If ``schema`` is not a sequence, or if any item in the
594+
# sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField`
595+
# instance or a compatible mapping representation of the field.
596+
# """
597+
598+
# for field in schema:
599+
# if not isinstance(field, (SchemaField, collections.abc.Mapping)):
600+
# raise ValueError(
601+
# "Schema items must either be fields or compatible "
602+
# "mapping representations."
603+
# )
604+
605+
# return [
606+
# field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field)
607+
# for field in schema
608+
# ]
609+
584610

585611
class PolicyTagList(object):
586612
"""Define Policy Tags for a column.
@@ -921,3 +947,58 @@ def from_api_repr(cls, resource: dict) -> SerDeInfo:
921947
config = cls("")
922948
config._properties = copy.deepcopy(resource)
923949
return config
950+
951+
952+
class Schema:
953+
def __init__(self, fields=None, foreign_type_info=None):
954+
self._properties = {}
955+
self._fields = [] if fields is None else list(fields) #Internal List
956+
self.foreign_type_info = foreign_type_info
957+
958+
@property
959+
def foreign_type_info(self) -> Any:
960+
"""TODO: docstring"""
961+
return self._properties.get("foreignTypeInfo")
962+
963+
@foreign_type_info.setter
964+
def foreign_type_info(self, value: str) -> None:
965+
value = _isinstance_or_raise(value, str, none_allowed=True)
966+
self._properties["foreignTypeInfo"] = value
967+
968+
@property
969+
def _fields(self) -> Any:
970+
"""TODO: docstring"""
971+
return self._properties.get("_fields")
972+
973+
@_fields.setter
974+
def _fields(self, value: list) -> None:
975+
value = _isinstance_or_raise(value, list, none_allowed=True)
976+
self._properties["_fields"] = value
977+
978+
979+
def __len__(self):
980+
return len(self._properties["_fields"])
981+
982+
def __getitem__(self, index):
983+
return self._properties["_fields"][index]
984+
985+
def __setitem__(self, index, value):
986+
self._properties["_fields"][index] = value
987+
988+
def __delitem__(self, index):
989+
del self._properties["_fields"][index]
990+
991+
def __iter__(self):
992+
return iter(self._properties["_fields"])
993+
994+
def __str__(self):
995+
return str(self._properties["_fields"])
996+
997+
def __repr__(self):
998+
return f"Schema({self.foreign_type_info!r}, {self._properties['_fields']!r})"
999+
1000+
def append(self, item):
1001+
self._properties["_fields"].append(item)
1002+
1003+
def extend(self, iterable):
1004+
self._properties["_fields"].extend(iterable)

google/cloud/bigquery/table.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
from google.cloud.bigquery.schema import _build_schema_resource
7171
from google.cloud.bigquery.schema import _parse_schema_resource
7272
from google.cloud.bigquery.schema import _to_schema_fields
73+
from google.cloud.bigquery.schema import Schema
7374
from google.cloud.bigquery.external_config import ExternalCatalogTableOptions
7475

7576
if typing.TYPE_CHECKING: # pragma: NO COVER
@@ -451,10 +452,13 @@ def schema(self):
451452
instance or a compatible mapping representation of the field.
452453
"""
453454
prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"])
454-
if not prop:
455-
return []
456-
else:
457-
return _parse_schema_resource(prop)
455+
if not prop: # if empty Schema, empty list, None
456+
if prop is None:
457+
return None
458+
return prop
459+
elif isinstance(prop, Schema):
460+
return prop
461+
return _parse_schema_resource(prop)
458462

459463
@schema.setter
460464
def schema(self, value):
@@ -1336,7 +1340,8 @@ def _row_from_mapping(mapping, schema):
13361340
Raises:
13371341
ValueError: If schema is empty.
13381342
"""
1339-
if len(schema) == 0:
1343+
1344+
if not schema:
13401345
raise ValueError(_TABLE_HAS_NO_SCHEMA)
13411346

13421347
row = []

tests/unit/test_client.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
from google.cloud.bigquery import ParquetOptions
6161
import google.cloud.bigquery.retry
6262
from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
63+
from google.cloud.bigquery.schema import Schema
6364
import google.cloud.bigquery.table
6465

6566
from test_utils.imports import maybe_fail_import
@@ -2608,7 +2609,8 @@ def test_update_table_w_schema_None(self):
26082609
sent = {"schema": None}
26092610
self.assertEqual(req[1]["data"], sent)
26102611
self.assertEqual(req[1]["path"], "/%s" % path)
2611-
self.assertEqual(len(updated_table.schema), 0)
2612+
valid_options = [Schema(), [], None]
2613+
self.assertIn(updated_table.schema, valid_options)
26122614

26132615
def test_update_table_delete_property(self):
26142616
from google.cloud.bigquery.table import Table

tests/unit/test_table.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
from google.cloud.bigquery import exceptions
3232
from google.cloud.bigquery.table import TableReference
3333
from google.cloud.bigquery.dataset import DatasetReference
34-
from google.cloud.bigquery.schema import SerDeInfo, StorageDescriptor
34+
from google.cloud.bigquery.schema import SerDeInfo, StorageDescriptor, Schema
3535

3636

3737
def _mock_client():
@@ -499,7 +499,8 @@ def _verifyResourceProperties(self, table, resource):
499499
if "schema" in resource:
500500
self._verifySchema(table.schema, resource)
501501
else:
502-
self.assertEqual(table.schema, [])
502+
valid_options = [Schema(), [], None]
503+
self.assertIn(table.schema, valid_options)
503504

504505
if "externalDataConfiguration" in resource:
505506
edc = table.external_data_configuration
@@ -536,7 +537,8 @@ def test_ctor(self):
536537
"/projects/%s/datasets/%s/tables/%s"
537538
% (self.PROJECT, self.DS_ID, self.TABLE_NAME),
538539
)
539-
self.assertEqual(table.schema, [])
540+
valid_options = [Schema(), [], None]
541+
self.assertIn(table.schema, valid_options)
540542

541543
self.assertIsNone(table.created)
542544
self.assertIsNone(table.etag)

0 commit comments

Comments
 (0)