Skip to content

Commit 39f7ce0

Browse files
committed
feat: preserve unknown fields from the REST API representaton in SchemaField
1 parent 887e126 commit 39f7ce0

File tree

4 files changed

+100
-63
lines changed

4 files changed

+100
-63
lines changed

google/cloud/bigquery/schema.py

Lines changed: 20 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,8 @@ def __init__(
203203
self._properties["rangeElementType"] = {"type": range_element_type}
204204
if isinstance(range_element_type, FieldElementType):
205205
self._properties["rangeElementType"] = range_element_type.to_api_repr()
206-
207-
self._fields = tuple(fields)
206+
if fields: # Don't set the property if it's not set.
207+
self._properties["fields"] = [field.to_api_repr() for field in fields]
208208

209209
@staticmethod
210210
def __get_int(api_repr, name):
@@ -225,43 +225,19 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField":
225225
Returns:
226226
google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object.
227227
"""
228-
field_type = api_repr["type"].upper()
229-
230-
# Handle optional properties with default values
231-
mode = api_repr.get("mode", "NULLABLE")
232-
description = api_repr.get("description", _DEFAULT_VALUE)
233-
fields = api_repr.get("fields", ())
234-
policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE)
228+
placeholder = cls("this_will_be_replaced", "PLACEHOLDER")
235229

236-
default_value_expression = api_repr.get("defaultValueExpression", None)
230+
# Note: we don't make a copy of api_repr because this can cause
231+
# unnecessary slowdowns, especially on deeply nested STRUCT / RECORD
232+
# fields. See https://github.com/googleapis/python-bigquery/issues/6
233+
placeholder._properties = api_repr
237234

238-
if policy_tags is not None and policy_tags is not _DEFAULT_VALUE:
239-
policy_tags = PolicyTagList.from_api_repr(policy_tags)
240-
241-
if api_repr.get("rangeElementType"):
242-
range_element_type = cast(dict, api_repr.get("rangeElementType"))
243-
element_type = range_element_type.get("type")
244-
else:
245-
element_type = None
246-
247-
return cls(
248-
field_type=field_type,
249-
fields=[cls.from_api_repr(f) for f in fields],
250-
mode=mode.upper(),
251-
default_value_expression=default_value_expression,
252-
description=description,
253-
name=api_repr["name"],
254-
policy_tags=policy_tags,
255-
precision=cls.__get_int(api_repr, "precision"),
256-
scale=cls.__get_int(api_repr, "scale"),
257-
max_length=cls.__get_int(api_repr, "maxLength"),
258-
range_element_type=element_type,
259-
)
235+
return placeholder
260236

261237
@property
262238
def name(self):
263239
"""str: The name of the field."""
264-
return self._properties["name"]
240+
return self._properties.get("name", "")
265241

266242
@property
267243
def field_type(self):
@@ -270,7 +246,10 @@ def field_type(self):
270246
See:
271247
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type
272248
"""
273-
return self._properties["type"]
249+
type_ = self._properties.get("type")
250+
if type_ is None: # Shouldn't happen, but some unit tests do this.
251+
return None
252+
return type_.upper()
274253

275254
@property
276255
def mode(self):
@@ -279,7 +258,7 @@ def mode(self):
279258
See:
280259
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode
281260
"""
282-
return self._properties.get("mode")
261+
return self._properties.get("mode", "NULLABLE").upper()
283262

284263
@property
285264
def is_nullable(self):
@@ -329,7 +308,7 @@ def fields(self):
329308
330309
Must be empty unset if ``field_type`` is not 'RECORD'.
331310
"""
332-
return self._fields
311+
return tuple(_to_schema_fields(self._properties.get("fields", [])))
333312

334313
@property
335314
def policy_tags(self):
@@ -345,14 +324,10 @@ def to_api_repr(self) -> dict:
345324
Returns:
346325
Dict: A dictionary representing the SchemaField in a serialized form.
347326
"""
348-
answer = self._properties.copy()
349-
350-
# If this is a RECORD type, then sub-fields are also included,
351-
# add this to the serialized representation.
352-
if self.field_type.upper() in _STRUCT_TYPES:
353-
answer["fields"] = [f.to_api_repr() for f in self.fields]
354-
355-
# Done; return the serialized dictionary.
327+
# Note: we don't make a copy of _properties because this can cause
328+
# unnecessary slowdowns, especially on deeply nested STRUCT / RECORD
329+
# fields. See https://github.com/googleapis/python-bigquery/issues/6
330+
answer = self._properties
356331
return answer
357332

358333
def _key(self):
@@ -389,7 +364,7 @@ def _key(self):
389364
self.mode.upper(), # pytype: disable=attribute-error
390365
self.default_value_expression,
391366
self.description,
392-
self._fields,
367+
self.fields,
393368
policy_tags,
394369
)
395370

tests/unit/job/test_load_config.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import copy
1516
import warnings
1617

1718
import pytest
@@ -571,16 +572,34 @@ def test_schema_setter_valid_mappings_list(self):
571572
config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]}
572573
)
573574

574-
def test_schema_setter_invalid_mappings_list(self):
575+
def test_schema_setter_allows_unknown_properties(self):
575576
config = self._get_target_class()()
576577

577578
schema = [
578-
{"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
579-
{"name": "age", "typeoo": "INTEGER", "mode": "REQUIRED"},
579+
{
580+
"name": "full_name",
581+
"type": "STRING",
582+
"mode": "REQUIRED",
583+
"someNewProperty": "test-value",
584+
},
585+
{
586+
"name": "age",
587+
# Note: This type should be included, too. Avoid client-side
588+
# validation, as it could prevent backwards-compatible
589+
# evolution of the server-side behavior.
590+
"typo": "INTEGER",
591+
"mode": "REQUIRED",
592+
"anotherNewProperty": "another-test",
593+
},
580594
]
581595

582-
with self.assertRaises(Exception):
583-
config.schema = schema
596+
# Make sure the setter doesn't mutate schema.
597+
expected_schema = copy.deepcopy(schema)
598+
599+
config.schema = schema
600+
601+
# _properties should include all fields, including unknown ones.
602+
assert config._properties["load"]["schema"]["fields"] == expected_schema
584603

585604
def test_schema_setter_unsetting_schema(self):
586605
from google.cloud.bigquery.schema import SchemaField

tests/unit/test_schema.py

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,16 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from google.cloud import bigquery
16-
from google.cloud.bigquery.standard_sql import StandardSqlStructType
17-
from google.cloud.bigquery.schema import PolicyTagList
15+
import copy
1816
import unittest
1917
from unittest import mock
2018

2119
import pytest
2220

21+
from google.cloud import bigquery
22+
from google.cloud.bigquery.standard_sql import StandardSqlStructType
23+
from google.cloud.bigquery.schema import PolicyTagList
24+
2325

2426
class TestSchemaField(unittest.TestCase):
2527
@staticmethod
@@ -821,13 +823,32 @@ def test_schema_fields_sequence(self):
821823
result = self._call_fut(schema)
822824
self.assertEqual(result, schema)
823825

824-
def test_invalid_mapping_representation(self):
826+
def test_unknown_properties(self):
825827
schema = [
826-
{"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
827-
{"name": "address", "typeooo": "STRING", "mode": "REQUIRED"},
828+
{
829+
"name": "full_name",
830+
"type": "STRING",
831+
"mode": "REQUIRED",
832+
"someNewProperty": "test-value",
833+
},
834+
{
835+
"name": "age",
836+
# Note: This type should be included, too. Avoid client-side
837+
# validation, as it could prevent backwards-compatible
838+
# evolution of the server-side behavior.
839+
"typo": "INTEGER",
840+
"mode": "REQUIRED",
841+
"anotherNewProperty": "another-test",
842+
},
828843
]
829-
with self.assertRaises(Exception):
830-
self._call_fut(schema)
844+
845+
# Make sure the setter doesn't mutate schema.
846+
expected_schema = copy.deepcopy(schema)
847+
848+
result = self._call_fut(schema)
849+
850+
for api_repr, field in zip(expected_schema, result):
851+
assert field.to_api_repr() == api_repr
831852

832853
def test_valid_mapping_representation(self):
833854
from google.cloud.bigquery.schema import SchemaField

tests/unit/test_table.py

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import copy
1516
import datetime
1617
import logging
1718
import re
@@ -711,14 +712,35 @@ def test_schema_setter_valid_fields(self):
711712
table.schema = [full_name, age]
712713
self.assertEqual(table.schema, [full_name, age])
713714

714-
def test_schema_setter_invalid_mapping_representation(self):
715+
def test_schema_setter_allows_unknown_properties(self):
715716
dataset = DatasetReference(self.PROJECT, self.DS_ID)
716717
table_ref = dataset.table(self.TABLE_NAME)
717718
table = self._make_one(table_ref)
718-
full_name = {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}
719-
invalid_field = {"name": "full_name", "typeooo": "STRING", "mode": "REQUIRED"}
720-
with self.assertRaises(Exception):
721-
table.schema = [full_name, invalid_field]
719+
schema = [
720+
{
721+
"name": "full_name",
722+
"type": "STRING",
723+
"mode": "REQUIRED",
724+
"someNewProperty": "test-value",
725+
},
726+
{
727+
"name": "age",
728+
# Note: This type should be included, too. Avoid client-side
729+
# validation, as it could prevent backwards-compatible
730+
# evolution of the server-side behavior.
731+
"typo": "INTEGER",
732+
"mode": "REQUIRED",
733+
"anotherNewProperty": "another-test",
734+
},
735+
]
736+
737+
# Make sure the setter doesn't mutate schema.
738+
expected_schema = copy.deepcopy(schema)
739+
740+
table.schema = schema
741+
742+
# _properties should include all fields, including unknown ones.
743+
assert table._properties["schema"]["fields"] == expected_schema
722744

723745
def test_schema_setter_valid_mapping_representation(self):
724746
from google.cloud.bigquery.schema import SchemaField

0 commit comments

Comments
 (0)