Skip to content

Commit 48c8cc6

Browse files
authored
feat: Adds attributes to SchemaField (#2077)
* Updates SchemaField with new fields and adds tests * Adds additional reference to Foreign * adds test for type vs ftd and test for enums * Updates enums, schema, and tests, plus linting * Updates some formatting of a comment to accomodate black
1 parent 74beca6 commit 48c8cc6

File tree

4 files changed

+160
-11
lines changed

4 files changed

+160
-11
lines changed

google/cloud/bigquery/enums.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,11 @@ class KeyResultStatementKind:
246246

247247

248248
class StandardSqlTypeNames(str, enum.Enum):
249+
"""Enum of allowed SQL type names in schema.SchemaField.
250+
251+
Datatype used in GoogleSQL.
252+
"""
253+
249254
def _generate_next_value_(name, start, count, last_values):
250255
return name
251256

@@ -267,6 +272,9 @@ def _generate_next_value_(name, start, count, last_values):
267272
ARRAY = enum.auto()
268273
STRUCT = enum.auto()
269274
RANGE = enum.auto()
275+
# NOTE: FOREIGN acts as a wrapper for data types
276+
# not natively understood by BigQuery unless translated
277+
FOREIGN = enum.auto()
270278

271279

272280
class EntityTypes(str, enum.Enum):
@@ -285,7 +293,10 @@ class EntityTypes(str, enum.Enum):
285293
# See also: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
286294
# and https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
287295
class SqlTypeNames(str, enum.Enum):
288-
"""Enum of allowed SQL type names in schema.SchemaField."""
296+
"""Enum of allowed SQL type names in schema.SchemaField.
297+
298+
Datatype used in Legacy SQL.
299+
"""
289300

290301
STRING = "STRING"
291302
BYTES = "BYTES"
@@ -306,6 +317,9 @@ class SqlTypeNames(str, enum.Enum):
306317
DATETIME = "DATETIME"
307318
INTERVAL = "INTERVAL" # NOTE: not available in legacy types
308319
RANGE = "RANGE" # NOTE: not available in legacy types
320+
# NOTE: FOREIGN acts as a wrapper for data types
321+
# not natively understood by BigQuery unless translated
322+
FOREIGN = "FOREIGN"
309323

310324

311325
class WriteDisposition(object):
@@ -344,3 +358,9 @@ class DeterminismLevel:
344358

345359
NOT_DETERMINISTIC = "NOT_DETERMINISTIC"
346360
"""The UDF is not deterministic."""
361+
362+
363+
class RoundingMode(enum.Enum):
364+
ROUNDING_MODE_UNSPECIFIED = 0
365+
ROUND_HALF_AWAY_FROM_ZERO = 1
366+
ROUND_HALF_EVEN = 2

google/cloud/bigquery/schema.py

Lines changed: 75 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@
2626
_isinstance_or_raise,
2727
_get_sub_prop,
2828
)
29-
from google.cloud.bigquery.enums import StandardSqlTypeNames
29+
from google.cloud.bigquery.enums import StandardSqlTypeNames, RoundingMode
3030

3131

3232
_STRUCT_TYPES = ("RECORD", "STRUCT")
3333

3434
# SQL types reference:
35-
# https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
36-
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
35+
# LEGACY SQL: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
36+
# GoogleSQL: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
3737
LEGACY_TO_STANDARD_TYPES = {
3838
"STRING": StandardSqlTypeNames.STRING,
3939
"BYTES": StandardSqlTypeNames.BYTES,
@@ -52,6 +52,7 @@
5252
"DATE": StandardSqlTypeNames.DATE,
5353
"TIME": StandardSqlTypeNames.TIME,
5454
"DATETIME": StandardSqlTypeNames.DATETIME,
55+
"FOREIGN": StandardSqlTypeNames.FOREIGN,
5556
# no direct conversion from ARRAY, the latter is represented by mode="REPEATED"
5657
}
5758
"""String names of the legacy SQL types to integer codes of Standard SQL standard_sql."""
@@ -170,6 +171,34 @@ class SchemaField(object):
170171
the type is RANGE, this field is required. Possible values for the
171172
field element type of a RANGE include `DATE`, `DATETIME` and
172173
`TIMESTAMP`.
174+
175+
rounding_mode: Union[RoundingMode, str, None]
176+
Specifies the rounding mode to be used when storing values of
177+
NUMERIC and BIGNUMERIC type.
178+
179+
Unspecified will default to using ROUND_HALF_AWAY_FROM_ZERO.
180+
181+
ROUND_HALF_AWAY_FROM_ZERO rounds half values away from zero
182+
when applying precision and scale upon writing of NUMERIC and BIGNUMERIC
183+
values.
184+
For Scale: 0
185+
1.1, 1.2, 1.3, 1.4 => 1
186+
1.5, 1.6, 1.7, 1.8, 1.9 => 2
187+
188+
ROUND_HALF_EVEN rounds half values to the nearest even value
189+
when applying precision and scale upon writing of NUMERIC and BIGNUMERIC
190+
values.
191+
For Scale: 0
192+
1.1, 1.2, 1.3, 1.4 => 1
193+
1.5 => 2
194+
1.6, 1.7, 1.8, 1.9 => 2
195+
2.5 => 2
196+
197+
foreign_type_definition: Optional[str]
198+
Definition of the foreign data type.
199+
200+
Only valid for top-level schema fields (not nested fields).
201+
If the type is FOREIGN, this field is required.
173202
"""
174203

175204
def __init__(
@@ -185,11 +214,12 @@ def __init__(
185214
scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
186215
max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
187216
range_element_type: Union[FieldElementType, str, None] = None,
217+
rounding_mode: Union[RoundingMode, str, None] = None,
218+
foreign_type_definition: Optional[str] = None,
188219
):
189-
self._properties: Dict[str, Any] = {
190-
"name": name,
191-
"type": field_type,
192-
}
220+
self._properties: Dict[str, Any] = {}
221+
222+
self._properties["name"] = name
193223
if mode is not None:
194224
self._properties["mode"] = mode.upper()
195225
if description is not _DEFAULT_VALUE:
@@ -210,6 +240,23 @@ def __init__(
210240
self._properties["rangeElementType"] = {"type": range_element_type}
211241
if isinstance(range_element_type, FieldElementType):
212242
self._properties["rangeElementType"] = range_element_type.to_api_repr()
243+
if isinstance(rounding_mode, RoundingMode):
244+
self._properties["roundingMode"] = rounding_mode.name
245+
if isinstance(rounding_mode, str):
246+
self._properties["roundingMode"] = rounding_mode
247+
if isinstance(foreign_type_definition, str):
248+
self._properties["foreignTypeDefinition"] = foreign_type_definition
249+
250+
# The order of operations is important:
251+
# If field_type is FOREIGN, then foreign_type_definition must be set.
252+
if field_type != "FOREIGN":
253+
self._properties["type"] = field_type
254+
else:
255+
if self._properties.get("foreignTypeDefinition") is None:
256+
raise ValueError(
257+
"If the 'field_type' is 'FOREIGN', then 'foreign_type_definition' is required."
258+
)
259+
self._properties["type"] = field_type
213260

214261
self._fields = tuple(fields)
215262

@@ -251,6 +298,9 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField":
251298
else:
252299
element_type = None
253300

301+
rounding_mode = api_repr.get("roundingMode")
302+
foreign_type_definition = api_repr.get("foreignTypeDefinition")
303+
254304
return cls(
255305
field_type=field_type,
256306
fields=[cls.from_api_repr(f) for f in fields],
@@ -263,6 +313,8 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField":
263313
scale=cls.__get_int(api_repr, "scale"),
264314
max_length=cls.__get_int(api_repr, "maxLength"),
265315
range_element_type=element_type,
316+
rounding_mode=rounding_mode,
317+
foreign_type_definition=foreign_type_definition,
266318
)
267319

268320
@property
@@ -330,6 +382,22 @@ def range_element_type(self):
330382
ret = self._properties.get("rangeElementType")
331383
return FieldElementType.from_api_repr(ret)
332384

385+
@property
386+
def rounding_mode(self):
387+
"""Enum that specifies the rounding mode to be used when storing values of
388+
NUMERIC and BIGNUMERIC type.
389+
"""
390+
return self._properties.get("roundingMode")
391+
392+
@property
393+
def foreign_type_definition(self):
394+
"""Definition of the foreign data type.
395+
396+
Only valid for top-level schema fields (not nested fields).
397+
If the type is FOREIGN, this field is required.
398+
"""
399+
return self._properties.get("foreignTypeDefinition")
400+
333401
@property
334402
def fields(self):
335403
"""Optional[tuple]: Subfields contained in this field.

tests/unit/test_schema.py

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
from google.cloud import bigquery
16+
from google.cloud.bigquery.enums import RoundingMode
1617
from google.cloud.bigquery.standard_sql import StandardSqlStructType
1718
from google.cloud.bigquery.schema import (
1819
PolicyTagList,
@@ -52,9 +53,12 @@ def test_constructor_defaults(self):
5253
self.assertEqual(field.fields, ())
5354
self.assertIsNone(field.policy_tags)
5455
self.assertIsNone(field.default_value_expression)
56+
self.assertEqual(field.rounding_mode, None)
57+
self.assertEqual(field.foreign_type_definition, None)
5558

5659
def test_constructor_explicit(self):
5760
FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field"
61+
ROUNDINGMODE = RoundingMode.ROUNDING_MODE_UNSPECIFIED
5862
field = self._make_one(
5963
"test",
6064
"STRING",
@@ -67,6 +71,8 @@ def test_constructor_explicit(self):
6771
)
6872
),
6973
default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION,
74+
rounding_mode=ROUNDINGMODE,
75+
foreign_type_definition="INTEGER",
7076
)
7177
self.assertEqual(field.name, "test")
7278
self.assertEqual(field.field_type, "STRING")
@@ -83,9 +89,16 @@ def test_constructor_explicit(self):
8389
)
8490
),
8591
)
92+
self.assertEqual(field.rounding_mode, ROUNDINGMODE.name)
93+
self.assertEqual(field.foreign_type_definition, "INTEGER")
8694

8795
def test_constructor_explicit_none(self):
88-
field = self._make_one("test", "STRING", description=None, policy_tags=None)
96+
field = self._make_one(
97+
"test",
98+
"STRING",
99+
description=None,
100+
policy_tags=None,
101+
)
89102
self.assertIsNone(field.description)
90103
self.assertIsNone(field.policy_tags)
91104

@@ -141,10 +154,18 @@ def test_to_api_repr(self):
141154
policy.to_api_repr(),
142155
{"names": ["foo", "bar"]},
143156
)
157+
ROUNDINGMODE = RoundingMode.ROUNDING_MODE_UNSPECIFIED
144158

145159
field = self._make_one(
146-
"foo", "INTEGER", "NULLABLE", description="hello world", policy_tags=policy
160+
"foo",
161+
"INTEGER",
162+
"NULLABLE",
163+
description="hello world",
164+
policy_tags=policy,
165+
rounding_mode=ROUNDINGMODE,
166+
foreign_type_definition=None,
147167
)
168+
print(f"DINOSAUR: {field}\n\n{field.to_api_repr()}")
148169
self.assertEqual(
149170
field.to_api_repr(),
150171
{
@@ -153,6 +174,7 @@ def test_to_api_repr(self):
153174
"type": "INTEGER",
154175
"description": "hello world",
155176
"policyTags": {"names": ["foo", "bar"]},
177+
"roundingMode": "ROUNDING_MODE_UNSPECIFIED",
156178
},
157179
)
158180

@@ -186,6 +208,7 @@ def test_from_api_repr(self):
186208
"description": "test_description",
187209
"name": "foo",
188210
"type": "record",
211+
"roundingMode": "ROUNDING_MODE_UNSPECIFIED",
189212
}
190213
)
191214
self.assertEqual(field.name, "foo")
@@ -197,6 +220,7 @@ def test_from_api_repr(self):
197220
self.assertEqual(field.fields[0].field_type, "INTEGER")
198221
self.assertEqual(field.fields[0].mode, "NULLABLE")
199222
self.assertEqual(field.range_element_type, None)
223+
self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED")
200224

201225
def test_from_api_repr_policy(self):
202226
field = self._get_target_class().from_api_repr(
@@ -462,6 +486,32 @@ def test_to_standard_sql_unknown_type(self):
462486
bigquery.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED,
463487
)
464488

489+
def test_to_standard_sql_foreign_type_valid(self):
490+
legacy_type = "FOREIGN"
491+
standard_type = bigquery.StandardSqlTypeNames.FOREIGN
492+
foreign_type_definition = "INTEGER"
493+
494+
field = self._make_one(
495+
"some_field",
496+
field_type=legacy_type,
497+
foreign_type_definition=foreign_type_definition,
498+
)
499+
standard_field = field.to_standard_sql()
500+
self.assertEqual(standard_field.name, "some_field")
501+
self.assertEqual(standard_field.type.type_kind, standard_type)
502+
503+
def test_to_standard_sql_foreign_type_invalid(self):
504+
legacy_type = "FOREIGN"
505+
foreign_type_definition = None
506+
507+
with self.assertRaises(ValueError) as context:
508+
self._make_one(
509+
"some_field",
510+
field_type=legacy_type,
511+
foreign_type_definition=foreign_type_definition,
512+
)
513+
self.assertTrue("If the 'field_type'" in context.exception.args[0])
514+
465515
def test___eq___wrong_type(self):
466516
field = self._make_one("test", "STRING")
467517
other = object()
@@ -1117,7 +1167,17 @@ def test_to_api_repr_parameterized(field, api):
11171167

11181168

11191169
class TestForeignTypeInfo:
1120-
"""TODO: add doc string."""
1170+
"""Tests metadata re: the foreign data type definition in field schema.
1171+
1172+
Specifies the system which defines the foreign data type.
1173+
1174+
TypeSystems are external systems, such as query engines or table formats,
1175+
that have their own data types.
1176+
1177+
TypeSystem may be:
1178+
TypeSystem not specified: TYPE_SYSTEM_UNSPECIFIED
1179+
Represents Hive data types: HIVE
1180+
"""
11211181

11221182
@staticmethod
11231183
def _get_target_class():

tests/unit/test_table.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5928,6 +5928,7 @@ def test_external_catalog_table_options_setter(
59285928
result = table.to_api_repr()
59295929
assert result == expected
59305930

5931+
59315932
@pytest.mark.parametrize("preserve_order", [True, False])
59325933
def test_to_arrow_iterable_w_bqstorage_max_stream_count(preserve_order):
59335934
pytest.importorskip("pandas")

0 commit comments

Comments
 (0)