Skip to content

Commit 74310c5

Browse files
authored
Support bigquery parameterized types (#168)
* Support bigquery parameterized types * Update tbl2tbl types * Fix fixed point type test * quality * Update all bigquery types * Clean up quality container * Add correct bigquery df types * bigquery: handle out-of-range numerics * Fix test * Quality * Attempt to fix numeric tests * more numeric fixes * Fix handling of fixed point constraints * quality * ignore bigquery typecheck * quality
1 parent e3b71d6 commit 74310c5

File tree

11 files changed

+83
-80
lines changed

11 files changed

+83
-80
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ quality-mdl:
7676
# to run a single item, you can do: make QUALITY_TOOL=bigfiles quality
7777
quality:
7878
@quality_gem_version=$$(python -c 'import yaml; print(yaml.safe_load(open(".circleci/config.yml","r"))["quality_gem_version"])'); \
79-
docker run \
79+
docker run --rm \
8080
-v "$$(pwd):/usr/app" \
8181
-v "$$(pwd)/Rakefile.quality:/usr/quality/Rakefile" \
8282
"apiology/quality:$${quality_gem_version}" ${QUALITY_TOOL}

metrics/coverage_high_water_mark

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
93.7200
1+
93.700

metrics/mypy_high_water_mark

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
92.2300
1+
92.3000

records_mover/db/bigquery/bigquery_db_driver.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,22 +72,34 @@ def fp_constraints(self,
7272
def fixed_point_constraints(self,
7373
type_: sqlalchemy.types.Numeric) ->\
7474
Optional[Tuple[int, int]]:
75-
if isinstance(type_, sqlalchemy.sql.sqltypes.DECIMAL):
75+
from sqlalchemy_bigquery import BIGNUMERIC
76+
base_type = super().fixed_point_constraints(type_)
77+
if base_type:
78+
return base_type
79+
elif isinstance(type_, sqlalchemy.sql.sqltypes.DECIMAL):
7680
return (38, 9)
81+
elif isinstance(type_, sqlalchemy.sql.sqltypes.NUMERIC):
82+
return (38, 9)
83+
elif isinstance(type_, BIGNUMERIC):
84+
return (76, 38)
7785
else:
7886
logger.warning(f"Don't know how to handle unexpected BigQuery type {type(type_)}")
7987
return None
8088

8189
def type_for_fixed_point(self,
8290
precision: int,
8391
scale: int) -> sqlalchemy.sql.sqltypes.Numeric:
84-
# BigQuery NUMERIC() type takes no arguments and supports 38
85-
# digits of precision, of which 9 digits are scale.
86-
if precision > 38 or scale > 9:
92+
# BigQuery now supports precision/scale args and higher precision/scale in the BIGNUMERIC:
93+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#parameterized_decimal_type
94+
#
95+
# sqlalchemy-bigquery uses the Numeric sqlalchemy type and chooses NUMERIC/BIGNUMERIC
96+
# as needed.
97+
if scale > 38 or precision > 76:
8798
logger.warning('Using BigQuery FLOAT64 type to represent '
8899
f'NUMERIC({precision},{scale}))')
89100
return sqlalchemy.types.Float()
90-
return sqlalchemy.sql.sqltypes.Numeric()
101+
102+
return sqlalchemy.sql.sqltypes.Numeric(precision, scale)
91103

92104
def type_for_integer(self,
93105
min_value: Optional[int],

setup.cfg

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ ignore_missing_imports = True
6868
[mypy-sqlalchemy_redshift.*]
6969
ignore_missing_imports = True
7070

71+
[mypy-sqlalchemy_bigquery.*]
72+
ignore_missing_imports = True
73+
7174
[mypy-odictliteral.*]
7275
ignore_missing_imports = True
7376

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def initialize_options(self) -> None:
183183
# https://github.com/jmcarp/sqlalchemy-postgres-copy/pull/14
184184
#
185185
# 'sqlalchemy-postgres-copy>=0.5,<0.6',
186-
'pybigquery',
186+
'sqlalchemy-bigquery',
187187
] + gcs_dependencies + db_dependencies
188188

189189

tests/integration/records/expected_column_types.py

Lines changed: 17 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,8 @@
1717
'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMP WITH TIME ZONE'
1818
],
1919
'bigquery': [
20-
"<class 'sqlalchemy.sql.sqltypes.Integer'>",
21-
"<class 'sqlalchemy.sql.sqltypes.String'>",
22-
"<class 'sqlalchemy.sql.sqltypes.String'>",
23-
"<class 'sqlalchemy.sql.sqltypes.String'>",
24-
"<class 'sqlalchemy.sql.sqltypes.String'>",
25-
"<class 'sqlalchemy.sql.sqltypes.String'>",
26-
"<class 'sqlalchemy.sql.sqltypes.String'>",
27-
"<class 'sqlalchemy.sql.sqltypes.DATE'>",
28-
"<class 'sqlalchemy.sql.sqltypes.TIME'>",
29-
"<class 'sqlalchemy.sql.sqltypes.DATETIME'>",
30-
"<class 'sqlalchemy.sql.sqltypes.TIMESTAMP'>"
20+
'INTEGER', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)',
21+
'VARCHAR(111)', 'DATE', 'TIME', 'DATETIME', 'TIMESTAMP'
3122
],
3223
'mysql': [
3324
'INTEGER(11)', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)',
@@ -55,6 +46,11 @@
5546
'VARCHAR(12)', 'VARCHAR(444)', 'DATE', 'VARCHAR(8)',
5647
'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMP WITH TIME ZONE'
5748
],
49+
'bigquery': [
50+
'INTEGER', 'VARCHAR(12)', 'VARCHAR(12)', 'VARCHAR(4)', 'VARCHAR(4)',
51+
'VARCHAR(12)', 'VARCHAR(444)', 'DATE', 'TIME',
52+
'DATETIME', 'TIMESTAMP'
53+
]
5854
}
5955

6056
expected_table2table_column_types = {
@@ -133,6 +129,10 @@
133129
'VARCHAR(256)', 'VARCHAR(256)', 'DATE', 'VARCHAR(8)',
134130
'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMP WITH TIME ZONE'
135131
],
132+
('postgresql', 'bigquery'): [
133+
'INTEGER', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)',
134+
'VARCHAR(256)', 'VARCHAR(256)', 'DATE', 'TIME', 'DATETIME', 'TIMESTAMP'
135+
],
136136
('redshift', 'vertica'): [
137137
'INTEGER', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)',
138138
'VARCHAR(3)', 'VARCHAR(111)', 'DATE', 'VARCHAR(8)',
@@ -144,13 +144,8 @@
144144
'VARCHAR(256)', 'VARCHAR(256)'
145145
],
146146
('bigquery', 'bigquery'): [
147-
"<class 'sqlalchemy.sql.sqltypes.Integer'>", "<class 'sqlalchemy.sql.sqltypes.String'>",
148-
"<class 'sqlalchemy.sql.sqltypes.String'>", "<class 'sqlalchemy.sql.sqltypes.String'>",
149-
"<class 'sqlalchemy.sql.sqltypes.String'>", "<class 'sqlalchemy.sql.sqltypes.String'>",
150-
"<class 'sqlalchemy.sql.sqltypes.String'>", "<class 'sqlalchemy.sql.sqltypes.DATE'>",
151-
"<class 'sqlalchemy.sql.sqltypes.TIME'>",
152-
"<class 'sqlalchemy.sql.sqltypes.String'>",
153-
"<class 'sqlalchemy.sql.sqltypes.TIMESTAMP'>"
147+
'INTEGER', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)',
148+
'VARCHAR(256)', 'VARCHAR(256)', 'DATE', 'TIME', 'VARCHAR(256)', 'TIMESTAMP'
154149
],
155150
('bigquery', 'postgresql'): [
156151
'BIGINT', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)',
@@ -162,30 +157,12 @@
162157
'VARCHAR(256)', 'VARCHAR(256)', 'DATE', 'TIME', 'TIMESTAMP', 'TIMESTAMP'
163158
],
164159
('redshift', 'bigquery'): [
165-
"<class 'sqlalchemy.sql.sqltypes.Integer'>",
166-
"<class 'sqlalchemy.sql.sqltypes.String'>",
167-
"<class 'sqlalchemy.sql.sqltypes.String'>",
168-
"<class 'sqlalchemy.sql.sqltypes.String'>",
169-
"<class 'sqlalchemy.sql.sqltypes.String'>",
170-
"<class 'sqlalchemy.sql.sqltypes.String'>",
171-
"<class 'sqlalchemy.sql.sqltypes.String'>",
172-
"<class 'sqlalchemy.sql.sqltypes.DATE'>",
173-
"<class 'sqlalchemy.sql.sqltypes.String'>",
174-
"<class 'sqlalchemy.sql.sqltypes.TIMESTAMP'>",
175-
"<class 'sqlalchemy.sql.sqltypes.TIMESTAMP'>",
160+
'INTEGER', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)',
161+
'VARCHAR(111)', 'DATE', 'VARCHAR(8)', 'TIMESTAMP', 'TIMESTAMP',
176162
],
177163
('mysql', 'bigquery'): [
178-
"<class 'sqlalchemy.sql.sqltypes.Integer'>",
179-
"<class 'sqlalchemy.sql.sqltypes.String'>",
180-
"<class 'sqlalchemy.sql.sqltypes.String'>",
181-
"<class 'sqlalchemy.sql.sqltypes.String'>",
182-
"<class 'sqlalchemy.sql.sqltypes.String'>",
183-
"<class 'sqlalchemy.sql.sqltypes.String'>",
184-
"<class 'sqlalchemy.sql.sqltypes.String'>",
185-
"<class 'sqlalchemy.sql.sqltypes.DATE'>",
186-
"<class 'sqlalchemy.sql.sqltypes.TIME'>",
187-
"<class 'sqlalchemy.sql.sqltypes.DATETIME'>",
188-
"<class 'sqlalchemy.sql.sqltypes.DATETIME'>"
164+
'INTEGER', 'VARCHAR(12)', 'VARCHAR(12)', 'VARCHAR(4)', 'VARCHAR(4)', 'VARCHAR(12)',
165+
'VARCHAR(444)', 'DATE', 'TIME', 'DATETIME', 'DATETIME',
189166
],
190167
('redshift', 'mysql'): [
191168
'INTEGER(11)', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)',

tests/integration/records/records_numeric_database_fixture.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ class RecordsNumericDatabaseFixture:
77
representing their constraints so we can validate behavior against
88
them later on.
99
"""
10+
1011
def __init__(self, db_engine, schema_name, table_name):
1112
self.engine = db_engine
1213
self.schema_name = schema_name
@@ -37,11 +38,15 @@ def bring_up(self):
3738
elif self.engine.name == 'bigquery':
3839
# BigQuery only supports a few large numeric types
3940
create_tables = [f"""
40-
CREATE TABLE {self.schema_name}.{self.table_name} AS
41-
SELECT CAST(9223372036854775807 AS INT64) AS int64,
42-
CAST(1234.56 AS NUMERIC) AS fixed_38_9,
43-
CAST(19223372036854775807.78 AS FLOAT64) AS float64;
44-
""" # noqa
41+
CREATE TABLE {self.schema_name}.{self.table_name} (
42+
`int64` INT64,
43+
`fixed_6_2` NUMERIC(6, 2),
44+
`float64` FLOAT64);
45+
""", # noqa
46+
f"""
47+
INSERT INTO {self.schema_name}.{self.table_name} (`int64`, `fixed_6_2`, `float64`)
48+
VALUES (9223372036854775807, 1234.56, 19223372036854775807.78);
49+
""", # noqa
4550
]
4651
elif self.engine.name == 'postgresql':
4752
# Postgres supports a number of different numeric types

tests/integration/records/single_db/numeric_expectations.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -145,28 +145,28 @@
145145
'fixed_100_4': 'NUMERIC(100, 4)' # Vertica supports precision <= 1024
146146
},
147147
'bigquery': {
148-
'int8': "<class 'sqlalchemy.sql.sqltypes.Integer'>",
149-
'int16': "<class 'sqlalchemy.sql.sqltypes.Integer'>",
150-
'int32': "<class 'sqlalchemy.sql.sqltypes.Integer'>",
151-
'int64': "<class 'sqlalchemy.sql.sqltypes.Integer'>",
152-
'ubyte': "<class 'sqlalchemy.sql.sqltypes.Integer'>",
153-
'uint8': "<class 'sqlalchemy.sql.sqltypes.Integer'>",
154-
'uint16': "<class 'sqlalchemy.sql.sqltypes.Integer'>",
155-
'uint32': "<class 'sqlalchemy.sql.sqltypes.Integer'>",
148+
'int8': 'INTEGER',
149+
'int16': 'INTEGER',
150+
'int32': 'INTEGER',
151+
'int64': 'INTEGER',
152+
'ubyte': 'INTEGER',
153+
'uint8': 'INTEGER',
154+
'uint16': 'INTEGER',
155+
'uint32': 'INTEGER',
156156
# Numeric has 29=38-9 digits of integer precision, and
157157
# uint64 has fewer.
158-
'uint64': "<class 'sqlalchemy.sql.sqltypes.DECIMAL'>",
159-
'float16': "<class 'sqlalchemy.sql.sqltypes.Float'>",
160-
'float32': "<class 'sqlalchemy.sql.sqltypes.Float'>",
161-
'float64': "<class 'sqlalchemy.sql.sqltypes.Float'>",
158+
'uint64': 'NUMERIC(20)',
159+
'float16': 'FLOAT',
160+
'float32': 'FLOAT',
161+
'float64': 'FLOAT',
162162
# BigQuery doesn't support >float64
163-
'float128': "<class 'sqlalchemy.sql.sqltypes.Float'>",
163+
'float128': 'FLOAT',
164164
# NUMERIC is precision=38, scale=9, so it fits
165-
'fixed_6_2': "<class 'sqlalchemy.sql.sqltypes.DECIMAL'>",
165+
'fixed_6_2': 'NUMERIC(6, 2)',
166166
# NUMERIC is precision=38, scale=9, so it fits
167-
'fixed_38_9': "<class 'sqlalchemy.sql.sqltypes.DECIMAL'>",
167+
'fixed_38_9': 'NUMERIC(38, 9)',
168168
# Escape to floating point, as this is larger than NUMERIC
169-
'fixed_100_4': "<class 'sqlalchemy.sql.sqltypes.Float'>"
169+
'fixed_100_4': 'FLOAT',
170170
},
171171
'postgresql': {
172172
'int8': 'SMALLINT',

tests/integration/records/single_db/test_records_numeric.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
records_schema_schema = json.load(records_schema_schema_data)
1919

2020
with open(f"{dir_path}/../../resources/example_numeric_records_schema.json")\
21-
as example_numeric_records_schema_file:
21+
as example_numeric_records_schema_file:
2222
example_numeric_records_schema = json.load(example_numeric_records_schema_file)
2323

2424

@@ -74,12 +74,10 @@ def validate_table(self):
7474
actual_column_types = {
7575
column['name']: str(column['type']) for column in columns
7676
}
77-
for colname in actual_column_types:
78-
assert actual_column_types[colname] ==\
79-
expected_column_types[self.engine.name][colname],\
80-
f"For {colname} on {self.engine.name}, " \
81-
f"expected {expected_column_types[self.engine.name][colname]}, "\
82-
f"got {actual_column_types[colname]}"
77+
assert actual_column_types ==\
78+
expected_column_types[self.engine.name],\
79+
f'Could not find column types filed under {self.engine.name}: ' +\
80+
f'{actual_column_types}'
8381

8482
def test_numeric_database_columns_created(self):
8583
records_schema = RecordsSchema.from_data(example_numeric_records_schema)

0 commit comments

Comments
 (0)