Skip to content

Commit 8bde85d

Browse files
committed
feat: updates time partitioning
1 parent a2974c5 commit 8bde85d

File tree

1 file changed

+36
-11
lines changed

1 file changed

+36
-11
lines changed

sqlalchemy_bigquery/base.py

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -832,27 +832,52 @@ def _process_time_partitioning(
832832
function returns:
833833
"PARTITION BY TIMESTAMP_TRUNC(event_timestamp, DAY)".
834834
835-
Current inputs allowed by BQ and covered by this function include:
835+
Current inputs allowed by BQ AND covered by this function include:
836836
* _PARTITIONDATE
837837
* DATETIME_TRUNC(<datetime_column>, DAY/HOUR/MONTH/YEAR)
838838
* TIMESTAMP_TRUNC(<timestamp_column>, DAY/HOUR/MONTH/YEAR)
839839
* DATE_TRUNC(<date_column>, MONTH/YEAR)
840-
841-
Additional options allowed by BQ but not explicitly covered by this
842-
function include:
843840
* DATE(_PARTITIONTIME)
844841
* DATE(<timestamp_column>)
845842
* DATE(<datetime_column>)
846843
* DATE column
847844
"""
848845

849846
sqltypes = {
850-
"_PARTITIONDATE": ("_PARTITIONDATE", None),
851-
"TIMESTAMP": ("TIMESTAMP_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}),
852-
"DATETIME": ("DATETIME_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}),
853-
"DATE": ("DATE_TRUNC", {"MONTH", "YEAR"}),
847+
# column_type | truncation func OR default value | partitioning_period(s)
848+
849+
"_PARTITIONDATE": ("_PARTITIONDATE", None), # default value, no period
850+
"_PARTITIONTIME": ("DATE", None), # trunc_fn, no period
851+
"DATE": {
852+
"no_period": (None, None), # date_column, no trunc_fn, no period
853+
"period": ("DATE_TRUNC", {"MONTH", "YEAR"}), # date_column, trunc_fn, period(s)
854+
},
855+
"DATETIME": {
856+
"no_period": ("DATE", None), # datetime_column, trunc_fn, no period
857+
"period": ("DATETIME_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), # datetime_column, trunc_fn, period(s)
858+
},
859+
"TIMESTAMP": {
860+
"no_period": ("DATE", None), # timestamp_column, trunc_fn, no period
861+
"period": ("TIMESTAMP_TRUNC", {"DAY", "HOUR", "MONTH", "YEAR"}), # timestamp_column, trunc_fn, period(s)
862+
},
854863
}
855864

865+
def parse_sqltypes(coltype, partitioning_period):
866+
"""Returns the default value OR the truncation function to be used
867+
and the allowed partitioning periods.
868+
"""
869+
870+
if coltype in {"_PARTITIONDATE", "_PARTITIONTIME"}:
871+
return sqltypes[coltype]
872+
873+
# by this point, value must be a nested dict
874+
if partitioning_period is None:
875+
# use "no_period" key
876+
return sqltypes[coltype]["no_period"]
877+
else:
878+
# use "period" key
879+
return sqltypes[coltype]["period"]
880+
856881
# Extract field (i.e <column_name> or _PARTITIONDATE)
857882
# AND extract the name of the column_type (i.e. "TIMESTAMP", "DATE",
858883
# "DATETIME", "_PARTITIONDATE")
@@ -870,14 +895,14 @@ def _process_time_partitioning(
870895
# immediately overwritten by python-bigquery to a default of DAY.
871896
partitioning_period = time_partitioning.type_
872897

873-
# Extract the truncation_function (i.e. DATE_TRUNC)
898+
# Extract the default value or truncation_function (i.e. DATE_TRUNC())
874899
# and the set of allowable partition_periods
875900
# that can be used in that function
876-
trunc_fn, allowed_partitions = sqltypes[column_type]
901+
trunc_fn, allowed_partitions = parse_sqltypes(column_type, time_partitioning)
877902

878903
# Create output:
879904
# Special Case: _PARTITIONDATE does NOT use a function or partitioning_period
880-
if trunc_fn == "_PARTITIONDATE":
905+
if trunc_fn is None or trunc_fn in {"_PARTITIONDATE"}:
881906
return f"PARTITION BY {field}"
882907

883908
# Special Case: BigQuery will not accept DAY as partitioning_period for

0 commit comments

Comments
 (0)