Skip to content

Commit fe75a18

Browse files
authored
Merge branch 'main' into feat-374142081-add-date-format
2 parents 46abd11 + b2300d0 commit fe75a18

File tree

5 files changed

+194
-6
lines changed

5 files changed

+194
-6
lines changed

google/cloud/bigquery/external_config.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -862,6 +862,22 @@ def date_format(self) -> Optional[str]:
862862
def date_format(self, value: Optional[str]):
863863
self._properties["dateFormat"] = value
864864

865+
def time_zone(self) -> Optional[str]:
866+
"""Optional[str]: Time zone used when parsing timestamp values that do not
867+
have specific time zone information (e.g. 2024-04-20 12:34:56). The expected
868+
format is an IANA timezone string (e.g. America/Los_Angeles).
869+
870+
See:
871+
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_zone
872+
"""
873+
874+
result = self._properties.get("timeZone")
875+
return typing.cast(str, result)
876+
877+
@time_zone.setter
878+
def time_zone(self, value: Optional[str]):
879+
self._properties["timeZone"] = value
880+
865881
@property
866882
def connection_id(self):
867883
"""Optional[str]: [Experimental] ID of a BigQuery Connection API

google/cloud/bigquery/job/load.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,19 @@ def date_format(self) -> Optional[str]:
561561
def date_format(self, value: Optional[str]):
562562
self._set_sub_prop("dateFormat", value)
563563

564+
def time_zone(self) -> Optional[str]:
565+
"""Optional[str]: Default time zone that will apply when parsing timestamp
566+
values that have no specific time zone.
567+
568+
See:
569+
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.time_zone
570+
"""
571+
return self._get_sub_prop("timeZone")
572+
573+
@time_zone.setter
574+
def time_zone(self, value: Optional[str]):
575+
self._set_sub_prop("timeZone", value)
576+
564577
@property
565578
def time_partitioning(self):
566579
"""Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based
@@ -909,6 +922,12 @@ def date_format(self):
909922
"""
910923
return self.configuration.date_format
911924

925+
def time_zone(self):
926+
"""See
927+
:attr:`google.cloud.bigquery.job.LoadJobConfig.time_zone`.
928+
"""
929+
return self.configuration.time_zone
930+
912931
@property
913932
def schema_update_options(self):
914933
"""See

tests/unit/job/test_load.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,15 @@ def _setUpConstants(self):
3737
self.OUTPUT_BYTES = 23456
3838
self.OUTPUT_ROWS = 345
3939
self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference"
40-
4140
self.DATE_FORMAT = "%Y-%m-%d"
41+
self.TIME_ZONE = "UTC"
4242

4343
def _make_resource(self, started=False, ended=False):
4444
resource = super(TestLoadJob, self)._make_resource(started, ended)
4545
config = resource["configuration"]["load"]
4646
config["sourceUris"] = [self.SOURCE1]
47-
4847
config["dateFormat"] = self.DATE_FORMAT
48+
config["timeZone"] = self.TIME_ZONE
4949
config["destinationTable"] = {
5050
"projectId": self.PROJECT,
5151
"datasetId": self.DS_ID,
@@ -156,6 +156,10 @@ def _verifyResourceProperties(self, job, resource):
156156
)
157157
else:
158158
self.assertIsNone(job.destination_encryption_configuration)
159+
if "timeZone" in config:
160+
self.assertEqual(job.time_zone, config["timeZone"])
161+
else:
162+
self.assertIsNone(job.time_zone)
159163

160164
if "dateFormat" in config:
161165
self.assertEqual(job.date_format, config["dateFormat"])
@@ -203,8 +207,8 @@ def test_ctor(self):
203207
self.assertIsNone(job.clustering_fields)
204208
self.assertIsNone(job.schema_update_options)
205209
self.assertIsNone(job.reference_file_schema_uri)
206-
207210
self.assertIsNone(job.date_format)
211+
self.assertIsNone(job.time_zone)
208212

209213
def test_ctor_w_config(self):
210214
from google.cloud.bigquery.schema import SchemaField
@@ -442,6 +446,24 @@ def test_from_api_repr_w_properties(self):
442446
self.assertIs(job._client, client)
443447
self._verifyResourceProperties(job, RESOURCE)
444448

449+
def test_to_api_repr(self):
450+
self._setUpConstants()
451+
client = _make_client(project=self.PROJECT)
452+
RESOURCE = self._make_resource(ended=False)
453+
454+
klass = self._get_target_class()
455+
job = klass.from_api_repr(RESOURCE, client)
456+
api_repr = job.to_api_repr()
457+
458+
# as per the documentation in load.py -> LoadJob.to_api_repr(),
459+
# the return value from to_api_repr should not include statistics
460+
expected = {
461+
"jobReference": RESOURCE["jobReference"],
462+
"configuration": RESOURCE["configuration"],
463+
}
464+
465+
self.assertEqual(api_repr, expected)
466+
445467
def test_begin_w_already_running(self):
446468
conn = make_connection()
447469
client = _make_client(project=self.PROJECT, connection=conn)
@@ -583,6 +605,7 @@ def test_begin_w_alternate_client(self):
583605
},
584606
"schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION],
585607
"dateFormat": self.DATE_FORMAT,
608+
"timeZone": self.TIME_ZONE,
586609
}
587610
RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION
588611
conn1 = make_connection()
@@ -611,8 +634,8 @@ def test_begin_w_alternate_client(self):
611634
config.write_disposition = WriteDisposition.WRITE_TRUNCATE
612635
config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION]
613636
config.reference_file_schema_uri = "gs://path/to/reference"
614-
615637
config.date_format = self.DATE_FORMAT
638+
config.time_zone = self.TIME_ZONE
616639

617640
with mock.patch(
618641
"google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"

tests/unit/job/test_load_config.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,22 @@ def test_date_format_setter(self):
844844
config.date_format = date_format
845845
self.assertEqual(config._properties["load"]["dateFormat"], date_format)
846846

847+
def test_time_zone_missing(self):
848+
config = self._get_target_class()()
849+
self.assertIsNone(config.time_zone)
850+
851+
def test_time_zone_hit(self):
852+
time_zone = "UTC"
853+
config = self._get_target_class()()
854+
config._properties["load"]["timeZone"] = time_zone
855+
self.assertEqual(config.time_zone, time_zone)
856+
857+
def test_time_zone_setter(self):
858+
time_zone = "America/New_York"
859+
config = self._get_target_class()()
860+
config.time_zone = time_zone
861+
self.assertEqual(config._properties["load"]["timeZone"], time_zone)
862+
847863
def test_parquet_options_missing(self):
848864
config = self._get_target_class()()
849865
self.assertIsNone(config.parquet_options)
@@ -917,3 +933,114 @@ def test_column_name_character_map_none(self):
917933
config._properties["load"]["columnNameCharacterMap"],
918934
ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED,
919935
)
936+
937+
RESOURCE = {
938+
"load": {
939+
"allowJaggedRows": True,
940+
"createDisposition": "CREATE_NEVER",
941+
"encoding": "UTF-8",
942+
"fieldDelimiter": ",",
943+
"ignoreUnknownValues": True,
944+
"maxBadRecords": 10,
945+
"nullMarker": "\\N",
946+
"quote": '"',
947+
"schema": {
948+
"fields": [
949+
{"name": "name", "type": "STRING", "mode": "NULLABLE"},
950+
{"name": "age", "type": "INTEGER", "mode": "NULLABLE"},
951+
]
952+
},
953+
"skipLeadingRows": "1",
954+
"sourceFormat": "CSV",
955+
"timePartitioning": {
956+
"type": "DAY",
957+
"field": "transaction_date",
958+
},
959+
"useAvroLogicalTypes": True,
960+
"writeDisposition": "WRITE_TRUNCATE",
961+
"timeZone": "America/New_York",
962+
"parquetOptions": {"enableListInference": True},
963+
"columnNameCharacterMap": "V2",
964+
"someNewField": "some-value",
965+
}
966+
}
967+
968+
def test_from_api_repr(self):
969+
from google.cloud.bigquery.job import (
970+
CreateDisposition,
971+
LoadJobConfig,
972+
SourceFormat,
973+
WriteDisposition,
974+
)
975+
from google.cloud.bigquery.schema import SchemaField
976+
from google.cloud.bigquery.table import TimePartitioning, TimePartitioningType
977+
978+
from google.cloud.bigquery.job.load import ColumnNameCharacterMap
979+
980+
config = LoadJobConfig.from_api_repr(self.RESOURCE)
981+
982+
self.assertTrue(config.allow_jagged_rows)
983+
self.assertEqual(config.create_disposition, CreateDisposition.CREATE_NEVER)
984+
self.assertEqual(config.encoding, "UTF-8")
985+
self.assertEqual(config.field_delimiter, ",")
986+
self.assertTrue(config.ignore_unknown_values)
987+
self.assertEqual(config.max_bad_records, 10)
988+
self.assertEqual(config.null_marker, "\\N")
989+
self.assertEqual(config.quote_character, '"')
990+
self.assertEqual(
991+
config.schema,
992+
[SchemaField("name", "STRING"), SchemaField("age", "INTEGER")],
993+
)
994+
self.assertEqual(config.skip_leading_rows, 1)
995+
self.assertEqual(config.source_format, SourceFormat.CSV)
996+
self.assertEqual(
997+
config.time_partitioning,
998+
TimePartitioning(type_=TimePartitioningType.DAY, field="transaction_date"),
999+
)
1000+
self.assertTrue(config.use_avro_logical_types)
1001+
self.assertEqual(config.write_disposition, WriteDisposition.WRITE_TRUNCATE)
1002+
self.assertEqual(config.time_zone, "America/New_York")
1003+
self.assertTrue(config.parquet_options.enable_list_inference)
1004+
self.assertEqual(config.column_name_character_map, ColumnNameCharacterMap.V2)
1005+
self.assertEqual(config._properties["load"]["someNewField"], "some-value")
1006+
1007+
def test_to_api_repr(self):
1008+
from google.cloud.bigquery.job import (
1009+
CreateDisposition,
1010+
LoadJobConfig,
1011+
SourceFormat,
1012+
WriteDisposition,
1013+
)
1014+
from google.cloud.bigquery.schema import SchemaField
1015+
from google.cloud.bigquery.table import TimePartitioning, TimePartitioningType
1016+
from google.cloud.bigquery.format_options import ParquetOptions
1017+
from google.cloud.bigquery.job.load import ColumnNameCharacterMap
1018+
1019+
config = LoadJobConfig()
1020+
config.allow_jagged_rows = True
1021+
config.create_disposition = CreateDisposition.CREATE_NEVER
1022+
config.encoding = "UTF-8"
1023+
config.field_delimiter = ","
1024+
config.ignore_unknown_values = True
1025+
config.max_bad_records = 10
1026+
config.null_marker = r"\N"
1027+
config.quote_character = '"'
1028+
config.schema = [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")]
1029+
config.skip_leading_rows = 1
1030+
config.source_format = SourceFormat.CSV
1031+
config.time_partitioning = TimePartitioning(
1032+
type_=TimePartitioningType.DAY, field="transaction_date"
1033+
)
1034+
config.use_avro_logical_types = True
1035+
config.write_disposition = WriteDisposition.WRITE_TRUNCATE
1036+
config.time_zone = "America/New_York"
1037+
parquet_options = ParquetOptions()
1038+
parquet_options.enable_list_inference = True
1039+
config.parquet_options = parquet_options
1040+
config.column_name_character_map = ColumnNameCharacterMap.V2
1041+
config._properties["load"]["someNewField"] = "some-value"
1042+
1043+
api_repr = config.to_api_repr()
1044+
1045+
expected = self.RESOURCE
1046+
self.assertEqual(api_repr, expected)

tests/unit/test_external_config.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@
2525

2626
class TestExternalConfig(unittest.TestCase):
2727
SOURCE_URIS = ["gs://foo", "gs://bar"]
28-
2928
DATE_FORMAT = "MM/DD/YYYY"
29+
TIME_ZONE = "America/Los_Angeles"
3030

3131
BASE_RESOURCE = {
3232
"sourceFormat": "",
@@ -36,6 +36,7 @@ class TestExternalConfig(unittest.TestCase):
3636
"ignoreUnknownValues": False,
3737
"compression": "compression",
3838
"dateFormat": DATE_FORMAT,
39+
"timeZone": TIME_ZONE,
3940
}
4041

4142
def test_from_api_repr_base(self):
@@ -83,6 +84,7 @@ def test_to_api_repr_base(self):
8384
ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")]
8485

8586
ec.date_format = self.DATE_FORMAT
87+
ec.time_zone = self.TIME_ZONE
8688
exp_schema = {
8789
"fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}]
8890
}
@@ -97,6 +99,7 @@ def test_to_api_repr_base(self):
9799
"connectionId": "path/to/connection",
98100
"schema": exp_schema,
99101
"dateFormat": self.DATE_FORMAT,
102+
"timeZone": self.TIME_ZONE,
100103
}
101104
self.assertEqual(got_resource, exp_resource)
102105

@@ -132,8 +135,8 @@ def _verify_base(self, ec):
132135
self.assertEqual(ec.ignore_unknown_values, False)
133136
self.assertEqual(ec.max_bad_records, 17)
134137
self.assertEqual(ec.source_uris, self.SOURCE_URIS)
135-
136138
self.assertEqual(ec.date_format, self.DATE_FORMAT)
139+
self.assertEqual(ec.time_zone, self.TIME_ZONE)
137140

138141
def test_to_api_repr_source_format(self):
139142
ec = external_config.ExternalConfig("CSV")

0 commit comments

Comments
 (0)