diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 9a1e4880c..e27b13ae7 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -462,3 +462,22 @@ class JobCreationMode(object): The conditions under which BigQuery can decide to not create a Job are subject to change. """ + + +class SourceColumnMatch(str, enum.Enum): + """Uses sensible defaults based on how the schema is provided. + + If autodetect is used, then columns are matched by name. Otherwise, columns + are matched by position. This is done to keep the behavior backward-compatible. + """ + + SOURCE_COLUMN_MATCH_UNSPECIFIED = "SOURCE_COLUMN_MATCH_UNSPECIFIED" + """Unspecified column name match option.""" + + POSITION = "POSITION" + """Matches by position. This assumes that the columns are ordered the same + way as the schema.""" + + NAME = "NAME" + """Matches by name. This reads the header row as column names and reorders + columns to match the field names in the schema.""" diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index cb8141cd0..df2f9c05d 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -23,13 +23,14 @@ import base64 import copy import typing -from typing import Any, Dict, FrozenSet, Iterable, Optional, Union +from typing import Any, Dict, FrozenSet, Iterable, List, Optional, Union from google.cloud.bigquery._helpers import _to_bytes from google.cloud.bigquery._helpers import _bytes_to_json from google.cloud.bigquery._helpers import _int_or_none from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery import _helpers +from google.cloud.bigquery.enums import SourceColumnMatch from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions from google.cloud.bigquery import schema from google.cloud.bigquery.schema import SchemaField @@ -474,6 +475,60 @@ def skip_leading_rows(self): def skip_leading_rows(self, value): self._properties["skipLeadingRows"] = str(value) + @property + def null_markers(self) -> Optional[List[str]]: + """Optional[List[str]]: A list of strings represented as SQL NULL value in a CSV file. + + null_marker and null_markers can't be set at the same time. + If null_marker is set, null_markers has to be not set. + If null_markers is set, null_marker has to be not set. + If both null_marker and null_markers are set at the same time, a user + error would be thrown. + Any strings listed in null_markers, including + empty string would be interpreted as SQL NULL. This applies to all column + types. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.null_markers + """ + return self._properties.get("nullMarkers") + + @null_markers.setter + def null_markers(self, value: Optional[List[str]]): + self._properties["nullMarkers"] = value + + @property + def source_column_match(self) -> Optional[SourceColumnMatch]: + """Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the + strategy used to match loaded columns to the schema. If not set, a sensible + default is chosen based on how the schema is provided. If autodetect is + used, then columns are matched by name. Otherwise, columns are matched by + position. This is done to keep the behavior backward-compatible. + + Acceptable values are: + SOURCE_COLUMN_MATCH_UNSPECIFIED - Unspecified column name match option. + POSITION - matches by position. This assumes that the columns are ordered + the same way as the schema. + NAME - matches by name. This reads the header row as column names and + reorders columns to match the field names in the schema. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.source_column_match + """ + + value = self._properties.get("sourceColumnMatch") + if value is not None: + return SourceColumnMatch(value) + return None + + @source_column_match.setter + def source_column_match(self, value: Optional[SourceColumnMatch]): + if value is not None and not isinstance(value, SourceColumnMatch): + raise TypeError( + "value must be a google.cloud.bigquery.enums.SourceColumnMatch or None" + ) + self._properties["sourceColumnMatch"] = value.value if value else None + def to_api_repr(self) -> dict: """Build an API representation of this object. @@ -848,6 +903,87 @@ def schema(self, value): prop = {"fields": [field.to_api_repr() for field in value]} self._properties["schema"] = prop + @property + def time_zone(self) -> Optional[str]: + """Optional[str]: Time zone used when parsing timestamp values that do not + have specific time zone information (e.g. 2024-04-20 12:34:56). The expected + format is an IANA timezone string (e.g. America/Los_Angeles). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_zone + """ + + result = self._properties.get("timeZone") + return typing.cast(str, result) + + @time_zone.setter + def time_zone(self, value: Optional[str]): + self._properties["timeZone"] = value + + @property + def date_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing DATE values. + + (Valid for CSV and NEWLINE_DELIMITED_JSON) + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.date_format + """ + result = self._properties.get("dateFormat") + return typing.cast(str, result) + + @date_format.setter + def date_format(self, value: Optional[str]): + self._properties["dateFormat"] = value + + @property + def datetime_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing DATETIME values. + + (Valid for CSV and NEWLINE_DELIMITED_JSON) + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.datetime_format + """ + result = self._properties.get("datetimeFormat") + return typing.cast(str, result) + + @datetime_format.setter + def datetime_format(self, value: Optional[str]): + self._properties["datetimeFormat"] = value + + @property + def time_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing TIME values. + + (Valid for CSV and NEWLINE_DELIMITED_JSON) + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_format + """ + result = self._properties.get("timeFormat") + return typing.cast(str, result) + + @time_format.setter + def time_format(self, value: Optional[str]): + self._properties["timeFormat"] = value + + @property + def timestamp_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing TIMESTAMP values. + + (Valid for CSV and NEWLINE_DELIMITED_JSON) + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.timestamp_format + """ + result = self._properties.get("timestampFormat") + return typing.cast(str, result) + + @timestamp_format.setter + def timestamp_format(self, value: Optional[str]): + self._properties["timestampFormat"] = value + @property def connection_id(self): """Optional[str]: [Experimental] ID of a BigQuery Connection API diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index e56ce16f0..092b58dde 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -30,6 +30,7 @@ from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference from google.cloud.bigquery.query import ConnectionProperty +from google.cloud.bigquery.enums import SourceColumnMatch class ColumnNameCharacterMap: @@ -548,6 +549,129 @@ def source_format(self): def source_format(self, value): self._set_sub_prop("sourceFormat", value) + @property + def time_zone(self) -> Optional[str]: + """Optional[str]: Default time zone that will apply when parsing timestamp + values that have no specific time zone. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.time_zone + """ + return self._get_sub_prop("timeZone") + + @time_zone.setter + def time_zone(self, value: Optional[str]): + self._set_sub_prop("timeZone", value) + + @property + def date_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing DATE values. + This option is valid for CSV and JSON sources. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.date_format + """ + return self._get_sub_prop("dateFormat") + + @date_format.setter + def date_format(self, value: Optional[str]): + self._set_sub_prop("dateFormat", value) + + @property + def datetime_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing DATETIME values. + This option is valid for CSV and JSON sources. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.datetime_format + """ + return self._get_sub_prop("datetimeFormat") + + @datetime_format.setter + def datetime_format(self, value: Optional[str]): + self._set_sub_prop("datetimeFormat", value) + + @property + def time_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing TIME values. + This option is valid for CSV and JSON sources. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.time_format + """ + return self._get_sub_prop("timeFormat") + + @time_format.setter + def time_format(self, value: Optional[str]): + self._set_sub_prop("timeFormat", value) + + @property + def timestamp_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing TIMESTAMP values. + This option is valid for CSV and JSON sources. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.timestamp_format + """ + return self._get_sub_prop("timestampFormat") + + @timestamp_format.setter + def timestamp_format(self, value: Optional[str]): + self._set_sub_prop("timestampFormat", value) + + @property + def null_markers(self) -> Optional[List[str]]: + """Optional[List[str]]: A list of strings represented as SQL NULL value in a CSV file. + + null_marker and null_markers can't be set at the same time. + If null_marker is set, null_markers has to be not set. + If null_markers is set, null_marker has to be not set. + If both null_marker and null_markers are set at the same time, a user + error would be thrown. + Any strings listed in null_markers, including + empty string would be interpreted as SQL NULL. This applies to all column + types. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_markers + """ + return self._get_sub_prop("nullMarkers") + + @null_markers.setter + def null_markers(self, value: Optional[List[str]]): + self._set_sub_prop("nullMarkers", value) + + @property + def source_column_match(self) -> Optional[SourceColumnMatch]: + """Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the + strategy used to match loaded columns to the schema. If not set, a sensible + default is chosen based on how the schema is provided. If autodetect is + used, then columns are matched by name. Otherwise, columns are matched by + position. This is done to keep the behavior backward-compatible. + + Acceptable values are: + SOURCE_COLUMN_MATCH_UNSPECIFIED - Unspecified column name match option. + POSITION - matches by position. This assumes that the columns are ordered + the same way as the schema. + NAME - matches by name. This reads the header row as column names and + reorders columns to match the field names in the schema. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_column_match + """ + value = self._get_sub_prop("sourceColumnMatch") + if value is not None: + return SourceColumnMatch(value) + return None + + @source_column_match.setter + def source_column_match(self, value: Optional[SourceColumnMatch]): + if value is not None and not isinstance(value, SourceColumnMatch): + raise TypeError( + "value must be a google.cloud.bigquery.enums.SourceColumnMatch or None" + ) + self._set_sub_prop("sourceColumnMatch", value.value if value else None) + @property def time_partitioning(self): """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based @@ -889,6 +1013,55 @@ def clustering_fields(self): """ return self.configuration.clustering_fields + @property + def time_zone(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.time_zone`. + """ + return self.configuration.time_zone + + @property + def date_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.date_format`. + """ + return self.configuration.date_format + + @property + def datetime_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.datetime_format`. + """ + return self.configuration.datetime_format + + @property + def time_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.time_format`. + """ + return self.configuration.time_format + + @property + def timestamp_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.timestamp_format`. + """ + return self.configuration.timestamp_format + + @property + def null_markers(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.null_markers`. + """ + return self.configuration.null_markers + + @property + def source_column_match(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.source_column_match`. + """ + return self.configuration.source_column_match + @property def schema_update_options(self): """See diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index b3046227c..4b9c515a7 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.1 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index ee895a4f4..824a1df4a 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==8.4.1 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 68f6c1662..0bb3b7892 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==25.3.0 -certifi==2025.6.15 +certifi==2025.4.26 cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index b3046227c..4b9c515a7 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.1 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index b3046227c..4b9c515a7 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.1 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index d71018b3f..d311187ec 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.6.4 -pytest==8.4.1 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 10df46fb3..e06515aa5 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -37,11 +37,27 @@ def _setUpConstants(self): self.OUTPUT_BYTES = 23456 self.OUTPUT_ROWS = 345 self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" + self.TIME_ZONE = "UTC" + self.DATE_FORMAT = "%Y-%m-%d" + self.DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S" + self.TIME_FORMAT = "%H:%M:%S" + self.TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MM:SS.SSSSSSZ" + self.NULL_MARKERS = ["N/A", "NA"] + self.SOURCE_COLUMN_MATCH = "NAME" def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) config = resource["configuration"]["load"] config["sourceUris"] = [self.SOURCE1] + config["timeZone"] = self.TIME_ZONE + config["dateFormat"] = self.DATE_FORMAT + config["datetimeFormat"] = self.DATETIME_FORMAT + config["timeFormat"] = self.TIME_FORMAT + config["timestampFormat"] = self.TIMESTAMP_FORMAT + config["nullMarkers"] = self.NULL_MARKERS + config[ + "sourceColumnMatch" + ] = self.SOURCE_COLUMN_MATCH # Keep value as string for mock API repr config["destinationTable"] = { "projectId": self.PROJECT, "datasetId": self.DS_ID, @@ -153,6 +169,39 @@ def _verifyResourceProperties(self, job, resource): else: self.assertIsNone(job.destination_encryption_configuration) + if "timeZone" in config: + self.assertEqual(job.time_zone, config["timeZone"]) + else: + self.assertIsNone(job.time_zone) + if "dateFormat" in config: + self.assertEqual(job.date_format, config["dateFormat"]) + else: + self.assertIsNone(job.date_format) + if "datetimeFormat" in config: + self.assertEqual(job.datetime_format, config["datetimeFormat"]) + else: + self.assertIsNone(job.datetime_format) + if "timeFormat" in config: + self.assertEqual(job.time_format, config["timeFormat"]) + else: + self.assertIsNone(job.time_format) + if "timestampFormat" in config: + self.assertEqual(job.timestamp_format, config["timestampFormat"]) + else: + self.assertIsNone(job.timestamp_format) + if "nullMarkers" in config: + self.assertEqual(job.null_markers, config["nullMarkers"]) + else: + self.assertIsNone(job.null_markers) + if "sourceColumnMatch" in config: + # job.source_column_match will be an Enum, config[...] is a string + self.assertEqual( + job.source_column_match.value, + config["sourceColumnMatch"], + ) + else: + self.assertIsNone(job.source_column_match) + def test_ctor(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) @@ -194,6 +243,13 @@ def test_ctor(self): self.assertIsNone(job.clustering_fields) self.assertIsNone(job.schema_update_options) self.assertIsNone(job.reference_file_schema_uri) + self.assertIsNone(job.time_zone) + self.assertIsNone(job.date_format) + self.assertIsNone(job.datetime_format) + self.assertIsNone(job.time_format) + self.assertIsNone(job.timestamp_format) + self.assertIsNone(job.null_markers) + self.assertIsNone(job.source_column_match) def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField @@ -571,6 +627,13 @@ def test_begin_w_alternate_client(self): ] }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], + "timeZone": self.TIME_ZONE, + "dateFormat": self.DATE_FORMAT, + "datetimeFormat": self.DATETIME_FORMAT, + "timeFormat": self.TIME_FORMAT, + "timestampFormat": self.TIMESTAMP_FORMAT, + "nullMarkers": self.NULL_MARKERS, + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, } RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION conn1 = make_connection() @@ -599,6 +662,16 @@ def test_begin_w_alternate_client(self): config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] config.reference_file_schema_uri = "gs://path/to/reference" + config.time_zone = self.TIME_ZONE + config.date_format = self.DATE_FORMAT + config.datetime_format = self.DATETIME_FORMAT + config.time_format = self.TIME_FORMAT + config.timestamp_format = self.TIMESTAMP_FORMAT + config.null_markers = self.NULL_MARKERS + # Ensure we are setting with the Enum type if that's what the setter expects + from google.cloud.bigquery.enums import SourceColumnMatch + + config.source_column_match = SourceColumnMatch(self.SOURCE_COLUMN_MATCH) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 3a681c476..5640f421c 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -828,6 +828,133 @@ def test_write_disposition_setter(self): config._properties["load"]["writeDisposition"], write_disposition ) + def test_time_zone_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.time_zone) + + def test_time_zone_hit(self): + time_zone = "UTC" + config = self._get_target_class()() + config._properties["load"]["timeZone"] = time_zone + self.assertEqual(config.time_zone, time_zone) + + def test_time_zone_setter(self): + time_zone = "America/New_York" + config = self._get_target_class()() + config.time_zone = time_zone + self.assertEqual(config._properties["load"]["timeZone"], time_zone) + + def test_date_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.date_format) + + def test_date_format_hit(self): + date_format = "%Y-%m-%d" + config = self._get_target_class()() + config._properties["load"]["dateFormat"] = date_format + self.assertEqual(config.date_format, date_format) + + def test_date_format_setter(self): + date_format = "YYYY/MM/DD" + config = self._get_target_class()() + config.date_format = date_format + self.assertEqual(config._properties["load"]["dateFormat"], date_format) + + def test_datetime_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.datetime_format) + + def test_datetime_format_hit(self): + datetime_format = "%Y-%m-%dT%H:%M:%S" + config = self._get_target_class()() + config._properties["load"]["datetimeFormat"] = datetime_format + self.assertEqual(config.datetime_format, datetime_format) + + def test_datetime_format_setter(self): + datetime_format = "YYYY/MM/DD HH24:MI:SS" + config = self._get_target_class()() + config.datetime_format = datetime_format + self.assertEqual(config._properties["load"]["datetimeFormat"], datetime_format) + + def test_time_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.time_format) + + def test_time_format_hit(self): + time_format = "%H:%M:%S" + config = self._get_target_class()() + config._properties["load"]["timeFormat"] = time_format + self.assertEqual(config.time_format, time_format) + + def test_time_format_setter(self): + time_format = "HH24:MI:SS" + config = self._get_target_class()() + config.time_format = time_format + self.assertEqual(config._properties["load"]["timeFormat"], time_format) + + def test_timestamp_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.timestamp_format) + + def test_timestamp_format_hit(self): + timestamp_format = "%Y-%m-%dT%H:%M:%S.%fZ" + config = self._get_target_class()() + config._properties["load"]["timestampFormat"] = timestamp_format + self.assertEqual(config.timestamp_format, timestamp_format) + + def test_timestamp_format_setter(self): + timestamp_format = "YYYY/MM/DD HH24:MI:SS.FF6 TZR" + config = self._get_target_class()() + config.timestamp_format = timestamp_format + self.assertEqual( + config._properties["load"]["timestampFormat"], timestamp_format + ) + + def test_null_markers_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.null_markers) + + def test_null_markers_hit(self): + null_markers = ["", "NA", "\\N"] + config = self._get_target_class()() + config._properties["load"]["nullMarkers"] = null_markers + self.assertEqual(config.null_markers, null_markers) + + def test_null_markers_setter(self): + null_markers = ["custom_null"] + config = self._get_target_class()() + config.null_markers = null_markers + self.assertEqual(config._properties["load"]["nullMarkers"], null_markers) + + def test_source_column_match_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.source_column_match) + + def test_source_column_match_hit(self): + from google.cloud.bigquery.enums import SourceColumnMatch + + option_enum = SourceColumnMatch.NAME + config = self._get_target_class()() + # Assume API stores the string value of the enum + config._properties["load"]["sourceColumnMatch"] = option_enum.value + self.assertEqual(config.source_column_match, option_enum) + + def test_source_column_match_setter(self): + from google.cloud.bigquery.enums import SourceColumnMatch + + option_enum = SourceColumnMatch.POSITION + config = self._get_target_class()() + config.source_column_match = option_enum + # Assert that the string value of the enum is stored + self.assertEqual( + config._properties["load"]["sourceColumnMatch"], option_enum.value + ) + + def test_source_column_match_setter_invalid_type(self): + config = self._get_target_class()() + with self.assertRaises(TypeError): + config.source_column_match = "INVALID_STRING_TYPE" + def test_parquet_options_missing(self): config = self._get_target_class()() self.assertIsNone(config.parquet_options) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 7f84a9f5b..55ede9978 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -19,12 +19,20 @@ from google.cloud.bigquery import external_config from google.cloud.bigquery import schema +from google.cloud.bigquery.enums import SourceColumnMatch import pytest class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] + TIME_ZONE = "America/Los_Angeles" + DATE_FORMAT = "MM/DD/YYYY" + DATETIME_FORMAT = "MM/DD/YYYY HH24:MI:SS" + TIME_FORMAT = "HH24:MI:SS" + TIMESTAMP_FORMAT = "MM/DD/YYYY HH24:MI:SS.FF6 TZR" + NULL_MARKERS = ["", "N/A"] + SOURCE_COLUMN_MATCH = SourceColumnMatch.NAME BASE_RESOURCE = { "sourceFormat": "", @@ -33,6 +41,11 @@ class TestExternalConfig(unittest.TestCase): "autodetect": True, "ignoreUnknownValues": False, "compression": "compression", + "timeZone": TIME_ZONE, + "dateFormat": DATE_FORMAT, + "datetimeFormat": DATETIME_FORMAT, + "timeFormat": TIME_FORMAT, + "timestampFormat": TIMESTAMP_FORMAT, } def test_from_api_repr_base(self): @@ -78,6 +91,11 @@ def test_to_api_repr_base(self): ec.compression = "compression" ec.connection_id = "path/to/connection" ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] + ec.time_zone = self.TIME_ZONE + ec.date_format = self.DATE_FORMAT + ec.datetime_format = self.DATETIME_FORMAT + ec.time_format = self.TIME_FORMAT + ec.timestamp_format = self.TIMESTAMP_FORMAT exp_schema = { "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] @@ -92,6 +110,11 @@ def test_to_api_repr_base(self): "compression": "compression", "connectionId": "path/to/connection", "schema": exp_schema, + "timeZone": self.TIME_ZONE, + "dateFormat": self.DATE_FORMAT, + "datetimeFormat": self.DATETIME_FORMAT, + "timeFormat": self.TIME_FORMAT, + "timestampFormat": self.TIMESTAMP_FORMAT, } self.assertEqual(got_resource, exp_resource) @@ -121,12 +144,31 @@ def test_schema_empty(self): want = {"sourceFormat": "", "schema": {"fields": []}} self.assertEqual(got, want) + def test_source_column_match_None(self): + ec = external_config.ExternalConfig("") + ec.source_column_match = None + expected = None + result = ec.source_column_match + self.assertEqual(expected, result) + + def test_source_column_match_valid_input(self): + ec = external_config.ExternalConfig("") + ec.source_column_match = SourceColumnMatch.NAME + expected = "NAME" + result = ec.source_column_match + self.assertEqual(expected, result) + def _verify_base(self, ec): self.assertEqual(ec.autodetect, True) self.assertEqual(ec.compression, "compression") self.assertEqual(ec.ignore_unknown_values, False) self.assertEqual(ec.max_bad_records, 17) self.assertEqual(ec.source_uris, self.SOURCE_URIS) + self.assertEqual(ec.time_zone, self.TIME_ZONE) + self.assertEqual(ec.date_format, self.DATE_FORMAT) + self.assertEqual(ec.datetime_format, self.DATETIME_FORMAT) + self.assertEqual(ec.time_format, self.TIME_FORMAT) + self.assertEqual(ec.timestamp_format, self.TIMESTAMP_FORMAT) def test_to_api_repr_source_format(self): ec = external_config.ExternalConfig("CSV") @@ -251,6 +293,8 @@ def test_from_api_repr_csv(self): "allowJaggedRows": False, "encoding": "encoding", "preserveAsciiControlCharacters": False, + "nullMarkers": self.NULL_MARKERS, + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, }, }, ) @@ -267,6 +311,11 @@ def test_from_api_repr_csv(self): self.assertEqual(ec.options.allow_jagged_rows, False) self.assertEqual(ec.options.encoding, "encoding") self.assertEqual(ec.options.preserve_ascii_control_characters, False) + self.assertEqual(ec.options.null_markers, self.NULL_MARKERS) + self.assertEqual( + ec.options.source_column_match, + self.SOURCE_COLUMN_MATCH, + ) got_resource = ec.to_api_repr() @@ -288,6 +337,8 @@ def test_to_api_repr_csv(self): options.skip_leading_rows = 123 options.allow_jagged_rows = False options.preserve_ascii_control_characters = False + options.null_markers = self.NULL_MARKERS + options.source_column_match = self.SOURCE_COLUMN_MATCH ec.csv_options = options exp_resource = { @@ -300,6 +351,8 @@ def test_to_api_repr_csv(self): "allowJaggedRows": False, "encoding": "encoding", "preserveAsciiControlCharacters": False, + "nullMarkers": self.NULL_MARKERS, + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, }, } @@ -852,6 +905,9 @@ def test_to_api_repr(self): class CSVOptions(unittest.TestCase): + NULL_MARKERS = ["", "N/A"] + SOURCE_COLUMN_MATCH = SourceColumnMatch.NAME + def test_to_api_repr(self): options = external_config.CSVOptions() options.field_delimiter = "\t" @@ -861,6 +917,8 @@ def test_to_api_repr(self): options.allow_jagged_rows = False options.encoding = "UTF-8" options.preserve_ascii_control_characters = False + options.null_markers = self.NULL_MARKERS + options.source_column_match = self.SOURCE_COLUMN_MATCH resource = options.to_api_repr() @@ -874,6 +932,8 @@ def test_to_api_repr(self): "allowJaggedRows": False, "encoding": "UTF-8", "preserveAsciiControlCharacters": False, + "nullMarkers": self.NULL_MARKERS, + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, }, ) diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 814150693..ceb2530f2 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -985,7 +985,7 @@ def test_bigquery_magic_dryrun_option_sets_job_config(monkeypatch): magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - + magics.context.project = "project-from-context" run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) @@ -1007,6 +1007,7 @@ def test_bigquery_magic_dryrun_option_returns_query_job(monkeypatch): magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1034,7 +1035,7 @@ def test_bigquery_magic_dryrun_option_variable_error_message( magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - + magics.context.project = "project-from-context" ipython_ns_cleanup.append((ip, "q_job")) run_query_patch = mock.patch( @@ -1064,6 +1065,7 @@ def test_bigquery_magic_dryrun_option_saves_query_job_to_variable( magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1094,9 +1096,12 @@ def test_bigquery_magic_saves_query_job_to_variable_on_error( ip = IPython.get_ipython() monkeypatch.setattr(bigquery, "bigquery_magics", None) bigquery.load_ipython_extension(ip) - magics.context.credentials = mock.create_autospec( - google.auth.credentials.Credentials, instance=True + monkeypatch.setattr( + magics.context, + "credentials", + mock.create_autospec(google.auth.credentials.Credentials, instance=True), ) + magics.context.project = "project-from-context" ipython_ns_cleanup.append((ip, "result"))