-
Notifications
You must be signed in to change notification settings - Fork 322
feat: Adds source_column_match and associated tests #2227
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
fc3dbf7
88c3775
9403046
b6261dc
964b1d0
c25727d
3b39fc3
5686794
c31b1ad
b7eabe1
3b10ee1
a1eddb5
4ac00ca
8351a1c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,6 +30,7 @@ | |
| from google.cloud.bigquery._helpers import _int_or_none | ||
| from google.cloud.bigquery._helpers import _str_or_none | ||
| from google.cloud.bigquery import _helpers | ||
| from google.cloud.bigquery.enums import SourceColumnMatch | ||
| from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions | ||
| from google.cloud.bigquery import schema | ||
| from google.cloud.bigquery.schema import SchemaField | ||
|
|
@@ -474,6 +475,39 @@ def skip_leading_rows(self): | |
| def skip_leading_rows(self, value): | ||
| self._properties["skipLeadingRows"] = str(value) | ||
|
|
||
| @property | ||
| def source_column_match(self) -> Optional[SourceColumnMatch]: | ||
| """Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the | ||
| strategy used to match loaded columns to the schema. If not set, a sensible | ||
| default is chosen based on how the schema is provided. If autodetect is | ||
| used, then columns are matched by name. Otherwise, columns are matched by | ||
| position. This is done to keep the behavior backward-compatible. | ||
|
|
||
| Acceptable values are: | ||
|
|
||
| SOURCE_COLUMN_MATCH_UNSPECIFIED: Unspecified column name match option. | ||
| POSITION: matches by position. This assumes that the columns are ordered | ||
| the same way as the schema. | ||
| NAME: matches by name. This reads the header row as column names and | ||
| reorders columns to match the field names in the schema. | ||
|
|
||
| See | ||
| https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.source_column_match | ||
chalmerlowe marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| """ | ||
|
|
||
| value = self._properties.get("sourceColumnMatch") | ||
| # if value is not None: | ||
| return SourceColumnMatch(value) if value is not None else None | ||
| # return None | ||
|
|
||
| @source_column_match.setter | ||
| def source_column_match(self, value: Optional[SourceColumnMatch]): | ||
|
||
| if value is not None and not isinstance(value, SourceColumnMatch): | ||
| raise TypeError( | ||
| "value must be a google.cloud.bigquery.enums.SourceColumnMatch or None" | ||
| ) | ||
| self._properties["sourceColumnMatch"] = value.value if value else None | ||
|
|
||
| @property | ||
| def null_markers(self) -> Optional[Iterable[str]]: | ||
| """Optional[Iterable[str]]: A list of strings represented as SQL NULL values in a CSV file. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ | |
| from typing import FrozenSet, List, Iterable, Optional | ||
|
|
||
| from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration | ||
| from google.cloud.bigquery.enums import SourceColumnMatch | ||
| from google.cloud.bigquery.external_config import HivePartitioningOptions | ||
| from google.cloud.bigquery.format_options import ParquetOptions | ||
| from google.cloud.bigquery import _helpers | ||
|
|
@@ -569,6 +570,39 @@ def source_format(self): | |
| def source_format(self, value): | ||
| self._set_sub_prop("sourceFormat", value) | ||
|
|
||
| @property | ||
| def source_column_match(self) -> Optional[SourceColumnMatch]: | ||
| """Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the | ||
| strategy used to match loaded columns to the schema. If not set, a sensible | ||
| default is chosen based on how the schema is provided. If autodetect is | ||
| used, then columns are matched by name. Otherwise, columns are matched by | ||
| position. This is done to keep the behavior backward-compatible. | ||
|
|
||
| Acceptable values are: | ||
chalmerlowe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| SOURCE_COLUMN_MATCH_UNSPECIFIED: Unspecified column name match option. | ||
| POSITION: matches by position. This assumes that the columns are ordered | ||
| the same way as the schema. | ||
| NAME: matches by name. This reads the header row as column names and | ||
| reorders columns to match the field names in the schema. | ||
|
|
||
| See: | ||
|
|
||
| https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_column_match | ||
| """ | ||
| value = self._get_sub_prop("sourceColumnMatch") | ||
| if value is not None: | ||
| return SourceColumnMatch(value) | ||
| return None | ||
|
|
||
| @source_column_match.setter | ||
| def source_column_match(self, value: Optional[SourceColumnMatch]): | ||
|
||
| if value is not None and not isinstance(value, SourceColumnMatch): | ||
| raise TypeError( | ||
| "value must be a google.cloud.bigquery.enums.SourceColumnMatch or None" | ||
| ) | ||
| self._set_sub_prop("sourceColumnMatch", value.value if value else None) | ||
|
|
||
| @property | ||
| def date_format(self) -> Optional[str]: | ||
| """Optional[str]: Date format used for parsing DATE values. | ||
|
|
@@ -983,6 +1017,13 @@ def clustering_fields(self): | |
| """ | ||
| return self.configuration.clustering_fields | ||
|
|
||
| @property | ||
| def source_column_match(self): | ||
|
||
| """See | ||
| :attr:`google.cloud.bigquery.job.LoadJobConfig.source_column_match`. | ||
| """ | ||
| return self.configuration.source_column_match | ||
|
|
||
| @property | ||
| def date_format(self): | ||
| """See | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -844,6 +844,35 @@ def test_write_disposition_setter(self): | |
| config._properties["load"]["writeDisposition"], write_disposition | ||
| ) | ||
|
|
||
| def test_source_column_match_missing(self): | ||
| config = self._get_target_class()() | ||
| self.assertIsNone(config.source_column_match) | ||
|
|
||
| def test_source_column_match_hit(self): | ||
| from google.cloud.bigquery.enums import SourceColumnMatch | ||
|
|
||
| option_enum = SourceColumnMatch.NAME | ||
| config = self._get_target_class()() | ||
| # Assume API stores the string value of the enum | ||
| config._properties["load"]["sourceColumnMatch"] = option_enum.value | ||
| self.assertEqual(config.source_column_match, option_enum) | ||
|
|
||
| def test_source_column_match_setter(self): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we add another test where we use the setter with a
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, both here and in test_external_config.py |
||
| from google.cloud.bigquery.enums import SourceColumnMatch | ||
|
|
||
| option_enum = SourceColumnMatch.POSITION | ||
| config = self._get_target_class()() | ||
| config.source_column_match = option_enum | ||
| # Assert that the string value of the enum is stored | ||
| self.assertEqual( | ||
| config._properties["load"]["sourceColumnMatch"], option_enum.value | ||
| ) | ||
|
|
||
| def test_source_column_match_setter_invalid_type(self): | ||
| config = self._get_target_class()() | ||
| with self.assertRaises(TypeError): | ||
| config.source_column_match = "INVALID_STRING_TYPE" | ||
|
|
||
| def test_date_format_missing(self): | ||
| config = self._get_target_class()() | ||
| self.assertIsNone(config.date_format) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.