Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
ac7b1fe
feat: Add new options to LoadJobConfig and ExternalConfig
google-labs-jules[bot] Jun 25, 2025
bd75b96
Merge branch 'main' into feat-new-load-options
chalmerlowe Jun 26, 2025
a9b187f
Adds enum, revises some docstrings, and attribute names
chalmerlowe Jun 26, 2025
c963716
test: Add unit tests for new LoadJobConfig options
google-labs-jules[bot] Jun 26, 2025
d721ea4
refactor: Integrate user's changes and new LoadJobConfig tests
google-labs-jules[bot] Jun 26, 2025
13ccbe7
updates branch with a number of minor tweaks
chalmerlowe Jun 26, 2025
0d74392
renames some attributes and adds some typehinting
chalmerlowe Jun 27, 2025
9862782
troubleshooting an issue with magics and output capture
chalmerlowe Jun 27, 2025
548017b
update magics for more troubleshooting
chalmerlowe Jun 27, 2025
c75000a
update magics for more troubleshooting II
chalmerlowe Jun 27, 2025
4e87993
update magics for more troubleshooting III
chalmerlowe Jun 27, 2025
b2a5308
update magics by adding magics.context.project for several tests
chalmerlowe Jun 27, 2025
2c8bca2
update docstring formatting
chalmerlowe Jun 27, 2025
f2ad535
update enums docstring formatting
chalmerlowe Jun 27, 2025
2766b2c
Merge branch 'main' into feat-new-load-options
chalmerlowe Jul 2, 2025
c267a3c
updates some tests and constants
chalmerlowe Jul 2, 2025
04cb59e
updates docstring and removes comment
chalmerlowe Jul 2, 2025
f8ae243
remove debug statement
chalmerlowe Jul 2, 2025
a93a98c
updates to ensure coverage of external_config.py
chalmerlowe Jul 2, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions google/cloud/bigquery/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,3 +462,22 @@ class JobCreationMode(object):
The conditions under which BigQuery can decide to not create a Job are
subject to change.
"""


class SourceColumnMatch(str, enum.Enum):
"""Uses sensible defaults based on how the schema is provided.

If autodetect is used, then columns are matched by name. Otherwise, columns
are matched by position. This is done to keep the behavior backward-compatible.
"""

SOURCE_COLUMN_MATCH_UNSPECIFIED = "SOURCE_COLUMN_MATCH_UNSPECIFIED"
"""Unspecified column name match option."""

POSITION = "POSITION"
"""Matches by position. This assumes that the columns are ordered the same
way as the schema."""

NAME = "NAME"
"""Matches by name. This reads the header row as column names and reorders
columns to match the field names in the schema."""
138 changes: 137 additions & 1 deletion google/cloud/bigquery/external_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@
import base64
import copy
import typing
from typing import Any, Dict, FrozenSet, Iterable, Optional, Union
from typing import Any, Dict, FrozenSet, Iterable, List, Optional, Union

from google.cloud.bigquery._helpers import _to_bytes
from google.cloud.bigquery._helpers import _bytes_to_json
from google.cloud.bigquery._helpers import _int_or_none
from google.cloud.bigquery._helpers import _str_or_none
from google.cloud.bigquery import _helpers
from google.cloud.bigquery.enums import SourceColumnMatch
from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions
from google.cloud.bigquery import schema
from google.cloud.bigquery.schema import SchemaField
Expand Down Expand Up @@ -474,6 +475,60 @@ def skip_leading_rows(self):
def skip_leading_rows(self, value):
self._properties["skipLeadingRows"] = str(value)

@property
def null_markers(self) -> Optional[List[str]]:
"""Optional[List[str]]: A list of strings represented as SQL NULL value in a CSV file.

null_marker and null_markers can't be set at the same time.
If null_marker is set, null_markers has to be not set.
If null_markers is set, null_marker has to be not set.
If both null_marker and null_markers are set at the same time, a user
error would be thrown.
Any strings listed in null_markers, including
empty string would be interpreted as SQL NULL. This applies to all column
types.

See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.null_markers
"""
return self._properties.get("nullMarkers")

@null_markers.setter
def null_markers(self, value: Optional[List[str]]):
self._properties["nullMarkers"] = value

@property
def source_column_match(self) -> Optional[SourceColumnMatch]:
"""Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the
strategy used to match loaded columns to the schema. If not set, a sensible
default is chosen based on how the schema is provided. If autodetect is
used, then columns are matched by name. Otherwise, columns are matched by
position. This is done to keep the behavior backward-compatible.

Acceptable values are:
SOURCE_COLUMN_MATCH_UNSPECIFIED - Unspecified column name match option.
POSITION - matches by position. This assumes that the columns are ordered
the same way as the schema.
NAME - matches by name. This reads the header row as column names and
reorders columns to match the field names in the schema.

See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.source_column_match
"""

value = self._properties.get("sourceColumnMatch")
if value is not None:
return SourceColumnMatch(value)
return None

@source_column_match.setter
def source_column_match(self, value: Optional[SourceColumnMatch]):
if value is not None and not isinstance(value, SourceColumnMatch):
raise TypeError(
"value must be a google.cloud.bigquery.enums.SourceColumnMatch or None"
)
self._properties["sourceColumnMatch"] = value.value if value else None

def to_api_repr(self) -> dict:
"""Build an API representation of this object.

Expand Down Expand Up @@ -848,6 +903,87 @@ def schema(self, value):
prop = {"fields": [field.to_api_repr() for field in value]}
self._properties["schema"] = prop

@property
def time_zone(self) -> Optional[str]:
"""Optional[str]: Time zone used when parsing timestamp values that do not
have specific time zone information (e.g. 2024-04-20 12:34:56). The expected
format is an IANA timezone string (e.g. America/Los_Angeles).

See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_zone
"""

result = self._properties.get("timeZone")
return typing.cast(str, result)

@time_zone.setter
def time_zone(self, value: Optional[str]):
self._properties["timeZone"] = value

@property
def date_format(self) -> Optional[str]:
"""Optional[str]: Date format used for parsing DATE values.

(Valid for CSV and NEWLINE_DELIMITED_JSON)

See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.date_format
"""
result = self._properties.get("dateFormat")
return typing.cast(str, result)

@date_format.setter
def date_format(self, value: Optional[str]):
self._properties["dateFormat"] = value

@property
def datetime_format(self) -> Optional[str]:
"""Optional[str]: Date format used for parsing DATETIME values.

(Valid for CSV and NEWLINE_DELIMITED_JSON)

See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.datetime_format
"""
result = self._properties.get("datetimeFormat")
return typing.cast(str, result)

@datetime_format.setter
def datetime_format(self, value: Optional[str]):
self._properties["datetimeFormat"] = value

@property
def time_format(self) -> Optional[str]:
"""Optional[str]: Date format used for parsing TIME values.

(Valid for CSV and NEWLINE_DELIMITED_JSON)

See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_format
"""
result = self._properties.get("timeFormat")
return typing.cast(str, result)

@time_format.setter
def time_format(self, value: Optional[str]):
self._properties["timeFormat"] = value

@property
def timestamp_format(self) -> Optional[str]:
"""Optional[str]: Date format used for parsing TIMESTAMP values.

(Valid for CSV and NEWLINE_DELIMITED_JSON)

See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.timestamp_format
"""
result = self._properties.get("timestampFormat")
return typing.cast(str, result)

@timestamp_format.setter
def timestamp_format(self, value: Optional[str]):
self._properties["timestampFormat"] = value

@property
def connection_id(self):
"""Optional[str]: [Experimental] ID of a BigQuery Connection API
Expand Down
173 changes: 173 additions & 0 deletions google/cloud/bigquery/job/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from google.cloud.bigquery.job.base import _JobConfig
from google.cloud.bigquery.job.base import _JobReference
from google.cloud.bigquery.query import ConnectionProperty
from google.cloud.bigquery.enums import SourceColumnMatch


class ColumnNameCharacterMap:
Expand Down Expand Up @@ -548,6 +549,129 @@ def source_format(self):
def source_format(self, value):
self._set_sub_prop("sourceFormat", value)

@property
def time_zone(self) -> Optional[str]:
"""Optional[str]: Default time zone that will apply when parsing timestamp
values that have no specific time zone.

See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.time_zone
"""
return self._get_sub_prop("timeZone")

@time_zone.setter
def time_zone(self, value: Optional[str]):
self._set_sub_prop("timeZone", value)

@property
def date_format(self) -> Optional[str]:
"""Optional[str]: Date format used for parsing DATE values.
This option is valid for CSV and JSON sources.

See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.date_format
"""
return self._get_sub_prop("dateFormat")

@date_format.setter
def date_format(self, value: Optional[str]):
self._set_sub_prop("dateFormat", value)

@property
def datetime_format(self) -> Optional[str]:
"""Optional[str]: Date format used for parsing DATETIME values.
This option is valid for CSV and JSON sources.

See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.datetime_format
"""
return self._get_sub_prop("datetimeFormat")

@datetime_format.setter
def datetime_format(self, value: Optional[str]):
self._set_sub_prop("datetimeFormat", value)

@property
def time_format(self) -> Optional[str]:
"""Optional[str]: Date format used for parsing TIME values.
This option is valid for CSV and JSON sources.

See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.time_format
"""
return self._get_sub_prop("timeFormat")

@time_format.setter
def time_format(self, value: Optional[str]):
self._set_sub_prop("timeFormat", value)

@property
def timestamp_format(self) -> Optional[str]:
"""Optional[str]: Date format used for parsing TIMESTAMP values.
This option is valid for CSV and JSON sources.

See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.timestamp_format
"""
return self._get_sub_prop("timestampFormat")

@timestamp_format.setter
def timestamp_format(self, value: Optional[str]):
self._set_sub_prop("timestampFormat", value)

@property
def null_markers(self) -> Optional[List[str]]:
"""Optional[List[str]]: A list of strings represented as SQL NULL value in a CSV file.

null_marker and null_markers can't be set at the same time.
If null_marker is set, null_markers has to be not set.
If null_markers is set, null_marker has to be not set.
If both null_marker and null_markers are set at the same time, a user
error would be thrown.
Any strings listed in null_markers, including
empty string would be interpreted as SQL NULL. This applies to all column
types.

See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_markers
"""
return self._get_sub_prop("nullMarkers")

@null_markers.setter
def null_markers(self, value: Optional[List[str]]):
self._set_sub_prop("nullMarkers", value)

@property
def source_column_match(self) -> Optional[SourceColumnMatch]:
"""Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the
strategy used to match loaded columns to the schema. If not set, a sensible
default is chosen based on how the schema is provided. If autodetect is
used, then columns are matched by name. Otherwise, columns are matched by
position. This is done to keep the behavior backward-compatible.

Acceptable values are:
SOURCE_COLUMN_MATCH_UNSPECIFIED - Unspecified column name match option.
POSITION - matches by position. This assumes that the columns are ordered
the same way as the schema.
NAME - matches by name. This reads the header row as column names and
reorders columns to match the field names in the schema.

See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_column_match
"""
value = self._get_sub_prop("sourceColumnMatch")
if value is not None:
return SourceColumnMatch(value)
return None

@source_column_match.setter
def source_column_match(self, value: Optional[SourceColumnMatch]):
if value is not None and not isinstance(value, SourceColumnMatch):
raise TypeError(
"value must be a google.cloud.bigquery.enums.SourceColumnMatch or None"
)
self._set_sub_prop("sourceColumnMatch", value.value if value else None)

@property
def time_partitioning(self):
"""Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based
Expand Down Expand Up @@ -889,6 +1013,55 @@ def clustering_fields(self):
"""
return self.configuration.clustering_fields

@property
def time_zone(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.time_zone`.
"""
return self.configuration.time_zone

@property
def date_format(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.date_format`.
"""
return self.configuration.date_format

@property
def datetime_format(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.datetime_format`.
"""
return self.configuration.datetime_format

@property
def time_format(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.time_format`.
"""
return self.configuration.time_format

@property
def timestamp_format(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.timestamp_format`.
"""
return self.configuration.timestamp_format

@property
def null_markers(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.null_markers`.
"""
return self.configuration.null_markers

@property
def source_column_match(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.source_column_match`.
"""
return self.configuration.source_column_match

@property
def schema_update_options(self):
"""See
Expand Down
2 changes: 1 addition & 1 deletion samples/desktopapp/requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
google-cloud-testutils==1.6.4
pytest==8.4.1
pytest==8.4.0
mock==5.2.0
pytest-xdist==3.7.0
2 changes: 1 addition & 1 deletion samples/geography/requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
pytest==8.4.1
pytest==8.4.0
mock==5.2.0
pytest-xdist==3.7.0
2 changes: 1 addition & 1 deletion samples/geography/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
attrs==25.3.0
certifi==2025.6.15
certifi==2025.4.26
cffi==1.17.1
charset-normalizer==3.4.2
click===8.1.8; python_version == '3.9'
Expand Down
Loading
Loading