Skip to content

Commit 8c28a21

Browse files
authored
Merge branch 'main' into fix-dataset-output-only-docs-final
2 parents b73f946 + 45643a2 commit 8c28a21

27 files changed

+474
-93
lines changed

CHANGELOG.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,24 @@
55
[1]: https://pypi.org/project/google-cloud-bigquery/#history
66

77

8+
## [3.34.0](https://github.com/googleapis/python-bigquery/compare/v3.33.0...v3.34.0) (2025-05-27)
9+
10+
11+
### Features
12+
13+
* Job creation mode GA ([#2190](https://github.com/googleapis/python-bigquery/issues/2190)) ([64cd39f](https://github.com/googleapis/python-bigquery/commit/64cd39fb395c4a03ef6d2ec8261e1709477b2186))
14+
15+
16+
### Bug Fixes
17+
18+
* **deps:** Update all dependencies ([#2184](https://github.com/googleapis/python-bigquery/issues/2184)) ([12490f2](https://github.com/googleapis/python-bigquery/commit/12490f2f03681516465fc34217dcdf57000f6fdd))
19+
20+
21+
### Documentation
22+
23+
* Update query.py ([#2192](https://github.com/googleapis/python-bigquery/issues/2192)) ([9b5ee78](https://github.com/googleapis/python-bigquery/commit/9b5ee78f046d9ca3f758eeca6244b8485fe35875))
24+
* Use query_and_wait in the array parameters sample ([#2202](https://github.com/googleapis/python-bigquery/issues/2202)) ([28a9994](https://github.com/googleapis/python-bigquery/commit/28a9994792ec90a6a4d16835faf2137c09c0fb02))
25+
826
## [3.33.0](https://github.com/googleapis/python-bigquery/compare/v3.32.0...v3.33.0) (2025-05-19)
927

1028

google/cloud/bigquery/_job_helpers.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737

3838
import copy
3939
import functools
40-
import os
4140
import uuid
4241
import textwrap
4342
from typing import Any, Dict, Optional, TYPE_CHECKING, Union
@@ -400,12 +399,6 @@ def query_and_wait(
400399
) -> table.RowIterator:
401400
"""Run the query, wait for it to finish, and return the results.
402401
403-
While ``jobCreationMode=JOB_CREATION_OPTIONAL`` is in preview in the
404-
``jobs.query`` REST API, use the default ``jobCreationMode`` unless
405-
the environment variable ``QUERY_PREVIEW_ENABLED=true``. After
406-
``jobCreationMode`` is GA, this method will always use
407-
``jobCreationMode=JOB_CREATION_OPTIONAL``. See:
408-
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query
409402
410403
Args:
411404
client:
@@ -500,9 +493,8 @@ def query_and_wait(
500493
request_body["maxResults"] = min(page_size, max_results)
501494
elif page_size is not None or max_results is not None:
502495
request_body["maxResults"] = page_size or max_results
503-
504-
if os.getenv("QUERY_PREVIEW_ENABLED", "").casefold() == "true":
505-
request_body["jobCreationMode"] = "JOB_CREATION_OPTIONAL"
496+
if client.default_job_creation_mode:
497+
request_body["jobCreationMode"] = client.default_job_creation_mode
506498

507499
def do_query():
508500
request_body["requestId"] = make_job_id()

google/cloud/bigquery/client.py

Lines changed: 69 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,8 @@
9090
from google.cloud.bigquery.dataset import Dataset
9191
from google.cloud.bigquery.dataset import DatasetListItem
9292
from google.cloud.bigquery.dataset import DatasetReference
93-
from google.cloud.bigquery.enums import AutoRowIDs
93+
94+
from google.cloud.bigquery.enums import AutoRowIDs, DatasetView, UpdateMode
9495
from google.cloud.bigquery.format_options import ParquetOptions
9596
from google.cloud.bigquery.job import (
9697
CopyJob,
@@ -221,6 +222,10 @@ class Client(ClientWithProject):
221222
client_options (Optional[Union[google.api_core.client_options.ClientOptions, Dict]]):
222223
Client options used to set user options on the client. API Endpoint
223224
should be set through client_options.
225+
default_job_creation_mode (Optional[str]):
226+
Sets the default job creation mode used by query methods such as
227+
query_and_wait(). For lightweight queries, JOB_CREATION_OPTIONAL is
228+
generally recommended.
224229
225230
Raises:
226231
google.auth.exceptions.DefaultCredentialsError:
@@ -243,6 +248,7 @@ def __init__(
243248
client_options: Optional[
244249
Union[google.api_core.client_options.ClientOptions, Dict[str, Any]]
245250
] = None,
251+
default_job_creation_mode: Optional[str] = None,
246252
) -> None:
247253
if client_options is None:
248254
client_options = {}
@@ -277,6 +283,7 @@ def __init__(
277283
self._connection = Connection(self, **kw_args)
278284
self._location = location
279285
self._default_load_job_config = copy.deepcopy(default_load_job_config)
286+
self.default_job_creation_mode = default_job_creation_mode
280287

281288
# Use property setter so validation can run.
282289
self.default_query_job_config = default_query_job_config
@@ -286,6 +293,15 @@ def location(self):
286293
"""Default location for jobs / datasets / tables."""
287294
return self._location
288295

296+
@property
297+
def default_job_creation_mode(self):
298+
"""Default job creation mode used for query execution."""
299+
return self._default_job_creation_mode
300+
301+
@default_job_creation_mode.setter
302+
def default_job_creation_mode(self, value: Optional[str]):
303+
self._default_job_creation_mode = value
304+
289305
@property
290306
def default_query_job_config(self) -> Optional[QueryJobConfig]:
291307
"""Default ``QueryJobConfig`` or ``None``.
@@ -849,6 +865,7 @@ def get_dataset(
849865
dataset_ref: Union[DatasetReference, str],
850866
retry: retries.Retry = DEFAULT_RETRY,
851867
timeout: TimeoutType = DEFAULT_TIMEOUT,
868+
dataset_view: Optional[DatasetView] = None,
852869
) -> Dataset:
853870
"""Fetch the dataset referenced by ``dataset_ref``
854871
@@ -866,7 +883,21 @@ def get_dataset(
866883
timeout (Optional[float]):
867884
The number of seconds to wait for the underlying HTTP transport
868885
before using ``retry``.
869-
886+
dataset_view (Optional[google.cloud.bigquery.enums.DatasetView]):
887+
Specifies the view that determines which dataset information is
888+
returned. By default, dataset metadata (e.g. friendlyName, description,
889+
labels, etc) and ACL information are returned. This argument can
890+
take on the following possible enum values.
891+
892+
* :attr:`~google.cloud.bigquery.enums.DatasetView.ACL`:
893+
Includes dataset metadata and the ACL.
894+
* :attr:`~google.cloud.bigquery.enums.DatasetView.FULL`:
895+
Includes all dataset metadata, including the ACL and table metadata.
896+
This view is not supported by the `datasets.list` API method.
897+
* :attr:`~google.cloud.bigquery.enums.DatasetView.METADATA`:
898+
Includes basic dataset metadata, but not the ACL.
899+
* :attr:`~google.cloud.bigquery.enums.DatasetView.DATASET_VIEW_UNSPECIFIED`:
900+
The server will decide which view to use. Currently defaults to FULL.
870901
Returns:
871902
google.cloud.bigquery.dataset.Dataset:
872903
A ``Dataset`` instance.
@@ -876,6 +907,12 @@ def get_dataset(
876907
dataset_ref, default_project=self.project
877908
)
878909
path = dataset_ref.path
910+
911+
if dataset_view:
912+
query_params = {"datasetView": dataset_view.value}
913+
else:
914+
query_params = {}
915+
879916
span_attributes = {"path": path}
880917
api_response = self._call_api(
881918
retry,
@@ -884,6 +921,7 @@ def get_dataset(
884921
method="GET",
885922
path=path,
886923
timeout=timeout,
924+
query_params=query_params,
887925
)
888926
return Dataset.from_api_repr(api_response)
889927

@@ -1183,6 +1221,7 @@ def update_dataset(
11831221
fields: Sequence[str],
11841222
retry: retries.Retry = DEFAULT_RETRY,
11851223
timeout: TimeoutType = DEFAULT_TIMEOUT,
1224+
update_mode: Optional[UpdateMode] = None,
11861225
) -> Dataset:
11871226
"""Change some fields of a dataset.
11881227
@@ -1222,6 +1261,20 @@ def update_dataset(
12221261
timeout (Optional[float]):
12231262
The number of seconds to wait for the underlying HTTP transport
12241263
before using ``retry``.
1264+
update_mode (Optional[google.cloud.bigquery.enums.UpdateMode]):
1265+
Specifies the kind of information to update in a dataset.
1266+
By default, dataset metadata (e.g. friendlyName, description,
1267+
labels, etc) and ACL information are updated. This argument can
1268+
take on the following possible enum values.
1269+
1270+
* :attr:`~google.cloud.bigquery.enums.UPDATE_MODE_UNSPECIFIED`:
1271+
The default value. Behavior defaults to UPDATE_FULL.
1272+
* :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_METADATA`:
1273+
Includes metadata information for the dataset, such as friendlyName, description, labels, etc.
1274+
* :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_ACL`:
1275+
Includes ACL information for the dataset, which defines dataset access for one or more entities.
1276+
* :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_FULL`:
1277+
Includes both dataset metadata and ACL information.
12251278
12261279
Returns:
12271280
google.cloud.bigquery.dataset.Dataset:
@@ -1235,6 +1288,11 @@ def update_dataset(
12351288
path = dataset.path
12361289
span_attributes = {"path": path, "fields": fields}
12371290

1291+
if update_mode:
1292+
query_params = {"updateMode": update_mode.value}
1293+
else:
1294+
query_params = {}
1295+
12381296
api_response = self._call_api(
12391297
retry,
12401298
span_name="BigQuery.updateDataset",
@@ -1244,6 +1302,7 @@ def update_dataset(
12441302
data=partial,
12451303
headers=headers,
12461304
timeout=timeout,
1305+
query_params=query_params,
12471306
)
12481307
return Dataset.from_api_repr(api_response)
12491308

@@ -1986,6 +2045,7 @@ def _get_query_results(
19862045
location: Optional[str] = None,
19872046
timeout: TimeoutType = DEFAULT_TIMEOUT,
19882047
page_size: int = 0,
2048+
start_index: Optional[int] = None,
19892049
) -> _QueryResults:
19902050
"""Get the query results object for a query job.
19912051
@@ -2004,9 +2064,12 @@ def _get_query_results(
20042064
before using ``retry``. If set, this connection timeout may be
20052065
increased to a minimum value. This prevents retries on what
20062066
would otherwise be a successful response.
2007-
page_size (int):
2067+
page_size (Optional[int]):
20082068
Maximum number of rows in a single response. See maxResults in
20092069
the jobs.getQueryResults REST API.
2070+
start_index (Optional[int]):
2071+
Zero-based index of the starting row. See startIndex in the
2072+
jobs.getQueryResults REST API.
20102073
20112074
Returns:
20122075
google.cloud.bigquery.query._QueryResults:
@@ -2036,6 +2099,9 @@ def _get_query_results(
20362099
if location is not None:
20372100
extra_params["location"] = location
20382101

2102+
if start_index is not None:
2103+
extra_params["startIndex"] = start_index
2104+
20392105
path = "/projects/{}/queries/{}".format(project, job_id)
20402106

20412107
# This call is typically made in a polling loop that checks whether the
@@ -3532,13 +3598,6 @@ def query_and_wait(
35323598
) -> RowIterator:
35333599
"""Run the query, wait for it to finish, and return the results.
35343600
3535-
While ``jobCreationMode=JOB_CREATION_OPTIONAL`` is in preview in the
3536-
``jobs.query`` REST API, use the default ``jobCreationMode`` unless
3537-
the environment variable ``QUERY_PREVIEW_ENABLED=true``. After
3538-
``jobCreationMode`` is GA, this method will always use
3539-
``jobCreationMode=JOB_CREATION_OPTIONAL``. See:
3540-
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query
3541-
35423601
Args:
35433602
query (str):
35443603
SQL query to be executed. Defaults to the standard SQL

google/cloud/bigquery/enums.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,24 @@ class CreateDisposition(object):
8080
returned in the job result."""
8181

8282

83+
class DatasetView(enum.Enum):
84+
"""DatasetView specifies which dataset information is returned."""
85+
86+
DATASET_VIEW_UNSPECIFIED = "DATASET_VIEW_UNSPECIFIED"
87+
"""The default value. Currently maps to the FULL view."""
88+
89+
METADATA = "METADATA"
90+
"""View metadata information for the dataset, such as friendlyName,
91+
description, labels, etc."""
92+
93+
ACL = "ACL"
94+
"""View ACL information for the dataset, which defines dataset access
95+
for one or more entities."""
96+
97+
FULL = "FULL"
98+
"""View both dataset metadata and ACL information."""
99+
100+
83101
class DefaultPandasDTypes(enum.Enum):
84102
"""Default Pandas DataFrem DTypes to convert BigQuery data. These
85103
Sentinel values are used instead of None to maintain backward compatibility,
@@ -407,3 +425,40 @@ class BigLakeTableFormat(object):
407425

408426
ICEBERG = "ICEBERG"
409427
"""Apache Iceberg format."""
428+
429+
430+
class UpdateMode(enum.Enum):
431+
"""Specifies the kind of information to update in a dataset."""
432+
433+
UPDATE_MODE_UNSPECIFIED = "UPDATE_MODE_UNSPECIFIED"
434+
"""The default value. Behavior defaults to UPDATE_FULL."""
435+
436+
UPDATE_METADATA = "UPDATE_METADATA"
437+
"""Includes metadata information for the dataset, such as friendlyName,
438+
description, labels, etc."""
439+
440+
UPDATE_ACL = "UPDATE_ACL"
441+
"""Includes ACL information for the dataset, which defines dataset access
442+
for one or more entities."""
443+
444+
UPDATE_FULL = "UPDATE_FULL"
445+
"""Includes both dataset metadata and ACL information."""
446+
447+
448+
class JobCreationMode(object):
449+
"""Documented values for Job Creation Mode."""
450+
451+
JOB_CREATION_MODE_UNSPECIFIED = "JOB_CREATION_MODE_UNSPECIFIED"
452+
"""Job creation mode is unspecified."""
453+
454+
JOB_CREATION_REQUIRED = "JOB_CREATION_REQUIRED"
455+
"""Job creation is always required."""
456+
457+
JOB_CREATION_OPTIONAL = "JOB_CREATION_OPTIONAL"
458+
"""Job creation is optional.
459+
460+
Returning immediate results is prioritized.
461+
BigQuery will automatically determine if a Job needs to be created.
462+
The conditions under which BigQuery can decide to not create a Job are
463+
subject to change.
464+
"""

google/cloud/bigquery/external_config.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
import base64
2424
import copy
25+
import typing
2526
from typing import Any, Dict, FrozenSet, Iterable, Optional, Union
2627

2728
from google.cloud.bigquery._helpers import _to_bytes
@@ -835,10 +836,10 @@ def schema(self):
835836
See
836837
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.schema
837838
"""
838-
# TODO: The typehinting for this needs work. Setting this pragma to temporarily
839-
# manage a pytype issue that came up in another PR. See Issue: #2132
840-
prop = self._properties.get("schema", {}) # type: ignore
841-
return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] # type: ignore
839+
prop: Dict[str, Any] = typing.cast(
840+
Dict[str, Any], self._properties.get("schema", {})
841+
)
842+
return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])]
842843

843844
@schema.setter
844845
def schema(self, value):

google/cloud/bigquery/job/base.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -435,9 +435,7 @@ def __init__(self, job_id, client):
435435
@property
436436
def configuration(self) -> _JobConfig:
437437
"""Job-type specific configurtion."""
438-
# TODO: The typehinting for this needs work. Setting this pragma to temporarily
439-
# manage a pytype issue that came up in another PR. See Issue: #2132
440-
configuration = self._CONFIG_CLASS() # pytype: disable=not-callable
438+
configuration: _JobConfig = self._CONFIG_CLASS() # pytype: disable=not-callable
441439
configuration._properties = self._properties.setdefault("configuration", {})
442440
return configuration
443441

google/cloud/bigquery/job/query.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1409,6 +1409,7 @@ def _reload_query_results(
14091409
retry: "retries.Retry" = DEFAULT_RETRY,
14101410
timeout: Optional[float] = None,
14111411
page_size: int = 0,
1412+
start_index: Optional[int] = None,
14121413
):
14131414
"""Refresh the cached query results unless already cached and complete.
14141415
@@ -1421,6 +1422,9 @@ def _reload_query_results(
14211422
page_size (int):
14221423
Maximum number of rows in a single response. See maxResults in
14231424
the jobs.getQueryResults REST API.
1425+
start_index (Optional[int]):
1426+
Zero-based index of the starting row. See startIndex in the
1427+
jobs.getQueryResults REST API.
14241428
"""
14251429
# Optimization: avoid a call to jobs.getQueryResults if it's already
14261430
# been fetched, e.g. from jobs.query first page of results.
@@ -1468,6 +1472,7 @@ def _reload_query_results(
14681472
location=self.location,
14691473
timeout=transport_timeout,
14701474
page_size=page_size,
1475+
start_index=start_index,
14711476
)
14721477

14731478
def result( # type: ignore # (incompatible with supertype)
@@ -1570,6 +1575,9 @@ def result( # type: ignore # (incompatible with supertype)
15701575
if page_size is not None:
15711576
reload_query_results_kwargs["page_size"] = page_size
15721577

1578+
if start_index is not None:
1579+
reload_query_results_kwargs["start_index"] = start_index
1580+
15731581
try:
15741582
retry_do_query = getattr(self, "_retry_do_query", None)
15751583
if retry_do_query is not None:

0 commit comments

Comments
 (0)