Skip to content

Commit 3220439

Browse files
committed
add tests and list_rows() support
1 parent f4c9b6a commit 3220439

File tree

11 files changed

+149
-9
lines changed

11 files changed

+149
-9
lines changed

google/cloud/bigquery/_helpers.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
from google.cloud._helpers import _RFC3339_MICROS
3333
from google.cloud._helpers import _RFC3339_NO_FRACTION
3434
from google.cloud._helpers import _to_bytes
35+
from google.cloud.bigquery import enums
36+
3537
from google.auth import credentials as ga_credentials # type: ignore
3638
from google.api_core import client_options as client_options_lib
3739

@@ -253,7 +255,10 @@ def bytes_to_py(self, value, field):
253255
return base64.standard_b64decode(_to_bytes(value))
254256

255257
def timestamp_to_py(self, value, field):
256-
"""Coerce 'value' to a datetime, if set or not nullable."""
258+
"""Coerce 'value' to a datetime, if set or not nullable. If timestamp
259+
is of picosecond precision, preserve the string format."""
260+
if field.timestamp_precision == enums.TimestampPrecision.PICOSECOND:
261+
return value
257262
if _not_null(value, field):
258263
# value will be a integer in seconds, to microsecond precision, in UTC.
259264
return _datetime_from_microseconds(int(value))

google/cloud/bigquery/client.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4062,6 +4062,8 @@ def list_rows(
40624062
page_size: Optional[int] = None,
40634063
retry: retries.Retry = DEFAULT_RETRY,
40644064
timeout: TimeoutType = DEFAULT_TIMEOUT,
4065+
*,
4066+
timestamp_precision: Union[enums.TimestampPrecision, int, None] = None,
40654067
) -> RowIterator:
40664068
"""List the rows of the table.
40674069
@@ -4110,6 +4112,11 @@ def list_rows(
41104112
before using ``retry``.
41114113
If multiple requests are made under the hood, ``timeout``
41124114
applies to each individual request.
4115+
timestamp_precision (Optional[enums.TimestampPrecision]):
4116+
[Private Preview] If set to `enums.TimestampPrecision.PICOSECOND`,
4117+
timestamp columns of picosecond precision will be returned with
4118+
full precision. Otherwise, will truncate to microsecond
4119+
precision.
41134120
41144121
Returns:
41154122
google.cloud.bigquery.table.RowIterator:
@@ -4144,6 +4151,13 @@ def list_rows(
41444151
params["startIndex"] = start_index
41454152

41464153
params["formatOptions.useInt64Timestamp"] = True
4154+
4155+
if timestamp_precision == enums.TimestampPrecision.PICOSECOND:
4156+
# Cannot specify both use_int64_timestamp and timestamp_output_format.
4157+
del params["formatOptions.useInt64Timestamp"]
4158+
4159+
params["formatOptions.timestampOutputFormat"] = "ISO8601_STRING"
4160+
41474161
row_iterator = RowIterator(
41484162
client=self,
41494163
api_request=functools.partial(self._call_api, retry, timeout=timeout),

google/cloud/bigquery/job/load.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -759,7 +759,6 @@ def column_name_character_map(self, value: Optional[str]):
759759
value = ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED
760760
self._set_sub_prop("columnNameCharacterMap", value)
761761

762-
763762
@property
764763
def timestamp_target_precision(self) -> Optional[list[int]]:
765764
"""Optional[list[int]]: [Private Preview] Precisions (maximum number of
@@ -776,7 +775,7 @@ def timestamp_target_precision(self) -> Optional[list[int]]:
776775
detected TIMESTAMP columns that have more than 6 digits of
777776
subseconds.
778777
[12]: timestamp(12) for all auto detected TIMESTAMP columns.
779-
778+
780779
The order of the elements in this array is ignored. Inputs that have
781780
higher precision than the highest target precision in this array will
782781
be truncated.

tests/data/pico.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
2025-01-01T00:00:00.123456789012Z
2+
2025-01-02T00:00:00.123456789012Z
3+
2025-01-03T00:00:00.123456789012Z

tests/data/pico_schema.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
[
2+
{
3+
"name": "pico_col",
4+
"type": "TIMESTAMP",
5+
"mode": "NULLABLE",
6+
"timestampPrecision": "12"
7+
}
8+
]

tests/system/conftest.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,14 @@ def load_scalars_table(
9898
data_path: str = "scalars.jsonl",
9999
source_format=enums.SourceFormat.NEWLINE_DELIMITED_JSON,
100100
schema_source="scalars_schema.json",
101+
timestamp_target_precision=None,
101102
) -> str:
102103
schema = bigquery_client.schema_from_json(DATA_DIR / schema_source)
103104
table_id = data_path.replace(".", "_") + hex(random.randrange(1000000))
104105
job_config = bigquery.LoadJobConfig()
105106
job_config.schema = schema
106107
job_config.source_format = source_format
108+
job_config.timestamp_target_precision = timestamp_target_precision
107109
full_table_id = f"{project_id}.{dataset_id}.{table_id}"
108110
with open(DATA_DIR / data_path, "rb") as data_file:
109111
job = bigquery_client.load_table_from_file(
@@ -169,6 +171,23 @@ def scalars_table_csv(
169171
bigquery_client.delete_table(full_table_id, not_found_ok=True)
170172

171173

174+
@pytest.fixture(scope="session")
175+
def scalars_table_pico(
176+
bigquery_client: bigquery.Client, project_id: str, dataset_id: str
177+
):
178+
full_table_id = load_scalars_table(
179+
bigquery_client,
180+
project_id,
181+
dataset_id,
182+
data_path="pico.csv",
183+
source_format=enums.SourceFormat.CSV,
184+
schema_source="pico_schema.json",
185+
timestamp_target_precision=[12],
186+
)
187+
yield full_table_id
188+
bigquery_client.delete_table(full_table_id, not_found_ok=True)
189+
190+
172191
@pytest.fixture
173192
def test_table_name(request, replace_non_anum=re.compile(r"[^a-zA-Z0-9_]").sub):
174193
return replace_non_anum("_", request.node.name)

tests/system/test_client.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1295,6 +1295,29 @@ def test_load_table_from_json_schema_autodetect_table_exists(self):
12951295
self.assertEqual(tuple(table.schema), table_schema)
12961296
self.assertEqual(table.num_rows, 2)
12971297

1298+
def test_load_table_from_csv_w_picosecond_timestamp(self):
1299+
dataset_id = _make_dataset_id("bq_system_test")
1300+
self.temp_dataset(dataset_id)
1301+
table_id = "{}.{}.load_table_from_json_basic_use".format(
1302+
Config.CLIENT.project, dataset_id
1303+
)
1304+
1305+
table_schema = Config.CLIENT.schema_from_json(DATA_PATH / "pico_schema.json")
1306+
# create the table before loading so that the column order is predictable
1307+
table = helpers.retry_403(Config.CLIENT.create_table)(
1308+
Table(table_id, schema=table_schema)
1309+
)
1310+
self.to_delete.insert(0, table)
1311+
1312+
# do not pass an explicit job config to trigger automatic schema detection
1313+
with open(DATA_PATH / "pico.csv", "rb") as f:
1314+
load_job = Config.CLIENT.load_table_from_file(f, table_id)
1315+
load_job.result()
1316+
1317+
table = Config.CLIENT.get_table(table)
1318+
self.assertEqual(list(table.schema), table_schema)
1319+
self.assertEqual(table.num_rows, 3)
1320+
12981321
def test_load_avro_from_uri_then_dump_table(self):
12991322
from google.cloud.bigquery.job import CreateDisposition
13001323
from google.cloud.bigquery.job import SourceFormat

tests/system/test_list_rows.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,3 +132,23 @@ def test_list_rows_range(bigquery_client: bigquery.Client, scalars_table_csv: st
132132

133133
row_null = rows[1]
134134
assert row_null["range_date"] is None
135+
136+
137+
def test_list_rows_pico(bigquery_client: bigquery.Client, scalars_table_pico: str):
138+
rows = bigquery_client.list_rows(
139+
scalars_table_pico, timestamp_precision=enums.TimestampPrecision.PICOSECOND
140+
)
141+
rows = list(rows)
142+
row = rows[0]
143+
assert row["pico_col"] == "2025-01-01T00:00:00.123456789012Z"
144+
145+
146+
def test_list_rows_pico_truncate(
147+
bigquery_client: bigquery.Client, scalars_table_pico: str
148+
):
149+
# For a picosecond timestamp column, if the user does not explicitly set
150+
# timestamp_precision, will return truncated microsecond precision.
151+
rows = bigquery_client.list_rows(scalars_table_pico)
152+
rows = list(rows)
153+
row = rows[0]
154+
assert row["pico_col"] == "1735689600123456"

tests/unit/_helpers/test_cell_data_parser.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -290,17 +290,26 @@ def test_bytes_to_py_w_base64_encoded_text(object_under_test):
290290
def test_timestamp_to_py_w_string_int_value(object_under_test):
291291
from google.cloud._helpers import _EPOCH
292292

293-
coerced = object_under_test.timestamp_to_py("1234567", object())
293+
coerced = object_under_test.timestamp_to_py("1234567", create_field())
294294
assert coerced == _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)
295295

296296

297297
def test_timestamp_to_py_w_int_value(object_under_test):
298298
from google.cloud._helpers import _EPOCH
299299

300-
coerced = object_under_test.timestamp_to_py(1234567, object())
300+
coerced = object_under_test.timestamp_to_py(1234567, create_field())
301301
assert coerced == _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)
302302

303303

304+
def test_timestamp_to_py_w_picosecond_precision(object_under_test):
305+
from google.cloud.bigquery import enums
306+
307+
pico_schema = create_field(timestamp_precision=enums.TimestampPrecision.PICOSECOND)
308+
pico_timestamp = "2025-01-01T00:00:00.123456789012Z"
309+
coerced = object_under_test.timestamp_to_py(pico_timestamp, pico_schema)
310+
assert coerced == pico_timestamp
311+
312+
304313
def test_datetime_to_py_w_string_value(object_under_test):
305314
coerced = object_under_test.datetime_to_py("2016-12-02T18:51:33", object())
306315
assert coerced == datetime.datetime(2016, 12, 2, 18, 51, 33)

tests/unit/job/test_load_config.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1072,19 +1072,26 @@ def test_timestamp_target_precision_missing(self):
10721072
def test_timestamp_target_precision_hit(self):
10731073
timestamp_target_precision = [6, 12]
10741074
config = self._get_target_class()()
1075-
config._properties["load"]["timestampTargetPrecision"] = timestamp_target_precision
1075+
config._properties["load"][
1076+
"timestampTargetPrecision"
1077+
] = timestamp_target_precision
10761078
self.assertEqual(config.timestamp_target_precision, timestamp_target_precision)
10771079

10781080
def test_timestamp_target_precision_setter(self):
10791081
timestamp_target_precision = [6, 12]
10801082
config = self._get_target_class()()
10811083
config.timestamp_target_precision = timestamp_target_precision
1082-
self.assertEqual(config._properties["load"]["timestampTargetPrecision"], timestamp_target_precision)
1084+
self.assertEqual(
1085+
config._properties["load"]["timestampTargetPrecision"],
1086+
timestamp_target_precision,
1087+
)
10831088

10841089
def test_timestamp_target_precision_setter_w_none(self):
10851090
timestamp_target_precision = [6, 12]
10861091
config = self._get_target_class()()
1087-
config._properties["load"]["timestampTargetPrecision"] = timestamp_target_precision
1092+
config._properties["load"][
1093+
"timestampTargetPrecision"
1094+
] = timestamp_target_precision
10881095
config.timestamp_target_precision = None
10891096
self.assertFalse("timestampTargetPrecision" in config._properties["load"])
10901097

0 commit comments

Comments
 (0)