add tests and list_rows() support

Linchin · Linchin · commit 3220439e65f5 · 2025-12-31T00:22:15.000Z
diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py
@@ -32,6 +32,8 @@
 from google.cloud._helpers import _RFC3339_MICROS
 from google.cloud._helpers import _RFC3339_NO_FRACTION
 from google.cloud._helpers import _to_bytes
+from google.cloud.bigquery import enums
+
 from google.auth import credentials as ga_credentials  # type: ignore
 from google.api_core import client_options as client_options_lib
 
@@ -253,7 +255,10 @@ def bytes_to_py(self, value, field):
             return base64.standard_b64decode(_to_bytes(value))
 
     def timestamp_to_py(self, value, field):
-        """Coerce 'value' to a datetime, if set or not nullable."""
+        """Coerce 'value' to a datetime, if set or not nullable. If timestamp
+        is of picosecond precision, preserve the string format."""
+        if field.timestamp_precision == enums.TimestampPrecision.PICOSECOND:
+            return value
         if _not_null(value, field):
             # value will be a integer in seconds, to microsecond precision, in UTC.
             return _datetime_from_microseconds(int(value))
diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py
@@ -4062,6 +4062,8 @@ def list_rows(
         page_size: Optional[int] = None,
         retry: retries.Retry = DEFAULT_RETRY,
         timeout: TimeoutType = DEFAULT_TIMEOUT,
+        *,
+        timestamp_precision: Union[enums.TimestampPrecision, int, None] = None,
     ) -> RowIterator:
         """List the rows of the table.
 
@@ -4110,6 +4112,11 @@ def list_rows(
                 before using ``retry``.
                 If multiple requests are made under the hood, ``timeout``
                 applies to each individual request.
+            timestamp_precision (Optional[enums.TimestampPrecision]):
+                [Private Preview] If set to `enums.TimestampPrecision.PICOSECOND`,
+                timestamp columns of picosecond precision will be returned with
+                full precision. Otherwise, will truncate to microsecond
+                precision.
 
         Returns:
             google.cloud.bigquery.table.RowIterator:
@@ -4144,6 +4151,13 @@ def list_rows(
             params["startIndex"] = start_index
 
         params["formatOptions.useInt64Timestamp"] = True
+
+        if timestamp_precision == enums.TimestampPrecision.PICOSECOND:
+            # Cannot specify both use_int64_timestamp and timestamp_output_format.
+            del params["formatOptions.useInt64Timestamp"]
+
+            params["formatOptions.timestampOutputFormat"] = "ISO8601_STRING"
+
         row_iterator = RowIterator(
             client=self,
             api_request=functools.partial(self._call_api, retry, timeout=timeout),
diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py
@@ -759,7 +759,6 @@ def column_name_character_map(self, value: Optional[str]):
             value = ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED
         self._set_sub_prop("columnNameCharacterMap", value)
 
-
     @property
     def timestamp_target_precision(self) -> Optional[list[int]]:
         """Optional[list[int]]: [Private Preview] Precisions (maximum number of
@@ -776,7 +775,7 @@ def timestamp_target_precision(self) -> Optional[list[int]]:
             detected TIMESTAMP columns that have more than 6 digits of
             subseconds.
             [12]: timestamp(12) for all auto detected TIMESTAMP columns.
-        
+
         The order of the elements in this array is ignored. Inputs that have
         higher precision than the highest target precision in this array will
         be truncated.
diff --git a/tests/data/pico.csv b/tests/data/pico.csv
@@ -0,0 +1,3 @@
+2025-01-01T00:00:00.123456789012Z
+2025-01-02T00:00:00.123456789012Z
+2025-01-03T00:00:00.123456789012Z
diff --git a/tests/data/pico_schema.json b/tests/data/pico_schema.json
@@ -0,0 +1,8 @@
+[
+  {
+    "name": "pico_col",
+    "type": "TIMESTAMP",
+    "mode": "NULLABLE",
+    "timestampPrecision": "12"
+  }
+]
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
@@ -98,12 +98,14 @@ def load_scalars_table(
     data_path: str = "scalars.jsonl",
     source_format=enums.SourceFormat.NEWLINE_DELIMITED_JSON,
     schema_source="scalars_schema.json",
+    timestamp_target_precision=None,
 ) -> str:
     schema = bigquery_client.schema_from_json(DATA_DIR / schema_source)
     table_id = data_path.replace(".", "_") + hex(random.randrange(1000000))
     job_config = bigquery.LoadJobConfig()
     job_config.schema = schema
     job_config.source_format = source_format
+    job_config.timestamp_target_precision = timestamp_target_precision
     full_table_id = f"{project_id}.{dataset_id}.{table_id}"
     with open(DATA_DIR / data_path, "rb") as data_file:
         job = bigquery_client.load_table_from_file(
@@ -169,6 +171,23 @@ def scalars_table_csv(
     bigquery_client.delete_table(full_table_id, not_found_ok=True)
 
 
+@pytest.fixture(scope="session")
+def scalars_table_pico(
+    bigquery_client: bigquery.Client, project_id: str, dataset_id: str
+):
+    full_table_id = load_scalars_table(
+        bigquery_client,
+        project_id,
+        dataset_id,
+        data_path="pico.csv",
+        source_format=enums.SourceFormat.CSV,
+        schema_source="pico_schema.json",
+        timestamp_target_precision=[12],
+    )
+    yield full_table_id
+    bigquery_client.delete_table(full_table_id, not_found_ok=True)
+
+
 @pytest.fixture
 def test_table_name(request, replace_non_anum=re.compile(r"[^a-zA-Z0-9_]").sub):
     return replace_non_anum("_", request.node.name)
diff --git a/tests/system/test_client.py b/tests/system/test_client.py
@@ -1295,6 +1295,29 @@ def test_load_table_from_json_schema_autodetect_table_exists(self):
         self.assertEqual(tuple(table.schema), table_schema)
         self.assertEqual(table.num_rows, 2)
 
+    def test_load_table_from_csv_w_picosecond_timestamp(self):
+        dataset_id = _make_dataset_id("bq_system_test")
+        self.temp_dataset(dataset_id)
+        table_id = "{}.{}.load_table_from_json_basic_use".format(
+            Config.CLIENT.project, dataset_id
+        )
+
+        table_schema = Config.CLIENT.schema_from_json(DATA_PATH / "pico_schema.json")
+        # create the table before loading so that the column order is predictable
+        table = helpers.retry_403(Config.CLIENT.create_table)(
+            Table(table_id, schema=table_schema)
+        )
+        self.to_delete.insert(0, table)
+
+        # do not pass an explicit job config to trigger automatic schema detection
+        with open(DATA_PATH / "pico.csv", "rb") as f:
+            load_job = Config.CLIENT.load_table_from_file(f, table_id)
+            load_job.result()
+
+        table = Config.CLIENT.get_table(table)
+        self.assertEqual(list(table.schema), table_schema)
+        self.assertEqual(table.num_rows, 3)
+
     def test_load_avro_from_uri_then_dump_table(self):
         from google.cloud.bigquery.job import CreateDisposition
         from google.cloud.bigquery.job import SourceFormat
diff --git a/tests/system/test_list_rows.py b/tests/system/test_list_rows.py
@@ -132,3 +132,23 @@ def test_list_rows_range(bigquery_client: bigquery.Client, scalars_table_csv: st
 
     row_null = rows[1]
     assert row_null["range_date"] is None
+
+
+def test_list_rows_pico(bigquery_client: bigquery.Client, scalars_table_pico: str):
+    rows = bigquery_client.list_rows(
+        scalars_table_pico, timestamp_precision=enums.TimestampPrecision.PICOSECOND
+    )
+    rows = list(rows)
+    row = rows[0]
+    assert row["pico_col"] == "2025-01-01T00:00:00.123456789012Z"
+
+
+def test_list_rows_pico_truncate(
+    bigquery_client: bigquery.Client, scalars_table_pico: str
+):
+    # For a picosecond timestamp column, if the user does not explicitly set
+    # timestamp_precision, will return truncated microsecond precision.
+    rows = bigquery_client.list_rows(scalars_table_pico)
+    rows = list(rows)
+    row = rows[0]
+    assert row["pico_col"] == "1735689600123456"
diff --git a/tests/unit/_helpers/test_cell_data_parser.py b/tests/unit/_helpers/test_cell_data_parser.py
@@ -290,17 +290,26 @@ def test_bytes_to_py_w_base64_encoded_text(object_under_test):
 def test_timestamp_to_py_w_string_int_value(object_under_test):
     from google.cloud._helpers import _EPOCH
 
-    coerced = object_under_test.timestamp_to_py("1234567", object())
+    coerced = object_under_test.timestamp_to_py("1234567", create_field())
     assert coerced == _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)
 
 
 def test_timestamp_to_py_w_int_value(object_under_test):
     from google.cloud._helpers import _EPOCH
 
-    coerced = object_under_test.timestamp_to_py(1234567, object())
+    coerced = object_under_test.timestamp_to_py(1234567, create_field())
     assert coerced == _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)
 
 
+def test_timestamp_to_py_w_picosecond_precision(object_under_test):
+    from google.cloud.bigquery import enums
+
+    pico_schema = create_field(timestamp_precision=enums.TimestampPrecision.PICOSECOND)
+    pico_timestamp = "2025-01-01T00:00:00.123456789012Z"
+    coerced = object_under_test.timestamp_to_py(pico_timestamp, pico_schema)
+    assert coerced == pico_timestamp
+
+
 def test_datetime_to_py_w_string_value(object_under_test):
     coerced = object_under_test.datetime_to_py("2016-12-02T18:51:33", object())
     assert coerced == datetime.datetime(2016, 12, 2, 18, 51, 33)
diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py
@@ -1072,19 +1072,26 @@ def test_timestamp_target_precision_missing(self):
     def test_timestamp_target_precision_hit(self):
         timestamp_target_precision = [6, 12]
         config = self._get_target_class()()
-        config._properties["load"]["timestampTargetPrecision"] = timestamp_target_precision
+        config._properties["load"][
+            "timestampTargetPrecision"
+        ] = timestamp_target_precision
         self.assertEqual(config.timestamp_target_precision, timestamp_target_precision)
 
     def test_timestamp_target_precision_setter(self):
         timestamp_target_precision = [6, 12]
         config = self._get_target_class()()
         config.timestamp_target_precision = timestamp_target_precision
-        self.assertEqual(config._properties["load"]["timestampTargetPrecision"], timestamp_target_precision)
+        self.assertEqual(
+            config._properties["load"]["timestampTargetPrecision"],
+            timestamp_target_precision,
+        )
 
     def test_timestamp_target_precision_setter_w_none(self):
         timestamp_target_precision = [6, 12]
         config = self._get_target_class()()
-        config._properties["load"]["timestampTargetPrecision"] = timestamp_target_precision
+        config._properties["load"][
+            "timestampTargetPrecision"
+        ] = timestamp_target_precision
         config.timestamp_target_precision = None
         self.assertFalse("timestampTargetPrecision" in config._properties["load"])
 
diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py
@@ -55,7 +55,7 @@
 from google.cloud import bigquery
 
 from google.cloud.bigquery.dataset import DatasetReference, Dataset
-from google.cloud.bigquery.enums import UpdateMode, DatasetView
+from google.cloud.bigquery.enums import UpdateMode, DatasetView, TimestampPrecision
 from google.cloud.bigquery import exceptions
 from google.cloud.bigquery import ParquetOptions
 import google.cloud.bigquery.retry
@@ -6817,6 +6817,39 @@ def test_list_rows(self):
             timeout=7.5,
         )
 
+    def test_list_rows_pico_timestamp(self):
+        from google.cloud.bigquery.schema import SchemaField
+        from google.cloud.bigquery.table import Table
+
+        PATH = "projects/%s/datasets/%s/tables/%s/data" % (
+            self.PROJECT,
+            self.DS_ID,
+            self.TABLE_ID,
+        )
+        creds = _make_credentials()
+        http = object()
+        client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+        conn = client._connection = make_connection({}, {})
+        pico_col = SchemaField(
+            "full_name",
+            "TIMESTAMP",
+            mode="REQUIRED",
+            timestamp_precision=TimestampPrecision.PICOSECOND,
+        )
+        table = Table(self.TABLE_REF, schema=[pico_col])
+
+        iterator = client.list_rows(
+            table, timestamp_precision=TimestampPrecision.PICOSECOND
+        )
+        next(iterator.pages)
+
+        conn.api_request.assert_called_once_with(
+            method="GET",
+            path="/%s" % PATH,
+            query_params={"formatOptions.timestampOutputFormat": "ISO8601_STRING"},
+            timeout=None,
+        )
+
     def test_list_rows_w_start_index_w_page_size(self):
         from google.cloud.bigquery.schema import SchemaField
         from google.cloud.bigquery.table import Table

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+2025-01-01T00:00:00.123456789012Z`
	`2`	`+2025-01-02T00:00:00.123456789012Z`
	`3`	`+2025-01-03T00:00:00.123456789012Z`