From da90fcbcd4b762bf4d547cface5953b09c34d5ba Mon Sep 17 00:00:00 2001 From: HCA97 Date: Mon, 7 Aug 2023 22:03:04 +0200 Subject: [PATCH 1/2] feat: we can pass extra json.dumps args when using load_table_from_json --- google/cloud/bigquery/client.py | 12 +++++-- tests/unit/test_client.py | 59 +++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 11cceea42..258479cb1 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -2759,6 +2759,7 @@ def load_table_from_json( project: Optional[str] = None, job_config: Optional[LoadJobConfig] = None, timeout: ResumableTimeoutType = DEFAULT_TIMEOUT, + json_dumps_kwargs: Optional[Dict[str, Any]] = None, ) -> job.LoadJob: """Upload the contents of a table from a JSON string or dict. @@ -2811,7 +2812,8 @@ def load_table_from_json( Can also be passed as a tuple (connect_timeout, read_timeout). See :meth:`requests.Session.request` documentation for details. - + json_dumps_kwargs: + Extra keyword arguments for ``json.dumps``. Usefull when you want to parse datetime objects. Returns: google.cloud.bigquery.job.LoadJob: A new load job. @@ -2842,7 +2844,13 @@ def load_table_from_json( destination = _table_arg_to_table_ref(destination, default_project=self.project) - data_str = "\n".join(json.dumps(item, ensure_ascii=False) for item in json_rows) + if json_dumps_kwargs is None: + json_dumps_kwargs = {} + + data_str = "\n".join( + json.dumps(item, ensure_ascii=False, **json_dumps_kwargs) + for item in json_rows + ) encoded_str = data_str.encode() data_file = io.BytesIO(encoded_str) return self.load_table_from_file( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index cf0aa4028..dd0b0aa47 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -8665,6 +8665,65 @@ def test_load_table_from_dataframe_with_csv_source_format(self): sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.CSV + def test_load_table_from_json_basic_use_with_json_dumps_kwargs(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + + def json_serial(obj): + """JSON serializer for objects not serializable by default json code + + Ref: https://stackoverflow.com/a/22238613 + """ + + if isinstance(obj, (datetime.datetime, datetime.date)): + return obj.isoformat() + raise TypeError("Type %s not serializable" % type(obj)) + + client = self._make_client() + + json_rows = [ + { + "name": "One", + "age": 11, + "birthday": datetime.date(2008, 9, 10), + "adult": False, + }, + { + "name": "Two", + "age": 22, + "birthday": datetime.date(1997, 8, 9), + "adult": True, + }, + ] + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch as load_table_from_file: + client.load_table_from_json( + json_rows, self.TABLE_REF, json_dumps_kwargs={"default": json_serial} + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + size=mock.ANY, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=mock.ANY, + job_id_prefix=None, + location=client.location, + project=client.project, + job_config=mock.ANY, + timeout=DEFAULT_TIMEOUT, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON + assert sent_config.schema is None + assert sent_config.autodetect + def test_load_table_from_json_basic_use(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job From ca3c28371383c229a2b88ee2166a0e9b92e00d40 Mon Sep 17 00:00:00 2001 From: HCA97 Date: Tue, 8 Aug 2023 21:21:49 +0200 Subject: [PATCH 2/2] test: making code coverage 100 percent --- tests/unit/test_client.py | 47 ++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index dd0b0aa47..e61384236 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -110,6 +110,17 @@ def _make_list_partitons_meta_info(project, dataset_id, table_id, num_rows=0): } +def _json_serial_date_only(obj): + """JSON serializer for objects not serializable by default json code + + Ref: https://stackoverflow.com/a/22238613 + """ + + if isinstance(obj, (datetime.datetime, datetime.date)): + return obj.isoformat() + raise TypeError("Type %s not serializable" % type(obj)) + + class TestClient(unittest.TestCase): PROJECT = "PROJECT" @@ -8665,19 +8676,31 @@ def test_load_table_from_dataframe_with_csv_source_format(self): sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.CSV - def test_load_table_from_json_basic_use_with_json_dumps_kwargs(self): - from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES - from google.cloud.bigquery import job + def test_load_table_from_json_basic_use_with_json_dumps_kwargs_fail(self): + client = self._make_client() - def json_serial(obj): - """JSON serializer for objects not serializable by default json code + class Fail: + _fail = "This will fail" - Ref: https://stackoverflow.com/a/22238613 - """ + json_rows = [ + { + "name": "One", + "age": 11, + "birthday": datetime.date(2008, 9, 10), + "adult": Fail(), + } + ] + + with pytest.raises(TypeError): + client.load_table_from_json( + json_rows, + self.TABLE_REF, + json_dumps_kwargs={"default": _json_serial_date_only}, + ) - if isinstance(obj, (datetime.datetime, datetime.date)): - return obj.isoformat() - raise TypeError("Type %s not serializable" % type(obj)) + def test_load_table_from_json_basic_use_with_json_dumps_kwargs(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job client = self._make_client() @@ -8702,7 +8725,9 @@ def json_serial(obj): with load_patch as load_table_from_file: client.load_table_from_json( - json_rows, self.TABLE_REF, json_dumps_kwargs={"default": json_serial} + json_rows, + self.TABLE_REF, + json_dumps_kwargs={"default": _json_serial_date_only}, ) load_table_from_file.assert_called_once_with(