Skip to content

Commit 2d8198c

Browse files
committed
we can pass extra json.dumps args when using load_table_from_json
1 parent 0686848 commit 2d8198c

File tree

2 files changed

+69
-2
lines changed

2 files changed

+69
-2
lines changed

google/cloud/bigquery/client.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2759,6 +2759,7 @@ def load_table_from_json(
27592759
project: Optional[str] = None,
27602760
job_config: Optional[LoadJobConfig] = None,
27612761
timeout: ResumableTimeoutType = DEFAULT_TIMEOUT,
2762+
json_dumps_kwargs: Optional[Dict[str, Any]] = None,
27622763
) -> job.LoadJob:
27632764
"""Upload the contents of a table from a JSON string or dict.
27642765
@@ -2811,7 +2812,8 @@ def load_table_from_json(
28112812
28122813
Can also be passed as a tuple (connect_timeout, read_timeout).
28132814
See :meth:`requests.Session.request` documentation for details.
2814-
2815+
json_dumps_kwargs:
2816+
Extra keyword arguments for ``json.dumps``. Usefull when you want to parse datetime objects.
28152817
Returns:
28162818
google.cloud.bigquery.job.LoadJob: A new load job.
28172819
@@ -2842,7 +2844,13 @@ def load_table_from_json(
28422844

28432845
destination = _table_arg_to_table_ref(destination, default_project=self.project)
28442846

2845-
data_str = "\n".join(json.dumps(item, ensure_ascii=False) for item in json_rows)
2847+
if json_dumps_kwargs is None:
2848+
json_dumps_kwargs = {}
2849+
2850+
data_str = "\n".join(
2851+
json.dumps(item, ensure_ascii=False, **json_dumps_kwargs)
2852+
for item in json_rows
2853+
)
28462854
encoded_str = data_str.encode()
28472855
data_file = io.BytesIO(encoded_str)
28482856
return self.load_table_from_file(

tests/unit/test_client.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8665,6 +8665,65 @@ def test_load_table_from_dataframe_with_csv_source_format(self):
86658665
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
86668666
assert sent_config.source_format == job.SourceFormat.CSV
86678667

8668+
def test_load_table_from_json_basic_use_with_json_dumps_kwargs(self):
8669+
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
8670+
from google.cloud.bigquery import job
8671+
8672+
def json_serial(obj):
8673+
"""JSON serializer for objects not serializable by default json code
8674+
8675+
Ref: https://stackoverflow.com/a/22238613
8676+
"""
8677+
8678+
if isinstance(obj, (datetime.datetime, datetime.date)):
8679+
return obj.isoformat()
8680+
raise TypeError("Type %s not serializable" % type(obj))
8681+
8682+
client = self._make_client()
8683+
8684+
json_rows = [
8685+
{
8686+
"name": "One",
8687+
"age": 11,
8688+
"birthday": datetime.date(2008, 9, 10),
8689+
"adult": False,
8690+
},
8691+
{
8692+
"name": "Two",
8693+
"age": 22,
8694+
"birthday": datetime.date(1997, 8, 9),
8695+
"adult": True,
8696+
},
8697+
]
8698+
8699+
load_patch = mock.patch(
8700+
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
8701+
)
8702+
8703+
with load_patch as load_table_from_file:
8704+
client.load_table_from_json(
8705+
json_rows, self.TABLE_REF, json_dumps_kwargs={"default": json_serial}
8706+
)
8707+
8708+
load_table_from_file.assert_called_once_with(
8709+
client,
8710+
mock.ANY,
8711+
self.TABLE_REF,
8712+
size=mock.ANY,
8713+
num_retries=_DEFAULT_NUM_RETRIES,
8714+
job_id=mock.ANY,
8715+
job_id_prefix=None,
8716+
location=client.location,
8717+
project=client.project,
8718+
job_config=mock.ANY,
8719+
timeout=DEFAULT_TIMEOUT,
8720+
)
8721+
8722+
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
8723+
assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON
8724+
assert sent_config.schema is None
8725+
assert sent_config.autodetect
8726+
86688727
def test_load_table_from_json_basic_use(self):
86698728
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
86708729
from google.cloud.bigquery import job

0 commit comments

Comments
 (0)