Skip to content

Commit 501337e

Browse files
committed
updated json normalize and added tests
1 parent c1e57c9 commit 501337e

File tree

2 files changed

+94
-8
lines changed

2 files changed

+94
-8
lines changed

pandas/io/json/_normalize.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ def _simple_json_normalize(
267267

268268

269269
def json_normalize(
270-
data: dict | list[dict] | Series,
270+
data: dict | list[dict] | Series | str | bytes,
271271
record_path: str | list | None = None,
272272
meta: str | list[str | list[str]] | None = None,
273273
meta_prefix: str | None = None,
@@ -285,8 +285,8 @@ def json_normalize(
285285
286286
Parameters
287287
----------
288-
data : dict, list of dicts, or Series of dicts
289-
Unserialized JSON objects.
288+
data : dict, list of dicts, Series of dicts/JSON strings/bytes, or JSON string/bytes
289+
Unserialized JSON objects or JSON strings/bytes.
290290
record_path : str or list of str, default None
291291
Path in each object to list of records. If not passed, data will be
292292
assumed to be an array of records.
@@ -434,7 +434,28 @@ def json_normalize(
434434
1 2
435435
436436
Returns normalized data with columns prefixed with the given string.
437+
438+
>>> # JSON string input
439+
>>> json_str = '{"id": 1, "name": {"first": "John", "last": "Doe"}}'
440+
>>> pd.json_normalize(json_str)
441+
id name.first name.last
442+
0 1 John Doe
437443
"""
444+
if isinstance(data, (str, bytes)):
445+
import json
446+
data = json.loads(data)
447+
448+
if isinstance(data, Series):
449+
if data.empty:
450+
return DataFrame()
451+
452+
sample = data.iloc[0]
453+
if isinstance(sample, (str, bytes)):
454+
import json
455+
data = data.apply(json.loads)
456+
index = data.index
457+
else:
458+
index = None
438459

439460
def _pull_field(
440461
js: dict[str, Any], spec: list | str, extract_record: bool = False
@@ -485,11 +506,6 @@ def _pull_records(js: dict[str, Any], spec: list | str) -> list:
485506
)
486507
return result
487508

488-
if isinstance(data, Series):
489-
index = data.index
490-
else:
491-
index = None
492-
493509
if isinstance(data, list) and not data:
494510
return DataFrame()
495511
elif isinstance(data, dict):

pandas/tests/io/json/test_normalize.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,76 @@ def test_series_index(self, state_data):
569569
result = json_normalize(series, "counties")
570570
tm.assert_index_equal(result.index, idx.repeat([3, 2]))
571571

572+
def test_json_string_input(self):
573+
# GH61006: Accept JSON as str input
574+
json_str = '{"id": 1, "name": {"first": "John", "last": "Doe"}}'
575+
result = json_normalize(json_str)
576+
expected = DataFrame({
577+
"id": [1],
578+
"name.first": ["John"],
579+
"name.last": ["Doe"]
580+
})
581+
tm.assert_frame_equal(result, expected)
582+
583+
json_array_str = '''[
584+
{"id": 1, "name": {"first": "John", "last": "Doe"}},
585+
{"id": 2, "name": {"first": "Jane", "last": "Smith"}}
586+
]'''
587+
result = json_normalize(json_array_str)
588+
expected = DataFrame({
589+
"id": [1, 2],
590+
"name.first": ["John", "Jane"],
591+
"name.last": ["Doe", "Smith"]
592+
})
593+
tm.assert_frame_equal(result, expected)
594+
595+
def test_json_bytes_input(self):
596+
# GH61006: Accept JSON as bytes input
597+
json_bytes = b'{"id": 1, "name": {"first": "John", "last": "Doe"}}'
598+
result = json_normalize(json_bytes)
599+
expected = DataFrame({
600+
"id": [1],
601+
"name.first": ["John"],
602+
"name.last": ["Doe"]
603+
})
604+
tm.assert_frame_equal(result, expected)
605+
606+
def test_series_json_string(self):
607+
# GH61006:
608+
s = Series([
609+
'{"value": 0.0}',
610+
'{"value": 0.5}',
611+
'{"value": 1.0}'
612+
])
613+
result = json_normalize(s)
614+
expected = DataFrame({
615+
"value": [0.0, 0.5, 1.0]
616+
})
617+
tm.assert_frame_equal(result, expected)
618+
619+
def test_series_json_string_with_index(self):
620+
# GH61006:
621+
s = Series(
622+
['{"value": 0.0}', '{"value": 0.5}'],
623+
index=['a', 'b']
624+
)
625+
result = json_normalize(s)
626+
expected = DataFrame(
627+
{"value": [0.0, 0.5]},
628+
index=['a', 'b']
629+
)
630+
tm.assert_frame_equal(result, expected)
631+
632+
def test_invalid_json_string(self):
633+
invalid_json = '{"id": 1, "name": {"first": "John", "last": "Doe"'
634+
with pytest.raises(json.JSONDecodeError):
635+
json_normalize(invalid_json)
636+
637+
def test_non_json_string(self):
638+
non_json = "Hello World"
639+
with pytest.raises(json.JSONDecodeError):
640+
json_normalize(non_json)
641+
572642

573643
class TestNestedToRecord:
574644
def test_flat_stays_flat(self):

0 commit comments

Comments
 (0)