Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1175,6 +1175,7 @@ I/O
- Fix bug in ``on_bad_lines`` callable when returning too many fields: now emits
``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`)
- Bug in :func:`pandas.json_normalize` inconsistently handling non-dict items in ``data`` when ``max_level`` was set. The function will now raise a ``TypeError`` if ``data`` is a list containing non-dict items (:issue:`62829`)
- Bug in :func:`pandas.json_normalize` raising ``TypeError`` when ``meta`` contained a non-string key (e.g., ``int``) and ``record_path`` was specified, which was inconsistent with the behavior when ``record_path`` was ``None`` (:issue:`63019`)
- Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`)
- Bug in :meth:`.io.common.is_fsspec_url` not recognizing chained fsspec URLs (:issue:`48978`)
- Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`)
Expand Down
33 changes: 33 additions & 0 deletions pandas/io/json/_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,37 @@ def _simple_json_normalize(
return normalized_json_object


def _validate_meta(meta: str | list[str | list[str]]) -> None:
"""
Validate that meta parameter contains only strings or lists of strings.

Parameters
----------
meta : str or list of str or list of list of str
The meta parameter to validate.

Raises
------
TypeError
If meta contains elements that are not strings or lists of strings.
"""
if isinstance(meta, str):
return
for item in meta:
if isinstance(item, list):
for subitem in item:
if not isinstance(subitem, str):
raise TypeError(
"All elements in nested meta paths must be strings. "
f"Found {type(subitem).__name__}: {subitem!r}"
)
elif not isinstance(item, str):
raise TypeError(
"All elements in 'meta' must be strings or lists of strings. "
f"Found {type(item).__name__}: {item!r}"
)


@set_module("pandas")
def json_normalize(
data: dict | list[dict] | Series,
Expand Down Expand Up @@ -437,6 +468,8 @@ def json_normalize(

Returns normalized data with columns prefixed with the given string.
"""
if meta is not None:
_validate_meta(meta)

def _pull_field(
js: dict[str, Any], spec: list | str, extract_record: bool = False
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/io/json/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -927,3 +927,21 @@ def test_series_non_zero_index(self):
index=[1, 2, 3],
)
tm.assert_frame_equal(result, expected)

def test_json_normalize_meta_string_validation(self):
# GH 63019
data = [{"a": 1, 12: "meta_value", "nested": [{"b": 2}]}]

# Test non-string meta raises TypeError consistently
with pytest.raises(TypeError, match="must be strings"):
json_normalize(data, meta=[12])

with pytest.raises(TypeError, match="must be strings"):
json_normalize(data, record_path=["nested"], meta=[12])

# Test string meta works correctly
result1 = json_normalize(data, meta=["a"])
assert "a" in result1.columns

result2 = json_normalize(data, record_path=["nested"], meta=["a"])
assert "a" in result2.columns
Loading