diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4ab20623cc561..b8b729c7bff8d 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1182,6 +1182,7 @@ I/O - Fix bug in ``on_bad_lines`` callable when returning too many fields: now emits ``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`) - Bug in :func:`pandas.json_normalize` inconsistently handling non-dict items in ``data`` when ``max_level`` was set. The function will now raise a ``TypeError`` if ``data`` is a list containing non-dict items (:issue:`62829`) +- Bug in :func:`pandas.json_normalize` raising ``TypeError`` when ``meta`` contained a non-string key (e.g., ``int``) and ``record_path`` was specified, which was inconsistent with the behavior when ``record_path`` was ``None`` (:issue:`63019`) - Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`) - Bug in :meth:`.io.common.is_fsspec_url` not recognizing chained fsspec URLs (:issue:`48978`) - Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 6194e699c12a8..53f8764ee4c82 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -267,6 +267,37 @@ def _simple_json_normalize( return normalized_json_object +def _validate_meta(meta: str | list[str | list[str]]) -> None: + """ + Validate that meta parameter contains only strings or lists of strings. + + Parameters + ---------- + meta : str or list of str or list of list of str + The meta parameter to validate. + + Raises + ------ + TypeError + If meta contains elements that are not strings or lists of strings. + """ + if isinstance(meta, str): + return + for item in meta: + if isinstance(item, list): + for subitem in item: + if not isinstance(subitem, str): + raise TypeError( + "All elements in nested meta paths must be strings. " + f"Found {type(subitem).__name__}: {subitem!r}" + ) + elif not isinstance(item, str): + raise TypeError( + "All elements in 'meta' must be strings or lists of strings. " + f"Found {type(item).__name__}: {item!r}" + ) + + @set_module("pandas") def json_normalize( data: dict | list[dict] | Series, @@ -437,6 +468,8 @@ def json_normalize( Returns normalized data with columns prefixed with the given string. """ + if meta is not None: + _validate_meta(meta) def _pull_field( js: dict[str, Any], spec: list | str, extract_record: bool = False diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index f03fd235fef85..545820879e651 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -927,3 +927,14 @@ def test_series_non_zero_index(self): index=[1, 2, 3], ) tm.assert_frame_equal(result, expected) + + def test_json_normalize_meta_string_validation(self): + # GH 63019 + data = [{"a": 1, 12: "meta_value", "nested": [{"b": 2}]}] + + # Test non-string meta raises TypeError consistently + with pytest.raises(TypeError, match="must be strings"): + json_normalize(data, meta=[12]) + + with pytest.raises(TypeError, match="must be strings"): + json_normalize(data, record_path=["nested"], meta=[12])