Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,7 @@ I/O
- Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
- Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
- Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
- Bug in :meth:`read_json` ignoring the given ``dtype`` when ``engine="pyarrow"`` (:issue:`59516`)
- Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`)
- Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
- Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`)
Expand Down
15 changes: 14 additions & 1 deletion pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from pandas.core.dtypes.common import (
ensure_str,
is_string_dtype,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import PeriodDtype

Expand Down Expand Up @@ -939,7 +940,19 @@ def read(self) -> DataFrame | Series:
with self:
if self.engine == "pyarrow":
pyarrow_json = import_optional_dependency("pyarrow.json")
pa_table = pyarrow_json.read_json(self.data)
if isinstance(self.dtype, dict):
pa = import_optional_dependency("pyarrow")
fields = [
(field, pandas_dtype(dtype).pyarrow_dtype)
for field, dtype in self.dtype.items()
]
schema = pa.schema(fields)
pa_table = pyarrow_json.read_json(
self.data,
parse_options=pyarrow_json.ParseOptions(explicit_schema=schema),
)
else:
pa_table = pyarrow_json.read_json(self.data)

mapping: type[ArrowDtype] | None | Callable
if self.dtype_backend == "pyarrow":
Expand Down
25 changes: 24 additions & 1 deletion pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import pandas as pd
from pandas import (
NA,
ArrowDtype,
DataFrame,
DatetimeIndex,
Index,
Expand Down Expand Up @@ -2163,7 +2164,7 @@ def test_read_json_dtype_backend(

if dtype_backend == "pyarrow":
pa = pytest.importorskip("pyarrow")
string_dtype = pd.ArrowDtype(pa.string())
string_dtype = ArrowDtype(pa.string())
else:
string_dtype = pd.StringDtype(string_storage)

Expand Down Expand Up @@ -2286,3 +2287,25 @@ def test_read_json_lines_rangeindex():
result = read_json(StringIO(data), lines=True).index
expected = RangeIndex(2)
tm.assert_index_equal(result, expected, exact=True)


def test_read_json_pyarrow_dtype(datapath):
dtype = {"a": "int32[pyarrow]", "b": "int64[pyarrow]"}

df = read_json(
datapath("io", "json", "data", "line_delimited.json"),
dtype=dtype,
lines=True,
engine="pyarrow",
dtype_backend="pyarrow",
)

result = df.dtypes
expected = Series(
[
ArrowDtype.construct_from_string("int32[pyarrow]"),
ArrowDtype.construct_from_string("int64[pyarrow]"),
],
index=["a", "b"],
)
tm.assert_series_equal(result, expected)
Loading