pandas-dev · jahn96 · Sep 15, 2024 · Sep 15, 2024 · Sep 15, 2024 · Sep 20, 2024
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -629,6 +629,7 @@ I/O
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
 - Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
 - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
+- Bug in :meth:`read_json` ignoring the given ``dtype`` when ``engine="pyarrow"`` (:issue:`59516`)
 - Bug in :meth:`read_json` not validating the ``typ`` argument to not be exactly ``"frame"`` or ``"series"`` (:issue:`59124`)
 - Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`)
 - Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -32,6 +32,7 @@
 from pandas.core.dtypes.common import (
     ensure_str,
     is_string_dtype,
+    pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import PeriodDtype
 
@@ -939,7 +940,19 @@ def read(self) -> DataFrame | Series:
         with self:
             if self.engine == "pyarrow":
                 pyarrow_json = import_optional_dependency("pyarrow.json")
-                pa_table = pyarrow_json.read_json(self.data)
+                if isinstance(self.dtype, dict):
+                    pa = import_optional_dependency("pyarrow")
+                    fields = [
+                        (field, pandas_dtype(dtype).pyarrow_dtype)
+                        for field, dtype in self.dtype.items()
+                    ]
+                    schema = pa.schema(fields)
+                    pa_table = pyarrow_json.read_json(
+                        self.data,
+                        parse_options=pyarrow_json.ParseOptions(explicit_schema=schema),
+                    )
+                else:
+                    pa_table = pyarrow_json.read_json(self.data)
 
                 mapping: type[ArrowDtype] | None | Callable
                 if self.dtype_backend == "pyarrow":

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -18,6 +18,7 @@
 import pandas as pd
 from pandas import (
     NA,
+    ArrowDtype,
     DataFrame,
     DatetimeIndex,
     Index,
@@ -2163,7 +2164,7 @@ def test_read_json_dtype_backend(
 
         if dtype_backend == "pyarrow":
             pa = pytest.importorskip("pyarrow")
-            string_dtype = pd.ArrowDtype(pa.string())
+            string_dtype = ArrowDtype(pa.string())
         else:
             string_dtype = pd.StringDtype(string_storage)
 
@@ -2286,3 +2287,25 @@ def test_read_json_lines_rangeindex():
     result = read_json(StringIO(data), lines=True).index
     expected = RangeIndex(2)
     tm.assert_index_equal(result, expected, exact=True)
+
+
+def test_read_json_pyarrow_dtype(datapath):
+    dtype = {"a": "int32[pyarrow]", "b": "int64[pyarrow]"}
+
+    df = read_json(
+        datapath("io", "json", "data", "line_delimited.json"),
+        dtype=dtype,
+        lines=True,
+        engine="pyarrow",
+        dtype_backend="pyarrow",
+    )
+
+    result = df.dtypes
+    expected = Series(
+        [
+            ArrowDtype.construct_from_string("int32[pyarrow]"),
+            ArrowDtype.construct_from_string("int64[pyarrow]"),
+        ],
+        index=["a", "b"],
+    )
+    tm.assert_series_equal(result, expected)