|
32 | 32 | from pandas.core.dtypes.common import (
|
33 | 33 | ensure_str,
|
34 | 34 | is_string_dtype,
|
| 35 | + pandas_dtype, |
35 | 36 | )
|
36 | 37 | from pandas.core.dtypes.dtypes import PeriodDtype
|
37 | 38 |
|
|
43 | 44 | isna,
|
44 | 45 | notna,
|
45 | 46 | to_datetime,
|
| 47 | + ArrowDtype, |
46 | 48 | )
|
47 | 49 | from pandas.core.reshape.concat import concat
|
48 | 50 | from pandas.core.shared_docs import _shared_docs
|
@@ -942,29 +944,56 @@ def read(self) -> DataFrame | Series:
|
942 | 944 | obj: DataFrame | Series
|
943 | 945 | with self:
|
944 | 946 | if self.engine == "pyarrow":
|
945 |
| - pyarrow_json = import_optional_dependency("pyarrow.json") |
946 |
| - pa_table = pyarrow_json.read_json(self.data) |
947 |
| - return arrow_table_to_pandas(pa_table, dtype_backend=self.dtype_backend) |
| 947 | + obj = self._read_pyarrow() |
948 | 948 | elif self.engine == "ujson":
|
949 |
| - if self.lines: |
950 |
| - if self.chunksize: |
951 |
| - obj = concat(self) |
952 |
| - elif self.nrows: |
953 |
| - lines = list(islice(self.data, self.nrows)) |
954 |
| - lines_json = self._combine_lines(lines) |
955 |
| - obj = self._get_object_parser(lines_json) |
956 |
| - else: |
957 |
| - data = ensure_str(self.data) |
958 |
| - data_lines = data.split("\n") |
959 |
| - obj = self._get_object_parser(self._combine_lines(data_lines)) |
960 |
| - else: |
961 |
| - obj = self._get_object_parser(self.data) |
962 |
| - if self.dtype_backend is not lib.no_default: |
963 |
| - return obj.convert_dtypes( |
964 |
| - infer_objects=False, dtype_backend=self.dtype_backend |
965 |
| - ) |
966 |
| - else: |
967 |
| - return obj |
| 949 | + obj = self._read_ujson() |
| 950 | + |
| 951 | + return obj |
| 952 | + |
| 953 | + def _read_pyarrow(self) -> DataFrame: |
| 954 | + """ |
| 955 | + Read JSON using the pyarrow engine. |
| 956 | + """ |
| 957 | + pyarrow_json = import_optional_dependency("pyarrow.json") |
| 958 | + options = None |
| 959 | + |
| 960 | + if isinstance(self.dtype, dict): |
| 961 | + pa = import_optional_dependency("pyarrow") |
| 962 | + fields = [ |
| 963 | + (field, pandas_dtype(dtype).pyarrow_dtype) |
| 964 | + for field, dtype in self.dtype.items() |
| 965 | + if isinstance(pandas_dtype(dtype), ArrowDtype) |
| 966 | + ] |
| 967 | + |
| 968 | + schema = pa.schema(fields) |
| 969 | + options = pyarrow_json.ParseOptions(explicit_schema=schema) |
| 970 | + |
| 971 | + pa_table = pyarrow_json.read_json(self.data, parse_options=options) |
| 972 | + return arrow_table_to_pandas(pa_table, dtype_backend=self.dtype_backend) |
| 973 | + |
| 974 | + def _read_ujson(self) -> DataFrame | Series: |
| 975 | + """ |
| 976 | + Read JSON using the ujson engine. |
| 977 | + """ |
| 978 | + if self.lines: |
| 979 | + if self.chunksize: |
| 980 | + obj = concat(self) |
| 981 | + elif self.nrows: |
| 982 | + lines = list(islice(self.data, self.nrows)) |
| 983 | + lines_json = self._combine_lines(lines) |
| 984 | + obj = self._get_object_parser(lines_json) |
| 985 | + else: |
| 986 | + data = ensure_str(self.data) |
| 987 | + data_lines = data.split("\n") |
| 988 | + obj = self._get_object_parser(self._combine_lines(data_lines)) |
| 989 | + else: |
| 990 | + obj = self._get_object_parser(self.data) |
| 991 | + if self.dtype_backend is not lib.no_default: |
| 992 | + return obj.convert_dtypes( |
| 993 | + infer_objects=False, dtype_backend=self.dtype_backend |
| 994 | + ) |
| 995 | + else: |
| 996 | + return obj |
968 | 997 |
|
969 | 998 | def _get_object_parser(self, json: str) -> DataFrame | Series:
|
970 | 999 | """
|
|
0 commit comments