Skip to content

Commit de0841e

Browse files
committed
REF: Make read_json less stateful
1 parent 42082a8 commit de0841e

File tree

3 files changed

+71
-93
lines changed

3 files changed

+71
-93
lines changed

pandas/io/json/_json.py

Lines changed: 68 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -969,7 +969,7 @@ def read(self) -> DataFrame | Series:
969969
else:
970970
return obj
971971

972-
def _get_object_parser(self, json) -> DataFrame | Series:
972+
def _get_object_parser(self, json: str) -> DataFrame | Series:
973973
"""
974974
Parses a json document into a pandas object.
975975
"""
@@ -985,16 +985,14 @@ def _get_object_parser(self, json) -> DataFrame | Series:
985985
"date_unit": self.date_unit,
986986
"dtype_backend": self.dtype_backend,
987987
}
988-
obj = None
989988
if typ == "frame":
990-
obj = FrameParser(json, **kwargs).parse()
991-
992-
if typ == "series" or obj is None:
989+
return FrameParser(json, **kwargs).parse()
990+
elif typ == "series":
993991
if not isinstance(dtype, bool):
994992
kwargs["dtype"] = dtype
995-
obj = SeriesParser(json, **kwargs).parse()
996-
997-
return obj
993+
return SeriesParser(json, **kwargs).parse()
994+
else:
995+
raise ValueError(f"{typ=} must be 'frame' or 'series'.")
998996

999997
def close(self) -> None:
1000998
"""
@@ -1107,7 +1105,6 @@ def __init__(
11071105
self.convert_dates = convert_dates
11081106
self.date_unit = date_unit
11091107
self.keep_default_dates = keep_default_dates
1110-
self.obj: DataFrame | Series | None = None
11111108
self.dtype_backend = dtype_backend
11121109

11131110
@final
@@ -1121,26 +1118,22 @@ def check_keys_split(self, decoded: dict) -> None:
11211118
raise ValueError(f"JSON data had unexpected key(s): {bad_keys_joined}")
11221119

11231120
@final
1124-
def parse(self):
1125-
self._parse()
1121+
def parse(self) -> DataFrame | Series:
1122+
obj = self._parse()
11261123

1127-
if self.obj is None:
1128-
return None
11291124
if self.convert_axes:
1130-
self._convert_axes()
1131-
self._try_convert_types()
1132-
return self.obj
1125+
obj = self._convert_axes(obj)
1126+
obj = self._try_convert_types(obj)
1127+
return obj
11331128

1134-
def _parse(self) -> None:
1129+
def _parse(self) -> DataFrame | Series:
11351130
raise AbstractMethodError(self)
11361131

11371132
@final
1138-
def _convert_axes(self) -> None:
1133+
def _convert_axes(self, obj: DataFrame | Series) -> DataFrame | Series:
11391134
"""
11401135
Try to convert axes.
11411136
"""
1142-
obj = self.obj
1143-
assert obj is not None # for mypy
11441137
for axis_name in obj._AXIS_ORDERS:
11451138
ax = obj._get_axis(axis_name)
11461139
ser = Series(ax, dtype=ax.dtype, copy=False)
@@ -1153,9 +1146,10 @@ def _convert_axes(self) -> None:
11531146
)
11541147
if result:
11551148
new_axis = Index(new_ser, dtype=new_ser.dtype, copy=False)
1156-
setattr(self.obj, axis_name, new_axis)
1149+
setattr(obj, axis_name, new_axis)
1150+
return obj
11571151

1158-
def _try_convert_types(self) -> None:
1152+
def _try_convert_types(self, obj: DataFrame | Series) -> DataFrame | Series:
11591153
raise AbstractMethodError(self)
11601154

11611155
@final
@@ -1292,42 +1286,31 @@ def _try_convert_to_date(self, data: Series) -> tuple[Series, bool]:
12921286
class SeriesParser(Parser):
12931287
_default_orient = "index"
12941288
_split_keys = ("name", "index", "data")
1295-
obj: Series | None
12961289

1297-
def _parse(self) -> None:
1290+
def _parse(self) -> Series:
12981291
data = ujson_loads(self.json, precise_float=self.precise_float)
12991292

13001293
if self.orient == "split":
13011294
decoded = {str(k): v for k, v in data.items()}
13021295
self.check_keys_split(decoded)
1303-
self.obj = Series(**decoded)
1296+
return Series(**decoded)
13041297
else:
1305-
self.obj = Series(data)
1298+
return Series(data)
13061299

1307-
def _try_convert_types(self) -> None:
1308-
if self.obj is None:
1309-
return
1310-
obj, result = self._try_convert_data(
1311-
"data", self.obj, convert_dates=self.convert_dates
1312-
)
1313-
if result:
1314-
self.obj = obj
1300+
def _try_convert_types(self, obj: Series) -> Series:
1301+
obj, _ = self._try_convert_data("data", obj, convert_dates=self.convert_dates)
1302+
return obj
13151303

13161304

13171305
class FrameParser(Parser):
13181306
_default_orient = "columns"
13191307
_split_keys = ("columns", "index", "data")
1320-
obj: DataFrame | None
13211308

1322-
def _parse(self) -> None:
1309+
def _parse(self) -> DataFrame:
13231310
json = self.json
13241311
orient = self.orient
13251312

1326-
if orient == "columns":
1327-
self.obj = DataFrame(
1328-
ujson_loads(json, precise_float=self.precise_float), dtype=None
1329-
)
1330-
elif orient == "split":
1313+
if orient == "split":
13311314
decoded = {
13321315
str(k): v
13331316
for k, v in ujson_loads(json, precise_float=self.precise_float).items()
@@ -1341,34 +1324,34 @@ def _parse(self) -> None:
13411324
orig_names,
13421325
is_potential_multi_index(orig_names, None),
13431326
)
1344-
self.obj = DataFrame(dtype=None, **decoded)
1327+
return DataFrame(dtype=None, **decoded)
13451328
elif orient == "index":
1346-
self.obj = DataFrame.from_dict(
1329+
return DataFrame.from_dict(
13471330
ujson_loads(json, precise_float=self.precise_float),
13481331
dtype=None,
13491332
orient="index",
13501333
)
13511334
elif orient == "table":
1352-
self.obj = parse_table_schema(json, precise_float=self.precise_float)
1335+
return parse_table_schema(json, precise_float=self.precise_float)
13531336
else:
1354-
self.obj = DataFrame(
1337+
# includes orient == "columns"
1338+
return DataFrame(
13551339
ujson_loads(json, precise_float=self.precise_float), dtype=None
13561340
)
13571341

1342+
@final
13581343
def _process_converter(
13591344
self,
1345+
obj: DataFrame,
13601346
f: Callable[[Hashable, Series], tuple[Series, bool]],
13611347
filt: Callable[[Hashable], bool] | None = None,
1362-
) -> None:
1348+
) -> DataFrame:
13631349
"""
13641350
Take a conversion function and possibly recreate the frame.
13651351
"""
13661352
if filt is None:
13671353
filt = lambda col: True
13681354

1369-
obj = self.obj
1370-
assert obj is not None # for mypy
1371-
13721355
needs_new_obj = False
13731356
new_obj = {}
13741357
for i, (col, c) in enumerate(obj.items()):
@@ -1383,48 +1366,43 @@ def _process_converter(
13831366
# possibly handle dup columns
13841367
new_frame = DataFrame(new_obj, index=obj.index)
13851368
new_frame.columns = obj.columns
1386-
self.obj = new_frame
1369+
return new_frame
1370+
return obj
13871371

1388-
def _try_convert_types(self) -> None:
1389-
if self.obj is None:
1390-
return
1372+
def _try_convert_types(self, obj: DataFrame) -> DataFrame:
13911373
if self.convert_dates:
1392-
self._try_convert_dates()
1393-
1394-
self._process_converter(
1395-
lambda col, c: self._try_convert_data(col, c, convert_dates=False)
1396-
)
1397-
1398-
def _try_convert_dates(self) -> None:
1399-
if self.obj is None:
1400-
return
1401-
1402-
# our columns to parse
1403-
convert_dates_list_bool = self.convert_dates
1404-
if isinstance(convert_dates_list_bool, bool):
1405-
convert_dates_list_bool = []
1406-
convert_dates = set(convert_dates_list_bool)
1407-
1408-
def is_ok(col) -> bool:
1409-
"""
1410-
Return if this col is ok to try for a date parse.
1411-
"""
1412-
if col in convert_dates:
1413-
return True
1414-
if not self.keep_default_dates:
1415-
return False
1416-
if not isinstance(col, str):
1374+
# our columns to parse
1375+
convert_dates_list_bool = self.convert_dates
1376+
if isinstance(convert_dates_list_bool, bool):
1377+
convert_dates_list_bool = []
1378+
convert_dates = set(convert_dates_list_bool)
1379+
1380+
def is_ok(col) -> bool:
1381+
"""
1382+
Return if this col is ok to try for a date parse.
1383+
"""
1384+
if col in convert_dates:
1385+
return True
1386+
if not self.keep_default_dates:
1387+
return False
1388+
if not isinstance(col, str):
1389+
return False
1390+
1391+
col_lower = col.lower()
1392+
if (
1393+
col_lower.endswith(("_at", "_time"))
1394+
or col_lower == "modified"
1395+
or col_lower == "date"
1396+
or col_lower == "datetime"
1397+
or col_lower.startswith("timestamp")
1398+
):
1399+
return True
14171400
return False
14181401

1419-
col_lower = col.lower()
1420-
if (
1421-
col_lower.endswith(("_at", "_time"))
1422-
or col_lower == "modified"
1423-
or col_lower == "date"
1424-
or col_lower == "datetime"
1425-
or col_lower.startswith("timestamp")
1426-
):
1427-
return True
1428-
return False
1429-
1430-
self._process_converter(lambda col, c: self._try_convert_to_date(c), filt=is_ok)
1402+
obj = self._process_converter(
1403+
obj, lambda col, c: self._try_convert_to_date(c), filt=is_ok
1404+
)
1405+
1406+
return self._process_converter(
1407+
obj, lambda col, c: self._try_convert_data(col, c, convert_dates=False)
1408+
)

pandas/tests/io/json/test_pandas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -792,7 +792,7 @@ def test_frame_from_json_precise_float(self):
792792

793793
def test_typ(self):
794794
s = Series(range(6), index=["a", "b", "c", "d", "e", "f"], dtype="int64")
795-
result = read_json(StringIO(s.to_json()), typ=None)
795+
result = read_json(StringIO(s.to_json()), typ="series")
796796
tm.assert_series_equal(result, s)
797797

798798
def test_reconstruction_index(self):

pandas/tests/io/json/test_readlines.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,11 @@ def test_readjson_chunks_series(request, engine):
165165
s = pd.Series({"A": 1, "B": 2})
166166

167167
strio = StringIO(s.to_json(lines=True, orient="records"))
168-
unchunked = read_json(strio, lines=True, typ="Series", engine=engine)
168+
unchunked = read_json(strio, lines=True, typ="series", engine=engine)
169169

170170
strio = StringIO(s.to_json(lines=True, orient="records"))
171171
with read_json(
172-
strio, lines=True, typ="Series", chunksize=1, engine=engine
172+
strio, lines=True, typ="series", chunksize=1, engine=engine
173173
) as reader:
174174
chunked = pd.concat(reader)
175175

0 commit comments

Comments
 (0)