Skip to content

Commit 5bca4b1

Browse files
Difference between calamine and openpyxl readers fixed
1 parent dcb5494 commit 5bca4b1

File tree

1 file changed

+18
-14
lines changed

1 file changed

+18
-14
lines changed

pandas/io/excel/_calamine.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,10 @@
1010
TYPE_CHECKING,
1111
Any,
1212
Union,
13+
cast,
1314
)
1415

16+
from pandas._typing import Scalar
1517
from pandas.compat._optional import import_optional_dependency
1618
from pandas.util._decorators import doc
1719

@@ -28,13 +30,11 @@
2830

2931
from pandas._typing import (
3032
FilePath,
31-
NaTType,
3233
ReadBuffer,
33-
Scalar,
3434
StorageOptions,
3535
)
3636

37-
_CellValue = Union[int, float, str, bool, time, date, datetime, timedelta]
37+
_CellValueT = Union[int, float, str, bool, time, date, datetime, timedelta]
3838

3939

4040
class CalamineReader(BaseExcelReader["CalamineWorkbook"]):
@@ -75,8 +75,7 @@ def load_workbook(
7575
from python_calamine import load_workbook
7676

7777
return load_workbook(
78-
filepath_or_buffer,
79-
**engine_kwargs,
78+
filepath_or_buffer, **engine_kwargs # type: ignore[arg-type]
8079
)
8180

8281
@property
@@ -99,26 +98,31 @@ def get_sheet_by_index(self, index: int) -> CalamineSheet:
9998

10099
def get_sheet_data(
101100
self, sheet: CalamineSheet, file_rows_needed: int | None = None
102-
) -> list[list[Scalar | NaTType | time]]:
103-
def _convert_cell(value: _CellValue) -> Scalar | NaTType | time:
101+
) -> list[list[Scalar]]:
102+
def _convert_cell(value: _CellValueT) -> Scalar:
103+
# Avoid explicit conversion to pd.Timestamp and pd.Timedelta
104104
if isinstance(value, float):
105105
val = int(value)
106106
if val == value:
107107
return val
108108
else:
109109
return value
110110
elif isinstance(value, date):
111-
return pd.Timestamp(value)
111+
return value
112112
elif isinstance(value, timedelta):
113-
return pd.Timedelta(value)
114-
elif isinstance(value, time):
115113
return value
114+
elif isinstance(value, time):
115+
# cast needed here because Scalar doesn't include datetime.time
116+
return cast(Scalar, value)
116117

117118
return value
118119

119-
rows: list[list[_CellValue]] = sheet.to_python(
120-
skip_empty_area=False, nrows=file_rows_needed
121-
)
122-
data = [[_convert_cell(cell) for cell in row] for row in rows]
120+
rows: list[list[_CellValueT]] = sheet.to_python(skip_empty_area=False)
121+
data: list[list[Scalar]] = []
122+
123+
for row in rows:
124+
data.append([_convert_cell(cell) for cell in row])
125+
if file_rows_needed is not None and len(data) >= file_rows_needed:
126+
break
123127

124128
return data

0 commit comments

Comments
 (0)