-
-
Notifications
You must be signed in to change notification settings - Fork 19.1k
Typ parts of python parser #45015
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Typ parts of python parser #45015
Changes from all commits
d0bebe6
99b1285
c1da4a9
a3f674b
bfb096f
b019ff7
74421ed
0995482
ee2f77a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ | |
DefaultDict, | ||
Hashable, | ||
Iterator, | ||
List, | ||
Literal, | ||
Mapping, | ||
Sequence, | ||
|
@@ -37,6 +38,11 @@ | |
from pandas.core.dtypes.common import is_integer | ||
from pandas.core.dtypes.inference import is_dict_like | ||
|
||
from pandas import ( | ||
Index, | ||
MultiIndex, | ||
) | ||
|
||
from pandas.io.parsers.base_parser import ( | ||
ParserBase, | ||
parser_defaults, | ||
|
@@ -167,7 +173,7 @@ def __init__(self, f: ReadCsvBuffer[str] | list, **kwds): | |
) | ||
self.num = re.compile(regex) | ||
|
||
def _make_reader(self, f) -> None: | ||
def _make_reader(self, f: IO[str] | ReadCsvBuffer[str]) -> None: | ||
sep = self.delimiter | ||
|
||
if sep is None or len(sep) == 1: | ||
|
@@ -198,10 +204,11 @@ class MyDialect(csv.Dialect): | |
self.pos += 1 | ||
line = f.readline() | ||
lines = self._check_comments([[line]])[0] | ||
lines_str = cast(List[str], lines) | ||
|
||
# since `line` was a string, lines will be a list containing | ||
# only a single string | ||
line = lines[0] | ||
line = lines_str[0] | ||
|
||
self.pos += 1 | ||
self.line_pos += 1 | ||
|
@@ -233,7 +240,11 @@ def _read(): | |
# TextIOWrapper, mmap, None]") | ||
self.data = reader # type: ignore[assignment] | ||
|
||
def read(self, rows: int | None = None): | ||
def read( | ||
self, rows: int | None = None | ||
) -> tuple[ | ||
Index | None, Sequence[Hashable] | MultiIndex, Mapping[Hashable, ArrayLike] | ||
]: | ||
try: | ||
content = self._get_lines(rows) | ||
except StopIteration: | ||
|
@@ -273,9 +284,11 @@ def read(self, rows: int | None = None): | |
conv_data = self._convert_data(data) | ||
columns, conv_data = self._do_date_conversions(columns, conv_data) | ||
|
||
index, columns = self._make_index(conv_data, alldata, columns, indexnamerow) | ||
index, result_columns = self._make_index( | ||
conv_data, alldata, columns, indexnamerow | ||
) | ||
|
||
return index, columns, conv_data | ||
return index, result_columns, conv_data | ||
|
||
def _exclude_implicit_index( | ||
self, | ||
|
@@ -586,7 +599,7 @@ def _handle_usecols( | |
self._col_indices = sorted(col_indices) | ||
return columns | ||
|
||
def _buffered_line(self): | ||
def _buffered_line(self) -> list[Scalar]: | ||
""" | ||
Return a line from buffer, filling buffer if required. | ||
""" | ||
|
@@ -876,7 +889,9 @@ def _clear_buffer(self) -> None: | |
|
||
_implicit_index = False | ||
|
||
def _get_index_name(self, columns: list[Hashable]): | ||
def _get_index_name( | ||
self, columns: list[Hashable] | ||
) -> tuple[list[Hashable] | None, list[Hashable], list[Hashable]]: | ||
""" | ||
Try several cases to get lines: | ||
|
||
|
@@ -941,8 +956,8 @@ def _get_index_name(self, columns: list[Hashable]): | |
|
||
else: | ||
# Case 2 | ||
(index_name, columns_, self.index_col) = self._clean_index_names( | ||
columns, self.index_col, self.unnamed_cols | ||
(index_name, _, self.index_col) = self._clean_index_names( | ||
columns, self.index_col | ||
) | ||
|
||
return index_name, orig_names, columns | ||
|
@@ -1034,7 +1049,7 @@ def _rows_to_cols(self, content: list[list[Scalar]]) -> list[np.ndarray]: | |
] | ||
return zipped_content | ||
|
||
def _get_lines(self, rows: int | None = None): | ||
def _get_lines(self, rows: int | None = None) -> list[list[Scalar]]: | ||
lines = self.buf | ||
new_rows = None | ||
|
||
|
@@ -1131,7 +1146,7 @@ class FixedWidthReader(abc.Iterator): | |
|
||
def __init__( | ||
self, | ||
f: IO[str], | ||
f: IO[str] | ReadCsvBuffer[str], | ||
colspecs: list[tuple[int, int]] | Literal["infer"], | ||
delimiter: str | None, | ||
comment: str | None, | ||
|
@@ -1228,14 +1243,16 @@ def detect_colspecs( | |
return edge_pairs | ||
|
||
def __next__(self) -> list[str]: | ||
# Argument 1 to "next" has incompatible type "Union[IO[str], | ||
# ReadCsvBuffer[str]]"; expected "SupportsNext[str]" | ||
if self.buffer is not None: | ||
try: | ||
line = next(self.buffer) | ||
except StopIteration: | ||
self.buffer = None | ||
line = next(self.f) | ||
line = next(self.f) # type: ignore[arg-type] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @twoertwein Does ReadCsvBuffer suport next in addition to iter? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As far as I understand: As long as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm weird, mypy complains about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm sorry, you are right! I think only from io import StringIO
import pandas as pd
class Test:
def __init__(self):
self.buffer = StringIO("a,b,c\n0,1,2")
@property
def mode(self):
return self.buffer.mode
def fileno(self):
return self.buffer.fileno()
def seek(self, __offset, __whence=-1):
if __whence != -1:
return self.seek(__offset)
return self.seek(__offset, __whence=__whence)
def seekable(self):
return self.buffer.seekable()
def tell(self):
return self.buffer.tell()
def read(self, __n=None):
self.buffer.read(__n)
def __iter__(self):
return self.buffer.__iter__()
def readline(self):
return self.buffer.readline()
@property
def closed(self):
return self.buffer.closed()
print(pd.read_csv(Test(), engine="python")) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep I think this is only called from read_fwf. Can we add next to ReadCsvBuffer without breaking anything (from a correctness standpoint) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would be fine with adding it or (probably better) creating Also happy to have the ignores for now and address this in a later PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lets go with the ignore for now then until we have typed more. Can probably make a better decision then |
||
else: | ||
line = next(self.f) | ||
line = next(self.f) # type: ignore[arg-type] | ||
# Note: 'colspecs' is a sequence of half-open intervals. | ||
return [line[fromm:to].strip(self.delimiter) for (fromm, to) in self.colspecs] | ||
|
||
|
@@ -1252,7 +1269,7 @@ def __init__(self, f: ReadCsvBuffer[str], **kwds) -> None: | |
self.infer_nrows = kwds.pop("infer_nrows") | ||
PythonParser.__init__(self, f, **kwds) | ||
|
||
def _make_reader(self, f: IO[str]) -> None: | ||
def _make_reader(self, f: IO[str] | ReadCsvBuffer[str]) -> None: | ||
self.data = FixedWidthReader( | ||
f, | ||
self.colspecs, | ||
|
Uh oh!
There was an error while loading. Please reload this page.