Skip to content

Commit 4d9494e

Browse files
committed
check for stringIO type, format file
1 parent 4e7ebae commit 4d9494e

File tree

1 file changed

+34
-13
lines changed

1 file changed

+34
-13
lines changed

hdxms_datasets/reader.py

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,9 @@ def get_backend():
4747
except ImportError:
4848
pass
4949

50-
raise ImportError("No suitable backend found. Please install pandas, polars, pyarrow or modin.")
50+
raise ImportError(
51+
"No suitable backend found. Please install pandas, polars, pyarrow or modin."
52+
)
5153

5254

5355
BACKEND = get_backend()
@@ -125,7 +127,9 @@ def read_hdexaminer_peptide_pool(source: Path | StringIO) -> nw.DataFrame:
125127
header_line = source.readline()
126128

127129
else:
128-
df = nw.read_csv(source.as_posix(), backend=BACKEND, skip_rows=1, has_header=True)
130+
df = nw.read_csv(
131+
source.as_posix(), backend=BACKEND, skip_rows=1, has_header=True
132+
)
129133
with open(source, "r") as fh:
130134
exposure_line = fh.readline()
131135
header_line = fh.readline()
@@ -135,7 +139,9 @@ def read_hdexaminer_peptide_pool(source: Path | StringIO) -> nw.DataFrame:
135139

136140
found_schema = df[:, 0:8].schema
137141
if found_schema != HDEXAMINER_PEPTIDE_POOL_INITIAL_SCHEMA:
138-
raise ValueError("HDX-Examiner peptide pool file has an unexpected columns schema.")
142+
raise ValueError(
143+
"HDX-Examiner peptide pool file has an unexpected columns schema."
144+
)
139145

140146
# find indices of exposure markers in header
141147
has_entry_with_end = [i for i, col in enumerate(exposure_columns) if col] + [
@@ -181,7 +187,7 @@ def read_hdexaminer_peptide_pool(source: Path | StringIO) -> nw.DataFrame:
181187
return final_output
182188

183189

184-
def read_csv(source: Path | IO | bytes, **kwargs) -> nw.DataFrame:
190+
def read_csv(source: Path | StringIO | bytes, **kwargs) -> nw.DataFrame:
185191
"""
186192
Read a CSV file and return a Narwhals DataFrame.
187193
@@ -199,7 +205,7 @@ def read_csv(source: Path | IO | bytes, **kwargs) -> nw.DataFrame:
199205
import polars as pl
200206

201207
return nw.from_native(pl.read_csv(source), **kwargs)
202-
elif isinstance(source, IO):
208+
elif isinstance(source, StringIO):
203209
try:
204210
import polars as pl
205211

@@ -213,7 +219,9 @@ def read_csv(source: Path | IO | bytes, **kwargs) -> nw.DataFrame:
213219
except ImportError:
214220
raise ValueError("No suitable backend found for reading file-like objects")
215221
else:
216-
raise TypeError("source must be a Path, bytes, or file-like object")
222+
raise TypeError(
223+
f"Source must be a Path, bytes, or file-like object, got: {type(source)}"
224+
)
217225

218226

219227
def hxms_line_generator(source: Path) -> Iterator[str]:
@@ -311,7 +319,9 @@ def _parse_hxms_TP_lines(lines: Iterable[str], sequence: str) -> nw.DataFrame:
311319

312320
used_columns = list(HXMS_SCHEMA)[: len(content)]
313321
data_dict: dict[str, Any] = dict(zip(used_columns, content))
314-
data_dict["sequence"] = sequence[int(data_dict["START"]) - 1 : int(data_dict["END"])]
322+
data_dict["sequence"] = sequence[
323+
int(data_dict["START"]) - 1 : int(data_dict["END"])
324+
]
315325
data_dict = _cast_envelope(data_dict)
316326
dicts.append(data_dict)
317327

@@ -321,11 +331,15 @@ def _parse_hxms_TP_lines(lines: Iterable[str], sequence: str) -> nw.DataFrame:
321331
continue
322332
content = _line_content(line)
323333
data_dict = dict(zip(used_columns, content))
324-
data_dict["sequence"] = sequence[int(data_dict["START"]) - 1 : int(data_dict["END"])]
334+
data_dict["sequence"] = sequence[
335+
int(data_dict["START"]) - 1 : int(data_dict["END"])
336+
]
325337
data_dict = _cast_envelope(data_dict)
326338
dicts.append(data_dict)
327339

328-
schema = nw.Schema({col: HXMS_SCHEMA[col] for col in used_columns} | {"sequence": nw.String()})
340+
schema = nw.Schema(
341+
{col: HXMS_SCHEMA[col] for col in used_columns} | {"sequence": nw.String()}
342+
)
329343
df = nw.from_dicts(dicts, schema=schema, backend=BACKEND)
330344

331345
return df
@@ -384,7 +398,9 @@ def parse_hxms_lines(lines: Iterable[str], read_content: bool = True) -> HXMSRes
384398
break
385399

386400
# the rest of the lines are data lines
387-
df = _parse_hxms_TP_lines(line_iter, sequence=result["METADATA"]["PROTEIN_SEQUENCE"])
401+
df = _parse_hxms_TP_lines(
402+
line_iter, sequence=result["METADATA"]["PROTEIN_SEQUENCE"]
403+
)
388404
result["DATA"] = df
389405

390406
# check read columns against expected columns
@@ -400,15 +416,20 @@ def parse_hxms_lines(lines: Iterable[str], read_content: bool = True) -> HXMSRes
400416

401417

402418
@overload
403-
def read_hxms(source: Path | IO | bytes, returns: Literal["HXMSResult"]) -> HXMSResult: ...
419+
def read_hxms(
420+
source: Path | IO | bytes, returns: Literal["HXMSResult"]
421+
) -> HXMSResult: ...
404422

405423

406424
@overload
407-
def read_hxms(source: Path | IO | bytes, returns: Literal["DataFrame"]) -> nw.DataFrame: ...
425+
def read_hxms(
426+
source: Path | IO | bytes, returns: Literal["DataFrame"]
427+
) -> nw.DataFrame: ...
408428

409429

410430
def read_hxms(
411-
source: Path | IO | bytes, returns: Literal["HXMSResult", "DataFrame"] = "HXMSResult"
431+
source: Path | IO | bytes,
432+
returns: Literal["HXMSResult", "DataFrame"] = "HXMSResult",
412433
) -> HXMSResult | nw.DataFrame:
413434
"""
414435
Read an HXMS file and return a HXMSResult or Narwhals DataFrame.

0 commit comments

Comments
 (0)