Skip to content

Commit 24dad24

Browse files
nathan-chappellNathan ChappellMthwRobinson
authored
chore: changed type IO to IO[bytes] (#878)
Co-authored-by: Nathan Chappell <[email protected]> Co-authored-by: Matt Robinson <[email protected]>
1 parent dc6d7d7 commit 24dad24

File tree

26 files changed

+37
-37
lines changed

26 files changed

+37
-37
lines changed

unstructured/file_utils/encoding.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def format_encoding_str(encoding: str) -> str:
5151

5252
def detect_file_encoding(
5353
filename: str = "",
54-
file: Optional[Union[bytes, IO]] = None,
54+
file: Optional[Union[bytes, IO[bytes]]] = None,
5555
) -> Tuple[str, str]:
5656
if filename:
5757
with open(filename, "rb") as f:
@@ -98,7 +98,7 @@ def detect_file_encoding(
9898

9999
def read_txt_file(
100100
filename: str = "",
101-
file: Optional[Union[bytes, IO]] = None,
101+
file: Optional[Union[bytes, IO[bytes]]] = None,
102102
encoding: Optional[str] = None,
103103
) -> Tuple[str, str]:
104104
"""Extracts document metadata from a plain text document."""

unstructured/file_utils/file_conversion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def convert_file_to_text(filename: str, source_format: str, target_format: str)
2525
def convert_file_to_html_text(
2626
source_format: str,
2727
filename: Optional[str] = None,
28-
file: Optional[IO] = None,
28+
file: Optional[IO[bytes]] = None,
2929
) -> str:
3030
"""Converts a document to HTML raw text. Enables the doucment to be
3131
processed using the partition_html function."""

unstructured/file_utils/filetype.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ def _resolve_symlink(file_path):
207207
def detect_filetype(
208208
filename: Optional[str] = None,
209209
content_type: Optional[str] = None,
210-
file: Optional[IO] = None,
210+
file: Optional[IO[bytes]] = None,
211211
file_filename: Optional[str] = None,
212212
encoding: Optional[str] = "utf-8",
213213
) -> Optional[FileType]:
@@ -370,7 +370,7 @@ def _detect_filetype_from_octet_stream(file: IO) -> FileType:
370370

371371
def _read_file_start_for_type_check(
372372
filename: Optional[str] = None,
373-
file: Optional[IO] = None,
373+
file: Optional[IO[bytes]] = None,
374374
encoding: Optional[str] = "utf-8",
375375
) -> str:
376376
"""Reads the start of the file and returns the text content."""
@@ -396,7 +396,7 @@ def _read_file_start_for_type_check(
396396

397397
def _is_text_file_a_json(
398398
filename: Optional[str] = None,
399-
file: Optional[IO] = None,
399+
file: Optional[IO[bytes]] = None,
400400
encoding: Optional[str] = "utf-8",
401401
):
402402
"""Detects if a file that has a text/plain MIME type is a JSON file."""
@@ -413,7 +413,7 @@ def _count_commas(text: str):
413413

414414
def _is_text_file_a_csv(
415415
filename: Optional[str] = None,
416-
file: Optional[IO] = None,
416+
file: Optional[IO[bytes]] = None,
417417
encoding: Optional[str] = "utf-8",
418418
):
419419
"""Detects if a file that has a text/plain MIME type is a CSV file."""

unstructured/file_utils/metadata.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def to_dict(self):
4141

4242
def get_docx_metadata(
4343
filename: str = "",
44-
file: Optional[IO] = None,
44+
file: Optional[IO[bytes]] = None,
4545
) -> Metadata:
4646
"""Extracts document metadata from a Microsoft .docx document."""
4747
if filename:
@@ -74,7 +74,7 @@ def get_docx_metadata(
7474

7575
def get_xlsx_metadata(
7676
filename: str = "",
77-
file: Optional[IO] = None,
77+
file: Optional[IO[bytes]] = None,
7878
) -> Metadata:
7979
"""Extracts document metadata from a Microsoft .xlsx document."""
8080
if filename:
@@ -108,7 +108,7 @@ def get_xlsx_metadata(
108108

109109
def get_jpg_metadata(
110110
filename: str = "",
111-
file: Optional[IO] = None,
111+
file: Optional[IO[bytes]] = None,
112112
) -> Metadata:
113113
"""Extracts metadata from a JPG image, including EXIF metadata."""
114114
if filename:

unstructured/partition/api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
def partition_via_api(
1616
filename: Optional[str] = None,
1717
content_type: Optional[str] = None,
18-
file: Optional[IO] = None,
18+
file: Optional[IO[bytes]] = None,
1919
file_filename: Optional[str] = None,
2020
api_url: str = "https://api.unstructured.io/general/v0/general",
2121
api_key: str = "",

unstructured/partition/auto.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
def partition(
3939
filename: Optional[str] = None,
4040
content_type: Optional[str] = None,
41-
file: Optional[IO] = None,
41+
file: Optional[IO[bytes]] = None,
4242
file_filename: Optional[str] = None,
4343
url: Optional[str] = None,
4444
include_page_breaks: bool = False,

unstructured/partition/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ def spooled_to_bytes_io_if_needed(
218218

219219

220220
def convert_to_bytes(
221-
file: Optional[Union[bytes, SpooledTemporaryFile, IO]] = None,
221+
file: Optional[Union[bytes, SpooledTemporaryFile, IO[bytes]]] = None,
222222
) -> bytes:
223223
if isinstance(file, bytes):
224224
f_bytes = file

unstructured/partition/csv.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
@add_metadata_with_filetype(FileType.CSV)
1919
def partition_csv(
2020
filename: Optional[str] = None,
21-
file: Optional[Union[IO, SpooledTemporaryFile]] = None,
21+
file: Optional[Union[IO[bytes], SpooledTemporaryFile]] = None,
2222
metadata_filename: Optional[str] = None,
2323
include_metadata: bool = True,
2424
**kwargs,

unstructured/partition/doc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
@add_metadata_with_filetype(FileType.DOC)
1313
def partition_doc(
1414
filename: Optional[str] = None,
15-
file: Optional[IO] = None,
15+
file: Optional[IO[bytes]] = None,
1616
include_page_breaks: bool = True,
1717
include_metadata: bool = True,
1818
metadata_filename: Optional[str] = None,

unstructured/partition/docx.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def _get_runs(node, parent):
107107
@add_metadata_with_filetype(FileType.DOCX)
108108
def partition_docx(
109109
filename: Optional[str] = None,
110-
file: Optional[Union[IO, SpooledTemporaryFile]] = None,
110+
file: Optional[Union[IO[bytes], SpooledTemporaryFile]] = None,
111111
metadata_filename: Optional[str] = None,
112112
include_page_breaks: bool = True,
113113
include_metadata: bool = True,
@@ -288,7 +288,7 @@ def _get_headers_and_footers(
288288
def convert_and_partition_docx(
289289
source_format: str,
290290
filename: Optional[str] = None,
291-
file: Optional[IO] = None,
291+
file: Optional[IO[bytes]] = None,
292292
include_metadata: bool = True,
293293
metadata_filename: Optional[str] = None,
294294
) -> List[Element]:

0 commit comments

Comments
 (0)