|
2 | 2 | # Copyright (c) 2023 Airbyte, Inc., all rights reserved. |
3 | 3 | # |
4 | 4 | import logging |
| 5 | +import mimetypes |
5 | 6 | import os |
6 | 7 | import traceback |
7 | 8 | from datetime import datetime |
|
10 | 11 |
|
11 | 12 | import backoff |
12 | 13 | import dpath |
13 | | -import mimetypes |
14 | 14 | import nltk |
15 | 15 | import requests |
16 | 16 | from unstructured.file_utils.filetype import FileType, detect_filetype |
@@ -330,15 +330,16 @@ def _read_file_remotely( |
330 | 330 |
|
331 | 331 | data = self._params_to_dict(format.parameters, strategy) |
332 | 332 |
|
333 | | - mime_type = mimetypes.guess_type(f"file.{filetype.name.lower()}")[0] if filetype else "application/octet-stream" |
334 | | - |
| 333 | + mime_type = ( |
| 334 | + mimetypes.guess_type(f"file.{filetype.name.lower()}")[0] |
| 335 | + if filetype |
| 336 | + else "application/octet-stream" |
| 337 | + ) |
| 338 | + |
335 | 339 | files = cast(Any, {"files": ("filename", file_handle, mime_type)}) |
336 | | - |
| 340 | + |
337 | 341 | response = requests.post( |
338 | | - f"{format.api_url}/general/v0/general", |
339 | | - headers=headers, |
340 | | - data=data, |
341 | | - files=files |
| 342 | + f"{format.api_url}/general/v0/general", headers=headers, data=data, files=files |
342 | 343 | ) |
343 | 344 |
|
344 | 345 | if response.status_code == 422: |
@@ -407,7 +408,10 @@ def _get_filetype(self, file: IOBase, remote_file: RemoteFile) -> Optional[FileT |
407 | 408 | """ |
408 | 409 | if remote_file.mime_type: |
409 | 410 | for file_type in FileType: |
410 | | - if mimetypes.guess_type(f"file.{file_type.name.lower()}")[0] == remote_file.mime_type: |
| 411 | + if ( |
| 412 | + mimetypes.guess_type(f"file.{file_type.name.lower()}")[0] |
| 413 | + == remote_file.mime_type |
| 414 | + ): |
411 | 415 | return file_type |
412 | 416 |
|
413 | 417 | # set name to none, otherwise unstructured will try to get the modified date from the local file system |
|
0 commit comments