Skip to content

Commit 3dbb175

Browse files
fix: improve markdown file detection for unstructured parser
Co-Authored-By: Aaron <AJ> Steers <[email protected]>
1 parent d2bb776 commit 3dbb175

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

airbyte_cdk/sources/file_based/file_types/unstructured_parser.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -452,10 +452,14 @@ def _get_filetype(self, file: IOBase, remote_file: RemoteFile) -> Optional[FileT
452452
if type_based_on_content and type_based_on_content != FileType.UNK:
453453
return type_based_on_content
454454

455-
extension = "." + remote_file.uri.split(".")[-1].lower()
456-
for file_type in FileType:
457-
if file_type.name.lower() == extension[1:].lower():
458-
return file_type
455+
if "." in remote_file.uri:
456+
extension = "." + remote_file.uri.split(".")[-1].lower()
457+
for file_type in FileType:
458+
if file_type.name.lower() == extension[1:].lower():
459+
return file_type
460+
461+
if remote_file.uri.endswith(".md") or remote_file.mime_type == "text/markdown":
462+
return FileType.MD
459463

460464
return None
461465

0 commit comments

Comments
 (0)