@@ -77,7 +77,6 @@ def test_it_detects_correct_file_type_for_CFB_and_ZIP_subtypes_detected_by_direc
7777 (FileType .HEIC , "img/DA-1p.heic" , "image/heic" ),
7878 (FileType .HTML , "example-10k-1p.html" , "text/html" ),
7979 (FileType .JPG , "img/example.jpg" , "image/jpeg" ),
80- (FileType .JSON , "spring-weather.html.json" , "application/json" ),
8180 (FileType .MD , "README.md" , "text/markdown" ),
8281 (FileType .ORG , "README.org" , "text/org" ),
8382 (FileType .PDF , "pdf/layout-parser-paper-fast.pdf" , "application/pdf" ),
@@ -116,7 +115,6 @@ def test_it_detects_correct_file_type_from_file_path_with_correct_asserted_conte
116115 (FileType .HEIC , "img/DA-1p.heic" , "image/heic" ),
117116 (FileType .HTML , "example-10k-1p.html" , "text/html" ),
118117 (FileType .JPG , "img/example.jpg" , "image/jpeg" ),
119- (FileType .JSON , "spring-weather.html.json" , "application/json" ),
120118 (FileType .MD , "README.md" , "text/markdown" ),
121119 (FileType .ORG , "README.org" , "text/org" ),
122120 (FileType .PDF , "pdf/layout-parser-paper-fast.pdf" , "application/pdf" ),
@@ -154,10 +152,10 @@ def test_it_identifies_NDJSON_for_file_like_object_with_no_name_but_NDJSON_conte
154152 assert detect_filetype (file = file , content_type = FileType .NDJSON .mime_type ) == FileType .NDJSON
155153
156154
157- # TODO: ideally this test should pass, currently fails
158- # def test_it_identifies_NDJSON_for_file_with_ndjson_extension_but_JSON_content_type():
159- # file_path = example_doc_path("simple.ndjson")
160- # assert detect_filetype(file_path, content_type=FileType.JSON.mime_type) == FileType.NDJSON
155+ def test_it_identifies_NDJSON_for_file_with_ndjson_extension_but_JSON_content_type ():
156+ file_path = example_doc_path ( "simple.ndjson" )
157+ assert detect_filetype ( file_path , content_type = FileType . JSON . mime_type ) == FileType . NDJSON
158+
161159
162160# ================================================================================================
163161# STRATEGY #3 - GUESS MIME-TYPE WITH LIBMAGIC/FILETYPE LIBRARY
@@ -268,7 +266,6 @@ def test_it_detects_most_file_types_using_mime_guessing_when_libmagic_guesses_mi
268266 (FileType .UNK , "stanley-cups.csv" ),
269267 (FileType .UNK , "eml/fake-email.eml" ),
270268 (FileType .UNK , "example-10k-1p.html" ),
271- (FileType .UNK , "spring-weather.html.json" ),
272269 (FileType .UNK , "README.md" ),
273270 (FileType .UNK , "README.org" ),
274271 (FileType .UNK , "README.rst" ),
@@ -333,6 +330,7 @@ def test_detect_filetype_from_file_warns_when_libmagic_is_not_installed(
333330 (FileType .TXT , "norwich-city.txt" ),
334331 (FileType .WAV , "CantinaBand3.wav" ),
335332 (FileType .XML , "factbook.xml" ),
333+ (FileType .NDJSON , "simple.ndjson" ),
336334 ],
337335)
338336def test_it_detects_correct_file_type_from_extension_when_that_maps_to_a_file_type (
@@ -395,6 +393,27 @@ def test_it_detects_HTML_from_guessed_mime_type_ending_with_xml_and_html_extensi
395393 assert file_type is FileType .HTML
396394
397395
396+ @pytest .mark .parametrize (
397+ ("expected_value" , "file_name" ),
398+ [(FileType .NDJSON , "simple.ndjson" ), (FileType .JSON , "spring-weather.html.json" )],
399+ )
400+ def test_it_detects_correct_json_type_without_extension (expected_value : FileType , file_name : str ):
401+ with open (example_doc_path (file_name ), "rb" ) as f :
402+ file = io .BytesIO (f .read ())
403+
404+ filetype = detect_filetype (file = file )
405+ assert filetype == expected_value
406+
407+
408+ @pytest .mark .parametrize (
409+ ("expected_value" , "file_name" ),
410+ [(FileType .NDJSON , "simple.ndjson" ), (FileType .JSON , "spring-weather.html.json" )],
411+ )
412+ def test_it_detects_correct_json_type_with_extension (expected_value : FileType , file_name : str ):
413+ filetype = detect_filetype (file_path = example_doc_path (file_name ))
414+ assert filetype == expected_value
415+
416+
398417@pytest .mark .parametrize (
399418 ("mime_type" , "file_name" ),
400419 [
0 commit comments