99Acknowledgements
1010Gary C. Kessler
1111 For use of his File Signature Tables, available at:
12- http ://www.garykessler.net/library/file_sigs.html
12+ https ://filesig.search.org/
1313"""
1414
15- from __future__ import annotations
16-
1715import json
1816import os
1917from binascii import unhexlify
2220from pathlib import Path
2321
2422if os .getenv ("PUREMAGIC_DEEPSCAN" ) != "0" :
25- from puremagic .scanners import zip_scanner , pdf_scanner , text_scanner
23+ from puremagic .scanners import zip_scanner , pdf_scanner , text_scanner , json_scanner , python_scanner
2624
2725__author__ = "Chris Griffith"
28- __version__ = "2.0.0b1 "
26+ __version__ = "2.0.0b2 "
2927__all__ = [
3028 "magic_file" ,
3129 "magic_string" ,
@@ -262,7 +260,7 @@ def ext_from_filename(filename: os.PathLike | str) -> str:
262260 all_exts = [x .extension for x in chain (magic_header_array , magic_footer_array )]
263261
264262 if base [- 4 :].startswith ("." ):
265- # For double extensions like like .tar.gz
263+ # For double extensions like .tar.gz
266264 long_ext = base [- 4 :] + ext
267265 if long_ext in all_exts :
268266 return long_ext
@@ -320,7 +318,7 @@ def from_stream(stream, mime: bool = False, filename: os.PathLike | str | None =
320318def magic_file (filename : os .PathLike | str ) -> list [PureMagicWithConfidence ]:
321319 """
322320 Returns list of (num_of_matches, array_of_matches)
323- arranged highest confidence match first.
321+ arranged by highest confidence match first.
324322
325323 :param filename: path to file
326324 :return: list of possible matches, highest confidence first
@@ -341,7 +339,7 @@ def magic_file(filename: os.PathLike | str) -> list[PureMagicWithConfidence]:
341339def magic_string (string , filename : os .PathLike | str | None = None ) -> list [PureMagicWithConfidence ]:
342340 """
343341 Returns tuple of (num_of_matches, array_of_matches)
344- arranged highest confidence match first
342+ arranged by highest confidence match first
345343 If filename is provided it will be used in the computation.
346344
347345 :param string: string representation to check
@@ -362,7 +360,7 @@ def magic_stream(
362360 filename : os .PathLike | None = None ,
363361) -> list [PureMagicWithConfidence ]:
364362 """Returns tuple of (num_of_matches, array_of_matches)
365- arranged highest confidence match first
363+ arranged by highest confidence match first
366364 If filename is provided it will be used in the computation.
367365
368366 :param stream: stream representation to check
@@ -393,14 +391,27 @@ def _single_deep_scan(
393391 return zip_scanner .main (filename , head , foot )
394392 case pdf_scanner .match_bytes :
395393 return pdf_scanner .main (filename , head , foot )
396- case None | b"" :
397- for scanner in (text_scanner , pdf_scanner ):
398- result = scanner .main (filename , head , foot )
399- if result :
400- return result
394+
395+ # First match wins, so text_scanner should always be last
396+ for scanner in (pdf_scanner , python_scanner , json_scanner ):
397+ result = scanner .main (filename , head , foot )
398+ if result :
399+ return result
401400 return None
402401
403402
403+ def _catch_all_deep_scan (
404+ filename : os .PathLike | str ,
405+ head = None ,
406+ foot = None ,
407+ ):
408+ if os .getenv ("PUREMAGIC_DEEPSCAN" ) == "0" :
409+ return None
410+ if not isinstance (filename , os .PathLike ):
411+ filename = Path (filename )
412+ return text_scanner .main (filename , head , foot )
413+
414+
404415def _run_deep_scan (
405416 matches : list [PureMagicWithConfidence ],
406417 filename : os .PathLike | str ,
@@ -425,10 +436,18 @@ def _run_deep_scan(
425436 name = result .name ,
426437 )
427438 ]
439+ try :
440+ result = _catch_all_deep_scan (filename , head , foot )
441+ except Exception :
442+ pass
443+ else :
444+ if result :
445+ return [result ]
428446 if raise_on_none :
429447 raise PureError ("Could not identify file" )
430448
431449 for pure_magic_match in matches :
450+ # noinspection PyBroadException
432451 try :
433452 result = _single_deep_scan (pure_magic_match .byte_match , filename , head , foot )
434453 except Exception :
@@ -484,7 +503,7 @@ def command_line_entry(*args):
484503 if i == 0 :
485504 print ("\n \t Best Match" )
486505 else :
487- print (f"\t Alertnative Match #{ i } " )
506+ print (f"\t Alternative Match #{ i } " )
488507 print (f"\t Name: { result .name } " )
489508 print (f"\t Confidence: { int (result .confidence * 100 )} %" )
490509 print (f"\t Extension: { result .extension } " )
@@ -526,14 +545,14 @@ def what(file: os.PathLike | str | None, h: bytes | None = None, imghdr_strict:
526545
527546 imghdr_strict enables bug-for-bug compatibility between imghdr.what() and puremagic.what() when the imghdr returns
528547 a match but puremagic returns None. We believe that imghdr is delivering a "false positive" in each of these
529- scenerios but we want puremagic.what()'s default behavior to match imghdr.what()'s false positives so we do not
548+ scenarios, but we want puremagic.what()'s default behavior to match imghdr.what()'s false positives so we do not
530549 break existing applications.
531550
532551 If imghdr_strict is True (the default) then a lookup will be done to deliver a matching result on all known false
533552 positives. If imghdr_strict is False then puremagic's algorithms will determine the image type. True is more
534553 compatible while False is more correct.
535554
536- NOTE: This compatibility effort only deals false positives and we are not interested to track the opposite
555+ NOTE: This compatibility effort only deals false positives, and we are not interested to track the opposite
537556 situation where puremagic's deliver a match while imghdr would have returned None. Also, puremagic.what() can
538557 recognize many more file types than the twelve image file types that imghdr focused on.
539558 """
0 commit comments