Skip to content

Commit fecf1f7

Browse files
cdgriffithbollwyvlpeterekepeterNebularNerd
authored
Version 1.27 (#98)
- Adding new verbose output to command line with `-v` or `--verbose` - Adding #92 include py.typed in sdist (thanks to Nicholas Bollweg - bollwyvl) - Adding #93 Improve PDF file detection, fix json description (thanks to Péter - peterekepeter) - Fixing #96 #86 stream does not work properly on opened small files (thanks to Felipe Lema and Andy - NebularNerd) - Removing expected invalid WinZip signature --------- Co-authored-by: Nicholas Bollweg <[email protected]> Co-authored-by: Péter <[email protected]> Co-authored-by: Andy <[email protected]>
1 parent 72ee164 commit fecf1f7

File tree

8 files changed

+91
-23
lines changed

8 files changed

+91
-23
lines changed

.pre-commit-config.yaml

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,28 +24,33 @@ repos:
2424
exclude: ^test/resources/
2525
- id: trailing-whitespace
2626
args: [--markdown-linebreak-ext=md]
27-
exclude: ^test/resources/
27+
exclude: |
28+
(?x)^(
29+
^test/resources/.+|
30+
^puremagic/magic_data.json
31+
)$
2832
- id: check-executables-have-shebangs
2933
- id: end-of-file-fixer
3034
exclude: ^test/resources/.+
3135

36+
3237
- repo: https://github.com/astral-sh/ruff-pre-commit
33-
rev: v0.4.6
38+
rev: v0.5.7
3439
hooks:
3540
- id: ruff
3641

3742
- repo: https://github.com/ambv/black
38-
rev: 24.4.2
43+
rev: 24.8.0
3944
hooks:
4045
- id: black
4146

4247
- repo: https://github.com/pre-commit/mirrors-mypy
43-
rev: 'v1.10.0'
48+
rev: 'v1.11.1'
4449
hooks:
4550
- id: mypy
4651

4752
- repo: https://github.com/tox-dev/pyproject-fmt
48-
rev: 2.1.3
53+
rev: 2.2.1
4954
hooks:
5055
- id: pyproject-fmt
5156

AUTHORS.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,7 @@ A big thank you to everyone that has helped!
2020
- Andy (NebularNerd)
2121
- Raphaël Vinot (Rafiot)
2222
- Sebastian Kreft (sk-)
23-
- William Bonnaventure (Aztorius)
23+
- William Bonnaventure (Aztorius)
24+
- Nicholas Bollweg (bollwyvl)
25+
- Péter (peterekepeter)
26+
- mara004

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11
Changelog
22
=========
33

4+
Version 1.27
5+
------------
6+
7+
- Adding new verbose output to command line with `-v` or `--verbose`
8+
- Adding #92 include py.typed in sdist (thanks to Nicholas Bollweg - bollwyvl)
9+
- Adding #93 Improve PDF file detection, fix json description (thanks to Péter - peterekepeter)
10+
- Fixing #96 #86 stream does not work properly on opened small files (thanks to Felipe Lema and Andy - NebularNerd)
11+
- Removing expected invalid WinZip signature
12+
413
Version 1.26
514
------------
615

MANIFEST.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
include puremagic/*.json
2+
include puremagic/py.typed
23
include LICENSE
34
include AUTHORS.rst
45
include CHANGELOG.md

README.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ Disadvantages:
3535
Compatibility
3636
~~~~~~~~~~~~~
3737

38-
- Python 3.8+
38+
- Python 3.7+
3939

4040
Using github ci to run continuous integration tests on listed platforms.
4141

@@ -174,7 +174,7 @@ http://www.garykessler.net/library/file_sigs.html
174174

175175
Freedesktop.org
176176

177-
For use of their shared-mime-info file (even if they do use XML, blea), available at:
177+
For use of their shared-mime-info file, available at:
178178
https://cgit.freedesktop.org/xdg/shared-mime-info/
179179

180180
License

puremagic/magic_data.json

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,15 @@
2020
["", 0, ".crt", "text/plain", "X.509 Certificate"],
2121
["", 0, ".reg", "", "Windows Registry File"],
2222
["", 0, ".md", "text/plain", "Markdown File"],
23-
["", 0, ".json", "application/json", "Markdown File"],
23+
["", 0, ".json", "application/json", "JSON File"],
2424
["", 0, ".rst", "text/plain", "Restructured Text File"],
2525
["", 0, ".cfg", "text/plain", "Configuration File"],
2626
["", 0, ".flake8", "text/plain", "Flake 8 Configuration File"],
2727
["", 0, ".coveragerc", "text/plain", "Coverage File"],
2828
["", 0, ".c", "text/x-csrc", "C Code File"],
2929
["", 0, ".cc", "text/x-csrc", "C Code File"],
3030
["", 0, ".h", "text/x-csrc", "C Header File"],
31+
["", 0, ".pdf", "application/pdf", "Adobe Portable Document Format file"],
3132
["", 0, ".stl", "model/stl", "stereolithography CAD software"],
3233
["", 0, ".srt", "application/x-subrip", "SubRip subtitles"],
3334
["", 0, ".obj", "", "Relocatable object code"],
@@ -63,9 +64,13 @@
6364
["", 0, ".b6i", "", "BlindWrite 6 Image File"],
6465
["", 0, ".cl2", "", "Adaptec Easy CD/DVD Creator image file"],
6566
["", 0, ".cl3", "", "Adaptec Easy CD/DVD Creator image file"],
66-
["", 0, ".cl4", "", "Adaptec Easy CD/DVD Creator image file"]
67+
["", 0, ".cl4", "", "Adaptec Easy CD/DVD Creator image file"],
68+
["", 0, ".vba", "", "Visual Basic Script"],
69+
["", 0, "README", "text/plain", "README File"]
6770
],
6871
"multi-part": {
72+
"7b22": [["227d", -2, ".json", "application/json", "JSON File"]],
73+
"7b": [["22", -1, ".json", "application/json", "JSON File"]],
6974
"464f524d": [
7075
["494c424d", 8, ".iff", "image/x-ilbm", "IFF Interleaved Bitmap Image"],
7176
["38535658", 8, ".iff", "audio/x-8svx", "IFF 8-Bit Sampled Voice"],
@@ -515,7 +520,7 @@
515520
],
516521
"73696262" : [
517522
["72686c62", 8, ".uif", "", "MagicISO Disk Image (Encrypted)"]
518-
]
523+
]
519524
},
520525
"footers": [
521526
["54525545564953494f4e2d5846494c452e00", -18, ".tga", "image/tga", "Truevision Targa Graphic file"],
@@ -525,9 +530,19 @@
525530
["3c2f7376673e", -6, ".svg", "image/svg+xml", "Scalable Vector Graphics Image"],
526531
["6b6f6c79", -512, ".dmg", "application/x-apple-diskimage", "MacOS X image file"],
527532
["4e45524f", -8, ".nrg", "", "Nero Disk Image (Version 1)"],
528-
["4e455235", -12, ".nrg", "", "Nero Disk Image (Version 2)"]
533+
["4e455235", -12, ".nrg", "", "Nero Disk Image (Version 2)"]
529534
],
530535
"headers": [
536+
["595556344d504547",0, ".y4m", "video/x-yuv4mpeg", "YUV4MPEG2 video file"],
537+
["3c68746d6c", 0, ".html", "text/html", "HTML File"],
538+
["424c5545", 0, ".bvr", "", "Blue Iris Video File"],
539+
["2d2d2d2d2d424547494e20504b4353372d2d2d2d2d", 0, ".p7b", "", "PKCS 7 Certificate File" ],
540+
["7b22", 0, ".json", "application/json", "JSON File"],
541+
["7b", 0, ".json", "application/json", "JSON File"],
542+
["50755454592d557365722d4b65792d46696c65", 0, ".ppk", "", "PuTTY User Key File"],
543+
["2d2d2d2d20424547494e2053534832205055424c4943204b4559202d2d2d2d", 0, "", "", "SSH Public Key"],
544+
["2d2d2d2d424547494e", 0, "", "", "Key or Cert File"],
545+
["2d2d2d2d20424547494e", 0, "", "", "Key or Cert File"],
531546
["30313233343536373839", 0, ".puremagic_multi_footer", "text/ascii", "TESTFILE"],
532547
["ff0a", 0, ".jxl", "image/jxl", "JPEG XL image (Raw stream)"],
533548
["0000000c4a584c200d0a870a", 0, ".jxl", "image/jxl", "JPEG XL image (ISOBMFF container)"],
@@ -580,7 +595,7 @@
580595
["425a68", 0, ".bzip2", "application/x-bzip2", "BZIP2 Compressed Archive file"],
581596
["664c614300000022", 0, ".flac", "audio/flac", "Free Lossless Audio Codec file"],
582597
["434f5744", 0, ".vmdk", "application/octet-stream", "VMware Sparse Extent Image file"],
583-
["23204469736b2044657363726970746f7246696c65", 0, ".vmdk", "application/octet-stream", "VMware Image Descriptor File"],
598+
["23204469736b2044657363726970746f7246696c65", 0, ".vmdk", "application/octet-stream", "VMware Image Descriptor File"],
584599
["4b444d56", 0, ".vmdk", "application/octet-stream", "VMware Virtual Single Disk file"],
585600
["e310000100000000", 0, ".info", "", "Amiga icon"],
586601
["5468697320697320", 0, ".info", "", "GNU Info Reader file"],
@@ -745,6 +760,7 @@
745760
["000100004d534953414d204461746162617365", 0, ".mny", "application/x-msmoney", "Microsoft Money file"],
746761
["000100005374616e64617264204a6574204442", 0, ".mdb", "application/x-msaccess", "Microsoft Access file"],
747762
["25504446", 0, ".pdf", "application/pdf", "Adobe Portable Document Format file"],
763+
["0d0a25504446", 0, ".pdf", "application/pdf", "Adobe Portable Document Format file"],
748764
["a0461df0", 512, ".ppt", "application/vnd.ms-powerpoint", "Microsoft Office PowerPoint Presentation file"],
749765
["cf11e0a1b11ae100", 0, ".doc", "application/msword", "Perfect Office Document file"],
750766
["d0cf11e0a1b11ae1", 0, ".doc", "application/msword", "Microsoft Office Document file"],
@@ -926,7 +942,6 @@
926942
["564350434830", 0, ".pch", "", "Visual C PreCompiled header"],
927943
["554641c6d2c1", 0, ".ufa", "", "UFA compressed archive"],
928944
["ac9ebd8f0000", 0, ".qdf", "", "Quicken data"],
929-
["57696e5a6970", 29152, ".zip", "application/zip", "WinZip compressed archive"],
930945
["504147454455", 0, ".dmp", "", "Windows memory dump"],
931946
["4d444d5093a7", 0, ".dmp", "", "Windows dump file"],
932947
["458600000600", 0, ".qbb", "", "QuickBooks backup"],
@@ -2006,7 +2021,7 @@
20062021
["5b436c6f6e6543445d", 0, ".ccd", "", "CloneCD Control File"],
20072022
["ffffffffffffffffffffffff", 0, ".sub", "", "CloneCD Sub Channel File"],
20082023
["00ffffffffffffffffffff", 0, ".img", "", "CloneCD Image File"],
2009-
["f7fff9fffdfffbfff6fff7fff7fff5fff8fff7fff5fff0fffcfffafffafff7fff8fff6fff7fff7fff2fff2fff8", 0, ".img", "", "CloneCD Image File"],
2024+
["f7fff9fffdfffbfff6fff7fff7fff5fff8fff7fff5fff0fffcfffafffafff7fff8fff6fff7fff7fff2fff2fff8", 0, ".img", "", "CloneCD Image File"],
20102025
["ffffffffffffffffffffffff", 0, ".b5i", "", "BlindWrite 5 Image File"],
20112026
["425754352053545245414d205349474e", 0, ".b5t", "", "BlindWrite 5 Stream File"],
20122027
["425754352053545245414d205349474e", 0, ".b6t", "", "BlindWrite 6 Stream File"],

puremagic/main.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from itertools import chain
2222

2323
__author__ = "Chris Griffith"
24-
__version__ = "1.26"
24+
__version__ = "1.27"
2525
__all__ = [
2626
"magic_file",
2727
"magic_string",
@@ -114,6 +114,9 @@ def _max_lengths() -> tuple[int, int]:
114114
return max_header_length, max_footer_length
115115

116116

117+
max_head, max_foot = _max_lengths()
118+
119+
117120
def _confidence(matches, ext=None) -> list[PureMagicWithConfidence]:
118121
"""Rough confidence based on string length and file extension"""
119122
results = []
@@ -133,7 +136,7 @@ def _confidence(matches, ext=None) -> list[PureMagicWithConfidence]:
133136
if not results:
134137
raise PureError("Could not identify file")
135138

136-
return sorted(results, key=lambda x: (x.confidence, x.byte_match), reverse=True)
139+
return sorted(results, key=lambda x: (x.confidence, len(x.byte_match)), reverse=True)
137140

138141

139142
def _identify_all(header: bytes, footer: bytes, ext=None) -> list[PureMagicWithConfidence]:
@@ -205,7 +208,6 @@ def _magic(header: bytes, footer: bytes, mime: bool, ext=None) -> str:
205208

206209
def _file_details(filename: os.PathLike | str) -> tuple[bytes, bytes]:
207210
"""Grab the start and end of the file"""
208-
max_head, max_foot = _max_lengths()
209211
with open(filename, "rb") as fin:
210212
head = fin.read(max_head)
211213
try:
@@ -218,15 +220,17 @@ def _file_details(filename: os.PathLike | str) -> tuple[bytes, bytes]:
218220

219221
def _string_details(string):
220222
"""Grab the start and end of the string"""
221-
max_head, max_foot = _max_lengths()
222223
return string[:max_head], string[-max_foot:]
223224

224225

225226
def _stream_details(stream):
226227
"""Grab the start and end of the stream"""
227-
max_head, max_foot = _max_lengths()
228228
head = stream.read(max_head)
229-
stream.seek(-max_foot, os.SEEK_END)
229+
try:
230+
stream.seek(-max_foot, os.SEEK_END)
231+
except OSError:
232+
# File is smaller than the max_foot size, jump to beginning
233+
stream.seek(0)
230234
foot = stream.read()
231235
stream.seek(0)
232236
return head, foot
@@ -374,6 +378,7 @@ def command_line_entry(*args):
374378
dest="mime",
375379
help="Return the mime type instead of file type",
376380
)
381+
parser.add_argument("-v", "--v", action="store_true", dest="verbose", help="Print verbose output")
377382
parser.add_argument("files", nargs="+")
378383
args = parser.parse_args(args if args else sys.argv[1:])
379384

@@ -385,6 +390,21 @@ def command_line_entry(*args):
385390
print(f"'{fn}' : {from_file(fn, args.mime)}")
386391
except PureError:
387392
print(f"'{fn}' : could not be Identified")
393+
continue
394+
if args.verbose:
395+
matches = magic_file(fn)
396+
print(f"Total Possible Matches: {len(matches)}")
397+
for i, result in enumerate(matches):
398+
if i == 0:
399+
print("\n\tBest Match")
400+
else:
401+
print(f"\tAlertnative Match #{i}")
402+
print(f"\tName: {result.name}")
403+
print(f"\tConfidence: {int(result.confidence * 100)}%")
404+
print(f"\tExtension: {result.extension}")
405+
print(f"\tMime Type: {result.mime_type}")
406+
print(f"\tByte Match: {result.byte_match}")
407+
print(f"\tOffset: {result.offset}\n")
388408

389409

390410
imghdr_bug_for_bug = { # Special cases where imghdr is probably incorrect.
@@ -444,5 +464,5 @@ def what(file: os.PathLike | str | None, h: bytes | None = None, imghdr_strict:
444464
return imghdr_exts.get(ext, ext)
445465

446466

447-
if __name__ == "__main__":
467+
if __name__ == "__main__": # pragma: no cover
448468
command_line_entry()

test/test_common_extensions.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import puremagic
1010

1111
LOCAL_DIR = os.path.realpath(os.path.dirname(__file__))
12+
RESOUCE_DIR = os.path.join(LOCAL_DIR, "resources")
1213
IMAGE_DIR = os.path.join(LOCAL_DIR, "resources", "images")
1314
VIDEO_DIR = os.path.join(LOCAL_DIR, "resources", "video")
1415
AUDIO_DIR = os.path.join(LOCAL_DIR, "resources", "audio")
@@ -19,6 +20,14 @@
1920
TGA_FILE = os.path.join(IMAGE_DIR, "test.tga")
2021

2122

23+
class MockBytesIO(BytesIO):
24+
25+
def seek(self, offset, whence=0):
26+
if offset < 0:
27+
raise OSError("Invalid seek position")
28+
return super().seek(offset, whence)
29+
30+
2231
class TestMagic(unittest.TestCase):
2332
def setUp(self):
2433
self.mp4magic = b"\x00\x00\x00\x1c\x66\x74\x79\x70\x4d\x53\x4e\
@@ -127,6 +136,10 @@ def test_magic_stream(self):
127136
self.assertEqual(result[0].extension, ".tga")
128137
self.assertRaises(ValueError, puremagic.magic_stream, BytesIO(b""))
129138

139+
def test_small_stream_error(self):
140+
ext = puremagic.from_stream(MockBytesIO(b"#!/usr/bin/env python"))
141+
self.assertEqual(ext, ".py")
142+
130143
def test_mime(self):
131144
"""Identify mime type"""
132145
self.assertEqual(puremagic.from_file(TGA_FILE, True), "image/tga")
@@ -171,7 +184,9 @@ def test_cmd_options(self):
171184
"""Test CLI options"""
172185
from puremagic.main import command_line_entry
173186

174-
command_line_entry(__file__, "test.py")
187+
command_line_entry(__file__, os.path.join(AUDIO_DIR, "test.mp3"), "-v")
188+
command_line_entry(__file__, "DOES NOT EXIST FILE")
189+
command_line_entry(__file__, os.path.join(RESOUCE_DIR, "fake_file"), "-v")
175190

176191
def test_bad_magic_input(self):
177192
"""Test bad magic input"""

0 commit comments

Comments
 (0)