Skip to content

Commit ee39242

Browse files
committed
Merge pull request #31 from JuliaIO/sd/multimagic
first take on multiple magic bytes
2 parents 980b2ab + 9829adf commit ee39242

File tree

6 files changed

+162
-19
lines changed

6 files changed

+162
-19
lines changed

src/query.jl

Lines changed: 73 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ const magic_func = Array(Pair, 0) # for formats with complex magic #s
2929
`add_format(fmt, magic, extention)` registers a new `DataFormat`.
3030
For example:
3131
32+
add_format(format"PNG", (UInt8[0x4d,0x4d,0x00,0x2b], UInt8[0x49,0x49,0x2a,0x00]), [".tiff", ".tif"])
3233
add_format(format"PNG", [0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a], ".png")
3334
add_format(format"NRRD", "NRRD", [".nrrd",".nhdr"])
3435
@@ -47,6 +48,22 @@ function add_format{sym}(fmt::Type{DataFormat{sym}}, magic::Union(Tuple,Abstract
4748
fmt
4849
end
4950

51+
# for multiple magic bytes
52+
function add_format{sym, T <: Vector{Uint8}, N}(fmt::Type{DataFormat{sym}}, magics::NTuple{N, T}, extension)
53+
haskey(sym2info, sym) && error("format ", fmt, " is already registered")
54+
magics = map(canonicalize_magic, magics)
55+
for magic in magics
56+
rng = searchsorted(magic_list, magic, lt=magic_cmp)
57+
if !isempty(magic) && !isempty(rng)
58+
error("magic bytes ", magic, " are already registered")
59+
end
60+
insert!(magic_list, first(rng), Pair(magic, sym)) # m=>sym in 0.4
61+
end
62+
sym2info[sym] = (magics, extension)
63+
add_extension(extension, sym)
64+
fmt
65+
end
66+
5067
# For when "magic" is supplied as a function (see the HDF5 example in
5168
# registry.jl)
5269
function add_format{sym}(fmt::Type{DataFormat{sym}}, magic, extension)
@@ -62,6 +79,18 @@ end
6279
""" ->
6380
function del_format{sym}(fmt::Type{DataFormat{sym}})
6481
magic, extension = sym2info[sym]
82+
del_magic(magic, sym)
83+
delete!(sym2info, sym)
84+
del_extension(extension)
85+
nothing
86+
end
87+
88+
# Deletes mutliple magic bytes
89+
del_magic(magic::Tuple, sym) = for m in magic
90+
del_magic(m, sym)
91+
end
92+
# Deletes single magic bytes
93+
function del_magic{N}(magic::NTuple{N, Uint8}, sym)
6594
rng = searchsorted(magic_list, magic, lt=magic_cmp)
6695
if length(magic) == 0
6796
fullrng = rng
@@ -77,8 +106,6 @@ function del_format{sym}(fmt::Type{DataFormat{sym}})
77106
end
78107
@assert length(rng) == 1
79108
deleteat!(magic_list, first(rng))
80-
delete!(sym2info, sym)
81-
del_extension(extension)
82109
nothing
83110
end
84111

@@ -242,11 +269,37 @@ For a plain IO object, you can use `skipmagic(io, fmt)`.
242269
skipmagic{F}(s::Stream{F}) = (skipmagic(stream(s), F); s)
243270
function skipmagic{sym}(io, fmt::Type{DataFormat{sym}})
244271
magic, _ = sym2info[sym]
245-
if !isa(magic, Function)
246-
seek(io, length(magic))
272+
skipmagic(io, magic)
273+
nothing
274+
end
275+
skipmagic(io, magic::Function) = nothing
276+
skipmagic{N}(io, magic::NTuple{N,UInt8}) = seek(io, length(magic))
277+
function skipmagic(io, magic::Tuple)
278+
lengths = map(length, magic)
279+
all(x->lengths[1] == x, lengths) && return seek(io, lengths[1]) # it doesn't matter what magic bytes get skipped as they all have the same length
280+
magic = [magic...]
281+
sort!(magic, lt=(a,b)-> length(a)>= length(b)) # start with longest first, to avoid overlapping magic bytes
282+
seekend(io)
283+
len = position(io)
284+
seekstart(io)
285+
filter!(x-> length(x) <= len, magic) # throw out magic bytes that are longer than IO
286+
tmp = readbytes(io, length(first(magic))) # now, first is both the longest and guaranteed to fit into io, so we can just read the bytes
287+
for m in magic
288+
if magic_equal(m, tmp)
289+
seek(io, length(m))
290+
return nothing
291+
end
247292
end
293+
error("tried to skip magic bytes of an IO that does not contain the magic bytes of the format. IO: $io")
294+
end
295+
function magic_equal(magic, buffer)
296+
for (i,elem) in enumerate(magic)
297+
buffer[i] != elem && return false
298+
end
299+
true
248300
end
249301

302+
250303
unknown{F}(::File{F}) = unknown(F)
251304
unknown{F}(::Stream{F}) = unknown(F)
252305

@@ -257,13 +310,13 @@ function query(filename::AbstractString)
257310
_, ext = splitext(filename)
258311
if haskey(ext2sym, ext)
259312
sym = ext2sym[ext]
260-
len = lenmagic(sym)
261-
if length(len) == 1 && (all(x->x==0, len) || !isfile(filename)) # we only found one candidate and there is no magic bytes, or no file, trust the extension
313+
no_magic = !hasmagic(sym)
314+
if lensym(sym) == 1 && (no_magic || !isfile(filename)) # we only found one candidate and there is no magic bytes, or no file, trust the extension
262315
return File{DataFormat{sym}}(filename)
263-
elseif !isfile(filename) && length(len) > 1
316+
elseif !isfile(filename) && lensym(sym) > 1
264317
error("no file for check of magic bytes and multiple extensions possible: $sym")
265318
end
266-
if any(x->x==0, len)
319+
if no_magic && !hasfunction(sym)
267320
error("Some formats with extension ", ext, " have no magic bytes; use `File{format\"FMT\"}(filename)` to resolve the ambiguity.")
268321
end
269322
end
@@ -272,11 +325,19 @@ function query(filename::AbstractString)
272325
file!(query(open(filename), filename))
273326
end
274327

275-
lenmagic(s::Symbol) = lenm(sym2info[s][1])
276-
lenmagic(v::Vector) = map(lenmagic, v)
328+
lensym(s::Symbol) = 1
329+
lensym(v::Vector) = length(v)
330+
331+
hasmagic(s::Symbol) = hasmagic(sym2info[s][1])
332+
hasmagic(v::Vector) = any(hasmagic, v)
333+
334+
hasmagic(t::Tuple) = !isempty(t)
335+
hasmagic(::Any) = false # for when magic is a function
277336

278-
lenm(t::Tuple) = length(t)
279-
lenm(::Any) = -1 # for when magic is a function
337+
hasfunction(s::Symbol) = hasfunction(sym2info[s][1])
338+
hasfunction(v::Vector) = any(hasfunction, v)
339+
hasfunction(s::Any) = true #has function
340+
hasfunction(s::Tuple) = false #has magic
280341

281342
@doc """
282343
`query(io, [filename])` returns a `Stream` object with information about the

src/registry.jl

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ add_saver(format"PSD", :ImageMagick)
7878
add_format(format"RGB", UInt8[0x01,0xda,0x01,0x01,0x00,0x03], ".rgb")
7979
add_loader(format"RGB", :ImageMagick)
8080
add_saver(format"RGB", :ImageMagick)
81-
add_format(format"TIFF", UInt8[0x4d,0x4d,0x00,0x2b], ".tiff")
81+
82+
add_format(format"TIFF", (UInt8[0x4d,0x4d,0x00,0x2a], UInt8[0x4d,0x4d,0x00,0x2b], UInt8[0x49,0x49,0x2a,0x00]), [".tiff", "tif"])
8283
add_loader(format"TIFF", :ImageMagick)
8384
add_saver(format"TIFF", :ImageMagick)
8485
add_format(format"WMF", UInt8[0xd7,0xcd,0xc6,0x9a], ".wmf")
@@ -147,7 +148,7 @@ add_saver(format"HDF5", :HDF5)
147148

148149
function detect_stlascii(io)
149150
try
150-
position(io) != 0 && return false
151+
position(io) != 0 && (seekstart(io); return false)
151152
seekend(io)
152153
len = position(io)
153154
seekstart(io)
@@ -163,7 +164,7 @@ function detect_stlbinary(io)
163164
const size_header = 80+sizeof(Uint32)
164165
const size_triangleblock = (4*3*sizeof(Float32)) + sizeof(Uint16)
165166

166-
position(io) != 0 && return false
167+
position(io) != 0 && (seekstart(io); return false)
167168
seekend(io)
168169
len = position(io)
169170
seekstart(io)
@@ -172,12 +173,13 @@ function detect_stlbinary(io)
172173
skip(io, 80) # skip header
173174
number_of_triangle_blocks = read(io, Uint32)
174175
#1 normal, 3 vertices in Float32 + attrib count, usually 0
175-
len != (number_of_triangle_blocks*size_triangleblock)+size_header && return false
176+
len != (number_of_triangle_blocks*size_triangleblock)+size_header && (seekstart(io); return false)
176177
skip(io, number_of_triangle_blocks*size_triangleblock-sizeof(Uint16))
177178
attrib_byte_count = read(io, Uint16) # read last attrib_byte
178-
attrib_byte_count != zero(Uint16) && return false # should be zero as not used
179-
eof(io) && return true
180-
false
179+
attrib_byte_count != zero(Uint16) && (seekstart(io); return false) # should be zero as not used
180+
result = eof(io) # if end of file, we have a stl!
181+
seekstart(io)
182+
return result
181183
end
182184
add_format(format"STL_ASCII", detect_stlascii, [".stl", ".STL"])
183185
add_format(format"STL_BINARY", detect_stlbinary, [".stl", ".STL"])

test/files/magic1.tiff

65.8 KB
Binary file not shown.

test/files/magic2.tiff

64.2 KB
Binary file not shown.

test/files/tiff_licence.txt

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
2+
Michael J. Lyons, Shigeru Akemastu, Miyuki Kamachi, Jiro Gyoba.
3+
Coding Facial Expressions with Gabor Wavelets, 3rd IEEE International Conference on Automatic Face and Gesture Recognition, pp. 200-205 (1998).
4+
5+
Information about the JAFFE database is available at:
6+
7+
http://www.kasrl.org/jaffe.html
8+
9+
Images are in .tiff format, no compression.
10+
11+
Included below, are files containing semantic rating data from
12+
psychological experiments using the images. I send it because I
13+
believe it is important to realize that expression are never pure
14+
expressions of one emotion, but always admixtures of different
15+
emotions. The expression labels on the images just represent the
16+
predominant expression in that image - the expression that the subject
17+
was asked to pose.
18+
19+
Best regards,
20+
21+
Michael J. Lyons, Ph.D.
22+
Professor of Image Arts and Sciences
23+
Ritsumeikan University
24+
Kyoto, Japan
25+
michael (dot) lyons(you know)gmail (dot) com

test/query.jl

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,44 @@ try
140140
q = query( "some_non_existant_file.mmm")
141141
@fact typeof(q) --> File{format"MAGIC"}
142142

143+
add_format(format"DOUBLE_MAGIC", (UInt8[0x4d,0x4d,0x00,0x2a], UInt8[0x4d,0x4d,0x00]), ".dd2")
144+
145+
fn = string(tempname(), ".dd2")
146+
open(fn, "w") do file
147+
write(file, UInt8[0x4d,0x4d,0x00,0x2a])
148+
write(file, randstring(19))
149+
end
150+
q = query(fn)
151+
@fact typeof(q) --> File{format"DOUBLE_MAGIC"}
152+
io = open(q)
153+
skipmagic(io)
154+
@fact position(io) --> 4
155+
close(io)
156+
rm(fn)
157+
158+
open(fn, "w") do file
159+
write(file, UInt8[0x4d,0x4d,0x00])
160+
write(file, randstring(19))
161+
end
162+
q = query(fn)
163+
@fact typeof(q) --> File{format"DOUBLE_MAGIC"}
164+
io = open(q)
165+
@fact file_extension(q) --> ".dd2"
166+
skipmagic(io)
167+
@fact position(io) --> 3
168+
close(io)
169+
open(fn, "w") do file
170+
write(file, randstring(19)) # corrupt magic bytes
171+
end
172+
open(fn, "r") do file
173+
@fact_throws skipmagic(file)
174+
end
175+
rm(fn)
176+
lene0 = length(FileIO.ext2sym)
177+
lenm0 = length(FileIO.magic_list)
178+
del_format(format"DOUBLE_MAGIC")
179+
@fact lene0 - 1 --> length(FileIO.ext2sym)
180+
@fact lenm0 - 2 --> length(FileIO.magic_list)
143181
end
144182

145183
del_format(format"JUNK") # This triggers del_extension for multiple extensions
@@ -161,10 +199,27 @@ facts("STL detection") do
161199
@fact typeof(q) --> File{format"STL_ASCII"}
162200
q = query(joinpath(file_dir, "binary_stl_from_solidworks.STL"))
163201
@fact typeof(q) --> File{format"STL_BINARY"}
202+
open(q) do io
203+
@fact position(io) --> 0
204+
skipmagic(io)
205+
@fact position(io) --> 0 # no skipping for functions
206+
end
164207
end
165208
facts("PLY detection") do
166209
q = query(joinpath(file_dir, "ascii.ply"))
167210
@fact typeof(q) --> File{format"PLY_ASCII"}
168211
q = query(joinpath(file_dir, "binary.ply"))
169212
@fact typeof(q) --> File{format"PLY_BINARY"}
213+
170214
end
215+
facts("Multiple Magic bytes") do
216+
q = query(joinpath(file_dir, "magic1.tiff"))
217+
@fact typeof(q) --> File{format"TIFF"}
218+
q = query(joinpath(file_dir, "magic2.tiff"))
219+
@fact typeof(q) --> File{format"TIFF"}
220+
open(q) do io
221+
@fact position(io) --> 0
222+
skipmagic(io)
223+
@fact position(io) --> 4
224+
end
225+
end

0 commit comments

Comments
 (0)