Skip to content

Commit 11a8921

Browse files
committed
clarifies stream seeking for querying and detection functions
1 parent a676a5c commit 11a8921

File tree

4 files changed

+38
-18
lines changed

4 files changed

+38
-18
lines changed

README.md

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ using FileIO
2525
obj = load(filename)
2626
```
2727
to read data from a formatted file. Likewise, saving might be as simple as
28-
```
28+
```jl
2929
save(filename, obj)
3030
```
3131

@@ -91,9 +91,11 @@ add_format(format"PNG", [0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a], ".png")
9191
# have one of two possible file extensions
9292
add_format(format"NRRD", "NRRD", [".nrrd",".nhdr"])
9393

94-
# A format whose magic bytes might not be at the beginning of the file,
95-
# necessitating a custom function `detecthdf5` to find them
96-
add_format(format"HDF5", detecthdf5, [".h5", ".hdf5"])
94+
# A format whose magic bytes more complicated, necessitating a custom function
95+
# `detectwav` to find them. The function should assume that the stream is
96+
# positioned at the beginning of the file being detected, and the query
97+
# infrastructure will handle seeking to the correct position afterwards.
98+
add_format(format"WAV", detectwav, ".wav")
9799

98100
# A fictitious format that, unfortunately, provides no magic
99101
# bytes. Here we have to place our faith in the file extension.
@@ -141,7 +143,6 @@ using FileIO
141143
# See important note about scope below
142144
function load(f::File{format"PNG"})
143145
open(f) do s
144-
skipmagic(s) # skip over the magic bytes
145146
# You can just call the method below...
146147
ret = load(s)
147148
# ...or implement everything here instead
@@ -150,7 +151,7 @@ end
150151

151152
# You can support streams and add keywords:
152153
function load(s::Stream{format"PNG"}; keywords...)
153-
# s is already positioned after the magic bytes
154+
skipmagic(s) # skip over the magic bytes
154155
# Do the stuff to read a PNG file
155156
chunklength = read(s, UInt32)
156157
...
@@ -174,7 +175,7 @@ Consequently, **packages should define "private" `load` and `save` methods (also
174175

175176
If you run into a naming conflict with the `load` and `save` functions
176177
(for example, you already have another function in your package that has
177-
one of these names), you can instead name your loaders `fileio_load`,
178+
one of these names), you can instead name your loaders `fileio_load`,
178179
`fileio_save` etc. Note that you cannot mix and match these styles: either
179180
all your loaders have to be named `load`, or all of them should be called
180181
`fileio_load`, but you cannot use both conventions in one module.

src/query.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ For example:
7272
add_format(format"PNG", (UInt8[0x4d,0x4d,0x00,0x2b], UInt8[0x49,0x49,0x2a,0x00]), [".tiff", ".tif"])
7373
add_format(format"PNG", [0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a], ".png")
7474
add_format(format"NRRD", "NRRD", [".nrrd",".nhdr"])
75+
add_format(format"WAV", detectwav, [".wav", ".WAV"])
7576
7677
Note that extensions, magic numbers, and format-identifiers are case-sensitive.
7778
"""

src/registry.jl

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ add_format(format"GZIP", [0x1f, 0x8b], ".gz", [:Libz])
88

99
# test for RD?n magic sequence at the beginning of R data input stream
1010
function detect_rdata(io)
11-
seekstart(io)
1211
read(io, UInt8) == UInt8('R') &&
1312
read(io, UInt8) == UInt8('D') &&
1413
read(io, UInt8) in (UInt8('A'), UInt8('B'), UInt8('X')) &&
@@ -19,10 +18,8 @@ end
1918
add_format(format"RData", detect_rdata, [".rda", ".RData", ".rdata"], [:RData, LOAD])
2019

2120
function detect_rdata_single(io)
22-
seekstart(io)
2321
res = read(io, UInt8) in (UInt8('A'), UInt8('B'), UInt8('X')) &&
2422
(c = read(io, UInt8); c == UInt8('\n') || (c == UInt8('\r') && read(io, UInt8) == UInt8('\n')))
25-
seekstart(io)
2623
return res
2724
end
2825

@@ -145,10 +142,9 @@ add_format(format"GSLIB", (), [".gslib",".sgems"], [:GslibIO])
145142

146143
### Audio formats
147144
function detectwav(io)
148-
seekstart(io)
149145
magic = read!(io, Vector{UInt8}(undef, 4))
150146
magic == b"RIFF" || return false
151-
seek(io, 8)
147+
skip(io, 4)
152148
submagic = read!(io, Vector{UInt8}(undef, 4))
153149

154150
submagic == b"WAVE"
@@ -198,10 +194,9 @@ skipmagic(io, ::typeof(detect_noometiff)) = seek(io, 4)
198194

199195
# AVI is a subtype of RIFF, as is WAV
200196
function detectavi(io)
201-
seekstart(io)
202197
magic = read!(io, Vector{UInt8}(undef, 4))
203198
magic == b"RIFF" || return false
204-
seek(io, 8)
199+
skip(io, 4)
205200
submagic = read!(io, Vector{UInt8}(undef, 4))
206201

207202
submagic == b"AVI "
@@ -210,6 +205,8 @@ add_format(format"AVI", detectavi, ".avi", [:ImageMagick])
210205

211206
# HDF5: the complication is that the magic bytes may start at
212207
# 0, 512, 1024, 2048, or any multiple of 2 thereafter
208+
# this detection function assumes that the stream start and end match the
209+
# file start and end, which is true if it's just a file on disk
213210
h5magic = (0x89,0x48,0x44,0x46,0x0d,0x0a,0x1a,0x0a)
214211
function detecthdf5(io)
215212
position(io) == 0 || return false
@@ -232,6 +229,8 @@ function detecthdf5(io)
232229
end
233230
add_format(format"HDF5", detecthdf5, [".h5", ".hdf5"], [:HDF5])
234231

232+
# the STL detection functions assumes that the stream start and end match the
233+
# file start and end, which is true if it's just a file on disk
235234
function detect_stlascii(io)
236235
pos = position(io)
237236
try
@@ -283,4 +282,4 @@ add_format(format"MetaImage", "ObjectType", ".mhd", [:MetaImageFormat])
283282

284283
add_format(format"vegalite", (), [".vegalite"], [:VegaLite])
285284

286-
add_format(format"FCS", "FCS", [".fcs"], [:FCSFiles])
285+
add_format(format"FCS", "FCS", [".fcs"], [:FCSFiles])

test/query.jl

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -336,13 +336,32 @@ end
336336
q = query(joinpath(file_dir, "minimal_ascii.rds"))
337337
@test typeof(q) == File{format"RDataSingle"}
338338
open(q) do io
339-
@test position(io) == 0
340339
@test FileIO.detect_rdata_single(io)
341-
# need to seek to beginning of file where data structure starts
342-
@test position(io) == 0
343340
end
344341
end
345342
@testset "Format with function for magic bytes" begin
346343
add_format(format"FUNCTION_FOR_MAGIC_BYTES", x -> 0x00, ".wav", [:WAV])
347344
del_format(format"FUNCTION_FOR_MAGIC_BYTES")
348345
end
346+
347+
function detect_position_test(io)
348+
return read(io, 3) == b"DET"
349+
end
350+
351+
@testset "Detection function called with properly-positioned stream" begin
352+
add_format(format"DET", detect_position_test, ".det")
353+
# we need extra junk to work around issue #176
354+
junk = rand(UInt8, 35)
355+
io = IOBuffer()
356+
write(io, "DET")
357+
write(io, junk)
358+
seek(io, 0)
359+
@test query(io) isa Formatted{format"DET"}
360+
@test position(io) == 0
361+
write(io, "junkDET")
362+
write(io, junk)
363+
seek(io, 4)
364+
@test query(io) isa Formatted{format"DET"}
365+
@test position(io) == 4
366+
del_format(format"DET")
367+
end

0 commit comments

Comments
 (0)