Skip to content

Commit 2c4095d

Browse files
CiaranOMaratimholy
authored andcommitted
Custom function to find bedgraph magic bytes (#158)
* Custom function to find bedGraph magic bytes * Convert magic bytes to a string * Bedgraph file tests * Strengthen regex - Allow white space at begining and end of track. - Assume chrom starts with a letter (not absolutely sure about this) - Assume value data ends with a digit (not absolutely certain about this).
1 parent 08873c4 commit 2c4095d

File tree

3 files changed

+51
-2
lines changed

3 files changed

+51
-2
lines changed

src/registry.jl

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ add_format(format"SAS", UInt8[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
2929
0xcf, 0xbd, 0x92, 0x08, 0x00, 0x09, 0xc7, 0x31, 0x8c, 0x18, 0x1f,
3030
0x10, 0x11], [".sas7bdat"], [:StatFiles, LOAD])
3131

32-
add_format(format"bedGraph", UInt8[0x74, 0x79, 0x70, 0x65, 0x3D, 0x62, 0x65, 0x64, 0x47, 0x72, 0x61, 0x70, 0x68], [".bedgraph"], [:BedgraphFiles])
33-
3432
# Image formats
3533
add_format(format"PBMBinary", b"P4", ".pbm", [:ImageMagick])
3634
add_format(format"PGMBinary", b"P5", ".pgm", [:Netpbm])
@@ -150,6 +148,26 @@ add_format(format"FLAC","fLaC",".flac",[:FLAC])
150148

151149
### Complex cases
152150

151+
# bedGraph: the complication is that the magic bytes may start at any location within an indeterminate header.
152+
const bedgraph_magic = UInt8[0x74, 0x79, 0x70, 0x65, 0x3D, 0x62, 0x65, 0x64, 0x47, 0x72, 0x61, 0x70, 0x68]
153+
function detect_bedgraph(io)
154+
position(io) == 0 || return false
155+
156+
line = ""
157+
158+
# Check lines for magic bytes.
159+
while !eof(io) && !ismatch(r"^\s*([A-Za-z]+\S*)\s+(\d+)\s+(\d+)\s+(\S*\d)\s*$", line) # Note: regex is used to limit the search by exiting the loop when a line matches the bedGraph track format.
160+
line = readline(io, chomp=false)
161+
162+
if contains(line, String(bedgraph_magic)) # Note: String(bedgraph_magic) = "type=bedGraph"
163+
return true
164+
end
165+
end
166+
167+
return false
168+
end
169+
add_format(format"bedGraph", detect_bedgraph, [".bedgraph"], [:BedgraphFiles])
170+
153171
# Handle OME-TIFFs, which are identical to normal TIFFs with the primary difference being the filename and embedded XML metadata
154172
const tiff_magic = (UInt8[0x4d,0x4d,0x00,0x2a], UInt8[0x4d,0x4d,0x00,0x2b], UInt8[0x49,0x49,0x2a,0x00],UInt8[0x49,0x49,0x2b,0x00])
155173
function detecttiff(io)

test/files/file.bedgraph

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
browser position chr19:49302001-49304701
2+
browser hide all
3+
browser pack refGene encodeRegions
4+
browser full altGraph
5+
# 300 base wide bar graph, autoScale is on by default == graphing
6+
# limits will dynamically change to always show full range of data
7+
# in viewing window, priority = 20 positions this as the second graph
8+
# Note, zero-relative, half-open coordinate system in use for bedGraph format
9+
track type=bedGraph name="BedGraph Format" description="BedGraph format" visibility=full color=200,100,0 altColor=0,100,200 priority=20
10+
chr19 49302000 49302300 -1.0
11+
chr19 49302300 49302600 -0.75
12+
chr19 49302600 49302900 -0.50
13+
chr19 49302900 49303200 -0.25
14+
chr19 49303200 49303500 0.0
15+
chr19 49303500 49303800 0.25
16+
chr19 49303800 49304100 0.50
17+
chr19 49304100 49304400 0.75
18+
chr19 49304400 49304700 1.00

test/query.jl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,19 @@ finally
267267
end
268268

269269
file_dir = joinpath(dirname(@__FILE__), "files")
270+
@testset "bedGraph" begin
271+
q = query(joinpath(file_dir, "file.bedgraph"))
272+
@test typeof(q) == File{format"bedGraph"}
273+
open(q) do io
274+
@test position(io) == 0
275+
skipmagic(io)
276+
@test position(io) == 0 # no skipping for functions
277+
# @test FileIO.detect_bedgraph(io) # MethodError: no method matching readline(::FileIO.Stream{FileIO.DataFormat{:bedGraph},IOStream}; chomp=false)
278+
end
279+
open(joinpath(file_dir, "file.bedgraph")) do io
280+
@test (FileIO.detect_bedgraph(io))
281+
end
282+
end
270283
@testset "STL detection" begin
271284
q = query(joinpath(file_dir, "ascii.stl"))
272285
@test typeof(q) == File{format"STL_ASCII"}

0 commit comments

Comments
 (0)