Skip to content

Commit dc0cafb

Browse files
committed
Initial commit of DBF file reading code, ported from JuliaData/DataFrames.jl#666 by @wildart
1 parent 6e0d074 commit dc0cafb

File tree

4 files changed

+158
-3
lines changed

4 files changed

+158
-3
lines changed

REQUIRE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
julia 0.6
2+
Nulls
3+
DataFrames

src/DBFTables.jl

Lines changed: 144 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,148 @@
11
module DBFTables
22

3-
# package code goes here
3+
using Nulls, DataFrames
4+
5+
# Read DBF files in xBase format
6+
# Files written in this format have the extension .dbf
7+
# Implemented: dBase III+ (w/o memo)
8+
9+
struct DBFFieldDescriptor
10+
nam::String
11+
typ::Type
12+
len::Int8
13+
dec::Int8
14+
end
15+
16+
struct DBFHeader
17+
version::UInt8
18+
lastUpdate::String
19+
records::Int32
20+
hsize::Int16
21+
rsize::Int16
22+
incomplete::Bool
23+
encrypted::Bool
24+
mdx::Bool
25+
langId::UInt8
26+
fields::Vector{DBFFieldDescriptor}
27+
end
28+
29+
function dbf_field_type(fld::Char, dec::UInt8)
30+
rt = Void
31+
if fld == 'C'
32+
rt = String
33+
elseif fld == 'D'
34+
rt = String
35+
elseif fld == 'N'
36+
if dec > 0
37+
rt = Float64
38+
else
39+
rt = Int
40+
end
41+
elseif fld == 'F' || fld == 'O'
42+
rt = Float64
43+
elseif fld == 'I' || fld == '+'
44+
rt = Integer
45+
elseif fld == 'L'
46+
rt = Bool
47+
else
48+
warn("Unknown record type: $(fld)")
49+
end
50+
return rt
51+
end
52+
53+
function read_dbf_field(io::IO)
54+
field_name = strip(replace((String(read!(io, Vector{UInt8}(11)))), '\0', ' ')) # 0x00
55+
field_type = read(io, Char) # 0x0B
56+
read(io, Int32) # skip 0x0C
57+
field_len = read(io, UInt8) # 0x10
58+
field_dec = read(io, UInt8) # 0x11
59+
read!(io, Vector{UInt8}(14)) # reserved
60+
return DBFFieldDescriptor(field_name, dbf_field_type(field_type, field_dec), field_len, field_dec)
61+
end
62+
63+
function read_dbf_header(io::IO)
64+
ver = read(io, UInt8)
65+
date = read!(io, Vector{UInt8}(3)) # 0x01
66+
last_update = @sprintf("%4d%02d%02d", date[1]+1900, date[2], date[3])
67+
records = read(io, Int32) # 0x04
68+
hsize = read(io, Int16) # 0x08
69+
rsize = read(io, Int16) # 0x0A
70+
read(io, Int16) # reserved # 0x0C
71+
incomplete = Bool(read(io, UInt8)) # 0x0E
72+
encrypted = Bool(read(io, UInt8)) # 0x0F
73+
read!(io, Vector{UInt8}(12)) # reserved
74+
mdx = Bool(read(io, UInt8)) # 0x1C
75+
langId = read(io, UInt8) # 0x1D
76+
read!(io, Vector{UInt8}(2)) # reserved # 0x1E
77+
fields = DBFFieldDescriptor[]
78+
79+
while !eof(io)
80+
push!(fields, read_dbf_field(io))
81+
p = position(io)
82+
trm = read(io, UInt8)
83+
if trm == 0xD
84+
break
85+
else
86+
seek(io, p)
87+
end
88+
end
89+
90+
return DBFHeader(ver, last_update, records, hsize, rsize,
91+
incomplete, encrypted, mdx, langId,
92+
fields)
93+
end
94+
95+
function read_dbf_records!(io::IO, df::DataFrame, header::DBFHeader; deleted=false)
96+
rc = 0
97+
while header.records != rc
98+
is_deleted = (read(io, UInt8) == 0x2A)
99+
r = Any[]
100+
for i = 1:length(header.fields)
101+
#print("P: $(position(io)) ")
102+
fld_data = read!(io, Vector{UInt8}(header.fields[i].len))
103+
#println("D: $(ascii(fld_data))")
104+
if header.fields[i].typ == Bool
105+
logical = Char(fld_data[1])
106+
if logical in ['Y', 'y', 'T', 't']
107+
push!(r, true)
108+
elseif logical in ['N', 'n', 'F', 'f']
109+
push!(r, false)
110+
else
111+
push!(r, null)
112+
end
113+
elseif header.fields[i].typ == Int
114+
push!(r, parse(header.fields[i].typ, String(fld_data)))
115+
elseif header.fields[i].typ == Float64
116+
push!(r, parse(header.fields[i].typ, String(fld_data)))
117+
elseif header.fields[i].typ == String
118+
push!(r, strip(String(fld_data)))
119+
elseif header.fields[i].typ == Void
120+
push!(r, null)
121+
else
122+
warn("Type $(header.fields[i].typ) is not supported")
123+
end
124+
end
125+
if !is_deleted || deleted
126+
push!(df, r)
127+
end
128+
rc += 1
129+
#println("R: $(position(io)), $(eof(io)), $(rc) ")
130+
end
131+
return df
132+
end
133+
134+
function read_dbf(io::IO; deleted=false)
135+
header = read_dbf_header(io)
136+
df = DataFrame(map(f->f.typ, header.fields), map(f->Symbol(f.nam), header.fields), 0)
137+
read_dbf_records!(io, df, header; deleted=deleted)
138+
return df
139+
end
140+
141+
function read_dbf(fnm::String; deleted=false)
142+
io = open(fnm)
143+
df = read_dbf(io; deleted=deleted)
144+
close(io)
145+
return df
146+
end
4147

5148
end # module

test/runtests.jl

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
using DBFTables
22
using Base.Test
33

4-
# write your own tests here
5-
@test 1 == 2
4+
dir = @__DIR__
5+
# dir = joinpath(Pkg.dir("DBFTables"), "test")
6+
df = DBFTables.read_dbf(joinpath(dir, "test.dbf"))
7+
8+
@test size(df,1) == 3 # records
9+
@test size(df,2) == 6 # fields
10+
@test df[:CHAR][2] == "John"
11+
@test df[:DATE][1] == "19900102"
12+
@test df[:BOOL][3] == false
13+
@test df[:FLOAT][1] == 10.21
14+
@test df[:NUMERIC][2] == 12.21
15+
@test df[:INTEGER][3] == 102

test/test.dbf

391 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)