|
1 | 1 | module DBFTables |
2 | 2 |
|
3 | | -# package code goes here |
| 3 | +using Nulls, DataFrames |
| 4 | + |
| 5 | +# Read DBF files in xBase format |
| 6 | +# Files written in this format have the extension .dbf |
| 7 | +# Implemented: dBase III+ (w/o memo) |
| 8 | + |
| 9 | +struct DBFFieldDescriptor |
| 10 | + nam::String |
| 11 | + typ::Type |
| 12 | + len::Int8 |
| 13 | + dec::Int8 |
| 14 | +end |
| 15 | + |
| 16 | +struct DBFHeader |
| 17 | + version::UInt8 |
| 18 | + lastUpdate::String |
| 19 | + records::Int32 |
| 20 | + hsize::Int16 |
| 21 | + rsize::Int16 |
| 22 | + incomplete::Bool |
| 23 | + encrypted::Bool |
| 24 | + mdx::Bool |
| 25 | + langId::UInt8 |
| 26 | + fields::Vector{DBFFieldDescriptor} |
| 27 | +end |
| 28 | + |
| 29 | +function dbf_field_type(fld::Char, dec::UInt8) |
| 30 | + rt = Void |
| 31 | + if fld == 'C' |
| 32 | + rt = String |
| 33 | + elseif fld == 'D' |
| 34 | + rt = String |
| 35 | + elseif fld == 'N' |
| 36 | + if dec > 0 |
| 37 | + rt = Float64 |
| 38 | + else |
| 39 | + rt = Int |
| 40 | + end |
| 41 | + elseif fld == 'F' || fld == 'O' |
| 42 | + rt = Float64 |
| 43 | + elseif fld == 'I' || fld == '+' |
| 44 | + rt = Integer |
| 45 | + elseif fld == 'L' |
| 46 | + rt = Bool |
| 47 | + else |
| 48 | + warn("Unknown record type: $(fld)") |
| 49 | + end |
| 50 | + return rt |
| 51 | +end |
| 52 | + |
| 53 | +function read_dbf_field(io::IO) |
| 54 | + field_name = strip(replace((String(read!(io, Vector{UInt8}(11)))), '\0', ' ')) # 0x00 |
| 55 | + field_type = read(io, Char) # 0x0B |
| 56 | + read(io, Int32) # skip 0x0C |
| 57 | + field_len = read(io, UInt8) # 0x10 |
| 58 | + field_dec = read(io, UInt8) # 0x11 |
| 59 | + read!(io, Vector{UInt8}(14)) # reserved |
| 60 | + return DBFFieldDescriptor(field_name, dbf_field_type(field_type, field_dec), field_len, field_dec) |
| 61 | +end |
| 62 | + |
| 63 | +function read_dbf_header(io::IO) |
| 64 | + ver = read(io, UInt8) |
| 65 | + date = read!(io, Vector{UInt8}(3)) # 0x01 |
| 66 | + last_update = @sprintf("%4d%02d%02d", date[1]+1900, date[2], date[3]) |
| 67 | + records = read(io, Int32) # 0x04 |
| 68 | + hsize = read(io, Int16) # 0x08 |
| 69 | + rsize = read(io, Int16) # 0x0A |
| 70 | + read(io, Int16) # reserved # 0x0C |
| 71 | + incomplete = Bool(read(io, UInt8)) # 0x0E |
| 72 | + encrypted = Bool(read(io, UInt8)) # 0x0F |
| 73 | + read!(io, Vector{UInt8}(12)) # reserved |
| 74 | + mdx = Bool(read(io, UInt8)) # 0x1C |
| 75 | + langId = read(io, UInt8) # 0x1D |
| 76 | + read!(io, Vector{UInt8}(2)) # reserved # 0x1E |
| 77 | + fields = DBFFieldDescriptor[] |
| 78 | + |
| 79 | + while !eof(io) |
| 80 | + push!(fields, read_dbf_field(io)) |
| 81 | + p = position(io) |
| 82 | + trm = read(io, UInt8) |
| 83 | + if trm == 0xD |
| 84 | + break |
| 85 | + else |
| 86 | + seek(io, p) |
| 87 | + end |
| 88 | + end |
| 89 | + |
| 90 | + return DBFHeader(ver, last_update, records, hsize, rsize, |
| 91 | + incomplete, encrypted, mdx, langId, |
| 92 | + fields) |
| 93 | +end |
| 94 | + |
| 95 | +function read_dbf_records!(io::IO, df::DataFrame, header::DBFHeader; deleted=false) |
| 96 | + rc = 0 |
| 97 | + while header.records != rc |
| 98 | + is_deleted = (read(io, UInt8) == 0x2A) |
| 99 | + r = Any[] |
| 100 | + for i = 1:length(header.fields) |
| 101 | + #print("P: $(position(io)) ") |
| 102 | + fld_data = read!(io, Vector{UInt8}(header.fields[i].len)) |
| 103 | + #println("D: $(ascii(fld_data))") |
| 104 | + if header.fields[i].typ == Bool |
| 105 | + logical = Char(fld_data[1]) |
| 106 | + if logical in ['Y', 'y', 'T', 't'] |
| 107 | + push!(r, true) |
| 108 | + elseif logical in ['N', 'n', 'F', 'f'] |
| 109 | + push!(r, false) |
| 110 | + else |
| 111 | + push!(r, null) |
| 112 | + end |
| 113 | + elseif header.fields[i].typ == Int |
| 114 | + push!(r, parse(header.fields[i].typ, String(fld_data))) |
| 115 | + elseif header.fields[i].typ == Float64 |
| 116 | + push!(r, parse(header.fields[i].typ, String(fld_data))) |
| 117 | + elseif header.fields[i].typ == String |
| 118 | + push!(r, strip(String(fld_data))) |
| 119 | + elseif header.fields[i].typ == Void |
| 120 | + push!(r, null) |
| 121 | + else |
| 122 | + warn("Type $(header.fields[i].typ) is not supported") |
| 123 | + end |
| 124 | + end |
| 125 | + if !is_deleted || deleted |
| 126 | + push!(df, r) |
| 127 | + end |
| 128 | + rc += 1 |
| 129 | + #println("R: $(position(io)), $(eof(io)), $(rc) ") |
| 130 | + end |
| 131 | + return df |
| 132 | +end |
| 133 | + |
| 134 | +function read_dbf(io::IO; deleted=false) |
| 135 | + header = read_dbf_header(io) |
| 136 | + df = DataFrame(map(f->f.typ, header.fields), map(f->Symbol(f.nam), header.fields), 0) |
| 137 | + read_dbf_records!(io, df, header; deleted=deleted) |
| 138 | + return df |
| 139 | +end |
| 140 | + |
| 141 | +function read_dbf(fnm::String; deleted=false) |
| 142 | + io = open(fnm) |
| 143 | + df = read_dbf(io; deleted=deleted) |
| 144 | + close(io) |
| 145 | + return df |
| 146 | +end |
4 | 147 |
|
5 | 148 | end # module |
0 commit comments