11module DBFTables
22
3- import Printf, Tables, WeakRefStrings
3+ import Tables, WeakRefStrings
4+ using Dates
45
56" Field/column descriptor, part of the Header"
67struct FieldDescriptor
78 name:: Symbol
89 type:: Type
10+ dbf_type:: Char
911 length:: UInt8
1012 ndec:: UInt8
1113end
1214
1315" DBF header, which also holds all field definitions"
1416struct Header
1517 version:: UInt8
16- last_update:: String
18+ last_update:: Date
1719 records:: UInt32
1820 hsize:: UInt16
1921 rsize:: UInt16
@@ -45,7 +47,7 @@ function typemap(fld::Char, ndec::UInt8)
4547 if fld == ' C'
4648 rt = String
4749 elseif fld == ' D'
48- rt = String
50+ rt = Date
4951 elseif fld == ' N'
5052 if ndec > 0
5153 rt = Float64
@@ -74,16 +76,29 @@ function read_dbf_field(io::IO)
7476 field_dec = read (io, UInt8)
7577 skip (io, 14 ) # reserved
7678 jltype = typemap (field_type, field_dec)
77- return FieldDescriptor (field_name, jltype, field_len, field_dec)
79+ return FieldDescriptor (field_name, jltype, field_type, field_len, field_dec)
80+ end
81+
82+ reserved (n) = fill (0x00 , n)
83+
84+ function Base. write (io:: IO , fd:: FieldDescriptor )
85+ out = 0
86+ out += Base. write (io, replace (rpad (String (fd. name), 11 ), ' ' => ' \0 ' )) # 0-10
87+ out += Base. write (io, fd. dbf_type) # 11
88+ out += Base. write (io, reserved (4 )) # 12-15
89+ out += Base. write (io, fd. length) # 16
90+ out += Base. write (io, fd. ndec) # 17
91+ out += Base. write (io, reserved (14 )) # 18-31
92+ return out
7893end
7994
8095" Read a DBF header from a stream"
8196function Header (io:: IO )
8297 ver = read (io, UInt8)
83- date1 = read (io, UInt8)
84- date2 = read (io, UInt8)
85- date3 = read (io, UInt8)
86- last_update = Printf . @sprintf ( " %4d%02d%02d " , date1 + 1900 , date2, date3 )
98+ yy = read (io, UInt8)
99+ mm = read (io, UInt8)
100+ dd = read (io, UInt8)
101+ last_update = Date (yy + 1900 , mm, dd )
87102 records = read (io, UInt32)
88103 hsize = read (io, UInt16)
89104 rsize = read (io, UInt16)
@@ -130,6 +145,34 @@ function Header(io::IO)
130145 )
131146end
132147
148+
149+
150+ # ref: https://www.clicketyclick.dk/databases/xbase/format/dbf.html
151+ function Base. Base. write (io:: IO , h:: Header )
152+ out = 0
153+ out += Base. write (io, h. version) # 0
154+ yy = UInt8 (year (h. last_update) - 1900 )
155+ mm = UInt8 (month (h. last_update))
156+ dd = UInt8 (day (h. last_update))
157+ out += Base. write (io, yy, mm, dd) # 1-3
158+ out += Base. write (io, h. records) # 4-7
159+ out += Base. write (io, h. hsize) # 8-9
160+ out += Base. write (io, h. rsize) # 10-11
161+ out += Base. write (io, reserved (2 )) # 12-13 reserved
162+ out += Base. write (io, h. incomplete) # 14
163+ out += Base. write (io, h. encrypted) # 15
164+ out += Base. write (io, reserved (12 )) # 16-19, 20-27 reserved
165+ out += Base. write (io, h. mdx) # 28
166+ out += Base. write (io, h. lang_id) # 29
167+ out += Base. write (io, reserved (2 )) # 30-31 reserved
168+ for field in h. fields
169+ out += Base. write (io, field)
170+ end
171+ out += Base. write (io, 0xD )
172+ return out
173+ end
174+
175+
133176miss (x) = ifelse (x === nothing , missing , x)
134177
135178" Concert a DBF entry string to a Julia value"
@@ -142,10 +185,12 @@ function dbf_value(::Type{Bool}, str::AbstractString)
142185 elseif char == ' ?'
143186 missing
144187 else
145- throw (ArgumentError (" Unknown logical $ char" ))
188+ throw (ArgumentError (" Unknown logical entry: $( repr ( char)) " ))
146189 end
147190end
148191
192+ dbf_value (:: Type{Date} , str:: AbstractString ) = all (isspace, str) ? missing : Date (str, dateformat " yyyymmdd" )
193+
149194dbf_value (T:: Union{Type{Int},Type{Float64}} , str:: AbstractString ) = miss (tryparse (T, str))
150195# String to avoid returning SubString{String}
151196function dbf_value (:: Type{String} , str:: AbstractString )
@@ -196,7 +241,7 @@ function Table(path::AbstractString)
196241 end
197242end
198243
199- " Collect all the offsets and lenghts from the header to create a StringArray"
244+ " Collect all the offsets and lengths from the header to create a StringArray"
200245function _create_stringarray (header:: Header , data:: AbstractVector )
201246 # first make the lengths and offsets for a single record
202247 lengths_record = UInt32 .(getfield .(header. fields, :length ))
@@ -308,4 +353,112 @@ function Base.getproperty(dbf::Table, name::Symbol)
308353 return colarr
309354end
310355
356+
357+ Base. write (io:: IO , dbf:: Table ) = Base. write (io, getfield (dbf, :header ), getfield (dbf, :data ), 0x1a )
358+ Base. write (path:: AbstractString , dbf:: Table ) = open (io -> Base. write (io, dbf), touch (path), " w" )
359+
360+
361+ " Generic .dbf writer for the Tables.jl interface."
362+ write (path:: AbstractString , tbl) = open (io -> write (io, tbl), touch (path), " w" )
363+
364+ function write (io:: IO , tbl)
365+ dct = Tables. dictcolumntable (tbl)
366+ fields, records = get_field_descriptors (dct)
367+ fieldcolumns = Dict {Symbol,Int} (f. name => i for (i,f) in enumerate (fields))
368+ hsize = UInt16 (length (fields) * 32 + 32 )
369+ rsize = UInt16 (sum (x -> x. length, fields)) + 1
370+
371+ version = 0x03
372+ last_update = today ()
373+ incomplete = false
374+ encrypted = false
375+ mdx = false
376+ lang_id = 0x00
377+
378+ h = Header (version, last_update, records, hsize, rsize, incomplete, encrypted, mdx, lang_id, fields, fieldcolumns)
379+ out = Base. write (io, h)
380+
381+ for row in Tables. rows (dct)
382+ out += write_record (io, fields, row)
383+ end
384+ out += Base. write (io, 0x1a ) # EOF marker
385+ return out
386+ end
387+
388+ function get_field_descriptors (dct)
389+ fields = FieldDescriptor[]
390+ sch = Tables. schema (dct)
391+ for (name, type) in zip (sch. names, sch. types)
392+ ndec = 0x0
393+ len = 0x0
394+ dbf_type = ' C'
395+ T = Base. nonmissingtype (type)
396+ if T isa Date
397+ dbf_type = ' D'
398+ len = 0x08
399+ elseif T <: AbstractString
400+ # TODO : support memos. Currently strings > 254 bytes will error
401+ len = UInt8 (maximum (x -> length (string (x)), dct[name]))
402+ if len > 254
403+ @warn " Strings will be truncated to 254 characters."
404+ len = 254
405+ end
406+ dbf_type = ' C'
407+ elseif type === Float64
408+ dbf_type = ' O'
409+ len = 0x08
410+ ndec = 0x01
411+ elseif T <: AbstractFloat
412+ dbf_type = ' F'
413+ len = UInt8 (20 )
414+ ndec = 0x01
415+ elseif T <: Bool
416+ dbf_type = ' L'
417+ len = 0x1
418+ elseif T <: Date
419+ dbf_type = ' D'
420+ len = 0x8
421+ elseif T <: Integer
422+ dbf_type = ' N'
423+ len = UInt8 (maximum (x -> length (string (x)), dct[name]))
424+ else
425+ @warn " Field $name has no known matching DBF data type for $T . Data will be stored as the DBF character data type ('C')."
426+ len = UInt8 (maximum (x -> length (string (x)), dct[name]))
427+ end
428+ push! (fields, FieldDescriptor (name, type, dbf_type, len, ndec))
429+ end
430+ fields, UInt32 (length (first (dct)))
431+ end
432+
433+ function write_record (io:: IO , fd:: Vector{FieldDescriptor} , row)
434+ out = 0
435+ out += Base. write (io, ' ' ) # deletion marker ' '=valid, '*'=deleted
436+ for (field, val) in zip (fd, row)
437+ out += Base. write (io, _val (field, val))
438+ end
439+ return out
440+ end
441+
442+ function _val (field:: FieldDescriptor , val):: Union{String, Float64}
443+ char = field. dbf_type
444+ if char == ' L'
445+ ismissing (val) && return " ?"
446+ val ? " T" : " F"
447+ elseif ismissing (val)
448+ ' ' ^ field. length
449+ elseif char == ' C'
450+ replace (rpad (val, field. length), ! isascii => ' ' )
451+ elseif char == ' D'
452+ Dates. format (val, " yyyymmdd" )
453+ elseif char == ' O'
454+ val # <-- the Float64 return value
455+ elseif char == ' F'
456+ rpad (val, 20 )[1 : 20 ]
457+ elseif char == ' N'
458+ rpad (val, field. length)
459+ else
460+ error (" Unknown DBF datatype $char ." )
461+ end
462+ end
463+
311464end # module
0 commit comments