@@ -12,6 +12,35 @@ struct FieldDescriptor
1212 ndec:: UInt8
1313end
1414
15+ " Create FieldDescriptor from a column in a Tables.jl table."
16+ function FieldDescriptor (name:: Symbol , data:: AbstractVector )
17+ T = Base. nonmissingtype (eltype (data))
18+ char = dbf_type (T)
19+ ndec = 0x00
20+ itr = skipmissing (data)
21+ if char === ' D'
22+ len = 0x08
23+ elseif T === Union{} # data is only missings
24+ len = 0x01
25+ elseif char === ' C'
26+ width = T <: AbstractString ? maximum (length, itr) : maximum (x -> length (string (x)), itr)
27+ if width > 254
28+ @warn " Due to DBF limitations, strings in field $name will be truncated to 254 characters."
29+ len = UInt8 (254 )
30+ else
31+ len = UInt8 (width)
32+ end
33+ elseif char === ' N'
34+ len = UInt8 (20 )
35+ ndec = T <: AbstractFloat ? 0x1 : 0x0
36+ elseif char === ' L'
37+ len = 0x1
38+ else
39+ error (" This shouldn't be reachable. Unknown DBF type code: '$char '." )
40+ end
41+ FieldDescriptor (name, T, char, len, ndec)
42+ end
43+
1544" DBF header, which also holds all field definitions"
1645struct Header
1746 version:: UInt8
@@ -40,30 +69,103 @@ struct Row <: Tables.AbstractRow
4069 row:: Int
4170end
4271
43- " Convert DBF data type characters to Julia types"
44- function typemap (fld:: Char , ndec:: UInt8 )
45- # https://www.clicketyclick.dk/databases/xbase/format/data_types.html
46- rt = Nothing
47- if fld == ' C'
48- rt = String
49- elseif fld == ' D'
50- rt = Date
51- elseif fld == ' N'
52- if ndec > 0
53- rt = Float64
54- else
55- rt = Int
56- end
57- elseif fld == ' F' || fld == ' O'
58- rt = Float64
59- elseif fld == ' I' || fld == ' +'
60- rt = Int
61- elseif fld == ' L'
62- rt = Bool
63- else
64- throw (ArgumentError (" Unknown record type $fld " ))
72+ # -----------------------------------------------------------------------# conversions: Julia-to-DBF
73+ # These are the only types DBFTables.jl will use to save data as.
74+ " Get the DBF type code from the Julia type. Assumes `Base.nonmissingtype(T)` is the input."
75+ dbf_type (:: Type{<:AbstractString} ) = ' C'
76+ dbf_type (:: Type{Bool} ) = ' L'
77+ dbf_type (:: Type{<:Integer} ) = ' N'
78+ dbf_type (:: Type{<:AbstractFloat} ) = ' N'
79+ dbf_type (:: Type{Date} ) = ' D'
80+ dbf_type (:: Type{Union{}} ) = ' C'
81+ function dbf_type (:: Type{T} ) where {T}
82+ @warn " No DBF type associated with Julia type $T . Data will be saved as `string(x)`."
83+ ' C'
84+ end
85+
86+ dbf_value (field:: FieldDescriptor , val) = dbf_value (Val (field. dbf_type), field. length, val)
87+
88+ # String (or any other type that gets mapped to 'C')
89+ function dbf_value (:: Val{'C'} , len:: UInt8 , x)
90+ out = replace (rpad (x, len), ! isascii => x -> ' _' ^ textwidth (x))
91+ length (out) > 254 ? out[1 : 254 ] : out
92+ end
93+ dbf_value (:: Val{'C'} , len:: UInt8 , :: Missing ) = ' ' ^ len
94+
95+ # Bool
96+ dbf_value (:: Val{'L'} , :: UInt8 , x:: Bool ) = x ? ' T' : ' F'
97+ dbf_value (:: Val{'L'} , :: UInt8 , :: Missing ) = ' ?'
98+
99+ # Integer & AbstractFloat
100+ function dbf_value (:: Val{'N'} , :: UInt8 , x:: Union{AbstractFloat, Integer} )
101+ maxval = 99999999999999999999
102+ abs (x) > maxval && @warn " Due to DBF limitations, a float will be clamped to fit in 20 characters."
103+ rpad (clamp (x, - maxval, maxval), 20 )
104+ end
105+ dbf_value (:: Val{'N'} , :: UInt8 , :: Missing ) = ' ' ^ 20
106+
107+ # Date
108+ dbf_value (:: Val{'D'} , :: UInt8 , x:: Date ) = Dates. format (x, " yyyymmdd" )
109+ dbf_value (:: Val{'D'} , :: UInt8 , :: Missing ) = ' ' ^ 8
110+
111+ dbf_value (:: Val , :: UInt8 , x) = error (" This should be unreachable. No known conversion from Julia to DBF: $x ." )
112+
113+ # -----------------------------------------------------------------------# conversions: DBF-to-Julia
114+ " Get the Julia type from the DBF type code and the decimal count"
115+ julia_type (:: Val{'C'} , ndec:: UInt8 ) = String
116+ julia_type (:: Val{'D'} , ndec:: UInt8 ) = Date
117+ julia_type (:: Val{'N'} , ndec:: UInt8 ) = ndec > 0 ? Float64 : Int
118+ julia_type (:: Val{'F'} , ndec:: UInt8 ) = Float64
119+ julia_type (:: Val{'O'} , ndec:: UInt8 ) = Float64
120+ julia_type (:: Val{'I'} , ndec:: UInt8 ) = Int32
121+ julia_type (:: Val{'+'} , ndec:: UInt8 ) = Int64
122+ julia_type (:: Val{'L'} , ndec:: UInt8 ) = Bool
123+ function julia_type (:: Val{T} , ndec:: UInt8 ) where {T}
124+ @warn " Unknown DBF type code '$T '. Data will be loaded as `String"
125+ String
126+ end
127+
128+
129+ julia_value (o:: FieldDescriptor , s:: AbstractString ) = julia_value (o. type, Val (o. dbf_type), s:: AbstractString )
130+
131+ function julia_value (:: Type{String} , :: Val{'C'} , s:: AbstractString )
132+ s2 = strip (s)
133+ isempty (s2) ? missing : String (s2)
134+ end
135+ function julia_value (:: Type{Date} , :: Val{'D'} , s:: AbstractString )
136+ all (isspace, s) ? missing : Date (s, dateformat " yyyymmdd" )
137+ end
138+ julia_value (:: Type{Int} , :: Val{'N'} , s:: AbstractString ) = miss (tryparse (Int, s))
139+ julia_value (:: Type{Float64} , :: Val{'N'} , s:: AbstractString ) = miss (tryparse (Float64, s))
140+ julia_value (:: Type{Float64} , :: Val{'F'} , s:: AbstractString ) = miss (tryparse (Float64, s))
141+ # 'O', 'I', and '+' do not use string representations.
142+ function julia_value (:: Type{Float64} , :: Val{'O'} , s:: AbstractString )
143+ try
144+ only (reinterpret (Float64, Vector {UInt8} (s)))
145+ catch
146+ missing
147+ end
148+ end
149+ function julia_value (:: Type{Int32} , :: Val{'I'} , s:: AbstractString )
150+ try
151+ only (reinterpret (Int32, Vector {UInt8} (s)))
152+ catch
153+ missing
154+ end
155+ end
156+ function julia_value (:: Type{Int64} , :: Val{'+'} , s:: AbstractString )
157+ try
158+ only (reinterpret (Int64, Vector {UInt8} (s)))
159+ catch
160+ missing
65161 end
66- return rt
162+ end
163+ function julia_value (:: Type{Bool} , :: Val{'L'} , s:: AbstractString )
164+ char = only (s)
165+ char === ' ?' ? missing :
166+ char in " YyTt" ? true :
167+ char in " NnFf" ? false :
168+ error (" Unknown logical entry for dbf type code 'L': '$char '." )
67169end
68170
69171" Read a field descriptor from the stream, and create a FieldDescriptor struct"
@@ -75,7 +177,7 @@ function read_dbf_field(io::IO)
75177 field_len = read (io, UInt8)
76178 field_dec = read (io, UInt8)
77179 skip (io, 14 ) # reserved
78- jltype = typemap ( field_type, field_dec)
180+ jltype = julia_type ( Val ( field_type) , field_dec)
79181 return FieldDescriptor (field_name, jltype, field_type, field_len, field_dec)
80182end
81183
175277
176278miss (x) = ifelse (x === nothing , missing , x)
177279
178- " Concert a DBF entry string to a Julia value"
179- function dbf_value (:: Type{Bool} , str:: AbstractString )
180- char = first (str)
181- if char in " YyTt"
182- true
183- elseif char in " NnFf"
184- false
185- elseif char == ' ?'
186- missing
187- else
188- throw (ArgumentError (" Unknown logical entry: $(repr (char)) " ))
189- end
190- end
191-
192- dbf_value (:: Type{Date} , str:: AbstractString ) = all (isspace, str) ? missing : Date (str, dateformat " yyyymmdd" )
193-
194- dbf_value (T:: Union{Type{Int},Type{Float64}} , str:: AbstractString ) = miss (tryparse (T, str))
195- # String to avoid returning SubString{String}
196- function dbf_value (:: Type{String} , str:: AbstractString )
197- stripped = rstrip (str)
198- if isempty (stripped)
199- # return missing rather than ""
200- return missing
201- else
202- return String (stripped)
203- end
204- end
205- dbf_value (:: Type{Nothing} , :: AbstractString ) = missing
206-
207280# define get functions using getfield since we overload getproperty
208281" Access the header of a DBF Table"
209282getheader (dbf:: Table ) = getfield (dbf, :header )
@@ -265,8 +338,7 @@ function Base.NamedTuple(row::Row)
265338 ncol = length (fields)
266339 rowidx = getrow (row)
267340 @inbounds record = @view str[:, rowidx]
268- @inbounds prs =
269- (fields[col]. name => dbf_value (fields[col]. type, record[col]) for col = 1 : ncol)
341+ @inbounds prs = (fields[col]. name => julia_value (fields[col], record[col]) for col = 1 : ncol)
270342 return (; prs... )
271343end
272344
@@ -310,17 +382,17 @@ function Tables.getcolumn(row::Row, name::Symbol)
310382 str = getstrings (dbf)
311383 colidx = get (header. fieldcolumns, name, nothing )
312384 colidx === nothing && throw (ArgumentError (" Column not present: $name " ))
313- type = @inbounds getfields (dbf)[colidx]. type
385+ field = @inbounds getfields (dbf)[colidx]
314386 rowidx = getrow (row)
315- return @inbounds dbf_value (type , str[colidx, rowidx])
387+ return @inbounds julia_value (field , str[colidx, rowidx])
316388end
317389
318390function Tables. getcolumn (row:: Row , i:: Int )
319391 dbf = gettable (row)
320392 str = getstrings (dbf)
321- type = getfields (dbf)[i]. type
393+ field = getfields (dbf)[i]
322394 rowidx = getrow (row)
323- return @inbounds dbf_value (type , str[i, rowidx])
395+ return @inbounds julia_value (field , str[i, rowidx])
324396end
325397
326398Tables. istable (:: Type{Table} ) = true
@@ -347,9 +419,9 @@ function Base.getproperty(dbf::Table, name::Symbol)
347419 col = get (header. fieldcolumns, name, nothing )
348420 col === nothing && throw (ArgumentError (" Column not present: $name " ))
349421 nrow = header. records
350- @inbounds type = getfields (dbf)[col]. type
422+ @inbounds field = getfields (dbf)[col]
351423 str = getstrings (dbf)
352- @inbounds colarr = [dbf_value (type , str[col, i]) for i = 1 : nrow]
424+ @inbounds colarr = [julia_value (field , str[col, i]) for i = 1 : nrow]
353425 return colarr
354426end
355427
@@ -363,7 +435,8 @@ write(path::AbstractString, tbl) = open(io -> write(io, tbl), touch(path), "w")
363435
364436function write (io:: IO , tbl)
365437 dct = Tables. dictcolumntable (tbl)
366- fields, records = get_field_descriptors (dct)
438+ fields = [FieldDescriptor (k, v) for (k,v) in pairs (getfield (dct, :values ))]
439+ records = UInt32 (length (first (dct)))
367440 fieldcolumns = Dict {Symbol,Int} (f. name => i for (i,f) in enumerate (fields))
368441 hsize = UInt16 (length (fields) * 32 + 32 )
369442 rsize = UInt16 (sum (x -> x. length, fields)) + 1
@@ -385,80 +458,15 @@ function write(io::IO, tbl)
385458 return out
386459end
387460
388- function get_field_descriptors (dct)
389- fields = FieldDescriptor[]
390- sch = Tables. schema (dct)
391- for (name, type) in zip (sch. names, sch. types)
392- ndec = 0x0
393- len = 0x0
394- dbf_type = ' C'
395- T = Base. nonmissingtype (type)
396- if T isa Date
397- dbf_type = ' D'
398- len = 0x08
399- elseif T <: AbstractString
400- # TODO : support memos. Currently strings > 254 bytes will error
401- len = UInt8 (maximum (x -> length (string (x)), dct[name]))
402- if len > 254
403- @warn " Strings will be truncated to 254 characters."
404- len = 254
405- end
406- dbf_type = ' C'
407- elseif type === Float64
408- dbf_type = ' O'
409- len = 0x08
410- ndec = 0x01
411- elseif T <: AbstractFloat
412- dbf_type = ' F'
413- len = UInt8 (20 )
414- ndec = 0x01
415- elseif T <: Bool
416- dbf_type = ' L'
417- len = 0x1
418- elseif T <: Date
419- dbf_type = ' D'
420- len = 0x8
421- elseif T <: Integer
422- dbf_type = ' N'
423- len = UInt8 (maximum (x -> length (string (x)), dct[name]))
424- else
425- @warn " Field $name has no known matching DBF data type for $T . Data will be stored as the DBF character data type ('C')."
426- len = UInt8 (maximum (x -> length (string (x)), dct[name]))
427- end
428- push! (fields, FieldDescriptor (name, type, dbf_type, len, ndec))
429- end
430- fields, UInt32 (length (first (dct)))
431- end
432-
433461function write_record (io:: IO , fd:: Vector{FieldDescriptor} , row)
434462 out = 0
435463 out += Base. write (io, ' ' ) # deletion marker ' '=valid, '*'=deleted
436464 for (field, val) in zip (fd, row)
437- out += Base. write (io, _val (field, val))
465+ out += Base. write (io, dbf_value (field, val))
438466 end
439467 return out
440468end
441469
442- function _val (field:: FieldDescriptor , val):: Union{String, Float64}
443- char = field. dbf_type
444- if char == ' L'
445- ismissing (val) && return " ?"
446- val ? " T" : " F"
447- elseif ismissing (val)
448- ' ' ^ field. length
449- elseif char == ' C'
450- replace (rpad (val, field. length), ! isascii => ' ' )
451- elseif char == ' D'
452- Dates. format (val, " yyyymmdd" )
453- elseif char == ' O'
454- val # <-- the Float64 return value
455- elseif char == ' F'
456- rpad (val, 20 )[1 : 20 ]
457- elseif char == ' N'
458- rpad (val, field. length)
459- else
460- error (" Unknown DBF datatype $char ." )
461- end
462- end
470+
463471
464472end # module
0 commit comments