-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathdbf_to_sql.py
More file actions
155 lines (123 loc) · 4.98 KB
/
dbf_to_sql.py
File metadata and controls
155 lines (123 loc) · 4.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
"""
:mod: `dbf_to_sql` - Converts FoxPro Tables to SQL
============================
.. module:: dbf_to_sql
:synopsis: Processes FoxPro Tables into SQL Tables.
:original_date: 15 Jul 2013
.. moduleauthor:: James Wang <james@j-wang.net>
This module essentially glues together dbfpy and sqlalchemy to convert FoxPro
tables into SQL tables (specifically, sqlite -- although this should be easy to
adapt and might work out of the box for dialects that sqlalchemy supports, it
has not been tested for those cases).
Requires dbf and sqlalchemy (both of which are available through pip install)
"""
import dbf
import sqlalchemy
import os
import datetime
class Converter(object):
"""
Converts a FoxPro database (directory of .dbf files) to an sqlite3
database stored on disk (will overwrite if already exists).
Theoretically can work for any sql dialect that sqlalchemy supports, but in
reality has just been tested for sqlite (and is expected to break in subtle
to not-so-subtle ways for other dialects). Use for other dialects at your
own risk.
Example:
::
fp2sql = Converter('sqldata.db')
fp2sql.convert_dbfs('foxpro_app/data/')
Alternatively:
::
fp2.sql.convert_dbf('clients.dbf')
"""
sa_types = {'M': sqlalchemy.String,
'N': sqlalchemy.Integer,
'T': sqlalchemy.DateTime,
'L': sqlalchemy.Boolean}
def __init__(self, sqldb, sql='sqlite'):
self.engine = sqlalchemy.create_engine('{0}:///{1}'.format(sql, sqldb),
echo=True)
self.meta = sqlalchemy.MetaData()
def convert_dbf(self, foxpro_source):
"""
Reads .dbf file and converts to specified sql database, preserving the
schema of the foxpro database (as best it can).
"""
conn = self.engine.connect()
dbf_table = dbf.Table(foxpro_source)
schema = dbf_table.field_names
# translate foxpro db schema to sqlite
cols = []
col_types = {}
for field in schema:
col_type = self.convert_type(dbf_table.field_info(field))
cols.append(sqlalchemy.Column(field, col_type))
col_types[field] = col_type
# split out dir, take filename, truncate .dbf
table_name = foxpro_source.split('/')[-1][:-4]
sql_table = sqlalchemy.Table(table_name, self.meta, *cols)
self.meta.create_all(self.engine)
dbf_table.open()
# deposit records into sqlite database
for record in dbf_table:
rec_data = {col: self.fix_record(record[col], col_types[col])
for col in schema}
conn.execute(sql_table.insert().values(**rec_data))
dbf_table.close()
def convert_dbfs(self, foxpro_dir):
"""
Reads FoxPro database (given as a directory containing .dbf files).
"""
dir_contents = [f.upper() for f in os.listdir(foxpro_dir)]
dbf_files = filter(lambda x: x.find('.DBF') != -1, dir_contents)
for dfile in dbf_files:
self.convert_dbf(foxpro_dir + dfile)
@classmethod
def convert_type(cls, typ_tuple):
"""
Takes a field_info tuple (called from dbf.Table.field_info) and
converts dbfpy type information into sqlalchemy type information.
"""
typ = typ_tuple[0]
length = typ_tuple[1]
length2 = typ_tuple[2]
if typ == 'N' and length2 != 0:
return sqlalchemy.Float
elif typ == 'C':
return sqlalchemy.String(length)
else:
return cls.sa_types[typ]
@classmethod
def fix_record(self, string, col_type):
"""
Takes record data as a read_string from dbf and an sqlalchemy datatype
(e.g. sqlalchemy.Integer). Performs simple typecasting depending on
type, and tries to handle bytestrings.
Needless to say, this won't save you from corrupted/badly mangled data.
"""
if col_type == sqlalchemy.Integer:
return int(string)
elif col_type == sqlalchemy.Boolean:
return float(string)
elif col_type == sqlalchemy.DateTime:
if type(string) == datetime.datetime:
return string
else:
raise TypeError("DateTime is in wrong format.")
else:
# Because sqlalchemy otherwise complains about bytestrings...
# A bit dangerous, but handles case of bytestrings being used in
# domain specific applications
try:
return unicode(string)
except UnicodeDecodeError: # bytestring (... usually)
bytestr = str(string).encode('string_escape')
return unicode(bytestr)
def main():
if 'test__.db' in os.listdir('.'):
os.remove('test__.db')
fp2sql = Converter('test__.db') # name-mangled for (hoped-for) safety
fp2sql.convert_dbfs('test_data/')
if __name__ == '__main__':
main()