@@ -2,7 +2,7 @@ module Sqlite
2
2
3
3
using DataFrames
4
4
5
- export sqlitedb
5
+ export sqlitedb, readdlmsql
6
6
7
7
include (" Sqlite_consts.jl" )
8
8
include (" Sqlite_api.jl" )
@@ -33,6 +33,9 @@ typealias TableInput Union(DataFrame,String)
33
33
const null_resultset = DataFrame (0 )
34
34
const null_SqliteDB = SqliteDB (" " ,C_NULL ,null_resultset)
35
35
sqlitedb = null_SqliteDB # Create default connection = null
36
+ const INTrx = r" ^\d +$"
37
+ const STRINGrx = r" [^eE0-9\.\-\+ ]" i
38
+ const FLOATrx = r" ^[+-]?([0-9]+(\. [0-9]*)?|\. [0-9]+)([eE][+-]?[0-9]+)?$"
36
39
37
40
# Core Functions
38
41
function connect (file:: String )
@@ -92,7 +95,7 @@ function query(q::String,conn::SqliteDB=sqlitedb)
92
95
# retrieve resultset
93
96
while true
94
97
for i = 1 : ncols
95
- t = sqlite3_column_type (stmt,i- 1 )
98
+ t = Sqlite . sqlite3_column_type (stmt,i- 1 )
96
99
if t == SQLITE3_TEXT
97
100
r = bytestring ( sqlite3_column_text (stmt,i- 1 ) )
98
101
elseif t == SQLITE_FLOAT
@@ -129,15 +132,15 @@ function query(q::String,conn::SqliteDB=sqlitedb)
129
132
sqlite3_finalize (stmt)
130
133
return (conn. resultset = DataFrame (resultset,Index (colnames)))
131
134
end
132
- function createtable (input:: TableInput ,conn:: SqliteDB = sqlitedb;name:: String = " " )
135
+ function createtable (input:: TableInput ,conn:: SqliteDB = sqlitedb;name:: String = " " ,delim :: Char = ' \0 ' ,header :: Bool = true ,types :: Array{DataType,1} = DataType[],infer :: Bool = true )
133
136
conn == null_SqliteDB && error (" [sqlite]: A valid SqliteDB was not specified (and no valid default SqliteDB exists)" )
134
137
# these 2 calls are for performance
135
138
internal_query (conn," PRAGMA synchronous = OFF" )
136
139
137
140
if typeof (input) == DataFrame
138
141
r = df2table (input,conn,name)
139
142
else
140
- r = 0 # dlm2table(input,conn,name)
143
+ r = dlm2table (input,conn,name,delim,header,types,infer )
141
144
end
142
145
internal_query (conn," PRAGMA synchronous = ON" )
143
146
return r
@@ -161,7 +164,6 @@ function df2table(df::DataFrame,conn::SqliteDB,name::String)
161
164
# prepare insert table with parameters for column values
162
165
params = chop (repeat (" ?," ,ncols))
163
166
stmt, r = internal_query (conn," insert into $dfname values ($params )" ,false ,false )
164
- sqlite3_reset (stmt)
165
167
# bind, step, reset loop for inserting values
166
168
for row = 1 : nrow (df)
167
169
for col = 1 : ncols
@@ -191,13 +193,151 @@ function droptable(table::String,conn::SqliteDB=sqlitedb)
191
193
return
192
194
end
193
195
# read raw file direct to sqlite table
194
- # function csv2table()
195
-
196
- # end
197
- # read raw file to sqlite table (call csv2table), then run sql statement on table to return df (call to query)
198
- # function readcsvsql()
199
-
200
- # end
196
+ function dlm2table (file:: String ,conn:: SqliteDB ,name:: String ,delim:: Char ,header:: Bool ,types:: Array{DataType,1} ,infer:: Bool )
197
+ # determine tablename and delimiter
198
+ tablename = name
199
+ if tablename == " "
200
+ tablename = match (r" \w +(?=\. )" ,file). match
201
+ end
202
+ delimiter = delim
203
+ if delimiter == ' \0 '
204
+ delimiter = ismatch (r" csv$" , file) ? ' ,' : ismatch (r" tsv$" , file) ? ' \t ' : ismatch (r" wsv$" , file) ? ' ' : error (" Unable to determine separator used in $file " )
205
+ end
206
+ # get column names/types: colnames, ncols, coltypes
207
+ f = open (file)
208
+ firstrow = split (chomp (readline (f)),delimiter)
209
+ ncols = length (firstrow)
210
+ if header
211
+ colnames = firstrow
212
+ else
213
+ colnames = String[" x$i " for i = 1 : ncols]
214
+ seekstart (f)
215
+ end
216
+ if infer
217
+ coltypes = Array (DataType,ncols)
218
+ check = falses (ncols)
219
+ for r in eachline (f)
220
+ row = split_quoted (chomp (r),delimiter)
221
+ for i = 1 : ncols
222
+ if ! check[i]
223
+ if row[i] == " " # null/missing value
224
+ continue
225
+ elseif ismatch (INTrx,row[i]) # match a plain integer first
226
+ colnames[i] *= " INT" ; check[i] = true
227
+ elseif ismatch (STRINGrx,row[i]) # then check if it's stringy
228
+ colnames[i] *= " TEXT" ; check[i] = true
229
+ elseif ismatch (FLOATrx,row[i]) # if it's not integer or string, check if it's a float
230
+ colnames[i] *= " REAL" ; check[i] = true
231
+ else # if it's still not a float, just make it a string
232
+ colnames[i] *= " TEXT" ; check[i] = true
233
+ end
234
+ end
235
+ end
236
+ sum (check) == ncols && break
237
+ end
238
+ if sum (check) < ncols
239
+ for i = 1 : ncols
240
+ if ! coltypes[i]
241
+ coltypes[i] = String
242
+ end
243
+ end
244
+ end
245
+ seekstart (f)
246
+ header && readline (f)
247
+ elseif length (types) > 0
248
+ if eltype (types) <: String
249
+ for i = 1 : ncols
250
+ colnames[i] *= " " * types[i]
251
+ end
252
+ else
253
+ for i = 1 : ncols
254
+ colnames[i] *= types[i] <: Integer ? " INT" : types[i] <: FloatingPoint ? " REAL" : " TEXT"
255
+ end
256
+ end
257
+ end
258
+ colnames = join (colnames,' ,' )
259
+ internal_query (conn," create table $tablename ($colnames )" )
260
+ internal_query (conn," BEGIN TRANSACTION" )
261
+ # prepare insert table with parameters for column values
262
+ params = chop (repeat (" ?," ,ncols))
263
+ stmt, r = internal_query (conn," insert into $tablename values ($params )" ,false ,false )
264
+ # bind, step, reset loop for inserting values
265
+ for r in eachline (f)
266
+ row = Sqlite. split_quoted (chomp (r),delimiter)
267
+ for col = 1 : ncols
268
+ d = row[col]
269
+ Sqlite. sqlite3_bind_text (stmt,col,d,length (d),C_NULL )
270
+ end
271
+ Sqlite. sqlite3_step (stmt)
272
+ Sqlite. sqlite3_reset (stmt)
273
+ end
274
+ sqlite3_finalize (stmt)
275
+ internal_query (conn," COMMIT" )
276
+ close (f)
277
+ return
278
+ end
279
+ # read raw file to sqlite table (call dlm2table), then run sql statement on table to return df (call to query)
280
+ function readdlmsql (input:: String ,conn:: SqliteDB = sqlitedb;sql:: String = " select * from file" ,name:: String = " file" ,delim:: Char = ' \0 ' ,header:: Bool = true ,types:: Array{DataType,1} = DataType[],infer:: Bool = true )
281
+ if conn == null_SqliteDB
282
+ handle = Array (Ptr{Void},1 )
283
+ file = tempname ()
284
+ Sqlite. sqlite3_open (file,handle)
285
+ conn = Sqlite. SqliteDB (file,handle[1 ],Sqlite. null_resultset)
286
+ end
287
+ createtable (input,conn;name= name,delim= delim,header= header,types= types,infer= infer)
288
+ return query (sql,conn)
289
+ end
290
+ function search_quoted (s:: String , c:: Char , i:: Integer )
291
+ if isempty (c)
292
+ return 1 <= i <= endof (s)+ 1 ? i :
293
+ i == endof (s)+ 2 ? 0 :
294
+ error (BoundsError)
295
+ end
296
+ if i < 1 error (BoundsError) end
297
+ i = nextind (s,i- 1 )
298
+ while ! done (s,i)
299
+ d, j = next (s,i)
300
+ if d == ' "'
301
+ i = j
302
+ d, j = next (s,i)
303
+ while d != ' "'
304
+ i = j
305
+ d, j = next (s,i)
306
+ end
307
+ end
308
+ if contains (c,d)
309
+ return i
310
+ end
311
+ i = j
312
+ end
313
+ return 0
314
+ end
315
+ search_quoted (s:: String , c:: Char ) = search_quoted (s,c,start (s))
316
+ function split_quoted (str:: String , splitter, limit:: Integer , keep_empty:: Bool )
317
+ strs = String[]
318
+ i = start (str)
319
+ n = endof (str)
320
+ r = search_quoted (str,splitter,i)
321
+ j, k = first (r), last (r)+ 1
322
+ while 0 < j <= n && length (strs) != limit- 1
323
+ if i < k
324
+ if keep_empty || i < j
325
+ push! (strs, str[i: j- 1 ])
326
+ end
327
+ i = k
328
+ end
329
+ if k <= j; k = nextind (str,j) end
330
+ r = search_quoted (str,splitter,k)
331
+ j, k = first (r), last (r)+ 1
332
+ end
333
+ if keep_empty || ! done (str,i)
334
+ push! (strs, str[i: ])
335
+ end
336
+ return strs
337
+ end
338
+ split_quoted (s:: String , spl, n:: Integer ) = split_quoted (s, spl, n, true )
339
+ split_quoted (s:: String , spl, keep:: Bool ) = split_quoted (s, spl, 0 , keep)
340
+ split_quoted (s:: String , spl) = split_quoted (s, spl, 0 , true )
201
341
end # sqlite module
202
342
203
343
function sqldf (q:: String )
0 commit comments