Skip to content

Commit 64920a7

Browse files
committed
Merge pull request #96 from amellnik/esc_str
Add a new method for removeduplicates!
2 parents f211e7f + a40f167 commit 64920a7

File tree

2 files changed

+37
-1
lines changed

2 files changed

+37
-1
lines changed

src/SQLite.jl

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,15 +273,30 @@ function createindex!{S<:AbstractString}(db::DB,table::AbstractString,index::Abs
273273
return
274274
end
275275

276-
"removes duplicate rows from `table` based on the values in `cols` which may be a single column or comma-delimited list of columns"
276+
"removes duplicate rows from `table` based on the values in `cols` which is an array of column names"
277+
function removeduplicates!{T <: AbstractString}(db,table::AbstractString,cols::AbstractArray{T})
278+
colsstr = ""
279+
for c in cols
280+
colsstr = colsstr*esc_id(c)*","
281+
end
282+
colsstr = chop(colsstr)
283+
transaction(db) do
284+
execute!(db,"DELETE FROM $(esc_id(table)) WHERE _ROWID_ NOT IN (SELECT max(_ROWID_) from $(esc_id(table)) GROUP BY $(colsstr));")
285+
end
286+
execute!(db,"ANALYZE $table")
287+
return
288+
end
289+
277290
function removeduplicates!(db,table::AbstractString,cols::AbstractString)
291+
warn("This method is deprecated, please provide the column names as an array of column names rather than a single string.")
278292
transaction(db) do
279293
execute!(db,"DELETE FROM $(esc_id(table)) WHERE _ROWID_ NOT IN (SELECT max(_ROWID_) from $(esc_id(table)) GROUP BY $(esc_id(cols)));")
280294
end
281295
execute!(db,"ANALYZE $table")
282296
return
283297
end
284298

299+
285300
"""
286301
`SQLite.Source` implementes the `DataStreams` framework for interacting with SQLite databases
287302
"""

test/runtests.jl

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,3 +290,24 @@ dt2 = SQLite.query(db, "Select * from temp")
290290
#There might be a better way to check this
291291
@test dt.data[1][1].value==dt2.data[1][1].value
292292
@test dt.data[1][2].isnull==dt2.data[1][2].isnull
293+
294+
#Test removeduplicates!
295+
db = SQLite.DB() #In case the order of tests is changed
296+
ints = Int64[1,1,2,2,3]
297+
strs = UTF8String["A", "A", "B", "C", "C"]
298+
nvInts = NullableArrays.NullableArray(ints)
299+
nvStrs = NullableArrays.NullableArray(strs)
300+
schema = DataStreams.Data.Schema(["ints", "strs"], [Int64, UTF8String],5)
301+
d = NullableArrays.NullableVector[nvInts, nvStrs]
302+
dt = DataStreams.Data.Table(schema, d,0)
303+
SQLite.drop!(db, "temp", ifexists=true)
304+
sink = SQLite.Sink(dt, db, "temp")
305+
Data.stream!(dt, sink)
306+
SQLite.removeduplicates!(db, "temp", ["ints","strs"]) #New format
307+
dt3 = SQLite.query(db, "Select * from temp")
308+
@test get(dt3[1,1]) == 1
309+
@test get(dt3[1,2]) == "A"
310+
@test get(dt3[2,1]) == 2
311+
@test get(dt3[2,2]) == "B"
312+
@test get(dt3[3,1]) == 2
313+
@test get(dt3[3,2]) == "C"

0 commit comments

Comments
 (0)