Skip to content

Commit ca37a70

Browse files
authored
Import/using refactor (#18)
1 parent 91576e1 commit ca37a70

12 files changed

+182
-155
lines changed

src/DTables.jl

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,58 @@
11
module DTables
22

3-
import Dagger
3+
############################################################################################
4+
# Using
5+
############################################################################################
6+
7+
using Dagger: Dagger
8+
using DataAPI: BroadcastedSelector
9+
using DataFrames: AsTable, ByRow, ColumnIndex, MultiColumnIndex, normalize_selection, Index
10+
using InvertedIndices: BroadcastedInvertedIndex
11+
using SentinelArrays: ChainedVector
12+
using TableOperations: TableOperations
13+
using Tables:
14+
columnindex,
15+
columnnames,
16+
columns,
17+
columntable,
18+
getcolumn,
19+
materializer,
20+
partitioner,
21+
rows,
22+
schema,
23+
Schema
24+
25+
############################################################################################
26+
# Extend
27+
############################################################################################
28+
29+
import Base:
30+
fetch,
31+
filter,
32+
getindex,
33+
getproperty,
34+
isready,
35+
iterate,
36+
keys,
37+
length,
38+
map,
39+
mapreduce,
40+
names,
41+
propertynames,
42+
reduce,
43+
show,
44+
wait
45+
import DataAPI: leftjoin, ncol, nrow, innerjoin
46+
import Tables:
47+
columnaccess, columnnames, columns, getcolumn, istable, partitions, rowaccess, rows, schema
48+
49+
############################################################################################
50+
# Export
51+
############################################################################################
52+
53+
export DTable, DTableColumn, innerjoin, leftjoin, tabletype, tabletype!, trim, trim!
54+
55+
############################################################################################
456

557
include("table/dtable.jl")
658
include("table/gdtable.jl")

src/table/dataframes_interface.jl

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
import DataAPI: All, Between, BroadcastedSelector, Cols
2-
import DataFrames: AsTable, ByRow, ColumnIndex, MultiColumnIndex, normalize_selection
3-
import InvertedIndices: BroadcastedInvertedIndex
4-
51
function make_pair_concrete(@nospecialize(x::Pair))
62
return make_pair_concrete(x.first) => make_pair_concrete(x.second)
73
end
@@ -225,8 +221,8 @@ function manipulate(
225221
dt::DTable, args::AbstractVector{Int}; copycols::Bool, keeprows::Bool, renamecols::Bool
226222
)
227223
colidx = first(args)
228-
colname = Tables.columnnames(Tables.columns(dt))[colidx]
229-
return map(r -> (; colname => Tables.getcolumn(r, colidx)), dt)
224+
colname = columnnames(columns(dt))[colidx]
225+
return map(r -> (; colname => getcolumn(r, colidx)), dt)
230226
end
231227

232228
# Copied as is from DataFrames.jl

src/table/dataframes_interface_utils.jl

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import DataAPI: ncol
2-
import DataFrames: Index, ByRow, AsTable
31

42
function select_rowfunction(row, mappable_part_of_normalized_cs, colresults)
53
_cs = [
@@ -9,12 +7,12 @@ function select_rowfunction(row, mappable_part_of_normalized_cs, colresults)
97
args = if colidx isa AsTable
108
(;
119
[
12-
k => Tables.getcolumn(row, k) for
13-
k in getindex.(Ref(Tables.columnnames(row)), colidx.cols)
10+
k => getcolumn(row, k) for
11+
k in getindex.(Ref(columnnames(row)), colidx.cols)
1412
]...
1513
)
1614
else
17-
Tables.getcolumn.(Ref(row), colidx)
15+
getcolumn.(Ref(row), colidx)
1816
end
1917

2018
if f isa ByRow && !(colidx isa AsTable) && length(colidx) == 0
@@ -54,31 +52,26 @@ function fillcolumns(
5452
index = something(indexin(csymbols, [sym])...)
5553
col_vecs_fetched[index]
5654
else
57-
Tables.getcolumn(ch, sym)
55+
getcolumn(ch, sym)
5856
end
5957
push!(colnames, sym)
6058
push!(cols, col)
6159
elseif sym === AsTable
6260
i = findfirst(x -> x === AsTable, csymbols[(last_astable + 1):end])
6361
if i === nothing
64-
c = Tables.getcolumn(ch, Symbol("AsTable$(idx)"))
62+
c = getcolumn(ch, Symbol("AsTable$(idx)"))
6563
else
6664
last_astable = i
6765
c = col_vecs_fetched[i]
6866
end
6967

70-
push!.(Ref(colnames), Tables.columnnames(Tables.columns(c)))
71-
push!.(
72-
Ref(cols),
73-
Tables.getcolumn.(
74-
Ref(Tables.columns(c)), Tables.columnnames(Tables.columns(c))
75-
),
76-
)
68+
push!.(Ref(colnames), columnnames(columns(c)))
69+
push!.(Ref(cols), getcolumn.(Ref(columns(c)), columnnames(columns(c))))
7770
else
7871
throw(ErrorException("something is off"))
7972
end
8073
end
81-
Tables.materializer(ch)(
74+
materializer(ch)(
8275
merge(NamedTuple(), (; [e[1] => e[2] for e in zip(colnames, cols)]...))
8376
)
8477
end
@@ -99,6 +92,3 @@ function fillcolumns(
9992

10093
return DTable(chunks, dt.tabletype)
10194
end
102-
103-
ncol(d::DTable) = length(Tables.columns(d))
104-
index(df::DTable) = Index(columnnames_svector(df))

src/table/dtable.jl

Lines changed: 29 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,3 @@
1-
import Base:
2-
collect, eltype, fetch, getproperty, isready, iterate, length, names, propertynames, show, wait
3-
import SentinelArrays
4-
import TableOperations
5-
import Tables
6-
7-
export DTable, tabletype, tabletype!, trim, trim!, leftjoin, innerjoin, DTableColumn
8-
91
const VTYPE = Vector{Union{Dagger.Chunk,Dagger.EagerThunk}}
102

113
"""
@@ -20,7 +12,7 @@ the underlying partitions was applied to it (currently only `filter`).
2012
mutable struct DTable
2113
chunks::VTYPE
2214
tabletype
23-
schema::Union{Nothing,Tables.Schema}
15+
schema::Union{Nothing,Schema}
2416
end
2517

2618
DTable(chunks::Vector, tabletype) = DTable(VTYPE(chunks), tabletype, nothing)
@@ -30,15 +22,15 @@ DTable(chunks::Vector, tabletype, schema) = DTable(VTYPE(chunks), tabletype, sch
3022
DTable(table; tabletype=nothing) -> DTable
3123
3224
Constructs a `DTable` using a `Tables.jl`-compatible input `table`.
33-
Calls `Tables.partitions` on `table` and assumes the provided partitioning.
25+
Calls `partitions` on `table` and assumes the provided partitioning.
3426
"""
3527
function DTable(table; tabletype=nothing)
3628
chunks = Vector{Dagger.Chunk}()
3729
type = nothing
3830
sink = nothing
39-
for partition in Tables.partitions(table)
31+
for partition in partitions(table)
4032
if sink === nothing
41-
sink = Tables.materializer(tabletype !== nothing ? tabletype() : partition)
33+
sink = materializer(tabletype !== nothing ? tabletype() : partition)
4234
end
4335

4436
tpart = sink(partition)
@@ -73,25 +65,25 @@ function DTable(table, chunksize::Integer; tabletype=nothing, interpartition_mer
7365
leftovers = nothing
7466
leftovers_length = 0
7567

76-
for partition in Tables.partitions(table)
68+
for partition in partitions(table)
7769
if sink === nothing
78-
sink = Tables.materializer(tabletype !== nothing ? tabletype() : partition)
70+
sink = materializer(tabletype !== nothing ? tabletype() : partition)
7971
end
8072

8173
if interpartition_merges && leftovers !== nothing
82-
inner_partitions = Tables.partitions(
74+
inner_partitions = partitions(
8375
TableOperations.makepartitions(sink(partition), chunksize - leftovers_length)
8476
)
8577

8678
merged_data = sink(
8779
TableOperations.joinpartitions(
88-
Tables.partitioner(identity, [leftovers, sink(first(inner_partitions))])
80+
partitioner(identity, [leftovers, sink(first(inner_partitions))])
8981
),
9082
)
9183

9284
if length(inner_partitions) == 1
9385
leftovers = merged_data
94-
leftovers_length = Tables.length(Tables.rows(leftovers))
86+
leftovers_length = length(rows(leftovers))
9587
if leftovers_length == chunksize
9688
# sometimes the next partition will be exactly the size of
9789
# the chunksize - leftovers_length, so perfect match
@@ -105,28 +97,26 @@ function DTable(table, chunksize::Integer; tabletype=nothing, interpartition_mer
10597
leftovers = nothing
10698
leftovers_length = 0
10799
partition = TableOperations.joinpartitions(
108-
Tables.partitioner(identity, Iterators.drop(inner_partitions, 1))
100+
partitioner(identity, Iterators.drop(inner_partitions, 1))
109101
)
110102
end
111103
end
112104

113-
inner_partitions = Tables.partitions(
114-
TableOperations.makepartitions(sink(partition), chunksize)
115-
)
105+
inner_partitions = partitions(TableOperations.makepartitions(sink(partition), chunksize))
116106

117107
for inner_partition in inner_partitions
118108
chunk_data = sink(inner_partition)
119-
chunk_data_rows = Tables.rows(chunk_data)
109+
chunk_data_rows = rows(chunk_data)
120110

121111
if (
122112
interpartition_merges &&
123113
Base.haslength(chunk_data_rows) &&
124-
Tables.length(chunk_data_rows) < chunksize
114+
length(chunk_data_rows) < chunksize
125115
)
126116
# this is the last chunk with fewer than requested records
127117
# merge it with the first of the next partition
128118
leftovers = chunk_data
129-
leftovers_length = Tables.length(chunk_data_rows)
119+
leftovers_length = length(chunk_data_rows)
130120
else
131121
push!(chunks, Dagger.tochunk(chunk_data))
132122
end
@@ -158,7 +148,7 @@ end
158148

159149
function _file_load(filename::AbstractString, loader_function::Function, tabletype::Any)
160150
part = loader_function(filename)
161-
sink = Tables.materializer(tabletype === nothing ? part : tabletype())
151+
sink = materializer(tabletype === nothing ? part : tabletype())
162152
tpart = sink(part)
163153
return tpart
164154
end
@@ -172,7 +162,7 @@ instance of the underlying table type.
172162
Fetching an empty DTable results in returning an empty `NamedTuple` regardless of the underlying `tabletype`.
173163
"""
174164
function fetch(d::DTable)
175-
sink = Tables.materializer(tabletype(d)())
165+
sink = materializer(tabletype(d)())
176166
return sink(retrieve_partitions(d))
177167
end
178168

@@ -187,7 +177,7 @@ fetch(d::DTable, sink) = sink(retrieve_partitions(d))
187177
function retrieve_partitions(d::DTable)
188178
d2 = trim(d)
189179
return if nchunks(d2) > 0
190-
TableOperations.joinpartitions(Tables.partitioner(retrieve, d2.chunks))
180+
TableOperations.joinpartitions(partitioner(retrieve, d2.chunks))
191181
else
192182
NamedTuple()
193183
end
@@ -229,7 +219,7 @@ function resolve_tabletype(d::DTable)
229219
end
230220

231221
function isnonempty(chunk)
232-
return length(Tables.rows(chunk)) > 0 && length(Tables.columnnames(chunk)) > 0
222+
return length(rows(chunk)) > 0 && length(columnnames(chunk)) > 0
233223
end
234224

235225
"""
@@ -260,7 +250,7 @@ function show(io::IO, ::MIME"text/plain", d::DTable)
260250
end
261251

262252
function chunk_lengths(table::DTable)
263-
f = x -> length(Tables.rows(x))
253+
f = x -> length(rows(x))
264254
return fetch.([Dagger.@spawn f(c) for c in table.chunks])
265255
end
266256

@@ -276,27 +266,31 @@ end
276266
@inline nchunks(d::DTable) = length(d.chunks)
277267

278268
function merge_chunks(sink, chunks)
279-
return sink(TableOperations.joinpartitions(Tables.partitioner(retrieve, chunks)))
269+
return sink(TableOperations.joinpartitions(partitioner(retrieve, chunks)))
280270
end
281271

282-
Base.names(dt::DTable) = string.(columnnames_svector(dt))
283-
Base.propertynames(dt::DTable) = columnnames_svector(dt)
272+
names(dt::DTable) = string.(columnnames_svector(dt))
273+
propertynames(dt::DTable) = columnnames_svector(dt)
284274

285-
function Base.wait(dt::DTable)
275+
function wait(dt::DTable)
286276
for ch in dt.chunks
287277
!(ch isa Dagger.Chunk) && wait(ch)
288278
end
289279
return nothing
290280
end
291281

292-
function Base.isready(dt::DTable)
282+
function isready(dt::DTable)
293283
return all([ch isa Dagger.Chunk ? true : (isready(ch); true) for ch in dt.chunks])
294284
end
295285

296-
function Base.getproperty(dt::DTable, s::Symbol)
286+
function getproperty(dt::DTable, s::Symbol)
297287
if s in fieldnames(DTable)
298288
return getfield(dt, s)
299289
else
300290
return DTableColumn(dt, s)
301291
end
302292
end
293+
294+
ncol(d::DTable) = length(columns(d))
295+
nrow(d::DTable) = length(d)
296+
index(df::DTable) = Index(columnnames_svector(df))

src/table/dtable_column.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@ mutable struct DTableColumn{T,TT}
99
end
1010

1111
function getcolumn_chunk(chunk_contents, col::Int)
12-
return Tables.getcolumn(Tables.columns(chunk_contents), col)
12+
return getcolumn(columns(chunk_contents), col)
1313
end
1414

1515
function DTableColumn(d::DTable, col::Int)
16-
column_eltype = Tables.schema(Tables.columns(d)).types[col]
16+
column_eltype = schema(columns(d)).types[col]
1717

1818
iterator_type = Nothing
1919
c_idx = 1

src/table/gdtable.jl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import Base: keys, iterate, length, getindex
2-
31
"""
42
GDTable
53

0 commit comments

Comments
 (0)