1- import Base:
2- collect, eltype, fetch, getproperty, isready, iterate, length, names, propertynames, show, wait
3- import SentinelArrays
4- import TableOperations
5- import Tables
6-
7- export DTable, tabletype, tabletype!, trim, trim!, leftjoin, innerjoin, DTableColumn
8-
91const VTYPE = Vector{Union{Dagger. Chunk,Dagger. EagerThunk}}
102
113"""
@@ -20,7 +12,7 @@ the underlying partitions was applied to it (currently only `filter`).
2012mutable struct DTable
2113 chunks:: VTYPE
2214 tabletype
23- schema:: Union{Nothing,Tables. Schema}
15+ schema:: Union{Nothing,Schema}
2416end
2517
2618DTable (chunks:: Vector , tabletype) = DTable (VTYPE (chunks), tabletype, nothing )
@@ -30,15 +22,15 @@ DTable(chunks::Vector, tabletype, schema) = DTable(VTYPE(chunks), tabletype, sch
3022 DTable(table; tabletype=nothing) -> DTable
3123
3224Constructs a `DTable` using a `Tables.jl`-compatible input `table`.
33- Calls `Tables. partitions` on `table` and assumes the provided partitioning.
25+ Calls `partitions` on `table` and assumes the provided partitioning.
3426"""
3527function DTable (table; tabletype= nothing )
3628 chunks = Vector {Dagger.Chunk} ()
3729 type = nothing
3830 sink = nothing
39- for partition in Tables . partitions (table)
31+ for partition in partitions (table)
4032 if sink === nothing
41- sink = Tables . materializer (tabletype != = nothing ? tabletype () : partition)
33+ sink = materializer (tabletype != = nothing ? tabletype () : partition)
4234 end
4335
4436 tpart = sink (partition)
@@ -73,25 +65,25 @@ function DTable(table, chunksize::Integer; tabletype=nothing, interpartition_mer
7365 leftovers = nothing
7466 leftovers_length = 0
7567
76- for partition in Tables . partitions (table)
68+ for partition in partitions (table)
7769 if sink === nothing
78- sink = Tables . materializer (tabletype != = nothing ? tabletype () : partition)
70+ sink = materializer (tabletype != = nothing ? tabletype () : partition)
7971 end
8072
8173 if interpartition_merges && leftovers != = nothing
82- inner_partitions = Tables . partitions (
74+ inner_partitions = partitions (
8375 TableOperations. makepartitions (sink (partition), chunksize - leftovers_length)
8476 )
8577
8678 merged_data = sink (
8779 TableOperations. joinpartitions (
88- Tables . partitioner (identity, [leftovers, sink (first (inner_partitions))])
80+ partitioner (identity, [leftovers, sink (first (inner_partitions))])
8981 ),
9082 )
9183
9284 if length (inner_partitions) == 1
9385 leftovers = merged_data
94- leftovers_length = Tables . length (Tables . rows (leftovers))
86+ leftovers_length = length (rows (leftovers))
9587 if leftovers_length == chunksize
9688 # sometimes the next partition will be exactly the size of
9789 # the chunksize - leftovers_length, so perfect match
@@ -105,28 +97,26 @@ function DTable(table, chunksize::Integer; tabletype=nothing, interpartition_mer
10597 leftovers = nothing
10698 leftovers_length = 0
10799 partition = TableOperations. joinpartitions (
108- Tables . partitioner (identity, Iterators. drop (inner_partitions, 1 ))
100+ partitioner (identity, Iterators. drop (inner_partitions, 1 ))
109101 )
110102 end
111103 end
112104
113- inner_partitions = Tables. partitions (
114- TableOperations. makepartitions (sink (partition), chunksize)
115- )
105+ inner_partitions = partitions (TableOperations. makepartitions (sink (partition), chunksize))
116106
117107 for inner_partition in inner_partitions
118108 chunk_data = sink (inner_partition)
119- chunk_data_rows = Tables . rows (chunk_data)
109+ chunk_data_rows = rows (chunk_data)
120110
121111 if (
122112 interpartition_merges &&
123113 Base. haslength (chunk_data_rows) &&
124- Tables . length (chunk_data_rows) < chunksize
114+ length (chunk_data_rows) < chunksize
125115 )
126116 # this is the last chunk with fewer than requested records
127117 # merge it with the first of the next partition
128118 leftovers = chunk_data
129- leftovers_length = Tables . length (chunk_data_rows)
119+ leftovers_length = length (chunk_data_rows)
130120 else
131121 push! (chunks, Dagger. tochunk (chunk_data))
132122 end
158148
159149function _file_load (filename:: AbstractString , loader_function:: Function , tabletype:: Any )
160150 part = loader_function (filename)
161- sink = Tables . materializer (tabletype === nothing ? part : tabletype ())
151+ sink = materializer (tabletype === nothing ? part : tabletype ())
162152 tpart = sink (part)
163153 return tpart
164154end
@@ -172,7 +162,7 @@ instance of the underlying table type.
172162Fetching an empty DTable results in returning an empty `NamedTuple` regardless of the underlying `tabletype`.
173163"""
174164function fetch (d:: DTable )
175- sink = Tables . materializer (tabletype (d)())
165+ sink = materializer (tabletype (d)())
176166 return sink (retrieve_partitions (d))
177167end
178168
@@ -187,7 +177,7 @@ fetch(d::DTable, sink) = sink(retrieve_partitions(d))
187177function retrieve_partitions (d:: DTable )
188178 d2 = trim (d)
189179 return if nchunks (d2) > 0
190- TableOperations. joinpartitions (Tables . partitioner (retrieve, d2. chunks))
180+ TableOperations. joinpartitions (partitioner (retrieve, d2. chunks))
191181 else
192182 NamedTuple ()
193183 end
@@ -229,7 +219,7 @@ function resolve_tabletype(d::DTable)
229219end
230220
231221function isnonempty (chunk)
232- return length (Tables . rows (chunk)) > 0 && length (Tables . columnnames (chunk)) > 0
222+ return length (rows (chunk)) > 0 && length (columnnames (chunk)) > 0
233223end
234224
235225"""
@@ -260,7 +250,7 @@ function show(io::IO, ::MIME"text/plain", d::DTable)
260250end
261251
262252function chunk_lengths (table:: DTable )
263- f = x -> length (Tables . rows (x))
253+ f = x -> length (rows (x))
264254 return fetch .([Dagger. @spawn f (c) for c in table. chunks])
265255end
266256
@@ -276,27 +266,31 @@ end
276266@inline nchunks (d:: DTable ) = length (d. chunks)
277267
278268function merge_chunks (sink, chunks)
279- return sink (TableOperations. joinpartitions (Tables . partitioner (retrieve, chunks)))
269+ return sink (TableOperations. joinpartitions (partitioner (retrieve, chunks)))
280270end
281271
282- Base . names (dt:: DTable ) = string .(columnnames_svector (dt))
283- Base . propertynames (dt:: DTable ) = columnnames_svector (dt)
272+ names (dt:: DTable ) = string .(columnnames_svector (dt))
273+ propertynames (dt:: DTable ) = columnnames_svector (dt)
284274
285- function Base . wait (dt:: DTable )
275+ function wait (dt:: DTable )
286276 for ch in dt. chunks
287277 ! (ch isa Dagger. Chunk) && wait (ch)
288278 end
289279 return nothing
290280end
291281
292- function Base . isready (dt:: DTable )
282+ function isready (dt:: DTable )
293283 return all ([ch isa Dagger. Chunk ? true : (isready (ch); true ) for ch in dt. chunks])
294284end
295285
296- function Base . getproperty (dt:: DTable , s:: Symbol )
286+ function getproperty (dt:: DTable , s:: Symbol )
297287 if s in fieldnames (DTable)
298288 return getfield (dt, s)
299289 else
300290 return DTableColumn (dt, s)
301291 end
302292end
293+
294+ ncol (d:: DTable ) = length (columns (d))
295+ nrow (d:: DTable ) = length (d)
296+ index (df:: DTable ) = Index (columnnames_svector (df))
0 commit comments