|
1 |
| - |
2 | 1 | mutable struct DTableColumn{T,TT}
|
3 | 2 | dtable::DTable
|
4 |
| - current_chunk::Int |
5 | 3 | col::Int
|
6 | 4 | colname::Symbol
|
7 | 5 | chunk_lengths::Vector{Int}
|
8 |
| - current_iterator::Union{Nothing,TT} |
9 |
| - chunkstore::Union{Nothing,Vector{T}} |
| 6 | + _chunk::Int |
| 7 | + _iter::Union{Nothing,TT} |
| 8 | + _chunkstore::Union{Nothing,Vector{T}} |
10 | 9 | end
|
11 | 10 |
|
12 |
| -__ff = (ch, col) -> Tables.getcolumn(Tables.columns(ch), col) |
| 11 | +function getcolumn_chunk(chunk_contents, col::Int) |
| 12 | + return Tables.getcolumn(Tables.columns(chunk_contents), col) |
| 13 | +end |
13 | 14 |
|
14 |
| -function DTableColumn(dtable::DTable, col::Int) |
15 |
| - column_eltype = Tables.schema(Tables.columns(dtable)).types[col] |
16 |
| - iterator_type = fetch(Dagger.spawn((ch, _col) -> typeof(iterate(__ff(ch, _col))), dtable.chunks[1], col)) |
| 15 | +function DTableColumn(d::DTable, col::Int) |
| 16 | + column_eltype = Tables.schema(Tables.columns(d)).types[col] |
| 17 | + |
| 18 | + iterator_type = Nothing |
| 19 | + c_idx = 1 |
| 20 | + while iterator_type === Nothing && c_idx <= nchunks(d) |
| 21 | + iterator_type = fetch(Dagger.spawn( |
| 22 | + (ch, _col) -> typeof(iterate(getcolumn_chunk(ch, _col))), |
| 23 | + d.chunks[c_idx], |
| 24 | + col |
| 25 | + )) |
| 26 | + c_idx += 1 |
| 27 | + end |
17 | 28 |
|
18 | 29 | DTableColumn{column_eltype,iterator_type}(
|
19 |
| - dtable, |
20 |
| - 0, |
| 30 | + d, |
21 | 31 | col,
|
22 |
| - _columnnames_svector(dtable)[col], |
23 |
| - chunk_lengths(dtable), |
| 32 | + _columnnames_svector(d)[col], |
| 33 | + chunk_lengths(d), |
| 34 | + 0, |
24 | 35 | nothing,
|
25 | 36 | nothing,
|
26 | 37 | )
|
27 | 38 | end
|
28 | 39 |
|
29 | 40 |
|
30 |
| -function getindex(dtablecolumn::DTableColumn, idx::Int) |
31 |
| - chunk_idx = 0 |
32 |
| - s = 1 |
33 |
| - for (i, e) in enumerate(dtablecolumn.chunk_lengths) |
34 |
| - if s <= idx < s + e |
35 |
| - chunk_idx = i |
36 |
| - break |
37 |
| - end |
38 |
| - s = s + e |
39 |
| - end |
40 |
| - chunk_idx == 0 && throw(BoundsError()) |
41 |
| - offset = idx - s + 1 |
42 |
| - chunk = fetch(Dagger.spawn(__ff, dtablecolumn.dtable.chunks[chunk_idx], dtablecolumn.col)) |
43 |
| - |
44 |
| - row, iter = iterate(Tables.rows(chunk)) |
45 |
| - for _ in 1:(offset-1) |
46 |
| - row, iter = iterate(Tables.rows(chunk), iter) |
47 |
| - end |
48 |
| - Tables.getcolumn(row, dtablecolumn.col) |
49 |
| -end |
| 41 | +DTableColumn(d::DTable, col::String) = |
| 42 | + DTableColumn(d, only(indexin([col], string.(_columnnames_svector(d))))) |
| 43 | +DTableColumn(d::DTable, col::Symbol) = DTableColumn(d, string(col)) |
50 | 44 |
|
51 |
| -length(dtablecolumn::DTableColumn) = sum(dtablecolumn.chunk_lengths) |
| 45 | +length(dtc::DTableColumn) = sum(dtc.chunk_lengths) |
52 | 46 |
|
53 | 47 |
|
54 |
| -function pull_next_chunk(dtablecolumn::DTableColumn, chunkidx::Int) |
55 |
| - while dtablecolumn.current_iterator === nothing |
56 |
| - chunkidx += 1 |
57 |
| - if chunkidx <= length(dtablecolumn.dtable.chunks) |
58 |
| - dtablecolumn.chunkstore = |
59 |
| - fetch(Dagger.spawn(__ff, dtablecolumn.dtable.chunks[chunkidx], dtablecolumn.col)) |
| 48 | +function pull_next_chunk!(dtc::DTableColumn) |
| 49 | + # find first non-empty chunk |
| 50 | + while dtc._iter === nothing |
| 51 | + dtc._chunk += 1 |
| 52 | + if dtc._chunk <= nchunks(dtc.dtable) |
| 53 | + dtc._chunkstore = fetch(Dagger.spawn( |
| 54 | + getcolumn_chunk, |
| 55 | + dtc.dtable.chunks[dtc._chunk], |
| 56 | + dtc.col |
| 57 | + )) |
60 | 58 | else
|
61 |
| - return chunkidx |
| 59 | + return nothing |
62 | 60 | end
|
63 |
| - dtablecolumn.current_iterator = iterate(dtablecolumn.chunkstore) |
| 61 | + # iterate in case this chunk is empty |
| 62 | + dtc._iter = iterate(dtc._chunkstore) |
64 | 63 | end
|
65 |
| - return chunkidx |
| 64 | + return nothing |
66 | 65 | end
|
67 | 66 |
|
68 | 67 |
|
69 |
| -function iterate(dtablecolumn::DTableColumn) |
70 |
| - if length(dtablecolumn) == 0 |
71 |
| - return nothing |
72 |
| - end |
73 |
| - dtablecolumn.chunkstore = nothing |
74 |
| - dtablecolumn.current_iterator = nothing |
75 |
| - chunkidx = pull_next_chunk(dtablecolumn, 0) |
76 |
| - ci = dtablecolumn.current_iterator |
77 |
| - if ci === nothing |
78 |
| - return nothing |
79 |
| - else |
80 |
| - return (ci[1], (chunkidx, ci[2])) |
81 |
| - end |
| 68 | +function iterate(dtc::DTableColumn) |
| 69 | + length(dtc) == 0 && return nothing |
| 70 | + |
| 71 | + # on every iteration start reset the cache |
| 72 | + dtc._chunkstore = nothing |
| 73 | + dtc._iter = nothing |
| 74 | + dtc._chunk = 0 |
| 75 | + |
| 76 | + # pull the first chunk |
| 77 | + pull_next_chunk!(dtc) |
| 78 | + |
| 79 | + return dtc._iter |
82 | 80 | end
|
83 | 81 |
|
84 |
| -function iterate(dtablecolumn::DTableColumn, iter) |
85 |
| - (chunkidx, i) = iter |
86 |
| - cs = dtablecolumn.chunkstore |
87 |
| - ci = nothing |
88 |
| - if cs !== nothing |
89 |
| - ci = iterate(cs, i) |
90 |
| - else |
91 |
| - return nothing |
92 |
| - end |
93 |
| - dtablecolumn.current_iterator = ci |
94 |
| - chunkidx = pull_next_chunk(dtablecolumn, chunkidx) |
95 |
| - ci = dtablecolumn.current_iterator |
96 |
| - if ci === nothing |
97 |
| - return nothing |
98 |
| - else |
99 |
| - return (ci[1], (chunkidx, ci[2])) |
100 |
| - end |
| 82 | +function iterate(dtc::DTableColumn, iter) |
| 83 | + dtc._chunkstore === nothing && return nothing |
| 84 | + dtc._iter = iterate(dtc._chunkstore, iter) |
| 85 | + pull_next_chunk!(dtc) |
| 86 | + return dtc._iter |
101 | 87 | end
|
0 commit comments