From 90d41733bd21853e66a141c7001d731ee37f5c81 Mon Sep 17 00:00:00 2001 From: Julian P Samaroo Date: Wed, 26 Jul 2023 21:43:17 -0400 Subject: [PATCH 1/4] Add Dagger.File loading support --- src/table/dtable.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/table/dtable.jl b/src/table/dtable.jl index ce96fe7..cdf25bf 100644 --- a/src/table/dtable.jl +++ b/src/table/dtable.jl @@ -1,4 +1,5 @@ -const VTYPE = Vector{Union{Dagger.Chunk,Dagger.EagerThunk}} +const ELTYPE = Union{Dagger.Chunk,Dagger.EagerThunk} +const VTYPE = Vector{ELTYPE} """ DTable @@ -153,6 +154,11 @@ function _file_load(filename::AbstractString, loader_function::Function, tablety return tpart end +function DTable(files::Vector{Dagger.File}; tabletype=nothing) + chunks = ELTYPE[file.chunk for file in files] + return DTable(chunks, tabletype) +end + """ fetch(d::DTable) From 5ae261c14b0958cbbd179ff4fdd0f594ac5369fa Mon Sep 17 00:00:00 2001 From: Julian P Samaroo Date: Thu, 27 Jul 2023 09:13:38 -0400 Subject: [PATCH 2/4] map: Use tabletype --- src/operations/operations.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/operations/operations.jl b/src/operations/operations.jl index 76e6c92..dbbe76b 100644 --- a/src/operations/operations.jl +++ b/src/operations/operations.jl @@ -24,15 +24,15 @@ julia> fetch(m) ``` """ function map(f, d::DTable) - chunk_wrap = (_chunk, _f) -> begin + chunk_wrap = (_chunk, _f, tabletype) -> begin return if isnonempty(_chunk) - sink = materializer(_chunk) + sink = materializer(tabletype === nothing ? _chunk : tabletype()) sink(TableOperations.map(_f, _chunk)) else _chunk end end - chunks = map(c -> Dagger.spawn(chunk_wrap, c, f), d.chunks) + chunks = map(c -> Dagger.spawn(chunk_wrap, c, f, d.tabletype), d.chunks) return DTable(chunks, d.tabletype) end From 1425a23e8b7629ca3faf0549098506bb2ffa9189 Mon Sep 17 00:00:00 2001 From: Julian P Samaroo Date: Thu, 27 Jul 2023 09:14:06 -0400 Subject: [PATCH 3/4] trim!: Remove some allocations --- src/table/dtable.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/table/dtable.jl b/src/table/dtable.jl index cdf25bf..1edfcf4 100644 --- a/src/table/dtable.jl +++ b/src/table/dtable.jl @@ -235,7 +235,11 @@ Removes empty chunks from `d`. """ function trim!(d::DTable) check_result = [Dagger.@spawn isnonempty(c) for c in d.chunks] - d.chunks = getindex.(filter(x -> fetch(check_result[x[1]]), collect(enumerate(d.chunks))), 2) + for idx in length(d.chunks):-1:1 + if !fetch(check_result[idx]) + deleteat!(d.chunks, idx) + end + end return d end From bdb80f1ad72927e008b2aad325aaba6c38728dc0 Mon Sep 17 00:00:00 2001 From: Julian P Samaroo Date: Thu, 27 Jul 2023 09:14:25 -0400 Subject: [PATCH 4/4] Improve colnames determination --- src/table/tables.jl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/table/tables.jl b/src/table/tables.jl index cb2907c..8536b67 100644 --- a/src/table/tables.jl +++ b/src/table/tables.jl @@ -43,7 +43,14 @@ end function determine_columnnames(table::DTable) s = determine_schema(table) - return s === nothing ? nothing : s.names + if s !== nothing + return s.names + end + if length(table.chunks) == 0 + return () + end + c = first(table.chunks) + return fetch(Dagger.@spawn columnnames(c)) end function _getcolumn(table::DTable, col::Union{Symbol,Int})