-
Notifications
You must be signed in to change notification settings - Fork 2
Open
Description
map and reduce all support GDTable, but mapreduce does not.
using Distributed
# add two further julia processes which could run on other machines
addprocs(2, exeflags="--threads=2")
# Distributed.@everywhere execute code on all machines
@everywhere using Dagger
# Dagger uses both Threads and Machines as processes
Dagger.all_processors()
using DTables, DataFrames, CSV, OnlineStats
url = "[https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv"](https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv%22)
files = [url, url, url, url, url]
d = DTable(DataFrame ∘ CSV.File ∘ download, files)
g = DTables.groupby(d, :species)
fetch(reduce(fit!, map(r -> (r.sepal_width,), g), init=Mean())) # works
fetch(mapreduce(r -> (r.sepal_width,), fit!, g, init= Mean())) # failsthe error is below
julia> fetch(mapreduce(r -> (r.sepal_width,), fit!, g, init= Mean()))
ERROR: type Pair has no field sepal_width
Stacktrace:
[1] getproperty
@ ./Base.jl:37 [inlined]
[2] (::var"#23#24")(r::Pair{String15, DTable})
@ Main ./REPL[44]:1
[3] MappingRF
@ ./reduce.jl:100 [inlined]
[4] _foldl_impl
@ ./reduce.jl:58 [inlined]
[5] foldl_impl
@ ./reduce.jl:48 [inlined]
[6] mapfoldl_impl(f::var"#23#24", op::typeof(fit!), nt::Mean{Float64, EqualWeight}, itr::DTables.GDTable)
@ Base ./reduce.jl:44
[7] mapfoldl(f::Function, op::Function, itr::DTables.GDTable; init::Mean{Float64, EqualWeight})
@ Base ./reduce.jl:175
[8] mapfoldl
@ ./reduce.jl:175 [inlined]
[9] #mapreduce#302
@ ./reduce.jl:307 [inlined]
[10] top-level scope
@ REPL[44]:1As the documentation recommends to use mapreduce over map + reduce, it would be really good if mapreduce also supports GDTables.
Metadata
Metadata
Assignees
Labels
No labels