Skip to content

Commit f0fe6d3

Browse files
Gord Stephentimholy
authored andcommitted
merge and join speedups (#75)
1 parent decc774 commit f0fe6d3

File tree

1 file changed

+75
-56
lines changed

1 file changed

+75
-56
lines changed

src/combine.jl

Lines changed: 75 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -25,46 +25,68 @@ function Base.cat{T}(n::Integer, As::AxisArray{T}...)
2525
end #if
2626
end #Base.cat
2727

28-
combineaxes{T,N,D,Ax}(As::AxisArray{T,N,D,Ax}...) = combineaxes(:outer, As...)
29-
30-
function combineaxes{T,N,D,Ax}(method::Symbol, As::AxisArray{T,N,D,Ax}...)
31-
axisnamesvalues = zip(axisnames(As[1]), zip(map(axisvalues, As)...)) |> collect
32-
33-
resultaxes = Array{Axis}(N)
34-
resultaxeslengths = Array{Int}(N)
35-
axismaps = Array{NTuple{2,NTuple{2,Vector{Int64}}}}(N)
36-
37-
for i in 1:N
38-
name, valueslists = axisnamesvalues[i]
39-
mergedaxisvalues = mergevalues(valueslists, method)
40-
isa(axistrait(mergedaxisvalues), Dimensional) && sort!(mergedaxisvalues)
41-
resultaxes[i] = Axis{name}(mergedaxisvalues)
42-
resultaxeslengths[i] = length(mergedaxisvalues)
43-
axismaps[i] = map(valueslists) do vals
44-
keepers = intersect(vals, mergedaxisvalues)
45-
return findin(vals, keepers), findin(mergedaxisvalues, keepers)
46-
end #do
47-
end
48-
49-
axismaps = map(zip(axismaps...)) do mps
50-
map(idxs->collect(product(idxs...)), zip(mps...))
51-
end #do
28+
function axismerge{name,T}(method::Symbol, axes::Axis{name,T}...)
5229

53-
return resultaxes, resultaxeslengths, axismaps
54-
end #combineaxes
55-
56-
function mergevalues{T}(values::Tuple{Vararg{AbstractVector{T}}}, method::Symbol)
57-
if method == :inner
58-
intersect(values...)
30+
axisvals = if method == :inner
31+
intersect(axisvalues(axes...)...)
5932
elseif method == :left
60-
values[1]
33+
axisvalues(axes[1])[1]
6134
elseif method == :right
62-
values[end]
35+
axisvalues(axes[end])[1]
6336
elseif method == :outer
64-
vcat(values...) |> unique
37+
union(axisvalues(axes...)...)
6538
else
6639
error("Join method must be one of :inner, :left, :right, :outer")
6740
end #if
41+
42+
isa(axistrait(axisvals), Dimensional) && sort!(axisvals)
43+
44+
return Axis{name}(collect(axisvals))
45+
46+
end
47+
48+
function indexmappings{N}(oldaxes::NTuple{N,Axis}, newaxes::NTuple{N,Axis})
49+
oldvals = axisvalues(oldaxes...)
50+
newvals = axisvalues(newaxes...)
51+
return collect(zip(indexmapping.(oldvals, newvals)...))
52+
end
53+
54+
function indexmapping(old::AbstractVector, new::AbstractVector)
55+
56+
before = Int[]
57+
after = Int[]
58+
59+
oldperm = sortperm(old)
60+
newperm = sortperm(new)
61+
62+
oldsorted = old[oldperm]
63+
newsorted = new[newperm]
64+
65+
oldlength = length(old)
66+
newlength = length(new)
67+
68+
oi = ni = 1
69+
70+
while oi <= oldlength && ni <= newlength
71+
72+
oldval = oldsorted[oi]
73+
newval = newsorted[ni]
74+
75+
if oldval == newval
76+
push!(before, oldperm[oi])
77+
push!(after, newperm[ni])
78+
oi += 1
79+
ni += 1
80+
elseif oldval < newval
81+
oi += 1
82+
else
83+
ni += 1
84+
end
85+
86+
end
87+
88+
return before, after
89+
6890
end
6991

7092
"""
@@ -74,16 +96,14 @@ Combines AxisArrays with matching axis names into a single AxisArray spanning al
7496
"""
7597
function Base.merge{T,N,D,Ax}(As::AxisArray{T,N,D,Ax}...; fillvalue::T=zero(T))
7698

77-
resultaxes, resultaxeslengths, indexmaps = combineaxes(As...)
78-
result = AxisArray(fill(fillvalue, resultaxeslengths...), resultaxes...)
99+
resultaxes = map(as -> axismerge(:outer, as...), map(tuple, axes.(As)...))
100+
resultdata = fill(fillvalue, length.(resultaxes)...)
101+
result = AxisArray(resultdata, resultaxes...)
79102

80-
for i in 1:length(As)
81-
A = As[i]
82-
Aidxs, resultidxs = indexmaps[i]
83-
for j in eachindex(Aidxs)
84-
result[resultidxs[j]...] = A[Aidxs[j]...]
85-
end #for
86-
end #for
103+
for A in As
104+
before_idxs, after_idxs = indexmappings(A.axes, result.axes)
105+
result.data[after_idxs...] = A.data[before_idxs...]
106+
end
87107

88108
return result
89109

@@ -101,20 +121,19 @@ Combines AxisArrays with matching axis names into a single AxisArray. Unlike `me
101121
102122
If an array value in the output array is not defined in any of the input arrays (i.e. in the case of a left, right, or outer join), it takes the value of the optional `fillvalue` keyword argument (default zero).
103123
"""
104-
function Base.join{T,N,D,Ax}(As::AxisArray{T,N,D,Ax}...; fillvalue::T=zero(T), newaxis::Axis=Axis{_defaultdimname(N+1)}(1:length(As)), method::Symbol=:outer)
105-
106-
M = length(As)
107-
resultaxes, resultaxeslengths, indexmaps = combineaxes(method, As...)
108-
push!(resultaxes, newaxis)
109-
push!(resultaxeslengths, M)
110-
result = AxisArray(fill(fillvalue, resultaxeslengths...), resultaxes...)
111-
112-
for i in 1:M
113-
A = As[i]
114-
Aidxs, resultidxs = indexmaps[i]
115-
for j in eachindex(Aidxs)
116-
result[[resultidxs[j]...; i]...] = A[Aidxs[j]...]
117-
end #for
124+
function Base.join{T,N,D,Ax}(As::AxisArray{T,N,D,Ax}...; fillvalue::T=zero(T),
125+
newaxis::Axis=_nextaxistype(As[1].data, As[1].axes)(1:length(As)),
126+
method::Symbol=:outer)
127+
128+
prejoin_resultaxes = map(as -> axismerge(method, as...), map(tuple, axes.(As)...))
129+
130+
resultaxes = (prejoin_resultaxes..., newaxis)
131+
resultdata = fill(fillvalue, length.(resultaxes)...)
132+
result = AxisArray(resultdata, resultaxes...)
133+
134+
for (i, A) in enumerate(As)
135+
before_idxs, after_idxs = indexmappings(A.axes, prejoin_resultaxes)
136+
result.data[(after_idxs..., i)...] = A.data[before_idxs...]
118137
end #for
119138

120139
return result

0 commit comments

Comments
 (0)