@@ -9,6 +9,7 @@ mutable struct Index{T, V} <: AbstractIndex{T, V}
99 longestprobe:: V
1010 initiallongestprobe:: V
1111 deletions:: V
12+ duplicates:: V
1213
1314 function Index {T} (nelements:: Integer ) where {T}
1415 size = ceil (nelements / 0.9 ) # 0.9 load factor
@@ -18,9 +19,10 @@ mutable struct Index{T, V} <: AbstractIndex{T, V}
1819 Vector {T} (undef, nelements),
1920 zeros (mintype, mintype (size)),
2021 zeros (mintype, mintype (size)),
21- 0 ,
22- 0 ,
23- 0 ,
22+ 0x0 ,
23+ 0x0 ,
24+ 0x0 ,
25+ 0x0 ,
2426 )
2527 end
2628end
@@ -33,9 +35,14 @@ function _setindex!(idx::Index{T, V}, elem::T, position::Unsigned) where {T, V}
3335 k = position
3436 location = hash (elem) % _length (idx) + 0x1
3537 prevlongestprobe = idx. longestprobe
38+ is_duplicate = false
3639 @inbounds while k != 0x0
3740 probeposition += 0x1
3841 recordpos = idx. probepositions[location]
42+ if ! is_duplicate && recordpos > 0 && isequal (idx. vals[idx. indices[location]], elem)
43+ idx. duplicates += 0x1
44+ is_duplicate = true
45+ end
3946 if probeposition > recordpos
4047 k, idx. indices[location] = idx. indices[location], k
4148 idx. probepositions[location] = probeposition
@@ -45,7 +52,7 @@ function _setindex!(idx::Index{T, V}, elem::T, position::Unsigned) where {T, V}
4552 location = location == _length (idx) ? UInt (0x1 ) : location + 0x1
4653 end
4754
48- # reset longestprobe after some deletions deletions , this should maintain stable performance for _getindex and _getindex_array
55+ # reset longestprobe after some deletions, this should maintain stable performance for _getindex and _getindex_array
4956 if idx. longestprobe == prevlongestprobe &&
5057 idx. longestprobe > idx. initiallongestprobe &&
5158 idx. deletions >= length (idx) ÷ 2
@@ -54,24 +61,28 @@ function _setindex!(idx::Index{T, V}, elem::T, position::Unsigned) where {T, V}
5461 end
5562end
5663
57- function _getindex (idx:: Index{T} , elem:: T ) where {T}
64+ function _getindex (idx:: Index{T, V } , elem:: T ) where {T, V }
5865 ilength = _length (idx)
5966 if ilength > 0x0
60- location = hash (elem) % ilength + 0x1
67+ location:: V = hash (elem) % ilength + 0x1
6168 @inbounds for probeposition ∈ 0x1 : (idx. longestprobe)
6269 pos = idx. indices[location]
6370 if pos > 0x0 && isequal (idx. vals[pos], elem)
6471 return location
6572 elseif pos == 0x0
66- return 0x0
73+ return V ( 0x0 )
6774 end
6875 location = location == ilength ? UInt (0x1 ) : location + 0x1
6976 end
7077 end
71- return 0x0
78+ return V ( 0x0 )
7279end
7380
7481function _getindex_array (idx:: Index{T, V} , elem:: T ) where {T, V}
82+ if allunique (idx)
83+ ret = _getindex (idx, elem)
84+ return ret > 0x0 ? [ret] : V[]
85+ end
7586 locations = Vector {V} ()
7687 ilength = _length (idx)
7788 if ilength > 0x0
@@ -89,45 +100,68 @@ function _getindex_array(idx::Index{T, V}, elem::T) where {T, V}
89100 return locations
90101end
91102
92- function _getindex_byposition (idx:: Index{T} , i:: Integer ) where {T}
103+ function _getindex_byposition (idx:: Index{T, V } , i:: Integer ) where {T, V }
93104 elem = idx. vals[i]
94105 ilength = _length (idx)
106+ is_duplicate = 0x0
95107 if ilength > 0x0
96- location = hash (elem) % ilength + 0x1
108+ location:: V = hash (elem) % ilength + 0x1
97109 @inbounds for probeposition ∈ 0x1 : (idx. longestprobe)
98110 pos = idx. indices[location]
99- if pos > 0x0 && pos == i
100- return location
111+ if pos > 0x0
112+ if is_duplicate < 0x2
113+ is_duplicate += isequal (elem, idx. vals[pos])
114+ end
115+ if pos == i
116+ return location, is_duplicate > 0x1
117+ end
101118 elseif pos == 0x0
102119 break
103120 end
104- location = location == ilength ? UInt64 (0x1 ) : location + 0x1
121+ location = location == ilength ? V (0x1 ) : location + 0x1
105122 end
106123 end
107124 error (" Element not found. This should never happen." )
108125end
109126
110- function _delete! (idx:: Index , oldkeyindex:: Integer )
127+ function _delete! (idx:: Index , oldkeyindex:: Integer , is_duplicate :: Bool = false )
111128 lastidx = findnext (≤ (0x1 ), idx. probepositions, oldkeyindex + 0x1 )
129+ has_duplicates = ! allunique (idx)
112130 @inbounds if ! isnothing (lastidx)
113131 lastidx -= 0x1
132+
133+ if ! is_duplicate && has_duplicates
134+ subvals = @view idx. vals[idx. indices[oldkeyindex: lastidx]]
135+ is_duplicate = ! isnothing (findnext (isequal (subvals[1 ]), subvals, 2 ))
136+ end
137+
114138 idx. indices[oldkeyindex: (lastidx - 0x1 )] .= @view idx. indices[(oldkeyindex + 0x1 ): lastidx]
115139 idx. probepositions[oldkeyindex: (lastidx - 0x1 )] .= @view (idx. probepositions[(oldkeyindex + 0x1 ): lastidx]) .- 0x1
116140 idx. indices[lastidx] = idx. probepositions[lastidx] = 0x0
117141 else
118142 if oldkeyindex < _length (idx)
143+ if ! is_duplicate && has_duplicates
144+ subvals = @view idx. vals[idx. indices[oldkeyindex: end ]]
145+ is_duplicate = ! isnothing (findnext (isequal (subvals[1 ]), subvals, 2 ))
146+ end
147+
119148 idx. indices[oldkeyindex: (end - 0x1 )] .= @view idx. indices[(oldkeyindex + 0x1 ): end ]
120149 idx. probepositions[oldkeyindex: (end - 0x1 )] .= @view (idx. probepositions[(oldkeyindex + 0x1 ): end ]) .- 0x1
121150 end
122151 if idx. probepositions[1 ] > 0x1
123- idx. indices[end ] = idx. indices[1 ]
124- idx. probepositions[end ] = idx. probepositions[1 ] - 0x1
125-
126152 lastidx = findnext (≤ (0x1 ), idx. probepositions, 2 )
127153 if isnothing (lastidx)
128154 lastidx = oldkeyindex
129155 end
130156 lastidx -= 0x1
157+
158+ if ! is_duplicate && has_duplicates
159+ subvals = @view idx. vals[idx. indices[1 : lastidx]]
160+ is_duplicate = ! isnothing (findnext (isequal (subvals[1 ]), subvals, 2 ))
161+ end
162+
163+ idx. indices[end ] = idx. indices[1 ]
164+ idx. probepositions[end ] = idx. probepositions[1 ] - 0x1
131165 idx. indices[0x1 : (lastidx - 0x1 )] .= @view idx. indices[0x2 : lastidx]
132166 idx. probepositions[0x1 : (lastidx - 0x1 )] .= @view (idx. probepositions[0x2 : lastidx]) .- 0x1
133167 idx. indices[lastidx] = idx. probepositions[lastidx] = 0x0
@@ -136,6 +170,7 @@ function _delete!(idx::Index, oldkeyindex::Integer)
136170 end
137171 end
138172
173+ idx. duplicates -= is_duplicate
139174 idx. deletions += 0x1
140175end
141176
@@ -157,6 +192,7 @@ Base.getindex(idx::AbstractIndex{T, V}, elems::AbstractVector{T}, x::Union{Val{t
157192 isempty (elems) ? V[] : reduce (vcat, (getindex (idx, elem, x) for elem ∈ elems))
158193Base. in (elem:: T , idx:: AbstractIndex{T} ) where {T} = getindex (idx, elem, Val (false ), Val (false )) != 0x0
159194
195+ Base. allunique (idx:: Index ) = idx. duplicates == 0x0
160196Base. getindex (idx:: Index{T} , elem:: T , :: Val{true} , :: Val{false} ) where {T} =
161197 @inbounds idx. indices[_getindex_array (idx, elem)] # exceptions may be undesirable in high-performance scenarios
162198function Base. getindex (idx:: Index{T} , elem:: T , :: Val{true} ) where {T}
185221
186222@inline function Base. setindex! (idx:: Index{T} , newval:: T , i:: Integer ) where {T}
187223 @boundscheck checkbounds (idx. vals, i)
188- oldkeyindex = _getindex_byposition (idx, i)
224+ oldkeyindex, is_duplicate = _getindex_byposition (idx, i)
189225 validx = idx. indices[oldkeyindex]
190- _delete! (idx, oldkeyindex)
226+ _delete! (idx, oldkeyindex, is_duplicate )
191227 _setindex! (idx, newval, validx)
192228 return idx
193229end
@@ -198,7 +234,7 @@ function Base.setindex!(idx::Index{T}, newval::T, oldval::T) where {T}
198234 throw (KeyError (oldval))
199235 end
200236 validx = idx. indices[oldkeyindex]
201- _delete! (idx, oldkeyindex)
237+ _delete! (idx, oldkeyindex, false )
202238 _setindex! (idx, newval, validx)
203239 return idx
204240end
@@ -209,7 +245,7 @@ Base.values(idx::Index) = idx.vals
209245Base. convert (:: Type{<:Index} , x:: AbstractArray ) = Index (x)
210246Base. convert (:: Type{<:Index} , x:: Index ) = x
211247
212- struct SubIndex{T, V, I} <: AbstractIndex{T, V}
248+ mutable struct SubIndex{T, V, I} <: AbstractIndex{T, V}
213249 parent:: Index{T, V}
214250 indices:: I
215251 revmapping:: Union{Nothing, Index}
224260 @boundscheck checkbounds (idx, I)
225261 return SubIndex (idx, I, Index (V .(I)))
226262end
227- @inline function Base. view (idx:: Index{T, V} , I:: AbstractArray{Bool} ) where {T, V}
263+ @inline function Base. view (idx:: Index , I:: AbstractArray{Bool} )
228264 @boundscheck checkbounds (idx, I)
229- return SubIndex (idx, I, Index ( V .( findall (I)) ))
265+ return @inbounds view (idx, findall (I))
230266end
231267@inline function Base. view (idx:: Index , I:: Integer )
232268 @boundscheck checkbounds (idx, I)
@@ -242,9 +278,17 @@ Base.parent(si::SubIndex) = si.parent
242278Base. parentindices (si:: SubIndex ) = si. indices
243279Base. length (si:: SubIndex ) = length (parentindices (si))
244280Base. length (si:: SubIndex{T, V, Colon} ) where {T, V} = length (parent (si))
245- Base. length (si:: SubIndex{T, V, I} ) where {T, V, I <: AbstractArray{Bool} } = length (si. revmapping)
246281Base. size (si:: SubIndex ) = (length (si),)
247- Base. values (si:: SubIndex ) = parent (si)[parentindices (si)]
282+ Base. values (si:: SubIndex ) = @view parent (si)[parentindices (si)]
283+
284+ Base. allunique (si:: SubIndex{T, V, Colon} ) where {T, V} = allunique (parent (si))
285+ # it is impossible to accurately keep track of duplicates for views: The parent object may be changed by another view or by itself
286+ # thus we really need to always recheck for duplicates in the view if the parent has any
287+ function Base. allunique (si:: SubIndex )
288+ return allunique (parent (si)) ? true :
289+ length (parentindices (si)) - length (unique (view (parent (si). vals, parentindices (si)))) == 0x0
290+ end
291+
248292function Base. getindex (si:: SubIndex{T} , elem:: T , :: Val{true} , :: Val{false} ) where {T}
249293 res = getindex (parent (si), elem, Val (true ), Val (false ))
250294 i = 0
@@ -260,15 +304,6 @@ function Base.getindex(si::SubIndex{T}, elem::T, ::Val{true}, ::Val{false}) wher
260304end
261305Base. getindex (si:: SubIndex{T, V, Colon} , elem:: T , :: Val{true} , :: Val{false} ) where {T, V} =
262306 getindex (parent (si), elem, Val (true ), Val (false ))
263- function Base. getindex (si:: SubIndex{T, V, I} , elem:: T , :: Val{true} , :: Val{false} ) where {T, V, I <: AbstractArray{Bool} }
264- res = getindex (parent (si), elem, Val (true ), Val (false ))
265- res = res[parentindices (si)[res]]
266- @inbounds for (i, r) ∈ enumerate (res)
267- res[i] = si. revmapping[r, false , false ]
268- end
269-
270- return res
271- end
272307function Base. getindex (
273308 si:: SubIndex{T, V, I} ,
274309 elem:: T ,
@@ -313,18 +348,6 @@ function Base.getindex(si::SubIndex{T, V}, elem::T, ::Val{false}, ::Val{false})
313348end
314349Base. getindex (si:: SubIndex{T, V, Colon} , elem:: T , :: Val{false} , :: Val{false} ) where {T, V} =
315350 getindex (parent (si), elem, Val (false ), Val (false ))
316- function Base. getindex (
317- si:: SubIndex{T, V, I} ,
318- elem:: T ,
319- :: Val{false} ,
320- :: Val{false} ,
321- ) where {T, V, I <: AbstractArray{Bool} }
322- res = getindex (parent (si, elem, Val (false ), Val (false )))
323- if res == 0x0 || res > 0x0 && ! (@inbounds parentindices (si)[res])
324- return V (0x0 )
325- end
326- return si. revmapping[res, false , false ]
327- end
328351function Base. getindex (
329352 si:: SubIndex{T, V, I} ,
330353 elem:: T ,
@@ -369,7 +392,7 @@ Base.@propagate_inbounds function Base.setindex!(si::SubIndex{T}, newval::T, i::
369392end
370393Base. @propagate_inbounds function Base. setindex! (si:: SubIndex{T, V, Colon} , newval:: T , i:: Integer ) where {T, V}
371394 @boundscheck checkbounds (si, i)
372- setindex! (parent (si), newval, i)
395+ return setindex! (parent (si), newval, i)
373396end
374397Base. @propagate_inbounds function Base. setindex! (si:: SubIndex{T} , newval:: T , oldval:: T ) where {T}
375398 oldidx = parent (si)[oldval, true ]
@@ -380,19 +403,6 @@ Base.@propagate_inbounds function Base.setindex!(si::SubIndex{T}, newval::T, old
380403 parent (si)[oldidx[foldidx]] = newval
381404 return si
382405end
383- Base. @propagate_inbounds function Base. setindex! (
384- si:: SubIndex{T, V, I} ,
385- newval:: T ,
386- oldval:: T ,
387- ) where {T, V, I <: AbstractArray{Bool} }
388- oldidx = parent (si)[oldval, true ]
389- oldidx = oldidx[parentindices (si)[oldix]]
390- if length (oldidx) == 0
391- throw (KeyError (oldval))
392- end
393- parent (si)[oldidx[1 ]] = newval
394- return si
395- end
396406Base. @propagate_inbounds function Base. setindex! (
397407 si:: SubIndex{T, V, I} ,
398408 newval:: T ,
0 commit comments