Skip to content

Commit 0d9cb40

Browse files
authored
Merge pull request #618 from sbromberger/sbromberger/dibitvectors
dibit_vector
2 parents 082b283 + e53ac6a commit 0d9cb40

File tree

9 files changed

+199
-3
lines changed

9 files changed

+199
-3
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name = "DataStructures"
22
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
3-
version = "0.17.16"
3+
version = "0.17.17"
44

55
[deps]
66
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ This package implements a variety of data structures, including
2828
- Sorted Dict, Sorted Multi-Dict and Sorted Set
2929
- DataStructures.IntSet
3030
- SparseIntSet
31+
- DiBitVector (in which each element can store two bits)
3132

3233
Resources
3334
---------

docs/make.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ makedocs(
2424
"mutable_linked_list.md",
2525
"intset.md",
2626
"sorted_containers.md",
27+
"dibit_vector.md",
2728
],
2829
modules = [DataStructures],
2930
format = Documenter.HTML()

docs/src/dibit_vector.md

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
```@meta
2+
DocTestSetup = :(using DataStructures)
3+
```
4+
5+
# DiBitVector
6+
7+
`DiBitVector` provides a memory-efficient vector of elements that represent four different values from `0` to `3`. This structure is comparable to a `BitVector` in its performance and memory characteristics.
8+
9+
Examples:
10+
11+
```jldoctest
12+
julia> v = DiBitVector(4, 0)
13+
4-element DiBitVector:
14+
0x00
15+
0x00
16+
0x00
17+
0x00
18+
19+
julia> w = DiBitVector(4, 2)
20+
4-element DiBitVector:
21+
0x02
22+
0x02
23+
0x02
24+
0x02
25+
26+
julia> v[1] = 2
27+
2
28+
29+
julia> v[2:4] .= 2
30+
3-element view(::DiBitVector, 2:4) with eltype UInt8:
31+
0x02
32+
0x02
33+
0x02
34+
35+
julia> v == w
36+
true
37+
38+
julia> pop!(v)
39+
0x02
40+
41+
julia> length(v)
42+
3
43+
```
44+
45+
```@meta
46+
DocTestSetup = nothing
47+
```

docs/src/index.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ This package implements a variety of data structures, including
2121
- Sorted Dict, Sorted Multi-Dict and Sorted Set
2222
- DataStructures.IntSet
2323
- SparseIntSet
24+
- DiBitVector
2425

2526
## Contents
2627

@@ -44,6 +45,7 @@ Pages = [
4445
"mutable_linked_list.md",
4546
"intset.md",
4647
"sorted_containers.md",
47-
"sparse_int_set.md"
48+
"sparse_int_set.md",
49+
"dibit_vector.md"
4850
]
4951
```

src/DataStructures.jl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ module DataStructures
1111
ReverseOrdering, Reverse, Lt,
1212
isless, union, intersect, symdiff, setdiff, issubset,
1313
searchsortedfirst, searchsortedlast, in,
14-
eachindex, keytype, valtype, minimum, maximum, size
14+
eachindex, keytype, valtype, minimum, maximum, size,
15+
zero, checkbounds
16+
1517

1618
using OrderedCollections
1719
import OrderedCollections: filter, filter!, isordered
@@ -57,6 +59,8 @@ module DataStructures
5759
export MultiDict, enumerateall
5860
export RobinDict
5961

62+
export DiBitVector
63+
6064
export findkey
6165

6266
include("delegate.jl")
@@ -107,5 +111,6 @@ module DataStructures
107111
include("sparse_int_set.jl")
108112
export SparseIntSet
109113

114+
include("dibit_vector.jl")
110115
include("deprecations.jl")
111116
end

src/dibit_vector.jl

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
"""
2+
DiBitVector(n::Integer, v::Integer)
3+
4+
Create a `DiBitVector` with `n` elements preinitialized to a value `v`
5+
from `0` to `3`, inclusive.
6+
7+
A `DiBitVector` is a vector whose elements are two bits wide, allowing
8+
storage of integer values between 0 and 3. This structure is optimized for
9+
performance and memory savings for large numbers of elements.
10+
"""
11+
mutable struct DiBitVector <: AbstractVector{UInt8}
12+
data::Vector{UInt64}
13+
len::UInt
14+
15+
function DiBitVector(n::Integer, v::Integer)
16+
if Int(n) < 0
17+
throw(ArgumentError("n ($n) must be greater than or equal to zero"))
18+
end
19+
if !(Int(v) in 0:3)
20+
throw(ArgumentError("v ($v) must be in 0:3"))
21+
end
22+
fv = (0x0000000000000000, 0x5555555555555555,
23+
0xaaaaaaaaaaaaaaaa, 0xffffffffffffffff)[v + 1]
24+
vec = Vector{UInt64}(undef, cld(n, 32))
25+
fill!(vec, fv)
26+
return new(vec, n % UInt64)
27+
end
28+
end
29+
30+
@inline checkbounds(D::DiBitVector, n::Integer) = 0 < n length(D.data) << 5 || throw(BoundsError(D, n))
31+
32+
"""
33+
DiBitVector(n::Integer)
34+
35+
Create a [`DiBitVector`](@ref) with `n` elements set to `0`.
36+
"""
37+
DiBitVector(n::Integer) = DiBitVector(n, 0)
38+
DiBitVector() = DiBitVector(0, 0)
39+
40+
@inline Base.length(x::DiBitVector) = x.len % Int
41+
@inline Base.size(x::DiBitVector) = (length(x),)
42+
43+
@inline index(n::Integer) = ((n-1) >>> 5) + 1
44+
@inline offset(n::Integer) = ((UInt64(n)-1) << 1) & 63
45+
46+
@inline function Base.getindex(x::DiBitVector, i::Int)
47+
@boundscheck checkbounds(x, i)
48+
return UInt8((@inbounds x.data[index(i)] >>> offset(i)) & 3)
49+
end
50+
51+
@inline function unsafe_setindex!(x::DiBitVector, v::UInt64, i::Int)
52+
bits = @inbounds x.data[index(i)]
53+
bits &= ~(UInt64(3) << offset(i))
54+
bits |= convert(UInt64, v) << offset(i)
55+
@inbounds x.data[index(i)] = bits
56+
end
57+
58+
@inline function Base.setindex!(x::DiBitVector, v::Integer, i::Int)
59+
v & 3 == v || throw(DomainError("Can only contain 0:3 (tried $v)"))
60+
@boundscheck checkbounds(x, i)
61+
unsafe_setindex!(x, convert(UInt64, v), i)
62+
end
63+
64+
@inline function Base.push!(x::DiBitVector, v::Integer)
65+
len = length(x)
66+
len == UInt64(length(x.data)) << 5 && push!(x.data, zero(UInt64))
67+
x.len = (len + 1) % UInt64
68+
x[len+1] = convert(UInt64, v)
69+
return x
70+
end
71+
72+
@inline function Base.pop!(x::DiBitVector)
73+
x.len == 0 && throw(ArgumentError("array must be non-empty"))
74+
v = x[end]
75+
x.len = (x.len - 1) % UInt64
76+
x.len == UInt64((length(x.data) -1)) << 5 && pop!(x.data)
77+
return v
78+
end
79+
80+
@inline zero(x::DiBitVector) = DiBitVector(x.len, 0)
81+

test/runtests.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ tests = ["int_set",
3030
"priority_queue",
3131
"fenwick",
3232
"robin_dict",
33+
"dibit_vector",
3334
]
3435

3536
if length(ARGS) > 0

test/test_dibit_vector.jl

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
@testset "DiBitVectors" begin
2+
d0 = DiBitVector()
3+
d1 = DiBitVector(10)
4+
d2 = DiBitVector(10, 0)
5+
6+
@test_throws ArgumentError DiBitVector(5, 4)
7+
@test_throws ArgumentError DiBitVector(5, -1)
8+
9+
@test_throws ArgumentError DiBitVector(-5)
10+
@test_throws ArgumentError DiBitVector(-5, 1)
11+
12+
@test length(d0) == 0
13+
@test isempty(d0)
14+
@test_throws ArgumentError pop!(d0)
15+
push!(d0, 1)
16+
@test length(d0) == 1
17+
@test pop!(d0) == 1
18+
@test length(d0) == 0
19+
@test_throws ArgumentError pop!(d0)
20+
21+
@test length(d1) == length(d2) == 10
22+
@test d1 == d2
23+
@test all(d1 .== 0)
24+
@test all(d2 .== 0)
25+
26+
@test size(d1) == size(d2) == (10,)
27+
28+
d3 = DiBitVector(30, 3)
29+
@test all(d3 .== 3)
30+
@test d3[1] == d3[end] == 3
31+
32+
push!(d3, 0)
33+
@test length(d3) == 31 && length(d3.data) == 1
34+
push!(d3, 1)
35+
@test length(d3) == 32 && length(d3.data) == 1
36+
push!(d3, 2)
37+
@test length(d3) == 33 && length(d3.data) == 2
38+
push!(d3, 3)
39+
@test length(d3) == 34 && length(d3.data) == 2
40+
41+
@test pop!(d3) == 3
42+
@test length(d3) == 33 && length(d3.data) == 2
43+
@test pop!(d3) == 2
44+
@test length(d3) == 32 && length(d3.data) == 1
45+
@test pop!(d3) == 1
46+
@test length(d3) == 31 && length(d3.data) == 1
47+
@test pop!(d3) == 0
48+
@test length(d3) == 30 && length(d3.data) == 1
49+
@test pop!(d3) == 3
50+
@test length(d3) == 29 && length(d3.data) == 1
51+
52+
@test zero(d3) == DiBitVector(length(d3))
53+
54+
@test_throws BoundsError d3[0]
55+
@test_throws BoundsError d3[-1]
56+
@test_throws BoundsError d3[99991]
57+
end
58+

0 commit comments

Comments
 (0)