Skip to content

Commit daab0fd

Browse files
authored
Merge pull request #501 from rogpeppe-contrib/004-new-unsetbits-pr
more efficient UnsetIterator implementation
2 parents 45a0bac + 2538288 commit daab0fd

File tree

8 files changed

+617
-68
lines changed

8 files changed

+617
-68
lines changed

arraycontainer.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ func (ac *arrayContainer) getManyIterator() manyIterable {
6262
return &shortIterator{ac.content, 0}
6363
}
6464

65+
func (ac *arrayContainer) getUnsetIterator() shortPeekable {
66+
return newArrayContainerUnsetIterator(ac.content)
67+
}
68+
6569
func (ac *arrayContainer) minimum() uint16 {
6670
return ac.content[0] // assume not empty
6771
}

benchmark_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,22 @@ func BenchmarkIterateRoaring(b *testing.B) {
600600
})
601601
}
602602
})
603+
b.Run("unsetIterator", func(b *testing.B) {
604+
b.ReportAllocs()
605+
606+
s := Flip(newBitmap(), 0, 0x100000000)
607+
608+
b.ResetTimer()
609+
610+
for j := 0; j < b.N; j++ {
611+
c9 = uint(0)
612+
i := s.UnsetIterator(0, 0xffffffff)
613+
for i.HasNext() {
614+
i.Next()
615+
c9++
616+
}
617+
}
618+
})
603619
}
604620

605621
// go test -bench BenchmarkSparseIterate -run -

bitmapcontainer.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,39 @@ func (bc *bitmapContainer) getManyIterator() manyIterable {
262262
return newBitmapContainerManyIterator(bc)
263263
}
264264

265+
type bitmapContainerUnsetIterator struct {
266+
ptr *bitmapContainer
267+
i int
268+
}
269+
270+
func (bcui *bitmapContainerUnsetIterator) next() uint16 {
271+
j := bcui.i
272+
bcui.i = bcui.ptr.NextUnsetBit(uint(bcui.i) + 1)
273+
return uint16(j)
274+
}
275+
276+
func (bcui *bitmapContainerUnsetIterator) hasNext() bool {
277+
return bcui.i >= 0 && bcui.i < 65536
278+
}
279+
280+
func (bcui *bitmapContainerUnsetIterator) peekNext() uint16 {
281+
return uint16(bcui.i)
282+
}
283+
284+
func (bcui *bitmapContainerUnsetIterator) advanceIfNeeded(minval uint16) {
285+
if bcui.hasNext() && bcui.peekNext() < minval {
286+
bcui.i = bcui.ptr.NextUnsetBit(uint(minval))
287+
}
288+
}
289+
290+
func newBitmapContainerUnsetIterator(a *bitmapContainer) *bitmapContainerUnsetIterator {
291+
return &bitmapContainerUnsetIterator{a, a.NextUnsetBit(0)}
292+
}
293+
294+
func (bc *bitmapContainer) getUnsetIterator() shortPeekable {
295+
return newBitmapContainerUnsetIterator(bc)
296+
}
297+
265298
func (bc *bitmapContainer) getSizeInBytes() int {
266299
return len(bc.bitmap) * 8
267300
}
@@ -1113,6 +1146,29 @@ func (bc *bitmapContainer) NextSetBit(i uint) int {
11131146
return -1
11141147
}
11151148

1149+
func (bc *bitmapContainer) NextUnsetBit(i uint) int {
1150+
var (
1151+
x = i / 64
1152+
length = uint(len(bc.bitmap))
1153+
)
1154+
if x >= length {
1155+
return int(i)
1156+
}
1157+
w := bc.bitmap[x]
1158+
w = w >> uint(i%64)
1159+
w = ^w
1160+
if w != 0 {
1161+
return int(i) + countTrailingZeros(w)
1162+
}
1163+
x++
1164+
for ; x < length; x++ {
1165+
if bc.bitmap[x] != 0xFFFFFFFFFFFFFFFF {
1166+
return int(x*64) + countTrailingZeros(^bc.bitmap[x])
1167+
}
1168+
}
1169+
return int(length * 64)
1170+
}
1171+
11161172
// PrevSetBit returns the previous set bit e.g the previous int packed into the bitmaparray
11171173
func (bc *bitmapContainer) PrevSetBit(i int) int {
11181174
if i < 0 {

property_test.go

Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,287 @@
1+
package roaring
2+
3+
import (
4+
"fmt"
5+
"math/rand"
6+
"testing"
7+
)
8+
9+
// TestBitmapProperties runs all invariants against all bitmaps in the corpus
10+
func TestBitmapProperties(t *testing.T) {
11+
corpus := getBitmapCorpus()
12+
invariants := getInvariants()
13+
14+
for _, gen := range corpus {
15+
for _, inv := range invariants {
16+
t.Run(fmt.Sprintf("%s/%s", gen.name, inv.name), func(t *testing.T) {
17+
b := gen.gen()
18+
inv.test(t, b)
19+
})
20+
}
21+
}
22+
}
23+
24+
// TestBitmapPropertiesWithRunOptimize tests all invariants on RunOptimize'd bitmaps
25+
func TestBitmapPropertiesWithRunOptimize(t *testing.T) {
26+
corpus := getBitmapCorpus()
27+
invariants := getInvariants()
28+
29+
for _, gen := range corpus {
30+
for _, inv := range invariants {
31+
t.Run(fmt.Sprintf("%s/%s_optimized", gen.name, inv.name), func(t *testing.T) {
32+
b := gen.gen()
33+
b.RunOptimize()
34+
inv.test(t, b)
35+
})
36+
}
37+
}
38+
}
39+
40+
// bitmapGenerator is a function that creates a test bitmap
41+
type bitmapGenerator struct {
42+
name string
43+
gen func() *Bitmap
44+
}
45+
46+
// invariant is a property that should hold for all bitmaps
47+
type invariant struct {
48+
name string
49+
test func(t *testing.T, b *Bitmap)
50+
}
51+
52+
// getInvariants returns all property invariants to test
53+
func getInvariants() []invariant {
54+
return []invariant{
55+
{name: "doubleflip", test: doubleFlipInvariant},
56+
{name: "iteratorbits", test: iteratorBitsInvariant},
57+
{name: "unsetiteratorbits", test: unsetIteratorBitsInvariant},
58+
}
59+
}
60+
61+
// doubleFlipInvariant checks that flip(flip(b)) == b
62+
func doubleFlipInvariant(t *testing.T, b *Bitmap) {
63+
original := b.Clone()
64+
65+
// Find the range to flip (slightly larger than the bitmap extent)
66+
var maxVal uint64
67+
if b.IsEmpty() {
68+
maxVal = 1000
69+
} else {
70+
maxVal = uint64(b.Maximum()) + 1000
71+
}
72+
73+
// Flip twice
74+
b.Flip(0, maxVal)
75+
b.Flip(0, maxVal)
76+
77+
// Should be equal to original
78+
if !original.Equals(b) {
79+
t.Errorf("double flip should restore original bitmap, original card=%d, result card=%d",
80+
original.GetCardinality(), b.GetCardinality())
81+
}
82+
}
83+
84+
// iteratorBitsInvariant checks that creating a bitmap from iterator bits gives the same bitmap
85+
func iteratorBitsInvariant(t *testing.T, b *Bitmap) {
86+
original := b.Clone()
87+
88+
// Create new bitmap from iterator
89+
result := NewBitmap()
90+
iter := original.Iterator()
91+
for iter.HasNext() {
92+
result.Add(iter.Next())
93+
}
94+
95+
// Should be equal to original
96+
if !original.Equals(result) {
97+
t.Errorf("bitmap reconstructed from iterator should equal original, original card=%d, result card=%d",
98+
original.GetCardinality(), result.GetCardinality())
99+
}
100+
}
101+
102+
// unsetIteratorBitsInvariant checks that creating a bitmap from unset iterator, then flipping, gives the same bitmap
103+
func unsetIteratorBitsInvariant(t *testing.T, b *Bitmap) {
104+
original := b.Clone()
105+
106+
numUnset := 0x100000000 - b.GetCardinality()
107+
if numUnset > 1000000 {
108+
t.Skip("too many iterations")
109+
}
110+
111+
// Create bitmap from unset bits
112+
result := NewBitmap()
113+
iter := original.UnsetIterator(0, 0x100000000)
114+
i := 0
115+
for iter.HasNext() {
116+
i++
117+
result.Add(iter.Next())
118+
}
119+
120+
// Flip the result in the same range
121+
result.Flip(0, 0x100000000)
122+
123+
// Should be equal to original
124+
if !original.Equals(result) {
125+
t.Errorf("bitmap reconstructed from unset iterator + flip should equal original, original card=%d, result card=%d",
126+
original.GetCardinality(), result.GetCardinality())
127+
}
128+
}
129+
130+
// getBitmapCorpus returns a diverse set of bitmaps for property testing
131+
func getBitmapCorpus() []bitmapGenerator {
132+
return []bitmapGenerator{
133+
{
134+
name: "empty",
135+
gen: func() *Bitmap {
136+
return NewBitmap()
137+
},
138+
},
139+
{
140+
name: "single_bit",
141+
gen: func() *Bitmap {
142+
b := NewBitmap()
143+
b.Add(42)
144+
return b
145+
},
146+
},
147+
{
148+
name: "sparse_small",
149+
gen: func() *Bitmap {
150+
b := NewBitmap()
151+
for i := 0; i < 100; i++ {
152+
b.Add(uint32(i * 1000))
153+
}
154+
return b
155+
},
156+
},
157+
{
158+
name: "sparse_random",
159+
gen: func() *Bitmap {
160+
b := NewBitmap()
161+
r := rand.New(rand.NewSource(12345))
162+
domain := 100000000
163+
count := 10000
164+
for j := 0; j < count; j++ {
165+
v := uint32(r.Intn(domain))
166+
b.Add(v)
167+
}
168+
return b
169+
},
170+
},
171+
{
172+
name: "dense_small",
173+
gen: func() *Bitmap {
174+
b := NewBitmap()
175+
for i := 0; i < 10000; i++ {
176+
b.Add(uint32(i))
177+
}
178+
return b
179+
},
180+
},
181+
{
182+
name: "dense_range",
183+
gen: func() *Bitmap {
184+
b := NewBitmap()
185+
b.AddRange(0, 100000)
186+
return b
187+
},
188+
},
189+
{
190+
name: "sequential_ranges",
191+
gen: func() *Bitmap {
192+
b := NewBitmap()
193+
b.AddRange(0, 1000)
194+
b.AddRange(10000, 11000)
195+
b.AddRange(100000, 101000)
196+
return b
197+
},
198+
},
199+
{
200+
name: "mixed_containers",
201+
gen: func() *Bitmap {
202+
b := NewBitmap()
203+
// Sparse in first container
204+
for i := 0; i < 100; i++ {
205+
b.Add(uint32(i * 100))
206+
}
207+
// Dense in second container
208+
for i := 0; i < 60000; i++ {
209+
b.Add(uint32(65536 + i))
210+
}
211+
// Sparse in third container
212+
for i := 0; i < 50; i++ {
213+
b.Add(uint32(131072 + i*1000))
214+
}
215+
return b
216+
},
217+
},
218+
{
219+
name: "alternating_bits",
220+
gen: func() *Bitmap {
221+
b := NewBitmap()
222+
for i := 0; i < 100000; i += 2 {
223+
b.Add(uint32(i))
224+
}
225+
return b
226+
},
227+
},
228+
{
229+
name: "high_values",
230+
gen: func() *Bitmap {
231+
b := NewBitmap()
232+
r := rand.New(rand.NewSource(54321))
233+
for i := 0; i < 1000; i++ {
234+
v := uint32(r.Intn(0x70000000) + 0x7fffffff)
235+
b.Add(v)
236+
}
237+
return b
238+
},
239+
},
240+
{
241+
name: "iterator_benchmark_sparse",
242+
gen: func() *Bitmap {
243+
// Based on BenchmarkIteratorAlloc
244+
b := NewBitmap()
245+
r := rand.New(rand.NewSource(0))
246+
sz := 1000000
247+
initsize := 50000
248+
for i := 0; i < initsize; i++ {
249+
b.Add(uint32(r.Intn(sz)))
250+
}
251+
return b
252+
},
253+
},
254+
{
255+
name: "iterator_benchmark_dense",
256+
gen: func() *Bitmap {
257+
// Based on BenchmarkNexts
258+
b := NewBitmap()
259+
for i := 0; i < 200000; i++ {
260+
b.Add(uint32(i))
261+
}
262+
return b
263+
},
264+
},
265+
{
266+
name: "iterator_benchmark_rle",
267+
gen: func() *Bitmap {
268+
// Based on BenchmarkNextsRLE
269+
b := NewBitmap()
270+
b.AddRange(0, 1000000)
271+
b.RunOptimize()
272+
return b
273+
},
274+
},
275+
{
276+
name: "gaps_and_runs",
277+
gen: func() *Bitmap {
278+
b := NewBitmap()
279+
for i := 0; i < 10; i++ {
280+
start := uint64(i * 100000)
281+
b.AddRange(start, start+10000)
282+
}
283+
return b
284+
},
285+
},
286+
}
287+
}

0 commit comments

Comments
 (0)