Skip to content

Commit c82f60c

Browse files
authored
Merge pull request #249 from kevinconaway/kevinconaway/add-iterate
Add Iterate() method for iterating the bitmap without an iterator
2 parents 239247b + 7c87cea commit c82f60c

File tree

7 files changed

+223
-16
lines changed

7 files changed

+223
-16
lines changed

arraycontainer.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,18 @@ func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uin
2424
}
2525
}
2626

27+
func (ac *arrayContainer) iterate(cb func(x uint16) bool) bool {
28+
iterator := shortIterator{ac.content, 0}
29+
30+
for iterator.hasNext() {
31+
if !cb(iterator.next()) {
32+
return false
33+
}
34+
}
35+
36+
return true
37+
}
38+
2739
func (ac *arrayContainer) getShortIterator() shortPeekable {
2840
return &shortIterator{ac.content, 0}
2941
}

benchmark_test.go

Lines changed: 76 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -363,23 +363,84 @@ func BenchmarkCountBitset(b *testing.B) {
363363

364364
// go test -bench BenchmarkIterate -run -
365365
func BenchmarkIterateRoaring(b *testing.B) {
366-
b.StopTimer()
367-
r := rand.New(rand.NewSource(0))
368-
s := NewBitmap()
369-
sz := 150000
370-
initsize := 65000
371-
for i := 0; i < initsize; i++ {
372-
s.Add(uint32(r.Int31n(int32(sz))))
373-
}
374-
b.StartTimer()
375-
for j := 0; j < b.N; j++ {
376-
c9 = uint(0)
377-
i := s.Iterator()
378-
for i.HasNext() {
379-
i.Next()
380-
c9++
366+
newBitmap := func() *Bitmap {
367+
r := rand.New(rand.NewSource(0))
368+
s := NewBitmap()
369+
sz := 150000
370+
initsize := 65000
371+
for i := 0; i < initsize; i++ {
372+
s.Add(uint32(r.Int31n(int32(sz))))
381373
}
374+
return s
382375
}
376+
377+
b.Run("iterator-compressed", func(b *testing.B) {
378+
b.ReportAllocs()
379+
380+
s := newBitmap()
381+
s.RunOptimize()
382+
383+
b.ResetTimer()
384+
385+
for j := 0; j < b.N; j++ {
386+
c9 = uint(0)
387+
i := s.Iterator()
388+
for i.HasNext() {
389+
i.Next()
390+
c9++
391+
}
392+
}
393+
})
394+
395+
b.Run("iterator", func(b *testing.B) {
396+
b.ReportAllocs()
397+
398+
s := newBitmap()
399+
400+
b.ResetTimer()
401+
402+
for j := 0; j < b.N; j++ {
403+
c9 = uint(0)
404+
i := s.Iterator()
405+
for i.HasNext() {
406+
i.Next()
407+
c9++
408+
}
409+
}
410+
})
411+
412+
b.Run("iterate-compressed", func(b *testing.B) {
413+
b.ReportAllocs()
414+
415+
s := newBitmap()
416+
s.RunOptimize()
417+
418+
b.ResetTimer()
419+
420+
for j := 0; j < b.N; j++ {
421+
c9 = uint(0)
422+
s.Iterate(func(x uint32) bool {
423+
c9++
424+
return true
425+
})
426+
}
427+
})
428+
429+
b.Run("iterate", func(b *testing.B) {
430+
b.ReportAllocs()
431+
432+
s := newBitmap()
433+
434+
b.ResetTimer()
435+
436+
for j := 0; j < b.N; j++ {
437+
c9 = uint(0)
438+
s.Iterate(func(x uint32) bool {
439+
c9++
440+
return true
441+
})
442+
}
443+
})
383444
}
384445

385446
// go test -bench BenchmarkSparseIterate -run -

bitmapcontainer.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,18 @@ func (bc *bitmapContainer) maximum() uint16 {
9696
return uint16(0)
9797
}
9898

99+
func (bc *bitmapContainer) iterate(cb func(x uint16) bool) bool {
100+
iterator := bitmapContainerShortIterator{bc, bc.NextSetBit(0)}
101+
102+
for iterator.hasNext() {
103+
if !cb(iterator.next()) {
104+
return false
105+
}
106+
}
107+
108+
return true
109+
}
110+
99111
type bitmapContainerShortIterator struct {
100112
ptr *bitmapContainer
101113
i int

roaring.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,38 @@ func (rb *Bitmap) String() string {
416416
return buffer.String()
417417
}
418418

419+
// Iterate iterates over the bitmap, calling the given callback with each value in the bitmap. If the callback returns
420+
// false, the iteration is halted.
421+
// The iteration results are undefined if the bitmap is modified (e.g., with Add or Remove).
422+
// There is no guarantee as to what order the values will be iterated
423+
func (rb *Bitmap) Iterate(cb func(x uint32) bool) {
424+
for i := 0; i < rb.highlowcontainer.size(); i++ {
425+
hs := uint32(rb.highlowcontainer.getKeyAtIndex(i)) << 16
426+
c := rb.highlowcontainer.getContainerAtIndex(i)
427+
428+
var shouldContinue bool
429+
// This is hacky but it avoids allocations from invoking an interface method with a closure
430+
switch t := c.(type) {
431+
case *arrayContainer:
432+
shouldContinue = t.iterate(func(x uint16) bool {
433+
return cb(uint32(x) | hs)
434+
})
435+
case *runContainer16:
436+
shouldContinue = t.iterate(func(x uint16) bool {
437+
return cb(uint32(x) | hs)
438+
})
439+
case *bitmapContainer:
440+
shouldContinue = t.iterate(func(x uint16) bool {
441+
return cb(uint32(x) | hs)
442+
})
443+
}
444+
445+
if !shouldContinue {
446+
break
447+
}
448+
}
449+
}
450+
419451
// Iterator creates a new IntPeekable to iterate over the integers contained in the bitmap, in sorted order;
420452
// the iterator becomes invalid if the bitmap is modified (e.g., with Add or Remove).
421453
func (rb *Bitmap) Iterator() IntPeekable {

roaring_test.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2308,3 +2308,79 @@ func TestBitmapFlipMaxRangeEnd(t *testing.T) {
23082308

23092309
assert.EqualValues(t, MaxRange, bm.GetCardinality())
23102310
}
2311+
2312+
func TestIterate(t *testing.T) {
2313+
rb := NewBitmap()
2314+
2315+
for i := 0; i < 300; i++ {
2316+
rb.Add(uint32(i))
2317+
}
2318+
2319+
var values []uint32
2320+
rb.Iterate(func(x uint32) bool {
2321+
values = append(values, x)
2322+
return true
2323+
})
2324+
2325+
assert.Equal(t, rb.ToArray(), values)
2326+
}
2327+
2328+
func TestIterateCompressed(t *testing.T) {
2329+
rb := NewBitmap()
2330+
2331+
for i := 0; i < 300; i++ {
2332+
rb.Add(uint32(i))
2333+
}
2334+
2335+
rb.RunOptimize()
2336+
2337+
var values []uint32
2338+
rb.Iterate(func(x uint32) bool {
2339+
values = append(values, x)
2340+
return true
2341+
})
2342+
2343+
assert.Equal(t, rb.ToArray(), values)
2344+
}
2345+
2346+
func TestIterateLargeValues(t *testing.T) {
2347+
rb := NewBitmap()
2348+
2349+
// This range of values ensures that all different types of containers will be used
2350+
for i := 150000; i < 450000; i++ {
2351+
rb.Add(uint32(i))
2352+
}
2353+
2354+
var values []uint32
2355+
rb.Iterate(func(x uint32) bool {
2356+
values = append(values, x)
2357+
return true
2358+
})
2359+
2360+
assert.Equal(t, rb.ToArray(), values)
2361+
}
2362+
2363+
func TestIterateHalt(t *testing.T) {
2364+
rb := NewBitmap()
2365+
2366+
// This range of values ensures that all different types of containers will be used
2367+
for i := 150000; i < 450000; i++ {
2368+
rb.Add(uint32(i))
2369+
}
2370+
2371+
var values []uint32
2372+
count := uint64(0)
2373+
stopAt := rb.GetCardinality() - 1
2374+
rb.Iterate(func(x uint32) bool {
2375+
values = append(values, x)
2376+
count++
2377+
if count == stopAt {
2378+
return false
2379+
}
2380+
return true
2381+
})
2382+
2383+
expected := rb.ToArray()
2384+
expected = expected[0 : len(expected)-1]
2385+
assert.Equal(t, expected, values)
2386+
}

roaringarray.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@ import (
44
"bytes"
55
"encoding/binary"
66
"fmt"
7+
"io"
8+
79
snappy "github.com/glycerine/go-unsnap-stream"
810
"github.com/tinylib/msgp/msgp"
9-
"io"
1011
)
1112

1213
//go:generate msgp -unexported
@@ -38,6 +39,7 @@ type container interface {
3839
inot(firstOfRange, endx int) container // i stands for inplace, range is [firstOfRange,endx)
3940
xor(r container) container
4041
getShortIterator() shortPeekable
42+
iterate(cb func(x uint16) bool) bool
4143
getReverseIterator() shortIterable
4244
getManyIterator() manyIterable
4345
contains(i uint16) bool

runcontainer.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1162,6 +1162,18 @@ func (rc *runContainer16) newRunIterator16() *runIterator16 {
11621162
return &runIterator16{rc: rc, curIndex: 0, curPosInIndex: 0}
11631163
}
11641164

1165+
func (rc *runContainer16) iterate(cb func(x uint16) bool) bool {
1166+
iterator := runIterator16{rc, 0, 0}
1167+
1168+
for iterator.hasNext() {
1169+
if !cb(iterator.next()) {
1170+
return false
1171+
}
1172+
}
1173+
1174+
return true
1175+
}
1176+
11651177
// hasNext returns false if calling next will panic. It
11661178
// returns true when there is at least one more value
11671179
// available in the iteration sequence.

0 commit comments

Comments
 (0)