Skip to content

Commit b5aa429

Browse files
authored
Merge pull request #225 from alldroll/byte_input
Unification of bitmap unserialization by using auxiliary byteInput approach
2 parents d017a98 + 65f42a3 commit b5aa429

12 files changed

+440
-342
lines changed

byte_input.go

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
package roaring
2+
3+
import (
4+
"encoding/binary"
5+
"io"
6+
)
7+
8+
type byteInput interface {
9+
// next returns a slice containing the next n bytes from the buffer,
10+
// advancing the buffer as if the bytes had been returned by Read.
11+
next(n int) ([]byte, error)
12+
// readUInt32 reads uint32 with LittleEndian order
13+
readUInt32() (uint32, error)
14+
// readUInt16 reads uint16 with LittleEndian order
15+
readUInt16() (uint16, error)
16+
// getReadBytes returns read bytes
17+
getReadBytes() int64
18+
// skipBytes skips exactly n bytes
19+
skipBytes(n int) error
20+
}
21+
22+
func newByteInputFromReader(reader io.Reader) byteInput {
23+
return &byteInputAdapter{
24+
r: reader,
25+
readBytes: 0,
26+
}
27+
}
28+
29+
func newByteInput(buf []byte) byteInput {
30+
return &byteBuffer{
31+
buf: buf,
32+
off: 0,
33+
}
34+
}
35+
36+
type byteBuffer struct {
37+
buf []byte
38+
off int
39+
}
40+
41+
// next returns a slice containing the next n bytes from the reader
42+
// If there are fewer bytes than the given n, io.ErrUnexpectedEOF will be returned
43+
func (b *byteBuffer) next(n int) ([]byte, error) {
44+
m := len(b.buf) - b.off
45+
46+
if n > m {
47+
return nil, io.ErrUnexpectedEOF
48+
}
49+
50+
data := b.buf[b.off : b.off+n]
51+
b.off += n
52+
53+
return data, nil
54+
}
55+
56+
// readUInt32 reads uint32 with LittleEndian order
57+
func (b *byteBuffer) readUInt32() (uint32, error) {
58+
if len(b.buf)-b.off < 4 {
59+
return 0, io.ErrUnexpectedEOF
60+
}
61+
62+
v := binary.LittleEndian.Uint32(b.buf[b.off:])
63+
b.off += 4
64+
65+
return v, nil
66+
}
67+
68+
// readUInt16 reads uint16 with LittleEndian order
69+
func (b *byteBuffer) readUInt16() (uint16, error) {
70+
if len(b.buf)-b.off < 2 {
71+
return 0, io.ErrUnexpectedEOF
72+
}
73+
74+
v := binary.LittleEndian.Uint16(b.buf[b.off:])
75+
b.off += 2
76+
77+
return v, nil
78+
}
79+
80+
// getReadBytes returns read bytes
81+
func (b *byteBuffer) getReadBytes() int64 {
82+
return int64(b.off)
83+
}
84+
85+
// skipBytes skips exactly n bytes
86+
func (b *byteBuffer) skipBytes(n int) error {
87+
m := len(b.buf) - b.off
88+
89+
if n > m {
90+
return io.ErrUnexpectedEOF
91+
}
92+
93+
b.off += n
94+
95+
return nil
96+
}
97+
98+
// reset resets the given buffer with a new byte slice
99+
func (b *byteBuffer) reset(buf []byte) {
100+
b.buf = buf
101+
b.off = 0
102+
}
103+
104+
type byteInputAdapter struct {
105+
r io.Reader
106+
readBytes int
107+
}
108+
109+
// next returns a slice containing the next n bytes from the buffer,
110+
// advancing the buffer as if the bytes had been returned by Read.
111+
func (b *byteInputAdapter) next(n int) ([]byte, error) {
112+
buf := make([]byte, n)
113+
m, err := io.ReadAtLeast(b.r, buf, n)
114+
b.readBytes += m
115+
116+
if err != nil {
117+
return nil, err
118+
}
119+
120+
return buf, nil
121+
}
122+
123+
// readUInt32 reads uint32 with LittleEndian order
124+
func (b *byteInputAdapter) readUInt32() (uint32, error) {
125+
buf, err := b.next(4)
126+
127+
if err != nil {
128+
return 0, err
129+
}
130+
131+
return binary.LittleEndian.Uint32(buf), nil
132+
}
133+
134+
// readUInt16 reads uint16 with LittleEndian order
135+
func (b *byteInputAdapter) readUInt16() (uint16, error) {
136+
buf, err := b.next(2)
137+
138+
if err != nil {
139+
return 0, err
140+
}
141+
142+
return binary.LittleEndian.Uint16(buf), nil
143+
}
144+
145+
// getReadBytes returns read bytes
146+
func (b *byteInputAdapter) getReadBytes() int64 {
147+
return int64(b.readBytes)
148+
}
149+
150+
// skipBytes skips exactly n bytes
151+
func (b *byteInputAdapter) skipBytes(n int) error {
152+
_, err := b.next(n)
153+
154+
return err
155+
}
156+
157+
// reset resets the given buffer with a new stream
158+
func (b *byteInputAdapter) reset(stream io.Reader) {
159+
b.r = stream
160+
b.readBytes = 0
161+
}

byte_input_test.go

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
package roaring
2+
3+
import (
4+
"bytes"
5+
"testing"
6+
7+
. "github.com/smartystreets/goconvey/convey"
8+
)
9+
10+
func TestByteInputFlow(t *testing.T) {
11+
Convey("Test should be an error on empty data", t, func() {
12+
buf := bytes.NewBuffer([]byte{})
13+
14+
instances := []byteInput{
15+
newByteInput(buf.Bytes()),
16+
newByteInputFromReader(buf),
17+
}
18+
19+
for _, input := range instances {
20+
n, err := input.readUInt16()
21+
So(n, ShouldEqual, 0)
22+
So(err, ShouldBeError)
23+
24+
p, err := input.readUInt32()
25+
So(p, ShouldEqual, 0)
26+
So(err, ShouldBeError)
27+
28+
b, err := input.next(10)
29+
So(b, ShouldEqual, nil)
30+
So(err, ShouldBeError)
31+
32+
err = input.skipBytes(10)
33+
So(err, ShouldBeError)
34+
}
35+
})
36+
37+
Convey("Test not empty data", t, func() {
38+
buf := bytes.NewBuffer(uint16SliceAsByteSlice([]uint16{1, 10, 32, 66, 23}))
39+
40+
instances := []byteInput{
41+
newByteInput(buf.Bytes()),
42+
newByteInputFromReader(buf),
43+
}
44+
45+
for _, input := range instances {
46+
n, err := input.readUInt16()
47+
So(n, ShouldEqual, 1)
48+
So(err, ShouldBeNil)
49+
50+
p, err := input.readUInt32()
51+
So(p, ShouldEqual, 2097162) // 32 << 16 | 10
52+
So(err, ShouldBeNil)
53+
54+
b, err := input.next(2)
55+
So([]byte{66, 0}, ShouldResemble, b)
56+
So(err, ShouldBeNil)
57+
58+
err = input.skipBytes(2)
59+
So(err, ShouldBeNil)
60+
61+
b, err = input.next(1)
62+
So(b, ShouldEqual, nil)
63+
So(err, ShouldBeError)
64+
}
65+
})
66+
}

roaring.go

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"fmt"
1212
"io"
1313
"strconv"
14+
"sync"
1415
)
1516

1617
// Bitmap represents a compressed bitmap where you can add integers.
@@ -66,8 +67,14 @@ func (rb *Bitmap) WriteToMsgpack(stream io.Writer) (int64, error) {
6667
// The format is compatible with other RoaringBitmap
6768
// implementations (Java, C) and is documented here:
6869
// https://github.com/RoaringBitmap/RoaringFormatSpec
69-
func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) {
70-
return rb.highlowcontainer.readFrom(stream)
70+
func (rb *Bitmap) ReadFrom(reader io.Reader) (p int64, err error) {
71+
stream := byteInputAdapterPool.Get().(*byteInputAdapter)
72+
stream.reset(reader)
73+
74+
p, err = rb.highlowcontainer.readFrom(stream)
75+
byteInputAdapterPool.Put(stream)
76+
77+
return
7178
}
7279

7380
// FromBuffer creates a bitmap from its serialized version stored in buffer
@@ -92,10 +99,30 @@ func (rb *Bitmap) ReadFrom(stream io.Reader) (int64, error) {
9299
// also be broken. Thus, before making buf unavailable, you should
93100
// call CloneCopyOnWriteContainers on all such bitmaps.
94101
//
95-
func (rb *Bitmap) FromBuffer(buf []byte) (int64, error) {
96-
return rb.highlowcontainer.fromBuffer(buf)
102+
func (rb *Bitmap) FromBuffer(buf []byte) (p int64, err error) {
103+
stream := byteBufferPool.Get().(*byteBuffer)
104+
stream.reset(buf)
105+
106+
p, err = rb.highlowcontainer.readFrom(stream)
107+
byteBufferPool.Put(stream)
108+
109+
return
97110
}
98111

112+
var (
113+
byteBufferPool = sync.Pool{
114+
New: func() interface{} {
115+
return &byteBuffer{}
116+
},
117+
}
118+
119+
byteInputAdapterPool = sync.Pool{
120+
New: func() interface{} {
121+
return &byteInputAdapter{}
122+
},
123+
}
124+
)
125+
99126
// RunOptimize attempts to further compress the runs of consecutive values found in the bitmap
100127
func (rb *Bitmap) RunOptimize() {
101128
rb.highlowcontainer.runOptimize()

roaring_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ func TestReverseIteratorCount(t *testing.T) {
2525
count := 0
2626
for it.HasNext() {
2727
it.Next()
28-
count += 1
28+
count++
2929
}
3030
if count != testSize {
3131
t.FailNow()

0 commit comments

Comments
 (0)