@@ -47,7 +47,7 @@ import (
4747// runContainer16 does run-length encoding of sets of
4848// uint16 integers.
4949type runContainer16 struct {
50- iv []interval16
50+ iv []interval16
5151}
5252
5353// interval16 is the internal to runContainer16
@@ -849,7 +849,7 @@ func (rc *runContainer16) numIntervals() int {
849849//
850850// runContainer16.search always returns whichInterval16 < len(rc.iv).
851851//
852- // The search space is from startIndex to endxIndex. If endxIndex is set to zero, then there
852+ // The search space is from startIndex to endxIndex. If endxIndex is set to zero, then there
853853// no upper bound.
854854//
855855func (rc * runContainer16 ) searchRange (key int , startIndex int , endxIndex int ) (whichInterval16 int , alreadyPresent bool , numCompares int ) {
@@ -968,14 +968,12 @@ func (rc *runContainer16) getCardinality() int {
968968 return n
969969}
970970
971-
972971// isEmpty returns true if the container is empty.
973972// It runs in constant time.
974973func (rc * runContainer16 ) isEmpty () bool {
975974 return len (rc .iv ) == 0
976975}
977976
978-
979977// AsSlice decompresses the contents into a []uint16 slice.
980978func (rc * runContainer16 ) AsSlice () []uint16 {
981979 s := make ([]uint16 , rc .getCardinality ())
@@ -1198,7 +1196,7 @@ func (ri *runIterator16) advanceIfNeeded(minval uint16) {
11981196// before calling next() to insure there are contents.
11991197type runReverseIterator16 struct {
12001198 rc * runContainer16
1201- curIndex int // index into rc.iv
1199+ curIndex int // index into rc.iv
12021200 curPosInIndex uint16 // offset in rc.iv[curIndex]
12031201}
12041202
@@ -1288,7 +1286,6 @@ func (ri *runIterator16) nextMany(hs uint32, buf []uint32) int {
12881286 return n
12891287}
12901288
1291-
12921289func (ri * runIterator16 ) nextMany64 (hs uint64 , buf []uint64 ) int {
12931290 n := 0
12941291
@@ -1424,7 +1421,7 @@ func intersectWithLeftover16(astart, alast, bstart, blast int) (isOverlap, isLef
14241421 return
14251422}
14261423
1427- func (rc * runContainer16 ) findNextIntervalThatIntersectsStartingFrom (startIndex int , key int ) (index int , done bool ) {
1424+ func (rc * runContainer16 ) findNextIntervalThatIntersectsStartingFrom (startIndex int , key int ) (index int , done bool ) {
14281425 w , _ , _ := rc .searchRange (key , startIndex , 0 )
14291426 // rc.search always returns w < len(rc.iv)
14301427 if w < startIndex {
@@ -1448,7 +1445,6 @@ func sliceToString16(m []interval16) string {
14481445 return s
14491446}
14501447
1451-
14521448// helper for invert
14531449func (rc * runContainer16 ) invertlastInterval (origin uint16 , lastIdx int ) []interval16 {
14541450 cur := rc .iv [lastIdx ]
@@ -2152,9 +2148,21 @@ func (rc *runContainer16) orBitmapContainerCardinality(bc *bitmapContainer) int
21522148
21532149// orArray finds the union of rc and ac.
21542150func (rc * runContainer16 ) orArray (ac * arrayContainer ) container {
2155- bc1 := newBitmapContainerFromRun (rc )
2156- bc2 := ac .toBitmapContainer ()
2157- return bc1 .orBitmap (bc2 )
2151+ if ac .isEmpty () {
2152+ return rc .clone ()
2153+ }
2154+ if rc .isEmpty () {
2155+ return ac .clone ()
2156+ }
2157+ intervals , cardMinusOne := runArrayUnionToRuns (rc , ac )
2158+ result := newRunContainer16TakeOwnership (intervals )
2159+ if len (intervals ) >= 2048 && cardMinusOne >= arrayDefaultMaxSize {
2160+ return newBitmapContainerFromRun (result )
2161+ }
2162+ if len (intervals )* 2 > 1 + int (cardMinusOne ) {
2163+ return result .toArrayContainer ()
2164+ }
2165+ return result
21582166}
21592167
21602168// orArray finds the union of rc and ac.
@@ -2197,13 +2205,88 @@ func (rc *runContainer16) iorBitmapContainer(bc *bitmapContainer) container {
21972205}
21982206
21992207func (rc * runContainer16 ) iorArray (ac * arrayContainer ) container {
2200- it := ac .getShortIterator ()
2201- for it .hasNext () {
2202- rc .Add (it .next ())
2208+ if rc .isEmpty () {
2209+ return ac .clone ()
2210+ }
2211+ if ac .isEmpty () {
2212+ return rc
2213+ }
2214+ var cardMinusOne uint16
2215+ //TODO: perform the union algorithm in-place using rc.iv
2216+ // this can be done with methods like the in-place array container union
2217+ // but maybe lazily moving the remaining elements back.
2218+ rc .iv , cardMinusOne = runArrayUnionToRuns (rc , ac )
2219+ if len (rc .iv ) >= 2048 && cardMinusOne >= arrayDefaultMaxSize {
2220+ return newBitmapContainerFromRun (rc )
2221+ }
2222+ if len (rc .iv )* 2 > 1 + int (cardMinusOne ) {
2223+ return rc .toArrayContainer ()
22032224 }
22042225 return rc
22052226}
22062227
2228+ func runArrayUnionToRuns (rc * runContainer16 , ac * arrayContainer ) ([]interval16 , uint16 ) {
2229+ pos1 := 0
2230+ pos2 := 0
2231+ length1 := len (ac .content )
2232+ length2 := len (rc .iv )
2233+ target := make ([]interval16 , 0 , len (rc .iv ))
2234+ // have to find the first range
2235+ // options are
2236+ // 1. from array container
2237+ // 2. from run container
2238+ var previousInterval interval16
2239+ var cardMinusOne uint16
2240+ if ac .content [0 ] < rc .iv [0 ].start {
2241+ previousInterval .start = ac .content [0 ]
2242+ previousInterval .length = 0
2243+ pos1 ++
2244+ } else {
2245+ previousInterval .start = rc .iv [0 ].start
2246+ previousInterval .length = rc .iv [0 ].length
2247+ pos2 ++
2248+ }
2249+
2250+ for pos1 < length1 || pos2 < length2 {
2251+ if pos1 < length1 {
2252+ s1 := ac .content [pos1 ]
2253+ if s1 <= previousInterval .start + previousInterval .length {
2254+ pos1 ++
2255+ continue
2256+ }
2257+ if previousInterval .last () < MaxUint16 && previousInterval .last ()+ 1 == s1 {
2258+ previousInterval .length ++
2259+ pos1 ++
2260+ continue
2261+ }
2262+ }
2263+ if pos2 < length2 {
2264+ range2 := rc .iv [pos2 ]
2265+ if range2 .start <= previousInterval .last () || range2 .start > 0 && range2 .start - 1 == previousInterval .last () {
2266+ pos2 ++
2267+ if previousInterval .last () < range2 .last () {
2268+ previousInterval .length = range2 .last () - previousInterval .start
2269+ }
2270+ continue
2271+ }
2272+ }
2273+ cardMinusOne += previousInterval .length + 1
2274+ target = append (target , previousInterval )
2275+ if pos2 == length2 || pos1 < length1 && ac .content [pos1 ] < rc .iv [pos2 ].start {
2276+ previousInterval .start = ac .content [pos1 ]
2277+ previousInterval .length = 0
2278+ pos1 ++
2279+ } else {
2280+ previousInterval = rc .iv [pos2 ]
2281+ pos2 ++
2282+ }
2283+ }
2284+ cardMinusOne += previousInterval .length + 1
2285+ target = append (target , previousInterval )
2286+
2287+ return target , cardMinusOne
2288+ }
2289+
22072290// lazyIOR is described (not yet implemented) in
22082291// this nice note from @lemire on
22092292// https://github.com/RoaringBitmap/roaring/pull/70#issuecomment-263613737
0 commit comments