11package ru .yandex .clickhouse .util ;
22
3+ import java .io .ByteArrayInputStream ;
34import java .io .ByteArrayOutputStream ;
45import java .io .DataInputStream ;
56import java .io .DataOutput ;
910import java .nio .ByteBuffer ;
1011import java .nio .ByteOrder ;
1112import java .util .Objects ;
12-
1313import org .roaringbitmap .RoaringBitmap ;
1414import org .roaringbitmap .buffer .ImmutableRoaringBitmap ;
1515import org .roaringbitmap .buffer .MutableRoaringBitmap ;
1616import org .roaringbitmap .longlong .Roaring64Bitmap ;
1717import org .roaringbitmap .longlong .Roaring64NavigableMap ;
18-
1918import ru .yandex .clickhouse .domain .ClickHouseDataType ;
2019
2120public abstract class ClickHouseBitmap {
@@ -131,16 +130,24 @@ public long getLongCardinality() {
131130 @ Override
132131 public void serialize (ByteBuffer buffer ) {
133132 int size = serializedSizeInBytes ();
133+ // TODO use custom data output so that we can handle large byte array
134134 try (ByteArrayOutputStream bas = new ByteArrayOutputStream (size )) {
135135 DataOutput out = new DataOutputStream (bas );
136136 try {
137+ // https://github.com/RoaringBitmap/RoaringBitmap/blob/0.9.9/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1105
137138 rb .serialize (out );
138139 } catch (IOException e ) {
139140 throw new IllegalArgumentException ("Failed to serialize given bitmap" , e );
140141 }
141- buffer .put (bas .toByteArray (), 5 , size - 5 );
142+
143+ byte [] bytes = bas .toByteArray ();
144+ for (int i = 4 ; i > 0 ; i --) {
145+ buffer .put (bytes [i ]);
146+ }
147+ buffer .putInt (0 );
148+ buffer .put (bytes , 5 , size - 5 );
142149 } catch (IOException e ) {
143- throw new IllegalArgumentException ("Failed to serialize given bitmap" , e );
150+ throw new IllegalStateException ("Failed to serialize given bitmap" , e );
144151 }
145152 }
146153
@@ -253,6 +260,8 @@ public static ClickHouseBitmap wrap(Object bitmap, ClickHouseDataType innerType)
253260 }
254261
255262 public static ClickHouseBitmap deserialize (DataInputStream in , ClickHouseDataType innerType ) throws IOException {
263+ final ClickHouseBitmap rb ;
264+
256265 int byteLen = byteLength (innerType );
257266 int flag = in .readUnsignedByte ();
258267 if (flag == 0 ) {
@@ -262,20 +271,36 @@ public static ClickHouseBitmap deserialize(DataInputStream in, ClickHouseDataTyp
262271 bytes [1 ] = cardinality ;
263272 in .read (bytes , 2 , bytes .length - 2 );
264273
265- return ClickHouseBitmap .deserialize (bytes , innerType );
266- } else if ( byteLen <= 4 ) {
274+ rb = ClickHouseBitmap .deserialize (bytes , innerType );
275+ } else {
267276 int len = Utils .readVarInt (in );
268277 byte [] bytes = new byte [len ];
269- Utils .readFully (in , bytes );
270- RoaringBitmap b = new RoaringBitmap ();
271- b .deserialize (flip (newBuffer (len ).put (bytes )));
272- return ClickHouseBitmap .wrap (b , innerType );
273- } else {
274- // why? when serializing Roaring64NavigableMap, the initial 5 bytes were removed
275- // with 8 unknown bytes appended
276- throw new UnsupportedOperationException (
277- "Deserializing Roaring64NavigableMap with cardinality larger than 32 is currently not supported." );
278+
279+ if (byteLen <= 4 ) {
280+ Utils .readFully (in , bytes );
281+ RoaringBitmap b = new RoaringBitmap ();
282+ b .deserialize (flip (newBuffer (len ).put (bytes )));
283+ rb = ClickHouseBitmap .wrap (b , innerType );
284+ } else {
285+ // TODO implement a wrapper of DataInput to get rid of byte array here
286+ bytes [0 ] = (byte ) 0 ; // always unsigned
287+ // read map size in big-endian byte order
288+ for (int i = 4 ; i > 0 ; i --) {
289+ bytes [i ] = in .readByte ();
290+ }
291+ if (in .readByte () != 0 || in .readByte () != 0 || in .readByte () != 0 || in .readByte () != 0 ) {
292+ throw new IllegalStateException (
293+ "Not able to deserialize ClickHouseBitmap for too many bitmaps(>" + 0xFFFFFFFFL + ")!" );
294+ }
295+ // read the rest
296+ Utils .readFully (in , bytes , 5 , len - 5 );
297+ Roaring64NavigableMap b = new Roaring64NavigableMap ();
298+ b .deserialize (new DataInputStream (new ByteArrayInputStream (bytes )));
299+ rb = ClickHouseBitmap .wrap (b , innerType );
300+ }
278301 }
302+
303+ return rb ;
279304 }
280305
281306 public static ClickHouseBitmap deserialize (byte [] bytes , ClickHouseDataType innerType ) throws IOException {
@@ -287,10 +312,7 @@ public static ClickHouseBitmap deserialize(byte[] bytes, ClickHouseDataType inne
287312 }
288313
289314 int byteLen = byteLength (innerType );
290- ByteBuffer buffer = ByteBuffer .allocate (bytes .length );
291- if (buffer .order () != ByteOrder .LITTLE_ENDIAN ) {
292- buffer = buffer .slice ().order (ByteOrder .LITTLE_ENDIAN );
293- }
315+ ByteBuffer buffer = newBuffer (bytes .length );
294316 buffer = (ByteBuffer ) ((Buffer ) buffer .put (bytes )).flip ();
295317
296318 if (buffer .get () == (byte ) 0 ) { // small set
@@ -331,10 +353,29 @@ public static ClickHouseBitmap deserialize(byte[] bytes, ClickHouseDataType inne
331353 b .deserialize (buffer );
332354 rb = ClickHouseBitmap .wrap (b , innerType );
333355 } else {
334- // why? when serializing Roaring64NavigableMap, the initial 5 bytes were removed
335- // with 8 unknown bytes appended
336- throw new UnsupportedOperationException (
337- "Deserializing Roaring64NavigableMap with cardinality larger than 32 is currently not supported." );
356+ // consume map size(long in little-endian byte order)
357+ byte [] bitmaps = new byte [4 ];
358+ buffer .get (bitmaps );
359+ if (buffer .get () != 0 || buffer .get () != 0 || buffer .get () != 0 || buffer .get () != 0 ) {
360+ throw new IllegalStateException (
361+ "Not able to deserialize ClickHouseBitmap for too many bitmaps(>" + 0xFFFFFFFFL + ")!" );
362+ }
363+ // replace the last 5 bytes to flag(boolean for signed/unsigned) and map
364+ // size(integer)
365+ buffer .position (buffer .position () - 5 );
366+ // always unsigned due to limit of CRoaring
367+ buffer .put ((byte ) 0 );
368+ // big-endian -> little-endian
369+ for (int i = 3 ; i >= 0 ; i --) {
370+ buffer .put (bitmaps [i ]);
371+ }
372+
373+ buffer .position (buffer .position () - 5 );
374+ bitmaps = new byte [buffer .remaining ()];
375+ buffer .get (bitmaps );
376+ Roaring64NavigableMap b = new Roaring64NavigableMap ();
377+ b .deserialize (new DataInputStream (new ByteArrayInputStream (bitmaps )));
378+ rb = ClickHouseBitmap .wrap (b , innerType );
338379 }
339380 }
340381
@@ -436,15 +477,17 @@ public long[] toLongArray() {
436477 return longs ;
437478 }
438479
480+ /**
481+ * Serialize the bitmap into a flipped ByteBuffer.
482+ *
483+ * @return flipped byte buffer
484+ */
439485 public ByteBuffer toByteBuffer () {
440486 ByteBuffer buf ;
441487
442488 int cardinality = getCardinality ();
443489 if (cardinality <= 32 ) {
444- buf = ByteBuffer .allocate (2 + byteLen * cardinality );
445- if (buf .order () != ByteOrder .LITTLE_ENDIAN ) {
446- buf = buf .slice ().order (ByteOrder .LITTLE_ENDIAN );
447- }
490+ buf = newBuffer (2 + byteLen * cardinality );
448491 buf .put ((byte ) 0 );
449492 buf .put ((byte ) cardinality );
450493 if (byteLen == 1 ) {
@@ -468,28 +511,23 @@ public ByteBuffer toByteBuffer() {
468511 int size = serializedSizeInBytes ();
469512 int varIntSize = Utils .getVarIntSize (size );
470513
471- buf = ByteBuffer .allocate (1 + varIntSize + size );
472- if (buf .order () != ByteOrder .LITTLE_ENDIAN ) {
473- buf = buf .slice ().order (ByteOrder .LITTLE_ENDIAN );
474- }
514+ buf = newBuffer (1 + varIntSize + size );
475515 buf .put ((byte ) 1 );
476516 Utils .writeVarInt (size , buf );
477517 serialize (buf );
478518 } else { // 64
479- // 1) exclude the leading 5 bytes - boolean flag + map size , see below:
519+ // 1) deduct one to exclude the leading byte - boolean flag, see below:
480520 // https://github.com/RoaringBitmap/RoaringBitmap/blob/0.9.9/RoaringBitmap/src/main/java/org/roaringbitmap/longlong/Roaring64NavigableMap.java#L1107
481- // 2) not sure what's the extra 8 bytes?
482- long size = serializedSizeInBytesAsLong () - 5 + 8 ;
521+ // 2) add 4 bytes because CRoaring uses long to store count of 32-bit bitmaps,
522+ // while Java uses int - see
523+ // https://github.com/RoaringBitmap/CRoaring/blob/v0.2.66/cpp/roaring64map.hh#L597
524+ long size = serializedSizeInBytesAsLong () - 1 + 4 ;
483525 int varIntSize = Utils .getVarLongSize (size );
484526 // TODO add serialize(DataOutput) to handle more
485527 int intSize = (int ) size ;
486- buf = ByteBuffer .allocate (1 + varIntSize + intSize );
487- if (buf .order () != ByteOrder .LITTLE_ENDIAN ) {
488- buf = buf .slice ().order (ByteOrder .LITTLE_ENDIAN );
489- }
528+ buf = newBuffer (1 + varIntSize + intSize );
490529 buf .put ((byte ) 1 );
491530 Utils .writeVarInt (intSize , buf );
492- buf .putLong (1L ); // what's this?
493531 serialize (buf );
494532 }
495533
0 commit comments