Skip to content

Commit 32e374a

Browse files
committed
Compute max compressed length for SkippableIntegerCODEC
Calculating the maximum compressed length is now implemented for all schemes except: * Kamikaze — as stated in the Javadoc, it is not intended for production use. * VectorFastPFOR — it does not appear to be supported for production yet, as it is not included in the release package.
1 parent e19e018 commit 32e374a

24 files changed

+263
-18
lines changed

example.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -263,10 +263,12 @@ public static void headlessDemo() {
263263
int[] uncompressed1 = {1,2,1,3,1};
264264
int[] uncompressed2 = {3,2,4,6,1};
265265

266-
int[] compressed = new int[uncompressed1.length+uncompressed2.length+1024];
267-
268266
SkippableIntegerCODEC codec = new SkippableComposition(new BinaryPacking(), new VariableByte());
269267

268+
int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), uncompressed1.length)
269+
+ codec.maxHeadlessCompressedLength(new IntWrapper(0), uncompressed2.length);
270+
int[] compressed = new int[maxCompressedLength];
271+
270272
// compressing
271273
IntWrapper outPos = new IntWrapper();
272274

src/main/java/me/lemire/integercompression/BinaryPacking.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,9 @@
3737
* @author Daniel Lemire
3838
*/
3939
public final class BinaryPacking implements IntegerCODEC, SkippableIntegerCODEC {
40-
final static int BLOCK_SIZE = 32;
41-
40+
public final static int BLOCK_SIZE = 32;
41+
private static final int MAX_BIT_WIDTH = Integer.SIZE;
42+
4243
@Override
4344
public void compress(int[] in, IntWrapper inpos, int inlength,
4445
int[] out, IntWrapper outpos) {
@@ -131,7 +132,16 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
131132
outpos.add(outlength);
132133
inpos.set(tmpinpos);
133134
}
134-
135+
136+
@Override
137+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
138+
int blockCount = inlength / BLOCK_SIZE;
139+
int headersSizeInInts = blockCount / Integer.BYTES + (blockCount % Integer.BYTES);
140+
int blocksSizeInInts = blockCount * MAX_BIT_WIDTH;
141+
compressedPositions.add(blockCount * BLOCK_SIZE);
142+
return headersSizeInInts + blocksSizeInInts;
143+
}
144+
135145
@Override
136146
public String toString() {
137147
return this.getClass().getSimpleName();

src/main/java/me/lemire/integercompression/FastPFOR.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,13 @@
4040
*/
4141
public class FastPFOR implements IntegerCODEC,SkippableIntegerCODEC {
4242
final static int OVERHEAD_OF_EACH_EXCEPT = 8;
43+
private static final int OVERHEAD_OF_EACH_PAGE_IN_INTS = 36; // 1 int for the header
44+
// 1 int for the byte array size
45+
// 1 int for the bitmap
46+
// 1 int for byte array padding (to align to 4 bytes)
47+
// 32 to have enough space to bit-pack the exceptions
48+
private static final int OVERHEAD_OF_EACH_BLOCK_IN_INTS = 1; // 1 byte for the number of bits allocated per truncated integer
49+
// 1 byte for the number of exceptions
4350
/**
4451
*
4552
*/
@@ -65,7 +72,7 @@ public class FastPFOR implements IntegerCODEC,SkippableIntegerCODEC {
6572
* @param pagesize
6673
* the desired page size (recommended value is FastPFOR.DEFAULT_PAGE_SIZE)
6774
*/
68-
private FastPFOR(int pagesize) {
75+
FastPFOR(int pagesize) {
6976
pageSize = pagesize;
7077
// Initiate arrrays.
7178
byteContainer = makeBuffer(3 * pageSize
@@ -230,6 +237,18 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
230237
}
231238
}
232239

240+
@Override
241+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
242+
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
243+
244+
int pageCount = (inlength + pageSize - 1) / pageSize;
245+
int blockCount = inlength / BLOCK_SIZE;
246+
247+
// getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers.
248+
int blockSizeInInts = OVERHEAD_OF_EACH_BLOCK_IN_INTS + BLOCK_SIZE;
249+
return OVERHEAD_OF_EACH_PAGE_IN_INTS * pageCount + blockSizeInInts * blockCount + 24;
250+
}
251+
233252
private void decodePage(int[] in, IntWrapper inpos, int[] out,
234253
IntWrapper outpos, int thissize) {
235254
final int initpos = inpos.get();

src/main/java/me/lemire/integercompression/FastPFOR128.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@
2323
*/
2424
public class FastPFOR128 implements IntegerCODEC,SkippableIntegerCODEC {
2525
final static int OVERHEAD_OF_EACH_EXCEPT = 8;
26+
private static final int OVERHEAD_OF_EACH_PAGE_IN_INTS = 36; // 1 int for the header
27+
// 1 int for the byte array size
28+
// 1 int for the bitmap
29+
// 1 int for byte array padding (to align to 4 bytes)
30+
// 32 to have enough space to bit-pack the exceptions
31+
private static final int OVERHEAD_OF_EACH_BLOCK_IN_INTS = 1; // 1 byte for the number of bits allocated per truncated integer
32+
// 1 byte for the number of exceptions
2633
/**
2734
*
2835
*/
@@ -209,6 +216,18 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
209216
}
210217
}
211218

219+
@Override
220+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
221+
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
222+
223+
int pageCount = (inlength + pageSize - 1) / pageSize;
224+
int blockCount = inlength / BLOCK_SIZE;
225+
226+
// getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers.
227+
int blockSizeInInts = OVERHEAD_OF_EACH_BLOCK_IN_INTS + BLOCK_SIZE;
228+
return OVERHEAD_OF_EACH_PAGE_IN_INTS * pageCount + blockSizeInInts * blockCount + 24;
229+
}
230+
212231
private void decodePage(int[] in, IntWrapper inpos, int[] out,
213232
IntWrapper outpos, int thissize) {
214233
final int initpos = inpos.get();

src/main/java/me/lemire/integercompression/GroupSimple9.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3549,4 +3549,10 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o
35493549
inpos.set(tmpinpos);
35503550

35513551
}
3552-
}
3552+
3553+
@Override
3554+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
3555+
compressedPositions.add(inlength);
3556+
return inlength;
3557+
}
3558+
}

src/main/java/me/lemire/integercompression/IntCompressor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ public IntCompressor() {
3636
* @throws UncompressibleInputException if the data is too poorly compressible
3737
*/
3838
public int[] compress(int[] input) {
39-
int[] compressed = new int[input.length + input.length / 100 + 1024];
39+
int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), input.length);
40+
int[] compressed = new int[maxCompressedLength + 1]; // +1 to store the length of the input
4041
// Store at index=0 the length of the input, hence enabling .headlessCompress
4142
compressed[0] = input.length;
4243
IntWrapper outpos = new IntWrapper(1);

src/main/java/me/lemire/integercompression/JustCopy.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,12 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
4242

4343
}
4444

45+
@Override
46+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
47+
compressedPositions.add(inlength);
48+
return inlength;
49+
}
50+
4551
@Override
4652
public void compress(int[] in, IntWrapper inpos, int inlength,
4753
int[] out, IntWrapper outpos) {

src/main/java/me/lemire/integercompression/Kamikaze.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o
3838
}
3939
}
4040

41+
@Override
42+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
43+
throw new UnsupportedOperationException("Calculating the max compressed length is not supported yet.");
44+
}
45+
4146
@Override
4247
public String toString() {
4348
return "Kamikaze's PForDelta";
@@ -64,4 +69,4 @@ public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out,
6469
headlessUncompress(in, inpos, inlength, out, outpos, outlength);
6570

6671
}
67-
}
72+
}

src/main/java/me/lemire/integercompression/NewPFD.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
132132
decodePage(in, inpos, out, outpos, mynvalue);
133133
}
134134

135+
@Override
136+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
137+
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
138+
int blockCount = inlength / BLOCK_SIZE;
139+
// +1 for the header
140+
// getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers.
141+
int maxBlockSizeInInts = 1 + BLOCK_SIZE;
142+
compressedPositions.add(inlength);
143+
return maxBlockSizeInInts * blockCount;
144+
}
145+
135146
private void decodePage(int[] in, IntWrapper inpos, int[] out,
136147
IntWrapper outpos, int thissize) {
137148
int tmpoutpos = outpos.get();

src/main/java/me/lemire/integercompression/NewPFDS16.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
131131
decodePage(in, inpos, out, outpos, mynvalue);
132132
}
133133

134+
@Override
135+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
136+
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
137+
int blockCount = inlength / BLOCK_SIZE;
138+
// +1 for the header
139+
// getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers.
140+
int maxBlockSizeInInts = 1 + BLOCK_SIZE;
141+
compressedPositions.add(inlength);
142+
return maxBlockSizeInInts * blockCount;
143+
}
144+
134145
private void decodePage(int[] in, IntWrapper inpos, int[] out,
135146
IntWrapper outpos, int thissize) {
136147
int tmpoutpos = outpos.get();

0 commit comments

Comments
 (0)