Skip to content

Commit f3c52d4

Browse files
authored
Merge pull request #60 from piotrrzysko/maxCompressedLength
Calculate max compressed length
2 parents 182c6d2 + 1d2749f commit f3c52d4

38 files changed

+435
-83
lines changed

example.java

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,7 @@ public static void basicExampleHeadless() {
104104
// be processed using variable byte
105105
SkippableIntegratedComposition codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(),
106106
new IntegratedVariableByte());
107-
// output vector should be large enough...
108-
int[] compressed = new int[data.length + 1024];
109-
// compressed might not be large enough in some cases
110-
// if you get java.lang.ArrayIndexOutOfBoundsException, try
111-
// allocating more memory
107+
int[] compressed = new int[codec.maxHeadlessCompressedLength(new IntWrapper(0), data.length)];
112108

113109
/**
114110
*
@@ -267,10 +263,12 @@ public static void headlessDemo() {
267263
int[] uncompressed1 = {1,2,1,3,1};
268264
int[] uncompressed2 = {3,2,4,6,1};
269265

270-
int[] compressed = new int[uncompressed1.length+uncompressed2.length+1024];
271-
272266
SkippableIntegerCODEC codec = new SkippableComposition(new BinaryPacking(), new VariableByte());
273267

268+
int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), uncompressed1.length)
269+
+ codec.maxHeadlessCompressedLength(new IntWrapper(0), uncompressed2.length);
270+
int[] compressed = new int[maxCompressedLength];
271+
274272
// compressing
275273
IntWrapper outPos = new IntWrapper();
276274

src/main/java/me/lemire/integercompression/BinaryPacking.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,9 @@
3737
* @author Daniel Lemire
3838
*/
3939
public final class BinaryPacking implements IntegerCODEC, SkippableIntegerCODEC {
40-
final static int BLOCK_SIZE = 32;
41-
40+
public final static int BLOCK_SIZE = 32;
41+
private static final int MAX_BIT_WIDTH = Integer.SIZE;
42+
4243
@Override
4344
public void compress(int[] in, IntWrapper inpos, int inlength,
4445
int[] out, IntWrapper outpos) {
@@ -131,7 +132,16 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
131132
outpos.add(outlength);
132133
inpos.set(tmpinpos);
133134
}
134-
135+
136+
@Override
137+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
138+
int blockCount = inlength / BLOCK_SIZE;
139+
int headersSizeInInts = blockCount / Integer.BYTES + (blockCount % Integer.BYTES);
140+
int blocksSizeInInts = blockCount * MAX_BIT_WIDTH;
141+
compressedPositions.add(blockCount * BLOCK_SIZE);
142+
return headersSizeInInts + blocksSizeInInts;
143+
}
144+
135145
@Override
136146
public String toString() {
137147
return this.getClass().getSimpleName();

src/main/java/me/lemire/integercompression/FastPFOR.java

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,13 @@
4040
*/
4141
public class FastPFOR implements IntegerCODEC,SkippableIntegerCODEC {
4242
final static int OVERHEAD_OF_EACH_EXCEPT = 8;
43+
private static final int OVERHEAD_OF_EACH_PAGE_IN_INTS = 36; // 1 int for the header
44+
// 1 int for the byte array size
45+
// 1 int for the bitmap
46+
// 1 int for byte array padding (to align to 4 bytes)
47+
// 32 to have enough space to bit-pack the exceptions
48+
private static final int OVERHEAD_OF_EACH_BLOCK_IN_INTS = 1; // 1 byte for the number of bits allocated per truncated integer
49+
// 1 byte for the number of exceptions
4350
/**
4451
*
4552
*/
@@ -65,7 +72,7 @@ public class FastPFOR implements IntegerCODEC,SkippableIntegerCODEC {
6572
* @param pagesize
6673
* the desired page size (recommended value is FastPFOR.DEFAULT_PAGE_SIZE)
6774
*/
68-
private FastPFOR(int pagesize) {
75+
FastPFOR(int pagesize) {
6976
pageSize = pagesize;
7077
// Initiate arrrays.
7178
byteContainer = makeBuffer(3 * pageSize
@@ -230,6 +237,18 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
230237
}
231238
}
232239

240+
@Override
241+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
242+
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
243+
244+
int pageCount = (inlength + pageSize - 1) / pageSize;
245+
int blockCount = inlength / BLOCK_SIZE;
246+
247+
// getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers.
248+
int blockSizeInInts = OVERHEAD_OF_EACH_BLOCK_IN_INTS + BLOCK_SIZE;
249+
return OVERHEAD_OF_EACH_PAGE_IN_INTS * pageCount + blockSizeInInts * blockCount + 24;
250+
}
251+
233252
private void decodePage(int[] in, IntWrapper inpos, int[] out,
234253
IntWrapper outpos, int thissize) {
235254
final int initpos = inpos.get();

src/main/java/me/lemire/integercompression/FastPFOR128.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@
2323
*/
2424
public class FastPFOR128 implements IntegerCODEC,SkippableIntegerCODEC {
2525
final static int OVERHEAD_OF_EACH_EXCEPT = 8;
26+
private static final int OVERHEAD_OF_EACH_PAGE_IN_INTS = 36; // 1 int for the header
27+
// 1 int for the byte array size
28+
// 1 int for the bitmap
29+
// 1 int for byte array padding (to align to 4 bytes)
30+
// 32 to have enough space to bit-pack the exceptions
31+
private static final int OVERHEAD_OF_EACH_BLOCK_IN_INTS = 1; // 1 byte for the number of bits allocated per truncated integer
32+
// 1 byte for the number of exceptions
2633
/**
2734
*
2835
*/
@@ -209,6 +216,18 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
209216
}
210217
}
211218

219+
@Override
220+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
221+
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
222+
223+
int pageCount = (inlength + pageSize - 1) / pageSize;
224+
int blockCount = inlength / BLOCK_SIZE;
225+
226+
// getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers.
227+
int blockSizeInInts = OVERHEAD_OF_EACH_BLOCK_IN_INTS + BLOCK_SIZE;
228+
return OVERHEAD_OF_EACH_PAGE_IN_INTS * pageCount + blockSizeInInts * blockCount + 24;
229+
}
230+
212231
private void decodePage(int[] in, IntWrapper inpos, int[] out,
213232
IntWrapper outpos, int thissize) {
214233
final int initpos = inpos.get();

src/main/java/me/lemire/integercompression/GroupSimple9.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3549,4 +3549,10 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o
35493549
inpos.set(tmpinpos);
35503550

35513551
}
3552-
}
3552+
3553+
@Override
3554+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
3555+
compressedPositions.add(inlength);
3556+
return inlength;
3557+
}
3558+
}

src/main/java/me/lemire/integercompression/IntCompressor.java

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -33,21 +33,14 @@ public IntCompressor() {
3333
*
3434
* @param input array to be compressed
3535
* @return compressed array
36-
* @throws UncompressibleInputException if the data is too poorly compressible
3736
*/
3837
public int[] compress(int[] input) {
39-
int[] compressed = new int[input.length + input.length / 100 + 1024];
38+
int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), input.length);
39+
int[] compressed = new int[maxCompressedLength + 1]; // +1 to store the length of the input
4040
// Store at index=0 the length of the input, hence enabling .headlessCompress
4141
compressed[0] = input.length;
4242
IntWrapper outpos = new IntWrapper(1);
43-
try {
44-
codec.headlessCompress(input, new IntWrapper(0),
45-
input.length, compressed, outpos);
46-
} catch (IndexOutOfBoundsException ioebe) {
47-
throw new
48-
UncompressibleInputException("Your input is too poorly compressible "
49-
+ "with the current codec : "+codec);
50-
}
43+
codec.headlessCompress(input, new IntWrapper(0), input.length, compressed, outpos);
5144
compressed = Arrays.copyOf(compressed,outpos.intValue());
5245
return compressed;
5346
}

src/main/java/me/lemire/integercompression/JustCopy.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,12 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
4242

4343
}
4444

45+
@Override
46+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
47+
compressedPositions.add(inlength);
48+
return inlength;
49+
}
50+
4551
@Override
4652
public void compress(int[] in, IntWrapper inpos, int inlength,
4753
int[] out, IntWrapper outpos) {

src/main/java/me/lemire/integercompression/Kamikaze.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o
3838
}
3939
}
4040

41+
@Override
42+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
43+
throw new UnsupportedOperationException("Calculating the max compressed length is not supported yet.");
44+
}
45+
4146
@Override
4247
public String toString() {
4348
return "Kamikaze's PForDelta";
@@ -64,4 +69,4 @@ public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out,
6469
headlessUncompress(in, inpos, inlength, out, outpos, outlength);
6570

6671
}
67-
}
72+
}

src/main/java/me/lemire/integercompression/NewPFD.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
132132
decodePage(in, inpos, out, outpos, mynvalue);
133133
}
134134

135+
@Override
136+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
137+
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
138+
int blockCount = inlength / BLOCK_SIZE;
139+
// +1 for the header
140+
// getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers.
141+
int maxBlockSizeInInts = 1 + BLOCK_SIZE;
142+
compressedPositions.add(inlength);
143+
return maxBlockSizeInInts * blockCount;
144+
}
145+
135146
private void decodePage(int[] in, IntWrapper inpos, int[] out,
136147
IntWrapper outpos, int thissize) {
137148
int tmpoutpos = outpos.get();

src/main/java/me/lemire/integercompression/NewPFDS16.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,17 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
131131
decodePage(in, inpos, out, outpos, mynvalue);
132132
}
133133

134+
@Override
135+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
136+
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
137+
int blockCount = inlength / BLOCK_SIZE;
138+
// +1 for the header
139+
// getBestBFromData limits the memory used for exceptions so that the total size of the block does not exceed BLOCK_SIZE integers.
140+
int maxBlockSizeInInts = 1 + BLOCK_SIZE;
141+
compressedPositions.add(inlength);
142+
return maxBlockSizeInInts * blockCount;
143+
}
144+
134145
private void decodePage(int[] in, IntWrapper inpos, int[] out,
135146
IntWrapper outpos, int thissize) {
136147
int tmpoutpos = outpos.get();

0 commit comments

Comments
 (0)