Skip to content

Commit 07128ab

Browse files
committed
Compute max compressed length for SkippableIntegerCODEC
Currently, only BinaryPacking and VariableByte provide real implementations that calculate the maximum compressed length. This commit demonstrates how the approach might work. Once we agree that the design is correct, we can extend it to other schemes.
1 parent 18da31e commit 07128ab

File tree

10 files changed

+68
-11
lines changed

10 files changed

+68
-11
lines changed

example.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -263,10 +263,12 @@ public static void headlessDemo() {
263263
int[] uncompressed1 = {1,2,1,3,1};
264264
int[] uncompressed2 = {3,2,4,6,1};
265265

266-
int[] compressed = new int[uncompressed1.length+uncompressed2.length+1024];
267-
268266
SkippableIntegerCODEC codec = new SkippableComposition(new BinaryPacking(), new VariableByte());
269267

268+
int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), uncompressed1.length)
269+
+ codec.maxHeadlessCompressedLength(new IntWrapper(0), uncompressed2.length);
270+
int[] compressed = new int[maxCompressedLength];
271+
270272
// compressing
271273
IntWrapper outPos = new IntWrapper();
272274

src/main/java/me/lemire/integercompression/BinaryPacking.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@
3838
*/
3939
public final class BinaryPacking implements IntegerCODEC, SkippableIntegerCODEC {
4040
final static int BLOCK_SIZE = 32;
41-
41+
private static final int MAX_BIT_WIDTH = Integer.SIZE;
42+
4243
@Override
4344
public void compress(int[] in, IntWrapper inpos, int inlength,
4445
int[] out, IntWrapper outpos) {
@@ -131,7 +132,16 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
131132
outpos.add(outlength);
132133
inpos.set(tmpinpos);
133134
}
134-
135+
136+
@Override
137+
public int maxHeadlessCompressedLength(IntWrapper inpos, int inlength) {
138+
int blockCount = inlength / BLOCK_SIZE;
139+
int headersSizeInInts = blockCount / Integer.BYTES + (blockCount % Integer.BYTES);
140+
int blocksSizeInInts = blockCount * MAX_BIT_WIDTH;
141+
inpos.add(blockCount * BLOCK_SIZE);
142+
return headersSizeInInts + blocksSizeInInts;
143+
}
144+
135145
@Override
136146
public String toString() {
137147
return this.getClass().getSimpleName();

src/main/java/me/lemire/integercompression/IntCompressor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ public IntCompressor() {
3636
* @throws UncompressibleInputException if the data is too poorly compressible
3737
*/
3838
public int[] compress(int[] input) {
39-
int[] compressed = new int[input.length + input.length / 100 + 1024];
39+
int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), input.length);
40+
int[] compressed = new int[maxCompressedLength];
4041
// Store at index=0 the length of the input, hence enabling .headlessCompress
4142
compressed[0] = input.length;
4243
IntWrapper outpos = new IntWrapper(1);

src/main/java/me/lemire/integercompression/SkippableComposition.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,16 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o
6161
F2.headlessUncompress(in, inpos, inlength, out, outpos, num);
6262
}
6363

64+
@Override
65+
public int maxHeadlessCompressedLength(IntWrapper inpos, int inlength) {
66+
int init = inpos.get();
67+
int maxLength = F1.maxHeadlessCompressedLength(inpos, inlength);
68+
maxLength += 1; // Add +1 for the potential F2 header. Question: is this header actually needed in the headless version?
69+
inlength -= inpos.get() - init;
70+
maxLength += F2.maxHeadlessCompressedLength(inpos, inlength);
71+
return maxLength;
72+
}
73+
6474
@Override
6575
public String toString() {
6676
return F1.toString() + "+" + F2.toString();

src/main/java/me/lemire/integercompression/SkippableIntegerCODEC.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,25 @@ public void headlessCompress(int[] in, IntWrapper inpos, int inlength, int[] out
6969
public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] out,
7070
IntWrapper outpos, int num);
7171

72+
/**
73+
* Compute the maximum number of integers that might be required to store
74+
* the compressed form of a given input array segment, without headers.
75+
* <p>
76+
* This is useful to pre-allocate the output buffer before calling
77+
* {@link #headlessCompress(int[], IntWrapper, int, int[], IntWrapper)}.
78+
* </p>
79+
*
80+
* @param inpos
81+
* starting position in the input array (passed by reference)
82+
* @param inlength
83+
* number of integers to be compressed
84+
* @return the maximum number of integers needed in the output array
85+
*/
86+
default int maxHeadlessCompressedLength(IntWrapper inpos, int inlength) {
87+
if (inlength == 0) {
88+
return 0;
89+
}
90+
inpos.add(inlength);
91+
return inlength + inlength / 100 + 1024;
92+
}
7293
}

src/main/java/me/lemire/integercompression/VariableByte.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
*/
2222
public class VariableByte implements IntegerCODEC, ByteIntegerCODEC, SkippableIntegerCODEC {
2323

24+
private static final int MAX_BYTES_PER_INT = 5;
25+
2426
private static byte extract7bits(int i, long val) {
2527
return (byte) ((val >> (7 * i)) & ((1 << 7) - 1));
2628
}
@@ -208,6 +210,14 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o
208210
inpos.set(p + (s!=0 ? 1 : 0));
209211
}
210212

213+
@Override
214+
public int maxHeadlessCompressedLength(IntWrapper inpos, int inlength) {
215+
int maxLengthInBytes = inlength * MAX_BYTES_PER_INT;
216+
int maxLengthInInts = (maxLengthInBytes + Integer.BYTES - 1) / Integer.BYTES;
217+
inpos.add(inlength);
218+
return maxLengthInInts;
219+
}
220+
211221
/**
212222
* Creates a new buffer of the requested size.
213223
*

src/test/java/me/lemire/integercompression/AdhocTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,10 @@ public void testIssue29() {
8686
@Test
8787
public void testIssue29b() {
8888
for(int x = 0; x < 64; x++) {
89+
SkippableIntegerCODEC codec = new SkippableComposition(new BinaryPacking(), new VariableByte());
8990
int[] a = {2, 3, 4, 5};
90-
int[] b = new int[90];
91+
int[] b = new int[x + codec.maxHeadlessCompressedLength(new IntWrapper(0), a.length)];
9192
int[] c = new int[a.length];
92-
SkippableIntegerCODEC codec = new SkippableComposition(new BinaryPacking(), new VariableByte());
9393
IntWrapper aOffset = new IntWrapper(0);
9494
IntWrapper bOffset = new IntWrapper(x);
9595
codec.headlessCompress(a, aOffset, a.length, b, bOffset);

src/test/java/me/lemire/integercompression/ExampleTest.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -276,10 +276,12 @@ public void headlessDemo() {
276276
int[] uncompressed1 = { 1, 2, 1, 3, 1 };
277277
int[] uncompressed2 = { 3, 2, 4, 6, 1 };
278278

279-
int[] compressed = new int[uncompressed1.length + uncompressed2.length + 1024];
280-
281279
SkippableIntegerCODEC codec = new SkippableComposition(new BinaryPacking(), new VariableByte());
282280

281+
int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), uncompressed1.length)
282+
+ codec.maxHeadlessCompressedLength(new IntWrapper(0), uncompressed2.length);
283+
int[] compressed = new int[maxCompressedLength];
284+
283285
// compressing
284286
IntWrapper outPos = new IntWrapper();
285287

src/test/java/me/lemire/integercompression/SkippableBasicTest.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,11 @@ public void consistentTest() {
4848
for (SkippableIntegerCODEC c : codecs) {
4949
System.out.println("[SkippeableBasicTest.consistentTest] codec = "
5050
+ c);
51-
int[] outBuf = new int[N + 1024];
5251
for (int n = 0; n <= N; ++n) {
5352
IntWrapper inPos = new IntWrapper();
5453
IntWrapper outPos = new IntWrapper();
54+
int[] outBuf = new int[c.maxHeadlessCompressedLength(new IntWrapper(0), n)];
55+
5556
c.headlessCompress(data, inPos, n, outBuf, outPos);
5657

5758
IntWrapper inPoso = new IntWrapper();

src/test/java/me/lemire/integercompression/TestUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ protected static int[] uncompress(ByteIntegerCODEC codec, byte[] data, int len)
165165
}
166166

167167
protected static int[] compressHeadless(SkippableIntegerCODEC codec, int[] data) {
168-
int[] outBuf = new int[data.length * 4];
168+
int[] outBuf = new int[codec.maxHeadlessCompressedLength(new IntWrapper(0), data.length)];
169169
IntWrapper inPos = new IntWrapper();
170170
IntWrapper outPos = new IntWrapper();
171171
codec.headlessCompress(data, inPos, data.length, outBuf, outPos);

0 commit comments

Comments
 (0)