Skip to content

Commit e19e018

Browse files
committed
Compute max compressed length for SkippableIntegratedIntegerCODEC
1 parent 182c6d2 commit e19e018

File tree

9 files changed

+87
-15
lines changed

9 files changed

+87
-15
lines changed

example.java

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,7 @@ public static void basicExampleHeadless() {
104104
// be processed using variable byte
105105
SkippableIntegratedComposition codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(),
106106
new IntegratedVariableByte());
107-
// output vector should be large enough...
108-
int[] compressed = new int[data.length + 1024];
109-
// compressed might not be large enough in some cases
110-
// if you get java.lang.ArrayIndexOutOfBoundsException, try
111-
// allocating more memory
107+
int[] compressed = new int[codec.maxHeadlessCompressedLength(new IntWrapper(0), data.length)];
112108

113109
/**
114110
*

src/main/java/me/lemire/integercompression/differential/IntegratedBinaryPacking.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@
4949
public class IntegratedBinaryPacking implements IntegratedIntegerCODEC,
5050
SkippableIntegratedIntegerCODEC {
5151

52-
static final int BLOCK_SIZE = 32;
52+
public static final int BLOCK_SIZE = 32;
53+
private static final int MAX_BIT_WIDTH = Integer.SIZE;
5354

5455
@Override
5556
public void compress(int[] in, IntWrapper inpos, int inlength, int[] out,
@@ -170,4 +171,13 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
170171
initvalue.set(initoffset);
171172
inpos.set(tmpinpos);
172173
}
174+
175+
@Override
176+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
177+
int blockCount = inlength / BLOCK_SIZE;
178+
int headersSizeInInts = blockCount / Integer.BYTES + (blockCount % Integer.BYTES);
179+
int blocksSizeInInts = blockCount * MAX_BIT_WIDTH;
180+
compressedPositions.add(blockCount * BLOCK_SIZE);
181+
return headersSizeInInts + blocksSizeInInts;
182+
}
173183
}

src/main/java/me/lemire/integercompression/differential/IntegratedIntCompressor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ public IntegratedIntCompressor() {
3939
* @throws UncompressibleInputException if the data is too poorly compressible
4040
*/
4141
public int[] compress(int[] input) {
42-
int [] compressed = new int[input.length + input.length / 100 + 1024];
42+
int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), input.length);
43+
int [] compressed = new int[maxCompressedLength + 1]; // +1 to store the length of the input
4344
compressed[0] = input.length;
4445
IntWrapper outpos = new IntWrapper(1);
4546
IntWrapper initvalue = new IntWrapper(0);

src/main/java/me/lemire/integercompression/differential/IntegratedVariableByte.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
public class IntegratedVariableByte implements IntegratedIntegerCODEC, IntegratedByteIntegerCODEC,
2525
SkippableIntegratedIntegerCODEC {
2626

27+
private static final int MAX_BYTES_PER_INT = 5;
28+
2729
private static byte extract7bits(int i, long val) {
2830
return (byte)((val >> (7 * i)) & ((1 << 7) - 1));
2931
}
@@ -257,6 +259,14 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
257259
inpos.set(p + (s!=0 ? 1 : 0));
258260
}
259261

262+
@Override
263+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
264+
int maxLengthInBytes = inlength * MAX_BYTES_PER_INT;
265+
int maxLengthInInts = (maxLengthInBytes + Integer.BYTES - 1) / Integer.BYTES;
266+
compressedPositions.add(inlength);
267+
return maxLengthInInts;
268+
}
269+
260270
/**
261271
* Creates a new buffer of the requested size.
262272
*

src/main/java/me/lemire/integercompression/differential/SkippableIntegratedComposition.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,4 +76,13 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
7676
F2.headlessUncompress(in, inpos, inlength, out, outpos,num,initvalue);
7777
}
7878

79+
@Override
80+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
81+
int init = compressedPositions.get();
82+
int maxLength = F1.maxHeadlessCompressedLength(compressedPositions, inlength);
83+
maxLength += 1; // Add +1 for the potential F2 header. Question: is this header actually needed in the headless version?
84+
inlength -= compressedPositions.get() - init;
85+
maxLength += F2.maxHeadlessCompressedLength(compressedPositions, inlength);
86+
return maxLength;
87+
}
7988
}

src/main/java/me/lemire/integercompression/differential/SkippableIntegratedIntegerCODEC.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,4 +71,21 @@ public void headlessCompress(int[] in, IntWrapper inpos, int inlength, int[] out
7171
public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] out,
7272
IntWrapper outpos, int num, IntWrapper initvalue);
7373

74+
/**
75+
* Compute the maximum number of integers that might be required to store
76+
* the compressed form of a given input array segment, without headers.
77+
* <p>
78+
* This is useful to pre-allocate the output buffer before calling
79+
* {@link #headlessCompress(int[], IntWrapper, int, int[], IntWrapper, IntWrapper)}.
80+
* </p>
81+
*
82+
* @param compressedPositions
83+
* since not all schemes compress every input integer, this parameter
84+
* returns how many input integers will actually be compressed.
85+
* This is useful when composing multiple schemes.
86+
* @param inlength
87+
* number of integers to be compressed
88+
* @return the maximum number of integers needed in the output array
89+
*/
90+
int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength);
7491
}

src/test/java/me/lemire/integercompression/AdhocTest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,11 @@ public void testIssue29b() {
108108
@Test
109109
public void testIssue41() {
110110
for (int x = 0; x < 64; x++) {
111-
int[] a = { 2, 3, 4, 5 };
112-
int[] b = new int[90];
113-
int[] c = new int[a.length];
114111
SkippableIntegratedIntegerCODEC codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(),
115112
new IntegratedVariableByte());
113+
int[] a = { 2, 3, 4, 5 };
114+
int[] b = new int[x + codec.maxHeadlessCompressedLength(new IntWrapper(0), a.length)];
115+
int[] c = new int[a.length];
116116
IntWrapper aOffset = new IntWrapper(0);
117117
IntWrapper bOffset = new IntWrapper(x);
118118
IntWrapper initValue = new IntWrapper(0);

src/test/java/me/lemire/integercompression/ExampleTest.java

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,7 @@ public void basicExampleHeadless() {
116116
// be processed using variable byte
117117
SkippableIntegratedComposition codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(),
118118
new IntegratedVariableByte());
119-
// output vector should be large enough...
120-
int[] compressed = new int[data.length + 1024];
121-
// compressed might not be large enough in some cases
122-
// if you get java.lang.ArrayIndexOutOfBoundsException, try
123-
// allocating more memory
119+
int[] compressed = new int[codec.maxHeadlessCompressedLength(new IntWrapper(0), data.length)];
124120

125121
/**
126122
*

src/test/java/me/lemire/integercompression/SkippableBasicTest.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,13 @@
99

1010
import java.util.Arrays;
1111

12+
import me.lemire.integercompression.differential.IntegratedBinaryPacking;
13+
import me.lemire.integercompression.differential.IntegratedVariableByte;
14+
import me.lemire.integercompression.differential.SkippableIntegratedComposition;
15+
import me.lemire.integercompression.differential.SkippableIntegratedIntegerCODEC;
1216
import org.junit.Test;
1317

18+
import static org.junit.Assert.assertTrue;
1419

1520
/**
1621
* Just some basic sanity tests.
@@ -147,5 +152,33 @@ public void varyingLengthTest2() {
147152
}
148153
}
149154

155+
@Test
156+
public void testMaxHeadlessCompressedLength() {
157+
testMaxHeadlessCompressedLength(new IntegratedBinaryPacking(), 16 * IntegratedBinaryPacking.BLOCK_SIZE);
158+
testMaxHeadlessCompressedLength(new IntegratedVariableByte(), 128);
159+
testMaxHeadlessCompressedLength(new SkippableIntegratedComposition(new IntegratedBinaryPacking(), new IntegratedVariableByte()), 16 * IntegratedBinaryPacking.BLOCK_SIZE + 10);
160+
}
161+
162+
private static void testMaxHeadlessCompressedLength(SkippableIntegratedIntegerCODEC codec, int inlengthTo) {
163+
// We test the worst-case scenario by making all deltas and the initial value negative.
164+
int delta = -1;
165+
int value = delta;
166+
167+
for (int inlength = 0; inlength < inlengthTo; ++inlength) {
168+
int[] input = new int[inlength];
169+
for (int i = 0; i < inlength; i++) {
170+
input[i] = value;
171+
value += delta;
172+
}
150173

174+
int maxOutputLength = codec.maxHeadlessCompressedLength(new IntWrapper(), inlength);
175+
int[] output = new int[maxOutputLength];
176+
IntWrapper outPos = new IntWrapper();
177+
178+
codec.headlessCompress(input, new IntWrapper(), inlength, output, outPos, new IntWrapper());
179+
// If we reach this point, no exception was thrown, which means the calculated output length was sufficient.
180+
181+
assertTrue(maxOutputLength <= outPos.get() + 1); // +1 because SkippableIntegratedComposition always adds one extra integer for the potential header
182+
}
183+
}
151184
}

0 commit comments

Comments
 (0)