Skip to content

Commit 3e1d40b

Browse files
committed
Add utility classes (Comnposition, Delta), Introduce LongAs2IntsCodec
1 parent 416a7d4 commit 3e1d40b

File tree

14 files changed

+1327
-3
lines changed

14 files changed

+1327
-3
lines changed

pom.xml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,12 @@
4848
<version>4.13.1</version>
4949
<scope>test</scope>
5050
</dependency>
51+
<dependency>
52+
<groupId>org.roaringbitmap</groupId>
53+
<artifactId>RoaringBitmap</artifactId>
54+
<version>0.9.35</version>
55+
<scope>test</scope>
56+
</dependency>
5157
</dependencies>
5258
<issueManagement>
5359
<system>GitHub Issue Tracking</system>
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
package me.lemire.longcompression;
2+
3+
/**
4+
* This is just like LongCODEC, except that it indicates that delta coding is
5+
* "integrated", so that you don't need a separate step for delta coding.
6+
*
7+
* @author Benoit Lacelle
8+
*/
9+
public interface IntegratedLongCODEC extends LongCODEC {
10+
11+
}
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
package me.lemire.longcompression;
2+
3+
import java.util.Arrays;
4+
5+
import me.lemire.integercompression.BinaryPacking;
6+
import me.lemire.integercompression.Composition;
7+
import me.lemire.integercompression.IntCompressor;
8+
import me.lemire.integercompression.IntWrapper;
9+
import me.lemire.integercompression.IntegerCODEC;
10+
import me.lemire.integercompression.VariableByte;
11+
12+
/**
13+
* A {@link LongCODEC} which split each long in a highpart (32 first bits) and a low part (32 last bits).
14+
*
15+
* @author Benoit Lacelle
16+
*
17+
*/
18+
public class LongAs2IntsCodec implements LongCODEC {
19+
final IntegerCODEC highPartsCodec;
20+
final IntegerCODEC lowPartsCodec;
21+
22+
public LongAs2IntsCodec(IntegerCODEC highPartsCodec, IntegerCODEC lowPartsCodec) {
23+
this.highPartsCodec = highPartsCodec;
24+
this.lowPartsCodec = lowPartsCodec;
25+
}
26+
27+
/**
28+
* By default, we expect longs to be slightly above Integer.MAX_VALUE. Hence highParts to be small and positive
29+
* integers. For lowParts, we rely on {@link IntCompressor} default IntegerCODEC
30+
*/
31+
public LongAs2IntsCodec() {
32+
this(new VariableByte(), new Composition(new BinaryPacking(), new VariableByte()));
33+
}
34+
35+
@Override
36+
public void compress(long[] in, IntWrapper inpos, int inlength, long[] out, IntWrapper outpos) {
37+
if (inlength == 0) {
38+
return;
39+
}
40+
41+
int[] highParts = new int[inlength];
42+
int[] lowParts = new int[inlength];
43+
44+
for (int i = 0; i < inlength; i++) {
45+
int inPosition = inpos.get() + i;
46+
47+
highParts[i] = RoaringIntPacking.high(in[inPosition]);
48+
lowParts[i] = RoaringIntPacking.low(in[inPosition]);
49+
}
50+
51+
// TODO What would be a relevant buffer size?
52+
int[] buffer = new int[inlength * 16];
53+
54+
int outPosition = outpos.get();
55+
56+
boolean hasLeftover;
57+
{
58+
// The first integer is reserved to hold the number of compressed ints
59+
IntWrapper highPartsOutPosition = new IntWrapper(1);
60+
61+
highPartsCodec.compress(highParts, new IntWrapper(), inlength, buffer, highPartsOutPosition);
62+
63+
// Record the compressedHighparts length
64+
buffer[0] = highPartsOutPosition.get() - 1;
65+
66+
for (int i = 0; i < highPartsOutPosition.get() / 2; i++) {
67+
long pack = RoaringIntPacking.pack(buffer[i * 2], buffer[i * 2 + 1]);
68+
out[outPosition++] = pack;
69+
}
70+
71+
if (1 == highPartsOutPosition.get() % 2) {
72+
// Shift the trailing integer as first in the buffer
73+
hasLeftover = true;
74+
buffer[0] = buffer[highPartsOutPosition.get() - 1];
75+
} else {
76+
hasLeftover = false;
77+
}
78+
}
79+
80+
{
81+
// The first integer is reserved to hold the number of compressed ints
82+
IntWrapper lowPartsOutPosition = new IntWrapper(1);
83+
if (hasLeftover) {
84+
// Keep the trailing int from highParts before the reserved int from lowParts compressed length
85+
lowPartsOutPosition.set(2);
86+
}
87+
88+
lowPartsCodec.compress(lowParts, new IntWrapper(0), inlength, buffer, lowPartsOutPosition);
89+
90+
// Record the compressedHighparts length
91+
buffer[hasLeftover ? 1 : 0] = lowPartsOutPosition.get() - (hasLeftover ? 2 : 1);
92+
93+
for (int i = 0; i < lowPartsOutPosition.get() / 2; i++) {
94+
long pack = RoaringIntPacking.pack(buffer[i * 2], buffer[i * 2 + 1]);
95+
out[outPosition++] = pack;
96+
}
97+
98+
if (1 == lowPartsOutPosition.get() % 2) {
99+
// The trailing integer is packed with a 0
100+
long pack = RoaringIntPacking.pack(buffer[lowPartsOutPosition.get() - 1], 0);
101+
out[outPosition++] = pack;
102+
}
103+
}
104+
105+
inpos.add(inlength);
106+
outpos.set(outPosition);
107+
}
108+
109+
/**
110+
* inlength is ignored by this codec. We may rely on it instead of storing the compressedLowPart length
111+
*/
112+
@Override
113+
public void uncompress(long[] in, IntWrapper inpos, int inlength, long[] out, IntWrapper outpos) {
114+
if (inlength == 0) {
115+
return;
116+
}
117+
118+
int longIndex = inpos.get();
119+
120+
int nbCompressedHighParts = RoaringIntPacking.high(in[longIndex]);
121+
int[] compressedHighParts = new int[nbCompressedHighParts];
122+
123+
// !highPart as we just read the highPart for nbCompressedHighParts
124+
boolean highPart = false;
125+
for (int i = 0; i < nbCompressedHighParts; i++) {
126+
int nextInt;
127+
if (highPart) {
128+
nextInt = RoaringIntPacking.high(in[longIndex + (i + 1) / 2]);
129+
} else {
130+
nextInt = RoaringIntPacking.low(in[longIndex + (i + 1) / 2]);
131+
}
132+
compressedHighParts[i] = nextInt;
133+
134+
highPart = !highPart;
135+
}
136+
137+
// TODO What would be a relevant buffer size?
138+
int[] buffer = new int[inlength * 16];
139+
140+
IntWrapper highPartsOutPosition = new IntWrapper();
141+
highPartsCodec.uncompress(compressedHighParts,
142+
new IntWrapper(),
143+
compressedHighParts.length,
144+
buffer,
145+
highPartsOutPosition);
146+
int[] highParts = Arrays.copyOf(buffer, highPartsOutPosition.get());
147+
148+
// +1 as we initially read nbCompressedHighParts
149+
int intIndexNbCompressedLowParts = longIndex * 2 + 1 + nbCompressedHighParts;
150+
int nbCompressedLowParts;
151+
if (highPart) {
152+
nbCompressedLowParts = RoaringIntPacking.high(in[intIndexNbCompressedLowParts / 2]);
153+
} else {
154+
nbCompressedLowParts = RoaringIntPacking.low(in[intIndexNbCompressedLowParts / 2]);
155+
}
156+
highPart = !highPart;
157+
158+
int[] compressedLowParts = new int[nbCompressedLowParts];
159+
for (int i = 0; i < nbCompressedLowParts; i++) {
160+
int nextInt;
161+
if (highPart) {
162+
nextInt = RoaringIntPacking.high(in[(intIndexNbCompressedLowParts + 1 + i) / 2]);
163+
} else {
164+
nextInt = RoaringIntPacking.low(in[(intIndexNbCompressedLowParts + 1 + i) / 2]);
165+
}
166+
compressedLowParts[i] = nextInt;
167+
168+
highPart = !highPart;
169+
}
170+
171+
IntWrapper lowPartsOutPosition = new IntWrapper();
172+
lowPartsCodec.uncompress(compressedLowParts,
173+
new IntWrapper(),
174+
compressedLowParts.length,
175+
buffer,
176+
lowPartsOutPosition);
177+
int[] lowParts = Arrays.copyOf(buffer, lowPartsOutPosition.get());
178+
assert highParts.length == lowParts.length;
179+
180+
int outposition = outpos.get();
181+
for (int i = 0; i < highParts.length; i++) {
182+
out[outposition++] = RoaringIntPacking.pack(highParts[i], lowParts[i]);
183+
}
184+
185+
inpos.add(inlength);
186+
outpos.set(outposition);
187+
}
188+
189+
}
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/**
2+
* This code is released under the
3+
* Apache License Version 2.0 http://www.apache.org/licenses/.
4+
*
5+
* (c) Daniel Lemire, http://lemire.me/en/
6+
*/
7+
package me.lemire.longcompression;
8+
9+
import me.lemire.integercompression.IntWrapper;
10+
11+
/**
12+
* Helper class to compose schemes.
13+
*
14+
* @author Benoit Lacelle
15+
*/
16+
public class LongComposition implements LongCODEC {
17+
LongCODEC F1, F2;
18+
19+
/**
20+
* Compose a scheme from a first one (f1) and a second one (f2). The
21+
* first one is called first and then the second one tries to compress
22+
* whatever remains from the first run.
23+
*
24+
* By convention, the first scheme should be such that if, during
25+
* decoding, a 32-bit zero is first encountered, then there is no
26+
* output.
27+
*
28+
* @param f1
29+
* first codec
30+
* @param f2
31+
* second codec
32+
*/
33+
public LongComposition(LongCODEC f1, LongCODEC f2) {
34+
F1 = f1;
35+
F2 = f2;
36+
}
37+
38+
@Override
39+
public void compress(long[] in, IntWrapper inpos, int inlength,
40+
long[] out, IntWrapper outpos) {
41+
if (inlength == 0) {
42+
return;
43+
}
44+
int inposInit = inpos.get();
45+
int outposInit = outpos.get();
46+
F1.compress(in, inpos, inlength, out, outpos);
47+
if (outpos.get() == outposInit) {
48+
out[outposInit] = 0;
49+
outpos.increment();
50+
}
51+
inlength -= inpos.get() - inposInit;
52+
F2.compress(in, inpos, inlength, out, outpos);
53+
}
54+
55+
@Override
56+
public void uncompress(long[] in, IntWrapper inpos, int inlength,
57+
long[] out, IntWrapper outpos) {
58+
if (inlength == 0)
59+
return;
60+
final int init = inpos.get();
61+
F1.uncompress(in, inpos, inlength, out, outpos);
62+
inlength -= inpos.get() - init;
63+
F2.uncompress(in, inpos, inlength, out, outpos);
64+
}
65+
66+
@Override
67+
public String toString() {
68+
return F1.toString() + " + " + F2.toString();
69+
}
70+
71+
}

src/main/java/me/lemire/longcompression/LongVariableByte.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ public void headlessCompress(long[] in, IntWrapper inpos, int inlength, long[] o
4141
IntWrapper outpos) {
4242
if (inlength == 0)
4343
return;
44+
// Worst case: we write 10 bytes per long, hence 2 longs for a long, hence 16 bytes per long
4445
ByteBuffer buf = makeBuffer(inlength * 16);
4546
buf.order(ByteOrder.LITTLE_ENDIAN);
4647
for (int k = inpos.get(); k < inpos.get() + inlength; ++k) {

0 commit comments

Comments
 (0)