|
| 1 | +package me.lemire.longcompression; |
| 2 | + |
| 3 | +import java.util.Arrays; |
| 4 | + |
| 5 | +import me.lemire.integercompression.BinaryPacking; |
| 6 | +import me.lemire.integercompression.Composition; |
| 7 | +import me.lemire.integercompression.IntCompressor; |
| 8 | +import me.lemire.integercompression.IntWrapper; |
| 9 | +import me.lemire.integercompression.IntegerCODEC; |
| 10 | +import me.lemire.integercompression.VariableByte; |
| 11 | + |
| 12 | +/** |
| 13 | + * A {@link LongCODEC} which split each long in a highpart (32 first bits) and a low part (32 last bits). |
| 14 | + * |
| 15 | + * @author Benoit Lacelle |
| 16 | + * |
| 17 | + */ |
| 18 | +public class LongAs2IntsCodec implements LongCODEC { |
| 19 | + final IntegerCODEC highPartsCodec; |
| 20 | + final IntegerCODEC lowPartsCodec; |
| 21 | + |
| 22 | + public LongAs2IntsCodec(IntegerCODEC highPartsCodec, IntegerCODEC lowPartsCodec) { |
| 23 | + this.highPartsCodec = highPartsCodec; |
| 24 | + this.lowPartsCodec = lowPartsCodec; |
| 25 | + } |
| 26 | + |
| 27 | + /** |
| 28 | + * By default, we expect longs to be slightly above Integer.MAX_VALUE. Hence highParts to be small and positive |
| 29 | + * integers. For lowParts, we rely on {@link IntCompressor} default IntegerCODEC |
| 30 | + */ |
| 31 | + public LongAs2IntsCodec() { |
| 32 | + this(new VariableByte(), new Composition(new BinaryPacking(), new VariableByte())); |
| 33 | + } |
| 34 | + |
| 35 | + @Override |
| 36 | + public void compress(long[] in, IntWrapper inpos, int inlength, long[] out, IntWrapper outpos) { |
| 37 | + if (inlength == 0) { |
| 38 | + return; |
| 39 | + } |
| 40 | + |
| 41 | + int[] highParts = new int[inlength]; |
| 42 | + int[] lowParts = new int[inlength]; |
| 43 | + |
| 44 | + for (int i = 0; i < inlength; i++) { |
| 45 | + int inPosition = inpos.get() + i; |
| 46 | + |
| 47 | + highParts[i] = RoaringIntPacking.high(in[inPosition]); |
| 48 | + lowParts[i] = RoaringIntPacking.low(in[inPosition]); |
| 49 | + } |
| 50 | + |
| 51 | + // TODO What would be a relevant buffer size? |
| 52 | + int[] buffer = new int[inlength * 16]; |
| 53 | + |
| 54 | + int outPosition = outpos.get(); |
| 55 | + |
| 56 | + boolean hasLeftover; |
| 57 | + { |
| 58 | + // The first integer is reserved to hold the number of compressed ints |
| 59 | + IntWrapper highPartsOutPosition = new IntWrapper(1); |
| 60 | + |
| 61 | + highPartsCodec.compress(highParts, new IntWrapper(), inlength, buffer, highPartsOutPosition); |
| 62 | + |
| 63 | + // Record the compressedHighparts length |
| 64 | + buffer[0] = highPartsOutPosition.get() - 1; |
| 65 | + |
| 66 | + for (int i = 0; i < highPartsOutPosition.get() / 2; i++) { |
| 67 | + long pack = RoaringIntPacking.pack(buffer[i * 2], buffer[i * 2 + 1]); |
| 68 | + out[outPosition++] = pack; |
| 69 | + } |
| 70 | + |
| 71 | + if (1 == highPartsOutPosition.get() % 2) { |
| 72 | + // Shift the trailing integer as first in the buffer |
| 73 | + hasLeftover = true; |
| 74 | + buffer[0] = buffer[highPartsOutPosition.get() - 1]; |
| 75 | + } else { |
| 76 | + hasLeftover = false; |
| 77 | + } |
| 78 | + } |
| 79 | + |
| 80 | + { |
| 81 | + // The first integer is reserved to hold the number of compressed ints |
| 82 | + IntWrapper lowPartsOutPosition = new IntWrapper(1); |
| 83 | + if (hasLeftover) { |
| 84 | + // Keep the trailing int from highParts before the reserved int from lowParts compressed length |
| 85 | + lowPartsOutPosition.set(2); |
| 86 | + } |
| 87 | + |
| 88 | + lowPartsCodec.compress(lowParts, new IntWrapper(0), inlength, buffer, lowPartsOutPosition); |
| 89 | + |
| 90 | + // Record the compressedHighparts length |
| 91 | + buffer[hasLeftover ? 1 : 0] = lowPartsOutPosition.get() - (hasLeftover ? 2 : 1); |
| 92 | + |
| 93 | + for (int i = 0; i < lowPartsOutPosition.get() / 2; i++) { |
| 94 | + long pack = RoaringIntPacking.pack(buffer[i * 2], buffer[i * 2 + 1]); |
| 95 | + out[outPosition++] = pack; |
| 96 | + } |
| 97 | + |
| 98 | + if (1 == lowPartsOutPosition.get() % 2) { |
| 99 | + // The trailing integer is packed with a 0 |
| 100 | + long pack = RoaringIntPacking.pack(buffer[lowPartsOutPosition.get() - 1], 0); |
| 101 | + out[outPosition++] = pack; |
| 102 | + } |
| 103 | + } |
| 104 | + |
| 105 | + inpos.add(inlength); |
| 106 | + outpos.set(outPosition); |
| 107 | + } |
| 108 | + |
| 109 | + /** |
| 110 | + * inlength is ignored by this codec. We may rely on it instead of storing the compressedLowPart length |
| 111 | + */ |
| 112 | + @Override |
| 113 | + public void uncompress(long[] in, IntWrapper inpos, int inlength, long[] out, IntWrapper outpos) { |
| 114 | + if (inlength == 0) { |
| 115 | + return; |
| 116 | + } |
| 117 | + |
| 118 | + int longIndex = inpos.get(); |
| 119 | + |
| 120 | + int nbCompressedHighParts = RoaringIntPacking.high(in[longIndex]); |
| 121 | + int[] compressedHighParts = new int[nbCompressedHighParts]; |
| 122 | + |
| 123 | + // !highPart as we just read the highPart for nbCompressedHighParts |
| 124 | + boolean highPart = false; |
| 125 | + for (int i = 0; i < nbCompressedHighParts; i++) { |
| 126 | + int nextInt; |
| 127 | + if (highPart) { |
| 128 | + nextInt = RoaringIntPacking.high(in[longIndex + (i + 1) / 2]); |
| 129 | + } else { |
| 130 | + nextInt = RoaringIntPacking.low(in[longIndex + (i + 1) / 2]); |
| 131 | + } |
| 132 | + compressedHighParts[i] = nextInt; |
| 133 | + |
| 134 | + highPart = !highPart; |
| 135 | + } |
| 136 | + |
| 137 | + // TODO What would be a relevant buffer size? |
| 138 | + int[] buffer = new int[inlength * 16]; |
| 139 | + |
| 140 | + IntWrapper highPartsOutPosition = new IntWrapper(); |
| 141 | + highPartsCodec.uncompress(compressedHighParts, |
| 142 | + new IntWrapper(), |
| 143 | + compressedHighParts.length, |
| 144 | + buffer, |
| 145 | + highPartsOutPosition); |
| 146 | + int[] highParts = Arrays.copyOf(buffer, highPartsOutPosition.get()); |
| 147 | + |
| 148 | + // +1 as we initially read nbCompressedHighParts |
| 149 | + int intIndexNbCompressedLowParts = longIndex * 2 + 1 + nbCompressedHighParts; |
| 150 | + int nbCompressedLowParts; |
| 151 | + if (highPart) { |
| 152 | + nbCompressedLowParts = RoaringIntPacking.high(in[intIndexNbCompressedLowParts / 2]); |
| 153 | + } else { |
| 154 | + nbCompressedLowParts = RoaringIntPacking.low(in[intIndexNbCompressedLowParts / 2]); |
| 155 | + } |
| 156 | + highPart = !highPart; |
| 157 | + |
| 158 | + int[] compressedLowParts = new int[nbCompressedLowParts]; |
| 159 | + for (int i = 0; i < nbCompressedLowParts; i++) { |
| 160 | + int nextInt; |
| 161 | + if (highPart) { |
| 162 | + nextInt = RoaringIntPacking.high(in[(intIndexNbCompressedLowParts + 1 + i) / 2]); |
| 163 | + } else { |
| 164 | + nextInt = RoaringIntPacking.low(in[(intIndexNbCompressedLowParts + 1 + i) / 2]); |
| 165 | + } |
| 166 | + compressedLowParts[i] = nextInt; |
| 167 | + |
| 168 | + highPart = !highPart; |
| 169 | + } |
| 170 | + |
| 171 | + IntWrapper lowPartsOutPosition = new IntWrapper(); |
| 172 | + lowPartsCodec.uncompress(compressedLowParts, |
| 173 | + new IntWrapper(), |
| 174 | + compressedLowParts.length, |
| 175 | + buffer, |
| 176 | + lowPartsOutPosition); |
| 177 | + int[] lowParts = Arrays.copyOf(buffer, lowPartsOutPosition.get()); |
| 178 | + assert highParts.length == lowParts.length; |
| 179 | + |
| 180 | + int outposition = outpos.get(); |
| 181 | + for (int i = 0; i < highParts.length; i++) { |
| 182 | + out[outposition++] = RoaringIntPacking.pack(highParts[i], lowParts[i]); |
| 183 | + } |
| 184 | + |
| 185 | + inpos.add(inlength); |
| 186 | + outpos.set(outposition); |
| 187 | + } |
| 188 | + |
| 189 | +} |
0 commit comments