Skip to content

Commit 3e880a3

Browse files
committed
Commit fsst against lucene lz4 impl
1 parent 82b6e45 commit 3e880a3

File tree

3 files changed

+1452
-0
lines changed

3 files changed

+1452
-0
lines changed
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.benchmark.common.compress;
11+
12+
import org.apache.lucene.codecs.compressing.CompressionMode;
13+
import org.apache.lucene.codecs.compressing.Compressor;
14+
import org.apache.lucene.store.ByteArrayDataOutput;
15+
import org.apache.lucene.store.ByteBuffersDataInput;
16+
import org.elasticsearch.common.compress.fsst.FSST;
17+
import org.openjdk.jmh.annotations.*;
18+
import org.openjdk.jmh.infra.Blackhole;
19+
20+
import java.io.IOException;
21+
import java.nio.ByteBuffer;
22+
import java.nio.charset.StandardCharsets;
23+
import java.nio.file.Files;
24+
import java.nio.file.Path;
25+
import java.util.List;
26+
import java.util.concurrent.TimeUnit;
27+
28+
@Fork(1)
29+
@Warmup(iterations = 2)
30+
@Measurement(iterations = 3)
31+
@BenchmarkMode(Mode.AverageTime)
32+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
33+
@State(Scope.Benchmark)
34+
public class FSSTCompressBenchmark {
35+
36+
@Param("")
37+
public String dataset;
38+
39+
private byte[] input;
40+
private int[] offsets;
41+
private byte[] outBuf;
42+
private int[] outOffsets;
43+
44+
@AuxCounters(AuxCounters.Type.EVENTS)
45+
@State(Scope.Thread)
46+
public static class CompressionMetrics {
47+
public double compressionRatio;
48+
}
49+
50+
@Setup(Level.Trial)
51+
public void setup() throws IOException {
52+
String content = Files.readString(Path.of(dataset), StandardCharsets.UTF_8);
53+
byte[] bytes = FSST.toBytes(content);
54+
byte[] bytes2 = new byte[bytes.length + 8];
55+
System.arraycopy(bytes, 0, bytes2, 0, bytes.length);
56+
input = bytes2;
57+
offsets = new int[]{0, bytes.length};
58+
59+
outBuf = new byte[bytes.length];
60+
outOffsets = new int[2];
61+
}
62+
63+
@Benchmark
64+
public void makeSample(Blackhole bh, CompressionMetrics metrics) {
65+
List<byte[]> sample = FSST.makeSample(input, offsets);
66+
var symbolTable = FSST.SymbolTable.buildSymbolTable(sample);
67+
bh.consume(sample);
68+
}
69+
70+
@Benchmark
71+
public void compressFSST(Blackhole bh, CompressionMetrics metrics) {
72+
List<byte[]> sample = FSST.makeSample(input, offsets);
73+
var symbolTable = FSST.SymbolTable.buildSymbolTable(sample);
74+
symbolTable.compressBulk(1, input, offsets, outBuf, outOffsets);
75+
bh.consume(outBuf);
76+
bh.consume(outOffsets);
77+
78+
int uncompressedSize = offsets[1];
79+
int compressedSize = outOffsets[1];
80+
metrics.compressionRatio = compressedSize / (double) uncompressedSize;
81+
}
82+
83+
@Benchmark
84+
public void compressLZ4Fast(Blackhole bh, CompressionMetrics metrics) throws IOException {
85+
int inputSize = offsets[1];
86+
87+
var dataInput = new ByteBuffersDataInput(List.of(ByteBuffer.wrap(input)));
88+
var dataOutput = new ByteArrayDataOutput(outBuf);
89+
90+
Compressor compressor = CompressionMode.FAST.newCompressor();
91+
compressor.compress(dataInput, dataOutput);
92+
93+
long compressedSize = dataOutput.getPosition();
94+
bh.consume(dataOutput);
95+
96+
metrics.compressionRatio = compressedSize / (double) inputSize;
97+
}
98+
99+
@Benchmark
100+
public void compressLZ4High(Blackhole bh, CompressionMetrics metrics) throws IOException {
101+
int inputSize = offsets[1];
102+
103+
var dataInput = new ByteBuffersDataInput(List.of(ByteBuffer.wrap(input)));
104+
var dataOutput = new ByteArrayDataOutput(outBuf);
105+
106+
Compressor compressor = CompressionMode.HIGH_COMPRESSION.newCompressor();
107+
compressor.compress(dataInput, dataOutput);
108+
109+
long compressedSize = dataOutput.getPosition();
110+
bh.consume(dataOutput);
111+
112+
metrics.compressionRatio = compressedSize / (double) inputSize;
113+
}
114+
}
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.benchmark.common.compress;
11+
12+
import org.apache.lucene.codecs.compressing.CompressionMode;
13+
import org.apache.lucene.codecs.compressing.Compressor;
14+
import org.apache.lucene.codecs.compressing.Decompressor;
15+
import org.apache.lucene.store.ByteArrayDataInput;
16+
import org.apache.lucene.store.ByteArrayDataOutput;
17+
import org.apache.lucene.store.ByteBuffersDataInput;
18+
import org.apache.lucene.util.BytesRef;
19+
import org.elasticsearch.common.compress.fsst.FSST;
20+
import org.openjdk.jmh.annotations.*;
21+
import org.openjdk.jmh.infra.Blackhole;
22+
23+
import java.io.IOException;
24+
import java.nio.ByteBuffer;
25+
import java.nio.charset.StandardCharsets;
26+
import java.nio.file.Files;
27+
import java.nio.file.Path;
28+
import java.util.List;
29+
import java.util.concurrent.TimeUnit;
30+
31+
@Fork(1)
32+
@Warmup(iterations = 2)
33+
@Measurement(iterations = 3)
34+
@BenchmarkMode(Mode.AverageTime)
35+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
36+
@State(Scope.Benchmark)
37+
public class FSSTDecompressBenchmark {
38+
39+
@Param({"fsst", "lz4_high", "lz4_fast"})
40+
public String compressionType;
41+
42+
@Param("")
43+
public String dataset;
44+
45+
// original file
46+
private int originalSize;
47+
private byte[] input;
48+
private int[] offsets;
49+
50+
// compressed
51+
private byte[] outBuf;
52+
private int[] outOffsets;
53+
private int compressedSize;
54+
55+
// decompressed
56+
private byte[] decompressBuf;
57+
58+
// fsst specific
59+
private FSST.SymbolTable symbolTable;
60+
61+
@Setup(Level.Trial)
62+
public void setup() throws IOException {
63+
String content = Files.readString(Path.of(dataset), StandardCharsets.UTF_8);
64+
byte[] bytes = FSST.toBytes(content);
65+
originalSize = bytes.length;
66+
input = new byte[originalSize + 8];
67+
offsets = new int[]{0, bytes.length};
68+
System.arraycopy(bytes, 0, input, 0, bytes.length);
69+
70+
outBuf = new byte[input.length];
71+
outOffsets = new int[2];
72+
73+
decompressBuf = new byte[input.length];
74+
75+
if (compressionType.equals("fsst")) {
76+
List<byte[]> sample = FSST.makeSample(input, offsets);
77+
symbolTable = FSST.SymbolTable.buildSymbolTable(sample);
78+
symbolTable.compressBulk(1, input, offsets, outBuf, outOffsets);
79+
compressedSize = outOffsets[1];
80+
} else if (compressionType.equals("lz4_fast")) {
81+
var dataInput = new ByteBuffersDataInput(List.of(ByteBuffer.wrap(input, 0, originalSize)));
82+
var dataOutput = new ByteArrayDataOutput(outBuf);
83+
Compressor compressor = CompressionMode.FAST.newCompressor();
84+
compressor.compress(dataInput, dataOutput);
85+
compressedSize = dataOutput.getPosition();
86+
} else if (compressionType.equals("lz4_high")) {
87+
var dataInput = new ByteBuffersDataInput(List.of(ByteBuffer.wrap(input, 0, originalSize)));
88+
var dataOutput = new ByteArrayDataOutput(outBuf);
89+
Compressor compressor = CompressionMode.HIGH_COMPRESSION.newCompressor();
90+
compressor.compress(dataInput, dataOutput);
91+
compressedSize = dataOutput.getPosition();
92+
}
93+
}
94+
95+
@Benchmark
96+
public void decompress(Blackhole bh) throws IOException {
97+
if (compressionType.equals("fsst")) {
98+
byte[] symbolTableBytes = symbolTable.exportToBytes();
99+
FSST.Decoder decoder = FSST.Decoder.readFrom(symbolTableBytes);
100+
int decompressedLen = FSST.decompress(outBuf, 0, outOffsets[1], decoder, decompressBuf);
101+
// assert Arrays.equals(input, 0, originalSize, decompressBuf, 0, originalSize);
102+
bh.consume(decompressBuf);
103+
bh.consume(decompressedLen);
104+
} else if (compressionType.equals("lz4_fast")) {
105+
Decompressor decompressor = CompressionMode.FAST.newDecompressor();
106+
var dataInput = new ByteArrayDataInput(outBuf, 0, compressedSize);
107+
var outBytesRef = new BytesRef(decompressBuf);
108+
decompressor.decompress(dataInput, originalSize, 0, originalSize, outBytesRef);
109+
// assert Arrays.equals(input, 0, originalSize, outBytesRef.bytes, 0, originalSize);
110+
bh.consume(outBytesRef);
111+
} else if (compressionType.equals("lz4_high")) {
112+
Decompressor decompressor = CompressionMode.HIGH_COMPRESSION.newDecompressor();
113+
var dataInput = new ByteArrayDataInput(outBuf, 0, compressedSize);
114+
var outBytesRef = new BytesRef(decompressBuf);
115+
decompressor.decompress(dataInput, originalSize, 0, originalSize, outBytesRef);
116+
// assert Arrays.equals(input, 0, originalSize, outBytesRef.bytes, 0, originalSize);
117+
bh.consume(outBytesRef);
118+
}
119+
}
120+
}

0 commit comments

Comments
 (0)