Skip to content

Commit be6717c

Browse files
authored
PAX: Support LZ4 compression for table columns (#1344)
* PAX: Support LZ4 compression for table columns PAX only support zlib and zstd compression for column values. This commit add lz4 support for pax table columns. * map compress level to acceleration for lz4 * strict acceleration to range [0, 3] * add macro control
1 parent 29d2a2a commit be6717c

File tree

5 files changed

+24
-3
lines changed

5 files changed

+24
-3
lines changed

contrib/pax_storage/src/cpp/access/paxc_rel_options.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ static const relopt_compress_type_mapping kSelfRelCompressMap[] = {
5050
pax::ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_ZSTD},
5151
{ColumnEncoding_Kind_COMPRESS_ZLIB_STR,
5252
pax::ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_ZLIB},
53+
#ifdef USE_LZ4
54+
{ColumnEncoding_Kind_COMPRESS_LZ4_STR,
55+
pax::ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_LZ4},
56+
#endif
5357
};
5458

5559
typedef struct {

contrib/pax_storage/src/cpp/access/paxc_rel_options.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ namespace paxc {
4141
#define ColumnEncoding_Kind_DICTIONARY_STR "dict"
4242
#define ColumnEncoding_Kind_COMPRESS_ZSTD_STR "zstd"
4343
#define ColumnEncoding_Kind_COMPRESS_ZLIB_STR "zlib"
44+
#define ColumnEncoding_Kind_COMPRESS_LZ4_STR "lz4"
4445

4546
#define STORAGE_FORMAT_TYPE_PORC "porc"
4647
#define STORAGE_FORMAT_TYPE_PORC_VEC "porc_vec"

contrib/pax_storage/src/cpp/storage/columns/pax_column_test.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -798,13 +798,19 @@ INSTANTIATE_TEST_SUITE_P(
798798
PaxColumnEncodingTestCombine, PaxColumnCompressTest,
799799
testing::Combine(testing::Values(16, 32, 64),
800800
testing::Values(ColumnEncoding_Kind_NO_ENCODED,
801+
#ifdef USE_LZ4
802+
ColumnEncoding_Kind_COMPRESS_LZ4,
803+
#endif
801804
ColumnEncoding_Kind_COMPRESS_ZSTD,
802805
ColumnEncoding_Kind_COMPRESS_ZLIB)));
803806

804807
INSTANTIATE_TEST_SUITE_P(
805808
PaxColumnEncodingTestCombine, PaxNonFixedColumnCompressTest,
806809
testing::Combine(testing::Values(16, 32, 64),
807810
testing::Values(ColumnEncoding_Kind_NO_ENCODED,
811+
#ifdef USE_LZ4
812+
ColumnEncoding_Kind_COMPRESS_LZ4,
813+
#endif
808814
ColumnEncoding_Kind_COMPRESS_ZSTD,
809815
ColumnEncoding_Kind_COMPRESS_ZLIB),
810816
testing::Values(true, false),

contrib/pax_storage/src/cpp/storage/columns/pax_compress.cc

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,12 @@ std::shared_ptr<PaxCompressor> PaxCompressor::CreateBlockCompressor(
5050
compressor = std::make_shared<PaxZlibCompressor>();
5151
break;
5252
}
53+
#ifdef USE_LZ4
54+
case ColumnEncoding_Kind::ColumnEncoding_Kind_COMPRESS_LZ4: {
55+
compressor = std::make_shared<PaxLZ4Compressor>();
56+
break;
57+
}
58+
#endif
5359
case ColumnEncoding_Kind::ColumnEncoding_Kind_DEF_ENCODED: {
5460
CBDB_RAISE(cbdb::CException::ExType::kExTypeLogicError,
5561
fmt("Invalid compress type %d",
@@ -230,9 +236,12 @@ size_t PaxLZ4Compressor::GetCompressBound(size_t src_len) {
230236
}
231237

232238
size_t PaxLZ4Compressor::Compress(void *dst_buff, size_t dst_cap,
233-
void *src_buff, size_t src_len, int /*lvl*/) {
234-
return LZ4_compress_default((char *)src_buff, (char *)dst_buff, src_len,
235-
dst_cap);
239+
void *src_buff, size_t src_len, int lvl) {
240+
// acceleration affects compression speed, the larger acceleration value,
241+
// the less compression ratio.
242+
int acceleration = (20 - lvl) / 6;
243+
return LZ4_compress_fast((char *)src_buff, (char *)dst_buff, src_len,
244+
dst_cap, acceleration);
236245
}
237246

238247
size_t PaxLZ4Compressor::Decompress(void *dst_buff, size_t dst_len,

contrib/pax_storage/src/cpp/storage/proto/pax.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ message ColumnEncoding {
3737
COMPRESS_ZLIB = 4; // use ZLIB to compress
3838

3939
DICTIONARY = 5; // use dict-endoing
40+
COMPRESS_LZ4 = 6; // use lz4 to compress
4041
}
4142

4243
optional Kind kind = 1;

0 commit comments

Comments
 (0)