diff --git a/build-tools-internal/src/main/resources/changelog-schema.json b/build-tools-internal/src/main/resources/changelog-schema.json index 593716954780b..a435305a8e3e2 100644 --- a/build-tools-internal/src/main/resources/changelog-schema.json +++ b/build-tools-internal/src/main/resources/changelog-schema.json @@ -32,6 +32,7 @@ "CRUD", "Client", "Cluster Coordination", + "Codec", "Data streams", "DLM", "Discovery-Plugins", diff --git a/docs/changelog/112665.yaml b/docs/changelog/112665.yaml new file mode 100644 index 0000000000000..ae2cf7f171f4b --- /dev/null +++ b/docs/changelog/112665.yaml @@ -0,0 +1,14 @@ +pr: 112665 +summary: Remove zstd feature flag for index codec best compression +area: Codec +type: enhancement +issues: [] +highlight: + title: Enable ZStandard compression for indices with index.codec set to best_compression + body: |- + Before DEFLATE compression was used to compress stored fields in indices with index.codec index setting set to + best_compression, with this change ZStandard is used as compression algorithm to stored fields for indices with + index.codec index setting set to best_compression. The usage ZStandard results in less storage usage with a + similar indexing throughput depending on what options are used. Experiments with indexing logs have shown that + ZStandard offers ~12% lower storage usage and a ~14% higher indexing throughput compared to DEFLATE. + notable: true diff --git a/docs/reference/ilm/actions/ilm-forcemerge.asciidoc b/docs/reference/ilm/actions/ilm-forcemerge.asciidoc index ef74e462d4bf2..24c3c08c24668 100644 --- a/docs/reference/ilm/actions/ilm-forcemerge.asciidoc +++ b/docs/reference/ilm/actions/ilm-forcemerge.asciidoc @@ -49,7 +49,7 @@ Number of segments to merge to. To fully merge the index, set to `1`. `index_codec`:: (Optional, string) Codec used to compress the document store. The only accepted value is -`best_compression`, which uses {wikipedia}/DEFLATE[DEFLATE] for a higher +`best_compression`, which uses {wikipedia}/Zstd[ZSTD] for a higher compression ratio but slower stored fields performance. To use the default LZ4 codec, omit this argument. + diff --git a/docs/reference/index-modules.asciidoc b/docs/reference/index-modules.asciidoc index 7232de12c8c50..ed8cf6c1494e4 100644 --- a/docs/reference/index-modules.asciidoc +++ b/docs/reference/index-modules.asciidoc @@ -76,14 +76,16 @@ breaking change]. The +default+ value compresses stored data with LZ4 compression, but this can be set to +best_compression+ - which uses {wikipedia}/DEFLATE[DEFLATE] for a higher - compression ratio, at the expense of slower stored fields performance. + which uses {wikipedia}/Zstd[ZSTD] for a higher + compression ratio, at the expense of slower stored fields read performance. If you are updating the compression type, the new one will be applied after segments are merged. Segment merging can be forced using <>. Experiments with indexing log datasets - have shown that `best_compression` gives up to ~18% lower storage usage in - the most ideal scenario compared to `default` while only minimally affecting - indexing throughput (~2%). + have shown that `best_compression` gives up to ~28% lower storage usage and + similar indexing throughput (sometimes a bit slower or faster depending on other used options) compared + to `default` while affecting get by id latencies between ~10% and ~33%. The higher get + by id latencies is not a concern for many use cases like logging or metrics, since + these don't really rely on get by id functionality (Get APIs or searching by _id). [[index-mode-setting]] `index.mode`:: + diff --git a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java index aa65289616ff6..df1682cd10a3e 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java +++ b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java @@ -53,15 +53,11 @@ public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays) } codecs.put(LEGACY_DEFAULT_CODEC, legacyBestSpeedCodec); + codecs.put( + BEST_COMPRESSION_CODEC, + new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_COMPRESSION, mapperService, bigArrays) + ); Codec legacyBestCompressionCodec = new LegacyPerFieldMapperCodec(Lucene99Codec.Mode.BEST_COMPRESSION, mapperService, bigArrays); - if (ZSTD_STORED_FIELDS_FEATURE_FLAG.isEnabled()) { - codecs.put( - BEST_COMPRESSION_CODEC, - new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_COMPRESSION, mapperService, bigArrays) - ); - } else { - codecs.put(BEST_COMPRESSION_CODEC, legacyBestCompressionCodec); - } codecs.put(LEGACY_BEST_COMPRESSION_CODEC, legacyBestCompressionCodec); codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault()); diff --git a/server/src/test/java/org/elasticsearch/index/codec/CodecIntegrationTests.java b/server/src/test/java/org/elasticsearch/index/codec/CodecIntegrationTests.java index 05b9cf42e6236..38b4d077a35aa 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/CodecIntegrationTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/CodecIntegrationTests.java @@ -17,8 +17,6 @@ public class CodecIntegrationTests extends ESSingleNodeTestCase { public void testCanConfigureLegacySettings() { - assumeTrue("Only when zstd_stored_fields feature flag is enabled", CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG.isEnabled()); - createIndex("index1", Settings.builder().put("index.codec", "legacy_default").build()); var codec = client().admin().indices().prepareGetSettings("index1").execute().actionGet().getSetting("index1", "index.codec"); assertThat(codec, equalTo("legacy_default")); @@ -29,8 +27,6 @@ public void testCanConfigureLegacySettings() { } public void testDefaultCodecLogsdb() { - assumeTrue("Only when zstd_stored_fields feature flag is enabled", CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG.isEnabled()); - var indexService = createIndex("index1", Settings.builder().put("index.mode", "logsdb").build()); var storedFieldsFormat = (Zstd814StoredFieldsFormat) indexService.getShard(0) .getEngineOrNull() diff --git a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java index c56ef138724d6..cb700dc9486b5 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java @@ -64,7 +64,6 @@ public void testDefault() throws Exception { } public void testBestCompression() throws Exception { - assumeTrue("Only when zstd_stored_fields feature flag is enabled", CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG.isEnabled()); Codec codec = createCodecService().codec("best_compression"); assertEquals( "Zstd814StoredFieldsFormat(compressionMode=ZSTD(level=3), chunkSize=245760, maxDocsPerChunk=2048, blockShift=10)",