Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,16 @@ In both cases, all queries return a constant score of 1.0.

## Index sorting for improved compression
The compression provided by `pattern_text` can be significantly improved if the index is sorted by the `template_id` field.
For example, a typical approach would be to sort first by `message.template_id`, then by `@timestamp`, as shown in the following example.
This sorting is not applied by default, but can be enabled for the `message` field of LogsDB indices (assuming it is of type `pattern_text`) by setting the index setting `index.logsdb.default_sort_on_message_template` to `true`.
This will cause the index to be sorted by `host.name` (if present), then `message.template_id`, and finally by `@timestamp`.
If the index is not LogsDB or the `pattern_text` field is named something other than `message`, index sorting can still be manually applied as shown in the following example.

```console
PUT logs
{
"settings": {
"index": {
"sort.field": [ "message.template_id", "@timestamp" ],
"sort.field": [ "notice.template_id", "@timestamp" ],
"sort.order": [ "asc", "desc" ]
}
},
Expand All @@ -62,7 +64,7 @@ PUT logs
"@timestamp": {
"type": "date"
},
"message": {
"notice": {
"type": "pattern_text"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
IndexSettings.TIME_SERIES_ES87TSDB_CODEC_ENABLED_SETTING,
IndexSettings.LOGSDB_ROUTE_ON_SORT_FIELDS,
IndexSettings.LOGSDB_SORT_ON_HOST_NAME,
IndexSettings.LOGSDB_SORT_ON_MESSAGE_TEMPLATE,
IndexSettings.LOGSDB_ADD_HOST_NAME_FIELD,
IndexSettings.PREFER_ILM_SETTING,
DataStreamFailureStoreDefinition.FAILURE_STORE_DEFINITION_VERSION_SETTING,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,14 @@ public boolean isES87TSDBCodecEnabled() {
Property.Final
);

public static final Setting<Boolean> LOGSDB_SORT_ON_MESSAGE_TEMPLATE = Setting.boolSetting(
"index.logsdb.sort_on_message_template",
false,
Property.IndexScope,
Property.PrivateIndex,
Property.Final
);

public static final boolean DOC_VALUES_SKIPPER = new FeatureFlag("doc_values_skipper").isEnabled();
public static final Setting<Boolean> USE_DOC_VALUES_SKIPPER = Setting.boolSetting(
"index.mapping.use_doc_values_skipper",
Expand Down
35 changes: 24 additions & 11 deletions server/src/main/java/org/elasticsearch/index/IndexSortConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.sort.SortOrder;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;
Expand Down Expand Up @@ -106,19 +107,21 @@ public final class IndexSortConfig {
);

public static class IndexSortConfigDefaults {
public static final FieldSortSpec[] TIME_SERIES_SORT, TIMESTAMP_SORT, HOSTNAME_TIMESTAMP_SORT, HOSTNAME_TIMESTAMP_BWC_SORT;
public static final FieldSortSpec[] TIME_SERIES_SORT, HOSTNAME_TIMESTAMP_BWC_SORT;

private static final FieldSortSpec HOSTNAME_SPEC, MESSAGE_PATTERN_SPEC, TIMESTAMP_SPEC;

static {
FieldSortSpec timeStampSpec = new FieldSortSpec(DataStreamTimestampFieldMapper.DEFAULT_PATH);
timeStampSpec.order = SortOrder.DESC;
TIME_SERIES_SORT = new FieldSortSpec[] { new FieldSortSpec(TimeSeriesIdFieldMapper.NAME), timeStampSpec };
TIMESTAMP_SORT = new FieldSortSpec[] { timeStampSpec };
TIMESTAMP_SPEC = new FieldSortSpec(DataStreamTimestampFieldMapper.DEFAULT_PATH);
TIMESTAMP_SPEC.order = SortOrder.DESC;
TIME_SERIES_SORT = new FieldSortSpec[] { new FieldSortSpec(TimeSeriesIdFieldMapper.NAME), TIMESTAMP_SPEC };

HOSTNAME_SPEC = new FieldSortSpec(IndexMode.HOST_NAME);
HOSTNAME_SPEC.order = SortOrder.ASC;
HOSTNAME_SPEC.missingValue = "_last";
HOSTNAME_SPEC.mode = MultiValueMode.MIN;

FieldSortSpec hostnameSpec = new FieldSortSpec(IndexMode.HOST_NAME);
hostnameSpec.order = SortOrder.ASC;
hostnameSpec.missingValue = "_last";
hostnameSpec.mode = MultiValueMode.MIN;
HOSTNAME_TIMESTAMP_SORT = new FieldSortSpec[] { hostnameSpec, timeStampSpec };
MESSAGE_PATTERN_SPEC = new FieldSortSpec("message.template_id");

// Older indexes use ascending ordering for host name and timestamp.
HOSTNAME_TIMESTAMP_BWC_SORT = new FieldSortSpec[] {
Expand Down Expand Up @@ -148,7 +151,17 @@ public static FieldSortSpec[] getDefaultSortSpecs(Settings settings) {
IndexVersions.LOGSB_OPTIONAL_SORTING_ON_HOST_NAME_BACKPORT,
IndexVersions.UPGRADE_TO_LUCENE_10_0_0
)) {
return (IndexSettings.LOGSDB_SORT_ON_HOST_NAME.get(settings)) ? HOSTNAME_TIMESTAMP_SORT : TIMESTAMP_SORT;

List<FieldSortSpec> sortSpecs = new ArrayList<>(3);
if (IndexSettings.LOGSDB_SORT_ON_HOST_NAME.get(settings)) {
sortSpecs.add(HOSTNAME_SPEC);
}
if (IndexSettings.LOGSDB_SORT_ON_MESSAGE_TEMPLATE.get(settings)) {
sortSpecs.add(MESSAGE_PATTERN_SPEC);
}
sortSpecs.add(TIMESTAMP_SPEC);

return sortSpecs.toArray(FieldSortSpec[]::new);
} else {
return HOSTNAME_TIMESTAMP_BWC_SORT;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,51 @@ public void testLogsdbIndexSortWithHostname() {
assertThat(config.sortSpecs[1].mode, equalTo(MultiValueMode.MAX));
}

public void testLogsdbIndexSortWithMessage() {
Settings settings = Settings.builder()
.put(IndexSettings.MODE.getKey(), "logsdb")
.put(IndexSettings.LOGSDB_SORT_ON_MESSAGE_TEMPLATE.getKey(), true)
.build();
IndexSettings indexSettings = indexSettings(settings);
IndexSortConfig config = indexSettings.getIndexSortConfig();
assertTrue(config.hasIndexSort());
assertThat(config.sortSpecs.length, equalTo(2));

assertThat(config.sortSpecs[0].field, equalTo("message.template_id"));
assertThat(config.sortSpecs[1].field, equalTo("@timestamp"));
assertThat(config.sortSpecs[0].order, equalTo(SortOrder.ASC));
assertThat(config.sortSpecs[1].order, equalTo(SortOrder.DESC));
assertThat(config.sortSpecs[0].missingValue, equalTo("_last"));
assertThat(config.sortSpecs[1].missingValue, equalTo("_last"));
assertThat(config.sortSpecs[0].mode, equalTo(MultiValueMode.MIN));
assertThat(config.sortSpecs[1].mode, equalTo(MultiValueMode.MAX));
}

public void testLogsdbIndexSortWithMessageAndHostname() {
Settings settings = Settings.builder()
.put(IndexSettings.MODE.getKey(), "logsdb")
.put(IndexSettings.LOGSDB_SORT_ON_HOST_NAME.getKey(), true)
.put(IndexSettings.LOGSDB_SORT_ON_MESSAGE_TEMPLATE.getKey(), true)
.build();
IndexSettings indexSettings = indexSettings(settings);
IndexSortConfig config = indexSettings.getIndexSortConfig();
assertTrue(config.hasIndexSort());
assertThat(config.sortSpecs.length, equalTo(3));

assertThat(config.sortSpecs[0].field, equalTo("host.name"));
assertThat(config.sortSpecs[1].field, equalTo("message.template_id"));
assertThat(config.sortSpecs[2].field, equalTo("@timestamp"));
assertThat(config.sortSpecs[0].order, equalTo(SortOrder.ASC));
assertThat(config.sortSpecs[1].order, equalTo(SortOrder.ASC));
assertThat(config.sortSpecs[2].order, equalTo(SortOrder.DESC));
assertThat(config.sortSpecs[0].missingValue, equalTo("_last"));
assertThat(config.sortSpecs[1].missingValue, equalTo("_last"));
assertThat(config.sortSpecs[2].missingValue, equalTo("_last"));
assertThat(config.sortSpecs[0].mode, equalTo(MultiValueMode.MIN));
assertThat(config.sortSpecs[1].mode, equalTo(MultiValueMode.MIN));
assertThat(config.sortSpecs[2].mode, equalTo(MultiValueMode.MAX));
}

public void testLogsdbIndexSortTimestampOnly() {
Settings settings = Settings.builder()
.put(IndexSettings.MODE.getKey(), "logsdb")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import java.util.Collection;
import java.util.List;
import java.util.function.Consumer;
import java.util.function.UnaryOperator;

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasSize;
Expand Down Expand Up @@ -76,6 +77,89 @@ private DocWithId doc(String source) {
return new DocWithId(Integer.toString(id++), source);
}

public void testHostnameMessageTimestampSortConfig() throws IOException {
final String dataStreamName = "test-logsdb-sort-hostname-message-timestamp";

final String mapping = """
{
"_doc": {
"properties": {
"@timestmap": {
"type": "date"
},
"host.name": {
"type": "keyword"
},
"message": {
"type": "pattern_text"
},
"test_id": {
"type": "text",
"store": true
}
}
}
}
""";

final DocWithId[] orderedDocs = {
doc("{\"@timestamp\":\"2025-01-01T13:00:00\",\"host.name\":\"aaa\",\"message\":\"bar 5\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T12:00:00\",\"host.name\":[\"aaa\",\"bbb\"],\"message\":\"bar 7\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T11:00:00\",\"host.name\":\"aaa\",\"message\":\"bar 9\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T13:00:00\",\"host.name\":\"aaa\",\"message\":\"foo 6\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T12:00:00\",\"host.name\":\"aaa\",\"message\":\"foo 1\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T11:00:00\",\"host.name\":[\"aaa\",\"bbb\"],\"message\":\"foo 9\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T13:00:00\",\"host.name\":[\"aaa\",\"bbb\"],\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T12:00:00\",\"host.name\":[\"aaa\",\"bbb\"],\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T11:00:00\",\"host.name\":[\"aaa\",\"bbb\"],\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T13:00:00\",\"host.name\":\"bbb\",\"message\":\"bar 4\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T12:00:00\",\"host.name\":\"bbb\",\"message\":\"bar 5\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T11:00:00\",\"host.name\":\"bbb\",\"message\":\"bar 2\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T13:00:00\",\"host.name\":\"bbb\",\"message\":\"foo 7\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T12:00:00\",\"host.name\":\"bbb\",\"message\":\"foo 3\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T11:00:00\",\"host.name\":\"bbb\",\"message\":\"foo 6\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T13:00:00\",\"host.name\":\"bbb\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T12:00:00\",\"host.name\":\"bbb\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T11:00:00\",\"host.name\":\"bbb\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T13:00:00\",\"message\":\"bar 4\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T12:00:00\",\"message\":\"bar 1\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T11:00:00\",\"message\":\"bar 4\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T13:00:00\",\"message\":\"foo 1\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T12:00:00\",\"message\":\"foo 9\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T11:00:00\",\"message\":\"foo 3\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T13:00:00\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T12:00:00\",\"test_id\": \"%id%\"}"),
doc("{\"@timestamp\":\"2025-01-01T11:00:00\",\"test_id\": \"%id%\"}") };

createDataStream(dataStreamName, mapping, b -> b.put("index.logsdb.default_sort_on_message_template", true));

List<DocWithId> shuffledDocs = shuffledList(Arrays.asList(orderedDocs));
indexDocuments(dataStreamName, shuffledDocs);

Index backingIndex = getBackingIndex(dataStreamName);

var featureService = getInstanceFromNode(FeatureService.class);
if (featureService.getNodeFeatures().containsKey("mapper.provide_index_sort_setting_defaults")) {
assertSettings(backingIndex, settings -> {
assertThat(
IndexSortConfig.INDEX_SORT_FIELD_SETTING.get(settings),
equalTo(List.of("host.name", "message.template_id", "@timestamp"))
);
assertThat(
IndexSortConfig.INDEX_SORT_ORDER_SETTING.get(settings),
equalTo(List.of(SortOrder.ASC, SortOrder.ASC, SortOrder.DESC))
);
assertThat(
IndexSortConfig.INDEX_SORT_MODE_SETTING.get(settings),
equalTo(List.of(MultiValueMode.MIN, MultiValueMode.MIN, MultiValueMode.MAX))
);
assertThat(IndexSortConfig.INDEX_SORT_MISSING_SETTING.get(settings), equalTo(List.of("_last", "_last", "_last")));
});
}

assertOrder(backingIndex, orderedDocs);
}

public void testHostnameTimestampSortConfig() throws IOException {
final String dataStreamName = "test-logsdb-sort-hostname-timestamp";

Expand Down Expand Up @@ -181,11 +265,21 @@ public void testTimestampOnlySortConfig() throws IOException {
}

private void createDataStream(String dataStreamName, String mapping) throws IOException {
createDataStream(dataStreamName, mapping, UnaryOperator.identity());
}

private void createDataStream(String dataStreamName, String mapping, UnaryOperator<Settings.Builder> settings) throws IOException {
var putTemplateRequest = new TransportPutComposableIndexTemplateAction.Request("id");
putTemplateRequest.indexTemplate(
ComposableIndexTemplate.builder()
.indexPatterns(List.of(dataStreamName + "*"))
.template(new Template(indexSettings(1, 0).put("index.mode", "logsdb").build(), new CompressedXContent(mapping), null))
.template(
new Template(
settings.apply(indexSettings(1, 0)).put("index.mode", "logsdb").build(),
new CompressedXContent(mapping),
null
)
)
.dataStreamTemplate(new ComposableIndexTemplate.DataStreamTemplate(false, false))
.build()
);
Expand Down
Loading