Skip to content

Commit d837e2e

Browse files
If patterned_text length exceeds limit put in stored fields (#134606)
SortedSetDocValues can only hold 32kb per item. This puts a length limit on the arguments and template sizes used by patterned_text. To avoid this, if a log is long we stored it as a stored field rather than a doc value. If a message is over 8kb, we put it in a stored field. Still parse a prefix of the field to create a template_id for sorting.
1 parent fa9e19e commit d837e2e

File tree

12 files changed

+677
-173
lines changed

12 files changed

+677
-173
lines changed
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.logsdb.patternedtext;
9+
10+
import org.elasticsearch.client.Request;
11+
import org.elasticsearch.client.Response;
12+
import org.elasticsearch.common.settings.Settings;
13+
import org.elasticsearch.common.time.DateFormatter;
14+
import org.elasticsearch.common.time.FormatNames;
15+
import org.elasticsearch.test.cluster.ElasticsearchCluster;
16+
import org.elasticsearch.test.cluster.local.distribution.DistributionType;
17+
import org.elasticsearch.test.rest.ESRestTestCase;
18+
import org.elasticsearch.test.rest.ObjectPath;
19+
import org.hamcrest.Matchers;
20+
import org.junit.ClassRule;
21+
22+
import java.io.IOException;
23+
import java.time.Instant;
24+
import java.util.ArrayList;
25+
import java.util.HashSet;
26+
import java.util.List;
27+
import java.util.Map;
28+
29+
import static org.hamcrest.Matchers.equalTo;
30+
import static org.hamcrest.Matchers.is;
31+
32+
public class PatternedTextBasicRestIT extends ESRestTestCase {
33+
34+
@ClassRule
35+
public static ElasticsearchCluster cluster = ElasticsearchCluster.local()
36+
.distribution(DistributionType.DEFAULT)
37+
.setting("xpack.license.self_generated.type", "trial")
38+
.setting("xpack.security.enabled", "false")
39+
.build();
40+
41+
@Override
42+
protected String getTestRestCluster() {
43+
return cluster.getHttpAddresses();
44+
}
45+
46+
@SuppressWarnings("unchecked")
47+
public void testBulkInsertThenMatchAllSource() throws IOException {
48+
49+
Settings.Builder settings = Settings.builder();
50+
if (randomBoolean()) {
51+
settings.put("index.mode", "logsdb");
52+
}
53+
if (randomBoolean()) {
54+
settings.put("index.mapping.source.mode", "synthetic");
55+
}
56+
57+
String mapping = """
58+
{
59+
"properties": {
60+
"@timestamp": {
61+
"type": "date"
62+
},
63+
"message": {
64+
"type": "patterned_text"
65+
}
66+
}
67+
}
68+
""";
69+
70+
String indexName = "test-index";
71+
createIndex(indexName, settings.build(), mapping);
72+
73+
int numDocs = randomIntBetween(1, 100);
74+
List<String> messages = randomMessages(numDocs);
75+
indexDocs(indexName, messages);
76+
77+
var actualMapping = getIndexMappingAsMap(indexName);
78+
assertThat("patterned_text", equalTo(ObjectPath.evaluate(actualMapping, "properties.message.type")));
79+
80+
Request searchRequest = new Request("GET", "/" + indexName + "/_search");
81+
searchRequest.setJsonEntity("""
82+
{
83+
"query" : { "match_all" : {} },
84+
"size": 100
85+
}
86+
""");
87+
Response getResponse = client().performRequest(searchRequest);
88+
assertThat(getResponse.getStatusLine().getStatusCode(), is(200));
89+
ObjectPath objectPath = ObjectPath.createFromResponse(getResponse);
90+
91+
assertThat(objectPath.evaluate("hits.total.value"), equalTo(messages.size()));
92+
var hits = (ArrayList<Map<String, Object>>) objectPath.evaluate("hits.hits");
93+
var values = new HashSet<>(hits.stream().map(o -> {
94+
var source = (Map<String, Object>) o.get("_source");
95+
return (String) source.get("message");
96+
}).toList());
97+
98+
assertEquals(new HashSet<>(messages), values);
99+
}
100+
101+
private void indexDocs(String indexName, List<String> messages) throws IOException {
102+
var now = Instant.now();
103+
StringBuilder sb = new StringBuilder();
104+
for (int i = 0; i < messages.size(); i++) {
105+
sb.append("{ \"create\": {} }").append('\n');
106+
if (messages.get(i) == null) {
107+
sb.append("""
108+
{"@timestamp": "$now"}
109+
""".replace("$now", formatInstant(now)));
110+
} else {
111+
sb.append("""
112+
{"@timestamp": "$now", "message": "$msg"}
113+
""".replace("$now", formatInstant(now)).replace("$msg", messages.get(i)));
114+
}
115+
sb.append('\n');
116+
now = now.plusSeconds(1);
117+
}
118+
119+
var bulkRequest = new Request("POST", "/" + indexName + "/_bulk");
120+
bulkRequest.setJsonEntity(sb.toString());
121+
bulkRequest.addParameter("refresh", "true");
122+
var bulkResponse = client().performRequest(bulkRequest);
123+
var bulkResponseBody = responseAsMap(bulkResponse);
124+
assertThat(bulkResponseBody, Matchers.hasEntry("errors", false));
125+
}
126+
127+
static String formatInstant(Instant instant) {
128+
return DateFormatter.forPattern(FormatNames.STRICT_DATE_OPTIONAL_TIME.getName()).format(instant);
129+
}
130+
131+
public static List<String> randomMessages(int numDocs) {
132+
List<String> messages = new ArrayList<>();
133+
for (int i = 0; i < numDocs; i++) {
134+
String message = randomFrom(random(), () -> null, () -> randomMessage(10), () -> randomMessage(8 * 1024));
135+
messages.add(message);
136+
}
137+
return messages;
138+
}
139+
140+
static String randomMessage(int minLength) {
141+
StringBuilder sb = new StringBuilder();
142+
while (sb.length() < minLength) {
143+
var token = randomBoolean() ? randomAlphaOfLength(randomIntBetween(1, 10)) : randomInt();
144+
sb.append(token).append(" ");
145+
}
146+
return sb.toString();
147+
}
148+
}

x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextBlockLoader.java

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,10 @@
1414

1515
public class PatternedTextBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader {
1616

17-
private final String templateFieldName;
18-
private final String argsFieldName;
19-
private final String argsInfoFieldName;
20-
21-
PatternedTextBlockLoader(String templateFieldName, String argsFieldName, String argsInfoFieldName) {
22-
this.templateFieldName = templateFieldName;
23-
this.argsFieldName = argsFieldName;
24-
this.argsInfoFieldName = argsInfoFieldName;
17+
private final PatternedTextFieldMapper.DocValuesSupplier docValuesSupplier;
18+
19+
PatternedTextBlockLoader(PatternedTextFieldMapper.DocValuesSupplier docValuesSupplier) {
20+
this.docValuesSupplier = docValuesSupplier;
2521
}
2622

2723
@Override
@@ -31,7 +27,7 @@ public BytesRefBuilder builder(BlockFactory factory, int expectedCount) {
3127

3228
@Override
3329
public AllReader reader(LeafReaderContext context) throws IOException {
34-
var docValues = PatternedTextDocValues.from(context.reader(), templateFieldName, argsFieldName, argsInfoFieldName);
30+
var docValues = docValuesSupplier.get(context.reader());
3531
if (docValues == null) {
3632
return new ConstantNullsReader();
3733
}
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.logsdb.patternedtext;
9+
10+
import org.apache.lucene.index.BinaryDocValues;
11+
import org.apache.lucene.index.DocValues;
12+
import org.apache.lucene.index.LeafReader;
13+
import org.apache.lucene.index.SortedSetDocValues;
14+
import org.apache.lucene.util.BytesRef;
15+
import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader;
16+
import org.elasticsearch.index.fieldvisitor.StoredFieldLoader;
17+
18+
import java.io.IOException;
19+
import java.util.List;
20+
import java.util.Set;
21+
22+
/**
23+
* If there are values which exceed 32kb, they cannot be stored as doc values
24+
* and must be in a stored field. This class combines the doc values with the
25+
* larges values which are in stored fields. Despite being backed by stored
26+
* fields, this class implements a doc value interface.
27+
*/
28+
public final class PatternedTextCompositeValues extends BinaryDocValues {
29+
private final LeafStoredFieldLoader storedTemplateLoader;
30+
private final String storedMessageFieldName;
31+
private final BinaryDocValues patternedTextDocValues;
32+
private final SortedSetDocValues templateIdDocValues;
33+
private boolean hasDocValue = false;
34+
35+
PatternedTextCompositeValues(
36+
LeafStoredFieldLoader storedTemplateLoader,
37+
String storedMessageFieldName,
38+
BinaryDocValues patternedTextDocValues,
39+
SortedSetDocValues templateIdDocValues
40+
) {
41+
this.storedTemplateLoader = storedTemplateLoader;
42+
this.storedMessageFieldName = storedMessageFieldName;
43+
this.patternedTextDocValues = patternedTextDocValues;
44+
this.templateIdDocValues = templateIdDocValues;
45+
}
46+
47+
static PatternedTextCompositeValues from(LeafReader leafReader, PatternedTextFieldType fieldType) throws IOException {
48+
SortedSetDocValues templateIdDocValues = DocValues.getSortedSet(leafReader, fieldType.templateIdFieldName());
49+
if (templateIdDocValues.getValueCount() == 0) {
50+
return null;
51+
}
52+
53+
var docValues = PatternedTextDocValues.from(
54+
leafReader,
55+
fieldType.templateFieldName(),
56+
fieldType.argsFieldName(),
57+
fieldType.argsInfoFieldName()
58+
);
59+
StoredFieldLoader storedFieldLoader = StoredFieldLoader.create(false, Set.of(fieldType.storedNamed()));
60+
LeafStoredFieldLoader storedTemplateLoader = storedFieldLoader.getLoader(leafReader.getContext(), null);
61+
return new PatternedTextCompositeValues(storedTemplateLoader, fieldType.storedNamed(), docValues, templateIdDocValues);
62+
}
63+
64+
public BytesRef binaryValue() throws IOException {
65+
if (hasDocValue) {
66+
return patternedTextDocValues.binaryValue();
67+
}
68+
69+
// If there is no doc value, the value was too large and was put in a stored field
70+
var storedFields = storedTemplateLoader.storedFields();
71+
List<Object> storedValues = storedFields.get(storedMessageFieldName);
72+
assert storedValues != null && storedValues.size() == 1 && storedValues.getFirst() instanceof BytesRef;
73+
return (BytesRef) storedValues.getFirst();
74+
}
75+
76+
public int docID() {
77+
return templateIdDocValues.docID();
78+
}
79+
80+
public boolean advanceExact(int i) throws IOException {
81+
boolean hasValue = templateIdDocValues.advanceExact(i);
82+
hasDocValue = patternedTextDocValues.advanceExact(i);
83+
if (hasValue && hasDocValue == false) {
84+
storedTemplateLoader.advanceTo(i);
85+
}
86+
return hasValue;
87+
}
88+
89+
@Override
90+
public int nextDoc() {
91+
throw new UnsupportedOperationException();
92+
}
93+
94+
@Override
95+
public int advance(int i) {
96+
throw new UnsupportedOperationException();
97+
98+
}
99+
100+
@Override
101+
public long cost() {
102+
return templateIdDocValues.cost() + patternedTextDocValues.cost();
103+
}
104+
}

x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextDocValues.java

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import java.io.IOException;
1717
import java.util.List;
1818

19-
public class PatternedTextDocValues extends BinaryDocValues {
19+
public final class PatternedTextDocValues extends BinaryDocValues {
2020
private final SortedSetDocValues templateDocValues;
2121
private final SortedSetDocValues argsDocValues;
2222
private final SortedSetDocValues argsInfoDocValues;
@@ -30,25 +30,22 @@ public class PatternedTextDocValues extends BinaryDocValues {
3030
static PatternedTextDocValues from(LeafReader leafReader, String templateFieldName, String argsFieldName, String argsInfoFieldName)
3131
throws IOException {
3232
SortedSetDocValues templateDocValues = DocValues.getSortedSet(leafReader, templateFieldName);
33-
if (templateDocValues.getValueCount() == 0) {
34-
return null;
35-
}
36-
3733
SortedSetDocValues argsDocValues = DocValues.getSortedSet(leafReader, argsFieldName);
3834
SortedSetDocValues argsInfoDocValues = DocValues.getSortedSet(leafReader, argsInfoFieldName);
3935
return new PatternedTextDocValues(templateDocValues, argsDocValues, argsInfoDocValues);
4036
}
4137

4238
private String getNextStringValue() throws IOException {
4339
assert templateDocValues.docValueCount() == 1;
40+
assert argsInfoDocValues.docValueCount() == 1;
41+
4442
String template = templateDocValues.lookupOrd(templateDocValues.nextOrd()).utf8ToString();
4543
List<Arg.Info> argsInfo = Arg.decodeInfo(argsInfoDocValues.lookupOrd(argsInfoDocValues.nextOrd()).utf8ToString());
46-
4744
if (argsInfo.isEmpty() == false) {
4845
assert argsDocValues.docValueCount() == 1;
49-
assert argsInfoDocValues.docValueCount() == 1;
5046
var mergedArgs = argsDocValues.lookupOrd(argsDocValues.nextOrd());
5147
var args = Arg.decodeRemainingArgs(mergedArgs.utf8ToString());
48+
assert args.length == argsInfo.size();
5249
return PatternedTextValueProcessor.merge(template, args, argsInfo);
5350
} else {
5451
return template;
@@ -75,22 +72,12 @@ public int docID() {
7572

7673
@Override
7774
public int nextDoc() throws IOException {
78-
int templateNext = templateDocValues.nextDoc();
79-
var argsAdvance = argsDocValues.advance(templateNext);
80-
var argsInfoAdvance = argsInfoDocValues.advance(templateNext);
81-
assert argsAdvance >= templateNext;
82-
assert argsInfoAdvance == templateNext;
83-
return templateNext;
75+
throw new UnsupportedOperationException();
8476
}
8577

8678
@Override
8779
public int advance(int i) throws IOException {
88-
int templateAdvance = templateDocValues.advance(i);
89-
var argsAdvance = argsDocValues.advance(templateAdvance);
90-
var argsInfoAdvance = argsInfoDocValues.advance(templateAdvance);
91-
assert argsAdvance >= templateAdvance;
92-
assert argsInfoAdvance == templateAdvance;
93-
return templateAdvance;
80+
throw new UnsupportedOperationException();
9481
}
9582

9683
@Override

0 commit comments

Comments
 (0)