Skip to content

Commit 2a4c2a6

Browse files
committed
handle large templates
1 parent 227d054 commit 2a4c2a6

File tree

1 file changed

+30
-1
lines changed

1 file changed

+30
-1
lines changed

x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapper.java

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,14 @@
77

88
package org.elasticsearch.xpack.logsdb.patternedtext;
99

10+
import org.apache.logging.log4j.LogManager;
11+
import org.apache.logging.log4j.Logger;
1012
import org.apache.lucene.analysis.CharArraySet;
1113
import org.apache.lucene.document.Field;
1214
import org.apache.lucene.document.FieldType;
1315
import org.apache.lucene.document.SortedSetDocValuesField;
1416
import org.apache.lucene.index.IndexOptions;
17+
import org.apache.lucene.index.IndexWriter;
1518
import org.apache.lucene.util.BytesRef;
1619
import org.elasticsearch.analysis.common.PatternAnalyzer;
1720
import org.elasticsearch.common.regex.Regex;
@@ -33,18 +36,23 @@
3336

3437
import java.io.IOException;
3538
import java.util.ArrayList;
39+
import java.util.Arrays;
3640
import java.util.Iterator;
3741
import java.util.List;
3842
import java.util.Map;
3943
import java.util.function.Function;
4044
import java.util.function.Supplier;
4145

46+
import static org.apache.lucene.index.IndexWriter.MAX_TERM_LENGTH;
47+
4248
/**
4349
* A {@link FieldMapper} for full-text log fields that internally splits text into a low cardinality template component
4450
* and high cardinality argument component. Separating these pieces allows the template component to be highly compressed.
4551
*/
4652
public class PatternedTextFieldMapper extends FieldMapper {
4753

54+
private static final Logger logger = LogManager.getLogger(PatternedTextFieldMapper.class);
55+
4856
public static final FeatureFlag PATTERNED_TEXT_MAPPER = new FeatureFlag("patterned_text");
4957
private static final NamedAnalyzer ANALYZER;
5058

@@ -220,12 +228,33 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
220228

221229
// Parse template and args
222230
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(value);
231+
BytesRef templateBytes = new BytesRef(parts.template());
232+
if (templateBytes.length >= IndexWriter.MAX_TERM_LENGTH) {
233+
logger.error(
234+
"pattern text template is longer than allowed maximum term length.\n Template={}\n Original value:{}",
235+
templateBytes,
236+
value
237+
);
238+
byte[] prefix = new byte[30];
239+
System.arraycopy(templateBytes.bytes, templateBytes.offset, prefix, 0, 30);
240+
String msg = "pattern text template is longer than allowed maximum term length=\""
241+
+ fieldType().name()
242+
+ "\" (whose "
243+
+ "UTF8 encoding is longer than the max length "
244+
+ MAX_TERM_LENGTH
245+
+ "), all of which were "
246+
+ "skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense "
247+
+ "term is: '"
248+
+ Arrays.toString(prefix)
249+
+ "...'";
250+
throw new IllegalArgumentException(msg);
251+
}
223252

224253
// Add index on original value
225254
context.doc().add(new Field(fieldType().name(), value, fieldType));
226255

227256
// Add template doc_values
228-
context.doc().add(new SortedSetDocValuesField(fieldType().templateFieldName(), new BytesRef(parts.template())));
257+
context.doc().add(new SortedSetDocValuesField(fieldType().templateFieldName(), templateBytes));
229258

230259
// Add template_id doc_values
231260
context.doc().add(templateIdMapper.buildKeywordField(new BytesRef(parts.templateId())));

0 commit comments

Comments
 (0)