Skip to content

Commit 1c94bdb

Browse files
committed
store too large log files in stored fields
1 parent cad5776 commit 1c94bdb

File tree

1 file changed

+25
-18
lines changed

1 file changed

+25
-18
lines changed

x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapper.java

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import org.apache.lucene.document.Field;
1414
import org.apache.lucene.document.FieldType;
1515
import org.apache.lucene.document.SortedSetDocValuesField;
16+
import org.apache.lucene.document.StoredField;
1617
import org.apache.lucene.index.IndexOptions;
1718
import org.apache.lucene.index.IndexWriter;
1819
import org.apache.lucene.util.BytesRef;
@@ -36,15 +37,12 @@
3637

3738
import java.io.IOException;
3839
import java.util.ArrayList;
39-
import java.util.Arrays;
4040
import java.util.Iterator;
4141
import java.util.List;
4242
import java.util.Map;
4343
import java.util.function.Function;
4444
import java.util.function.Supplier;
4545

46-
import static org.apache.lucene.index.IndexWriter.MAX_TERM_LENGTH;
47-
4846
/**
4947
* A {@link FieldMapper} for full-text log fields that internally splits text into a low cardinality template component
5048
* and high cardinality argument component. Separating these pieces allows the template component to be highly compressed.
@@ -159,7 +157,15 @@ public PatternedTextFieldMapper build(MapperBuilderContext context) {
159157
indexCreatedVersion,
160158
true
161159
).indexed(false).build(context);
162-
return new PatternedTextFieldMapper(leafName(), fieldType, patternedTextFieldType, builderParams, this, templateIdMapper);
160+
return new PatternedTextFieldMapper(
161+
leafName(),
162+
fieldType,
163+
patternedTextFieldType,
164+
builderParams,
165+
this,
166+
templateIdMapper,
167+
context.isSourceSynthetic()
168+
);
163169
}
164170
}
165171

@@ -172,14 +178,16 @@ public PatternedTextFieldMapper build(MapperBuilderContext context) {
172178
private final int positionIncrementGap;
173179
private final FieldType fieldType;
174180
private final KeywordFieldMapper templateIdMapper;
181+
private final boolean isSourceSynthetic;
175182

176183
private PatternedTextFieldMapper(
177184
String simpleName,
178185
FieldType fieldType,
179186
PatternedTextFieldType mappedFieldType,
180187
BuilderParams builderParams,
181188
Builder builder,
182-
KeywordFieldMapper templateIdMapper
189+
KeywordFieldMapper templateIdMapper,
190+
boolean isSourceSynthetic
183191
) {
184192
super(simpleName, mappedFieldType, builderParams);
185193
assert mappedFieldType.getTextSearchInfo().isTokenized();
@@ -191,6 +199,7 @@ private PatternedTextFieldMapper(
191199
this.indexOptions = builder.indexOptions.getValue();
192200
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
193201
this.templateIdMapper = templateIdMapper;
202+
this.isSourceSynthetic = isSourceSynthetic;
194203
}
195204

196205
@Override
@@ -232,21 +241,19 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
232241
if (templateBytes.length >= IndexWriter.MAX_TERM_LENGTH) {
233242
logger.error(
234243
"pattern text template is longer than allowed maximum term length.\n Template={}\n Original value={}",
235-
templateBytes,
244+
templateBytes.utf8ToString(),
236245
value
237246
);
238-
byte[] prefix = new byte[30];
239-
System.arraycopy(templateBytes.bytes, templateBytes.offset, prefix, 0, 30);
240-
String msg = "pattern text template is longer than allowed maximum term length=\""
241-
+ fieldType().name()
242-
+ "\" (whose "
243-
+ "UTF8 encoding is longer than the max length "
244-
+ MAX_TERM_LENGTH
245-
+ "), all of which were "
246-
+ "skipped. The prefix of the first immense term is: '"
247-
+ Arrays.toString(prefix)
248-
+ "...'";
249-
throw new IllegalArgumentException(msg);
247+
// Maybe adding template id helps with compressing the original stored field:
248+
context.doc().add(templateIdMapper.buildKeywordField(new BytesRef(parts.templateId())));
249+
// Even when template too large we can still create an inverted index:
250+
context.doc().add(new Field(fieldType().name(), value, fieldType));
251+
// It is kind of ignored:
252+
context.addIgnoredField(fullPath());
253+
if (isSourceSynthetic) {
254+
context.doc().add(new StoredField(fieldType().name() + ".original", value));
255+
}
256+
return;
250257
}
251258

252259
// Add index on original value

0 commit comments

Comments
 (0)