-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Simple version of patterned_text with a single doc value for arguments #129292
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
d0c9a2e
78ef582
6bb95ac
ad728f3
a97bbc4
3a0e349
cda6b0a
30ae746
387757d
03b9fde
f9344bf
e4d4830
fca0f83
f9b030b
4429474
10147ad
4a3ba41
b7450c2
8efac46
cd2f9aa
906ca11
0815760
6856d2d
4b63ed0
fa022e6
8cd70fd
526fef1
39e9d88
eb4212f
e026a5c
a2bc5fa
f0da074
4e0c337
109afd4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the Elastic License | ||
| * 2.0; you may not use this file except in compliance with the Elastic License | ||
| * 2.0. | ||
| */ | ||
|
|
||
| package org.elasticsearch.xpack.logsdb.patternedtext; | ||
|
|
||
| import org.apache.lucene.index.BinaryDocValues; | ||
| import org.apache.lucene.index.DocValues; | ||
| import org.apache.lucene.index.LeafReader; | ||
| import org.apache.lucene.index.SortedSetDocValues; | ||
| import org.apache.lucene.util.BytesRef; | ||
|
|
||
| import java.io.IOException; | ||
|
|
||
| public class PatternedTextDocValues extends BinaryDocValues { | ||
| private final SortedSetDocValues templateDocValues; | ||
| private final SortedSetDocValues argsDocValues; | ||
|
|
||
| PatternedTextDocValues(SortedSetDocValues templateDocValues, SortedSetDocValues argsDocValues) { | ||
| this.templateDocValues = templateDocValues; | ||
| this.argsDocValues = argsDocValues; | ||
| } | ||
|
|
||
| static PatternedTextDocValues from(LeafReader leafReader, String templateFieldName, String argsFieldName) throws IOException { | ||
| SortedSetDocValues templateDocValues = DocValues.getSortedSet(leafReader, templateFieldName); | ||
| if (templateDocValues.getValueCount() == 0) { | ||
| return null; | ||
| } | ||
|
|
||
| SortedSetDocValues argsDocValues = DocValues.getSortedSet(leafReader, argsFieldName); | ||
| return new PatternedTextDocValues(templateDocValues, argsDocValues); | ||
| } | ||
|
|
||
| private String getNextStringValue() throws IOException { | ||
| assert templateDocValues.docValueCount() == 1; | ||
| String template = templateDocValues.lookupOrd(templateDocValues.nextOrd()).utf8ToString(); | ||
| int argsCount = PatternedTextValueProcessor.countArgs(template); | ||
| if (argsCount > 0) { | ||
| assert argsDocValues.docValueCount() == 1; | ||
| var mergedArgs = argsDocValues.lookupOrd(argsDocValues.nextOrd()); | ||
| var args = PatternedTextValueProcessor.decodeRemainingArgs(mergedArgs.utf8ToString()); | ||
| return PatternedTextValueProcessor.merge(new PatternedTextValueProcessor.Parts(template, args)); | ||
| } else { | ||
| return template; | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public BytesRef binaryValue() throws IOException { | ||
| return new BytesRef(getNextStringValue()); | ||
| } | ||
|
|
||
| @Override | ||
| public boolean advanceExact(int i) throws IOException { | ||
| argsDocValues.advanceExact(i); | ||
| // If template has a value, then message has a value. We don't have to check args here, since there may not be args for the doc | ||
| return templateDocValues.advanceExact(i); | ||
| } | ||
|
|
||
| @Override | ||
| public int docID() { | ||
| return templateDocValues.docID(); | ||
| } | ||
|
|
||
| @Override | ||
| public int nextDoc() throws IOException { | ||
| int templateNext = templateDocValues.nextDoc(); | ||
| var argsAdvance = argsDocValues.advance(templateNext); | ||
| assert argsAdvance >= templateNext; | ||
| return templateNext; | ||
| } | ||
|
|
||
| @Override | ||
| public int advance(int i) throws IOException { | ||
| int templateAdvance = templateDocValues.advance(i); | ||
| var argsAdvance = argsDocValues.advance(templateAdvance); | ||
| assert argsAdvance >= templateAdvance; | ||
| return templateAdvance; | ||
| } | ||
|
|
||
| @Override | ||
| public long cost() { | ||
| return templateDocValues.cost() + argsDocValues.cost(); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,176 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the Elastic License | ||
| * 2.0; you may not use this file except in compliance with the Elastic License | ||
| * 2.0. | ||
| */ | ||
|
|
||
| package org.elasticsearch.xpack.logsdb.patternedtext; | ||
|
|
||
| import org.apache.lucene.document.Field; | ||
| import org.apache.lucene.document.FieldType; | ||
| import org.apache.lucene.document.SortedSetDocValuesField; | ||
| import org.apache.lucene.index.IndexOptions; | ||
| import org.apache.lucene.util.BytesRef; | ||
| import org.elasticsearch.common.util.FeatureFlag; | ||
| import org.elasticsearch.index.IndexVersion; | ||
| import org.elasticsearch.index.analysis.IndexAnalyzers; | ||
| import org.elasticsearch.index.analysis.NamedAnalyzer; | ||
| import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader; | ||
| import org.elasticsearch.index.mapper.DocumentParserContext; | ||
| import org.elasticsearch.index.mapper.FieldMapper; | ||
| import org.elasticsearch.index.mapper.MapperBuilderContext; | ||
| import org.elasticsearch.index.mapper.TextParams; | ||
| import org.elasticsearch.index.mapper.TextSearchInfo; | ||
|
|
||
| import java.io.IOException; | ||
| import java.util.Map; | ||
|
|
||
| /** | ||
| * A {@link FieldMapper} that assigns every document the same value. | ||
| */ | ||
| public class PatternedTextFieldMapper extends FieldMapper { | ||
|
|
||
| public static final FeatureFlag PATTERNED_TEXT_MAPPER = new FeatureFlag("patterned_text"); | ||
|
|
||
| public static class Defaults { | ||
| public static final FieldType FIELD_TYPE; | ||
|
|
||
| static { | ||
| final FieldType ft = new FieldType(); | ||
| ft.setTokenized(true); | ||
| ft.setStored(false); | ||
| ft.setStoreTermVectors(false); | ||
| ft.setOmitNorms(true); | ||
| ft.setIndexOptions(IndexOptions.DOCS); | ||
| FIELD_TYPE = freezeAndDeduplicateFieldType(ft); | ||
| } | ||
| } | ||
|
|
||
| public static class Builder extends FieldMapper.Builder { | ||
|
|
||
| private final IndexVersion indexCreatedVersion; | ||
|
|
||
| private final Parameter<Map<String, String>> meta = Parameter.metaParam(); | ||
|
|
||
| private final TextParams.Analyzers analyzers; | ||
|
|
||
| public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers) { | ||
| super(name); | ||
| this.indexCreatedVersion = indexCreatedVersion; | ||
| this.analyzers = new TextParams.Analyzers( | ||
| indexAnalyzers, | ||
| m -> ((PatternedTextFieldMapper) m).indexAnalyzer, | ||
| m -> ((PatternedTextFieldMapper) m).positionIncrementGap, | ||
| indexCreatedVersion | ||
| ); | ||
| } | ||
|
|
||
| @Override | ||
| protected Parameter<?>[] getParameters() { | ||
| return new Parameter<?>[] { meta }; | ||
| } | ||
|
|
||
| private PatternedTextFieldType buildFieldType(MapperBuilderContext context) { | ||
| NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer(); | ||
| NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer(); | ||
| NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer(); | ||
| TextSearchInfo tsi = new TextSearchInfo(Defaults.FIELD_TYPE, null, searchAnalyzer, searchQuoteAnalyzer); | ||
| return new PatternedTextFieldType( | ||
| context.buildFullName(leafName()), | ||
| tsi, | ||
| indexAnalyzer, | ||
| context.isSourceSynthetic(), | ||
| meta.getValue() | ||
| ); | ||
| } | ||
|
|
||
| @Override | ||
| public PatternedTextFieldMapper build(MapperBuilderContext context) { | ||
| return new PatternedTextFieldMapper(leafName(), buildFieldType(context), builderParams(this, context), this); | ||
| } | ||
| } | ||
|
|
||
| public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers())); | ||
|
|
||
| private final IndexVersion indexCreatedVersion; | ||
| private final IndexAnalyzers indexAnalyzers; | ||
| private final NamedAnalyzer indexAnalyzer; | ||
| private final int positionIncrementGap; | ||
| private final FieldType fieldType; | ||
|
|
||
| private PatternedTextFieldMapper( | ||
| String simpleName, | ||
| PatternedTextFieldType mappedFieldPatternedTextFieldType, | ||
| BuilderParams builderParams, | ||
| Builder builder | ||
| ) { | ||
| super(simpleName, mappedFieldPatternedTextFieldType, builderParams); | ||
| assert mappedFieldPatternedTextFieldType.getTextSearchInfo().isTokenized(); | ||
| assert mappedFieldPatternedTextFieldType.hasDocValues() == false; | ||
| this.fieldType = Defaults.FIELD_TYPE; | ||
| this.indexCreatedVersion = builder.indexCreatedVersion; | ||
| this.indexAnalyzers = builder.analyzers.indexAnalyzers; | ||
| this.indexAnalyzer = builder.analyzers.getIndexAnalyzer(); | ||
| this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue(); | ||
| } | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In a follow up we should overwrite the |
||
| @Override | ||
| public Map<String, NamedAnalyzer> indexAnalyzers() { | ||
| return Map.of(mappedFieldType.name(), indexAnalyzer); | ||
| } | ||
|
|
||
| @Override | ||
| public FieldMapper.Builder getMergeBuilder() { | ||
| return new Builder(leafName(), indexCreatedVersion, indexAnalyzers).init(this); | ||
| } | ||
|
|
||
| @Override | ||
| protected void parseCreateField(DocumentParserContext context) throws IOException { | ||
| final String value = context.parser().textOrNull(); | ||
| if (value == null) { | ||
| return; | ||
| } | ||
|
|
||
| var existingValue = context.doc().getField(fieldType().name()); | ||
| if (existingValue != null) { | ||
| throw new IllegalArgumentException("Multiple values are not allowed for field [" + fieldType().name() + "]."); | ||
| } | ||
|
|
||
| // Parse template and args. | ||
| PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(value); | ||
|
|
||
| // Add index on original value | ||
| context.doc().add(new Field(fieldType().name(), value, fieldType)); | ||
|
|
||
| // Add template doc_values | ||
| context.doc().add(new SortedSetDocValuesField(fieldType().templateFieldName(), new BytesRef(parts.template()))); | ||
|
|
||
| // Add args doc_values | ||
| if (parts.args().isEmpty() == false) { | ||
| String remainingArgs = PatternedTextValueProcessor.encodeRemainingArgs(parts); | ||
| context.doc().add(new SortedSetDocValuesField(fieldType().argsFieldName(), new BytesRef(remainingArgs))); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| protected String contentType() { | ||
| return PatternedTextFieldType.CONTENT_TYPE; | ||
| } | ||
|
|
||
| @Override | ||
| public PatternedTextFieldType fieldType() { | ||
| return (PatternedTextFieldType) super.fieldType(); | ||
| } | ||
|
|
||
| @Override | ||
| protected SyntheticSourceSupport syntheticSourceSupport() { | ||
| return new SyntheticSourceSupport.Native( | ||
| () -> new CompositeSyntheticFieldLoader( | ||
| leafName(), | ||
| fullPath(), | ||
| new PatternedTextSyntheticFieldLoaderLayer(fieldType().name(), fieldType().templateFieldName(), fieldType().argsFieldName()) | ||
| ) | ||
| ); | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.