diff --git a/benchmarks/build.gradle b/benchmarks/build.gradle index c28e25a817f11..06fcfff932795 100644 --- a/benchmarks/build.gradle +++ b/benchmarks/build.gradle @@ -45,6 +45,7 @@ dependencies { api(project(':x-pack:plugin:esql')) api(project(':x-pack:plugin:esql:compute')) implementation project(path: ':libs:simdvec') + implementation project(':x-pack:plugin:mapper-patterned-text') expression(project(path: ':modules:lang-expression', configuration: 'zip')) painless(project(path: ':modules:lang-painless', configuration: 'zip')) nativeLib(project(':libs:native')) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/mapper/PatternedTextMapperOperationsBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/mapper/PatternedTextMapperOperationsBenchmark.java new file mode 100644 index 0000000000000..c9f1dd9951046 --- /dev/null +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/mapper/PatternedTextMapperOperationsBenchmark.java @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.benchmark.index.mapper; + +//import org.elasticsearch.xpack.patternedtext.PatternedTextValueProcessor; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +import java.util.concurrent.TimeUnit; + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Fork(value = 1) +@Warmup(iterations = 2) +@Measurement(iterations = 3) +@State(Scope.Benchmark) +public class PatternedTextMapperOperationsBenchmark { + @Param({}) + public String input; + + @Benchmark + public void testIpv4MatchManual(Blackhole blackhole) {} +} diff --git a/modules/mapper-extras/src/main/java/module-info.java b/modules/mapper-extras/src/main/java/module-info.java index f892248133791..8bdda994e3e59 100644 --- a/modules/mapper-extras/src/main/java/module-info.java +++ b/modules/mapper-extras/src/main/java/module-info.java @@ -14,4 +14,6 @@ requires org.apache.lucene.core; requires org.apache.lucene.memory; requires org.apache.lucene.queries; + + exports org.elasticsearch.index.mapper.extras; } diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index 055f6091ac484..ead70c5bfa713 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -169,7 +169,7 @@ public MatchOnlyTextFieldType( super(name, true, false, false, tsi, meta); this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer); this.textFieldType = new TextFieldType(name, isSyntheticSource); - this.originalName = isSyntheticSource ? name() + "._original" : null; + this.originalName = isSyntheticSource ? name + "._original" : null; } public MatchOnlyTextFieldType(String name) { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java index 736592083a229..42eb130055797 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java @@ -549,10 +549,10 @@ public String toString() { } } - private static class SingletonOrdinals extends BlockDocValuesReader { + public static class SingletonOrdinals extends BlockDocValuesReader { private final SortedDocValues ordinals; - SingletonOrdinals(SortedDocValues ordinals) { + public SingletonOrdinals(SortedDocValues ordinals) { this.ordinals = ordinals; } @@ -607,10 +607,10 @@ public String toString() { } } - private static class Ordinals extends BlockDocValuesReader { + public static class Ordinals extends BlockDocValuesReader { private final SortedSetDocValues ordinals; - Ordinals(SortedSetDocValues ordinals) { + public Ordinals(SortedSetDocValues ordinals) { this.ordinals = ordinals; } diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java index 54c265deb948d..cf9e8fbf7ded0 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java @@ -124,7 +124,8 @@ private FieldContext contextBuilders( if (fieldNameContainsWildcards) { if (fieldType.typeName().equals(TextFieldMapper.CONTENT_TYPE) == false && fieldType.typeName().equals(KeywordFieldMapper.CONTENT_TYPE) == false - && fieldType.typeName().equals("match_only_text") == false) { + && fieldType.typeName().equals("match_only_text") == false + && fieldType.typeName().equals("patterned_text") == false) { continue; } if (highlighter.canHighlight(fieldType) == false) { diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java index 1c4cfa4ec7ff9..aef0cf10174a9 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java @@ -8,14 +8,21 @@ */ package org.elasticsearch.index.mapper; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocValuesSkipIndexType; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.search.lookup.FieldLookup; @@ -32,6 +39,7 @@ import java.util.HashMap; import java.util.List; import java.util.Set; +import java.util.function.Supplier; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -71,6 +79,24 @@ public static List fetchSourceValue(MappedFieldType fieldType, Object sourceV return fetcher.fetchValues(source, -1, new ArrayList<>()); } + public static List fetchDocValues(MappedFieldType fieldType, Supplier documentSupplier) throws IOException { + IndexWriterConfig iwc = new IndexWriterConfig(null); + try (Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc)) { + iw.addDocument(documentSupplier.get()); + try (DirectoryReader reader = iw.getReader()) { + IndexSearcher searcher = newSearcher(reader); + LeafReaderContext context = searcher.getIndexReader().leaves().get(0); + SearchExecutionContext searchExecutionContext = mock(SearchExecutionContext.class); + when(searchExecutionContext.getForField(fieldType, MappedFieldType.FielddataOperation.SEARCH)).thenReturn( + fieldType.fielddataBuilder(null).build(null, null) + ); + ValueFetcher valueFetcher = fieldType.valueFetcher(searchExecutionContext, null); + valueFetcher.setNextReader(context); + return valueFetcher.fetchValues(null, 0, new ArrayList<>()); + } + } + } + public static List fetchSourceValues(MappedFieldType fieldType, Object... values) throws IOException { String field = fieldType.name(); SearchExecutionContext searchExecutionContext = mock(SearchExecutionContext.class); diff --git a/x-pack/plugin/mapper-patterned-text/build.gradle b/x-pack/plugin/mapper-patterned-text/build.gradle new file mode 100644 index 0000000000000..8ed1dac3c1389 --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/build.gradle @@ -0,0 +1,42 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + + + +apply plugin: 'elasticsearch.internal-es-plugin' +apply plugin: 'elasticsearch.internal-yaml-rest-test' + +esplugin { + name = 'patterned-text' + description = 'Module for the patterned_text field type.' + classname ='org.elasticsearch.xpack.patternedtext.PatternedTextMapperPlugin' + extendedPlugins = ['x-pack-core', 'lang-painless'] +} +base { + archivesName = 'x-pack-patterned-text' +} + +dependencies { + compileOnly project(':modules:lang-painless:spi') + compileOnly project(path: xpackModule('core')) + implementation project(':modules:mapper-extras') +} + +if (buildParams.getSnapshotBuild() == false) { + tasks.named("test").configure { + systemProperty 'es.index_mode_feature_flag_registered', 'true' + } +} diff --git a/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextBlockLoader.java b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextBlockLoader.java new file mode 100644 index 0000000000000..f673d6c6e9944 --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextBlockLoader.java @@ -0,0 +1,64 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.patternedtext; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.elasticsearch.index.mapper.BlockDocValuesReader; + +import java.io.IOException; + +public class PatternedTextBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader { + + private final String name; + private final String templateFieldName; + private final String timestampFieldName; + private final String argsFieldName; + + PatternedTextBlockLoader(String name, String templateFieldName, String timestampFieldName, String argsFieldName) { + this.name = name; + this.templateFieldName = templateFieldName; + this.timestampFieldName = timestampFieldName; + this.argsFieldName = argsFieldName; + } + + @Override + public BytesRefBuilder builder(BlockFactory factory, int expectedCount) { + return factory.bytesRefs(expectedCount); + } + + @Override + public AllReader reader(LeafReaderContext context) throws IOException { + SortedSetDocValues combinedDocValues = ordinals(context); + if (combinedDocValues != null) { + SortedDocValues singleton = DocValues.unwrapSingleton(combinedDocValues); + if (singleton != null) { + return new BlockDocValuesReader.SingletonOrdinals(singleton); + } + return new BlockDocValuesReader.Ordinals(combinedDocValues); + } + return new ConstantNullsReader(); + } + + @Override + public boolean supportsOrdinals() { + return true; + } + + @Override + public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException { + return PatternedTextDocValues.from(context.reader(), templateFieldName, timestampFieldName, argsFieldName); + } + + @Override + public String toString() { + return "BytesRefsFromOrds[" + name + "]"; + } +} diff --git a/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextDocValues.java b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextDocValues.java new file mode 100644 index 0000000000000..1dfa1d8afef8b --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextDocValues.java @@ -0,0 +1,118 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.patternedtext; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class PatternedTextDocValues extends SortedSetDocValues { + private final SortedSetDocValues templateDocValues; + private final SortedSetDocValues argsDocValues; + private final SortedSetDocValues[] optimizedArgsDocValues; + private final SortedNumericDocValues timestampDocValues; + + PatternedTextDocValues( + SortedSetDocValues templateDocValues, + SortedSetDocValues argsDocValues, + SortedSetDocValues[] optimizedArgsDocValues, + SortedNumericDocValues timestampDocValues + ) { + this.templateDocValues = templateDocValues; + this.argsDocValues = argsDocValues; + this.optimizedArgsDocValues = optimizedArgsDocValues; + this.timestampDocValues = timestampDocValues; + } + + static PatternedTextDocValues from(LeafReader leafReader, String templateFieldName, String timestampFieldName, String argsFieldName) + throws IOException { + SortedSetDocValues templateDocValues = DocValues.getSortedSet(leafReader, templateFieldName); + if (templateDocValues.getValueCount() == 0) { + return null; + } + + SortedSetDocValues argsDocValues = DocValues.getSortedSet(leafReader, argsFieldName); + SortedSetDocValues[] optimizedArgsDocValues = new SortedSetDocValues[PatternedTextFieldMapper.OPTIMIZED_ARG_COUNT]; + for (int i = 0; i < optimizedArgsDocValues.length; i++) { + optimizedArgsDocValues[i] = DocValues.getSortedSet(leafReader, argsFieldName + "." + i); + } + SortedNumericDocValues timestampDocValues = DocValues.getSortedNumeric(leafReader, timestampFieldName); + return new PatternedTextDocValues(templateDocValues, argsDocValues, optimizedArgsDocValues, timestampDocValues); + } + + @Override + public long nextOrd() throws IOException { + return templateDocValues.nextOrd(); + } + + @Override + public int docValueCount() { + return templateDocValues.docValueCount(); + } + + @Override + public BytesRef lookupOrd(long l) throws IOException { + return new BytesRef(lookupOrdAsString(l)); + } + + String lookupOrdAsString(long l) throws IOException { + String template = templateDocValues.lookupOrd(l).utf8ToString(); + Long timestamp = PatternedTextValueProcessor.hasTimestamp(template) ? timestampDocValues.nextValue() : null; + + int argsCount = PatternedTextValueProcessor.countArgs(template); + List args = new ArrayList<>(argsCount); + for (int j = 0; j < Integer.min(argsCount, PatternedTextFieldMapper.OPTIMIZED_ARG_COUNT); j++) { + args.add(optimizedArgsDocValues[j].lookupOrd(argsDocValues.nextOrd()).utf8ToString()); + } + if (argsCount > PatternedTextFieldMapper.OPTIMIZED_ARG_COUNT) { + PatternedTextValueProcessor.addRemainingArgs(args, argsDocValues.lookupOrd(argsDocValues.nextOrd()).utf8ToString()); + } + return PatternedTextValueProcessor.merge(new PatternedTextValueProcessor.Parts(template, timestamp, args, null)); + } + + @Override + public long getValueCount() { + return templateDocValues.getValueCount(); + } + + @Override + public boolean advanceExact(int i) throws IOException { + timestampDocValues.advanceExact(i); + argsDocValues.advanceExact(i); + for (var optimizedArg : optimizedArgsDocValues) { + optimizedArg.advanceExact(i); + } + return templateDocValues.advanceExact(i); + } + + @Override + public int docID() { + return templateDocValues.docID(); + } + + @Override + public int nextDoc() throws IOException { + return templateDocValues.nextDoc(); + } + + @Override + public int advance(int i) throws IOException { + return templateDocValues.advance(i); + } + + @Override + public long cost() { + return templateDocValues.cost() + argsDocValues.cost() + timestampDocValues.cost(); + } +} diff --git a/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextDocValuesField.java b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextDocValuesField.java new file mode 100644 index 0000000000000..321377769093f --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextDocValuesField.java @@ -0,0 +1,17 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.patternedtext; + +import org.elasticsearch.index.fielddata.SortedBinaryDocValues; +import org.elasticsearch.script.field.BaseKeywordDocValuesField; + +public class PatternedTextDocValuesField extends BaseKeywordDocValuesField { + public PatternedTextDocValuesField(SortedBinaryDocValues input, String name) { + super(input, name); + } +} diff --git a/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextFieldMapper.java b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextFieldMapper.java new file mode 100644 index 0000000000000..a007913e6f0fd --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextFieldMapper.java @@ -0,0 +1,200 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.patternedtext; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.analysis.IndexAnalyzers; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader; +import org.elasticsearch.index.mapper.DocumentParserContext; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.MapperBuilderContext; +import org.elasticsearch.index.mapper.TextParams; +import org.elasticsearch.index.mapper.TextSearchInfo; + +import java.io.IOException; +import java.util.Map; + +/** + * A {@link FieldMapper} that assigns every document the same value. + */ +public class PatternedTextFieldMapper extends FieldMapper { + + static final int OPTIMIZED_ARG_COUNT = 0; + + public static class Defaults { + public static final FieldType FIELD_TYPE; + public static final FieldType TEMPLATE_TYPE; + + static { + final FieldType ft = new FieldType(); + ft.setTokenized(true); + ft.setStored(false); + ft.setStoreTermVectors(false); + ft.setOmitNorms(true); + ft.setIndexOptions(IndexOptions.DOCS); + FIELD_TYPE = freezeAndDeduplicateFieldType(ft); + } + + static { + final FieldType ft = new FieldType(); + ft.setTokenized(true); + ft.setStored(false); + ft.setStoreTermVectors(false); + ft.setOmitNorms(true); + ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + TEMPLATE_TYPE = freezeAndDeduplicateFieldType(ft); + } + + } + + public static class Builder extends FieldMapper.Builder { + + private final IndexVersion indexCreatedVersion; + + private final Parameter> meta = Parameter.metaParam(); + + private final TextParams.Analyzers analyzers; + + public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers) { + super(name); + this.indexCreatedVersion = indexCreatedVersion; + this.analyzers = new TextParams.Analyzers( + indexAnalyzers, + m -> ((PatternedTextFieldMapper) m).indexAnalyzer, + m -> ((PatternedTextFieldMapper) m).positionIncrementGap, + indexCreatedVersion + ); + } + + @Override + protected Parameter[] getParameters() { + return new Parameter[] { meta }; + } + + private PatternedTextFieldType buildFieldType(MapperBuilderContext context) { + NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer(); + NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer(); + NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer(); + TextSearchInfo tsi = new TextSearchInfo(Defaults.FIELD_TYPE, null, searchAnalyzer, searchQuoteAnalyzer); + return new PatternedTextFieldType( + context.buildFullName(leafName()), + tsi, + indexAnalyzer, + context.isSourceSynthetic(), + meta.getValue() + ); + } + + @Override + public PatternedTextFieldMapper build(MapperBuilderContext context) { + return new PatternedTextFieldMapper(leafName(), buildFieldType(context), builderParams(this, context), this); + } + } + + public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers())); + + private final IndexVersion indexCreatedVersion; + private final IndexAnalyzers indexAnalyzers; + private final NamedAnalyzer indexAnalyzer; + private final int positionIncrementGap; + private final FieldType fieldType; + private final FieldType templateFieldType; + + private PatternedTextFieldMapper( + String simpleName, + PatternedTextFieldType mappedFieldPatternedTextFieldType, + BuilderParams builderParams, + Builder builder + ) { + super(simpleName, mappedFieldPatternedTextFieldType, builderParams); + assert mappedFieldPatternedTextFieldType.getTextSearchInfo().isTokenized(); + assert mappedFieldPatternedTextFieldType.hasDocValues(); + this.fieldType = Defaults.FIELD_TYPE; + this.templateFieldType = Defaults.TEMPLATE_TYPE; + this.indexCreatedVersion = builder.indexCreatedVersion; + this.indexAnalyzers = builder.analyzers.indexAnalyzers; + this.indexAnalyzer = builder.analyzers.getIndexAnalyzer(); + this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue(); + } + + @Override + public Map indexAnalyzers() { + return Map.of(mappedFieldType.name(), indexAnalyzer, fieldType().templateFieldName(), indexAnalyzer); + } + + @Override + public FieldMapper.Builder getMergeBuilder() { + return new Builder(leafName(), indexCreatedVersion, indexAnalyzers).init(this); + } + + @Override + protected void parseCreateField(DocumentParserContext context) throws IOException { + final String value = context.parser().textOrNull(); + if (value == null) { + return; + } + + // Parse template and args. + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(value); + + // Add template and args index. + context.doc().add(new Field(fieldType().name(), parts.indexed(), fieldType)); + + // Add template docvalues and index. + context.doc().add(new SortedSetDocValuesField(fieldType().templateFieldName(), new BytesRef(parts.template()))); + // todo: calling templateStripped() right after split() seems like a waste, would be better to do it in the split() method + context.doc().add(new Field(fieldType().templateFieldName(), parts.templateStripped(), templateFieldType)); + + // Add timestamp docvalues. + if (parts.timestamp() != null) { + context.doc().add(new SortedNumericDocValuesField(fieldType().timestampFieldName(), parts.timestamp())); + } + + // Add args docvalues. + for (int i = 0; i < Integer.min(parts.args().size(), OPTIMIZED_ARG_COUNT); i++) { + String argFieldname = fieldType().argsFieldName() + "." + i; + context.doc().add(new SortedSetDocValuesField(argFieldname, new BytesRef(parts.args().get(i)))); + } + if (parts.args().size() > OPTIMIZED_ARG_COUNT) { + String remainingArgs = PatternedTextValueProcessor.mergeRemainingArgs(parts, OPTIMIZED_ARG_COUNT); + context.doc().add(new SortedSetDocValuesField(fieldType().argsFieldName(), new BytesRef(remainingArgs))); + } + } + + @Override + protected String contentType() { + return PatternedTextFieldType.CONTENT_TYPE; + } + + @Override + public PatternedTextFieldType fieldType() { + return (PatternedTextFieldType) super.fieldType(); + } + + @Override + protected SyntheticSourceSupport syntheticSourceSupport() { + return new SyntheticSourceSupport.Native( + () -> new CompositeSyntheticFieldLoader( + leafName(), + fullPath(), + new PatternedTextSyntheticFieldLoaderLayer( + fieldType().templateFieldName(), + fieldType().timestampFieldName(), + fieldType().argsFieldName() + ) + ) + ); + } +} diff --git a/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextFieldType.java b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextFieldType.java new file mode 100644 index 0000000000000..477befb2f6f38 --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextFieldType.java @@ -0,0 +1,312 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.patternedtext; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.queries.intervals.Intervals; +import org.apache.lucene.queries.intervals.IntervalsSource; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.FieldExistsQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOFunction; +import org.elasticsearch.common.CheckedIntFunction; +import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.index.fielddata.FieldData; +import org.elasticsearch.index.fielddata.FieldDataContext; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.IndexNumericFieldData; +import org.elasticsearch.index.fielddata.plain.SortedNumericIndexFieldData; +import org.elasticsearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData; +import org.elasticsearch.index.mapper.BlockLoader; +import org.elasticsearch.index.mapper.DocValueFetcher; +import org.elasticsearch.index.mapper.DynamicFieldType; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.StringFieldType; +import org.elasticsearch.index.mapper.TextFieldMapper; +import org.elasticsearch.index.mapper.TextSearchInfo; +import org.elasticsearch.index.mapper.ValueFetcher; +import org.elasticsearch.index.mapper.extras.SourceConfirmedTextQuery; +import org.elasticsearch.index.mapper.extras.SourceIntervalsSource; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.script.field.DateMillisDocValuesField; +import org.elasticsearch.script.field.KeywordDocValuesField; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import static org.elasticsearch.search.aggregations.support.CoreValuesSourceType.KEYWORD; + +class PatternedTextFieldType extends StringFieldType implements DynamicFieldType { + + private static final String TEMPLATE_SUFFIX = ".template"; + private static final String ARGS_SUFFIX = ".args"; + private static final String TIMESTAMP_SUFFIX = ".ts"; + + static final String CONTENT_TYPE = "patterned_text"; + + private final Analyzer indexAnalyzer; + private final TextFieldMapper.TextFieldType textFieldType; + private final TextFieldMapper.TextFieldType templateFieldType; + + PatternedTextFieldType(String name, TextSearchInfo tsi, Analyzer indexAnalyzer, boolean isSyntheticSource, Map meta) { + super(name, true, false, true, tsi, meta); + this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer); + this.textFieldType = new TextFieldMapper.TextFieldType(name, isSyntheticSource); + this.templateFieldType = new TextFieldMapper.TextFieldType(name + TEMPLATE_SUFFIX, isSyntheticSource); + this.templateFieldType.setFielddata(true); // for aggregations + } + + PatternedTextFieldType(String name) { + this( + name, + new TextSearchInfo(PatternedTextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER), + Lucene.STANDARD_ANALYZER, + false, + Collections.emptyMap() + ); + } + + @Override + public MappedFieldType getChildFieldType(String path) { + return templateFieldType; + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + @Override + public String familyTypeName() { + return TextFieldMapper.CONTENT_TYPE; + } + + @Override + public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { + return new DocValueFetcher(docValueFormat(format, null), context.getForField(this, FielddataOperation.SEARCH)); + } + + private IOFunction, IOException>> getValueFetcherProvider( + SearchExecutionContext searchExecutionContext + ) { + return context -> { + ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null); + valueFetcher.setNextReader(context); + return docID -> { + try { + return valueFetcher.fetchValues(null, docID, new ArrayList<>()); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }; + }; + } + + private Query combinedQuery(Query query, Query templateQuery, SearchExecutionContext context) { + // Disable scoring + return new ConstantScoreQuery( + // TODO: skip SourceConfirmedTextQuery when the templateQuery has matches. + new BooleanQuery.Builder().add( + new SourceConfirmedTextQuery(query, getValueFetcherProvider(context), indexAnalyzer), + BooleanClause.Occur.SHOULD + ).add(templateQuery, BooleanClause.Occur.SHOULD).build() + ); + } + + private IntervalsSource toIntervalsSource(IntervalsSource source, Query approximation, SearchExecutionContext searchExecutionContext) { + return new SourceIntervalsSource(source, approximation, getValueFetcherProvider(searchExecutionContext), indexAnalyzer); + } + + @Override + public Query termQuery(Object query, SearchExecutionContext context) { + // Disable scoring + return new ConstantScoreQuery(super.termQuery(query, context)); + } + + @Override + public Query fuzzyQuery( + Object value, + Fuzziness fuzziness, + int prefixLength, + int maxExpansions, + boolean transpositions, + SearchExecutionContext context, + MultiTermQuery.RewriteMethod rewriteMethod + ) { + // Disable scoring + return new ConstantScoreQuery( + super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context, rewriteMethod) + ); + } + + @Override + public Query existsQuery(SearchExecutionContext context) { + return new FieldExistsQuery(templateFieldName()); + } + + @Override + public IntervalsSource termIntervals(BytesRef term, SearchExecutionContext context) { + return toIntervalsSource(Intervals.term(term), new TermQuery(new Term(name(), term)), context); + } + + @Override + public IntervalsSource prefixIntervals(BytesRef term, SearchExecutionContext context) { + return toIntervalsSource( + Intervals.prefix(term, IndexSearcher.getMaxClauseCount()), + new PrefixQuery(new Term(name(), term)), + context + ); + } + + @Override + public IntervalsSource fuzzyIntervals( + String term, + int maxDistance, + int prefixLength, + boolean transpositions, + SearchExecutionContext context + ) { + FuzzyQuery fuzzyQuery = new FuzzyQuery( + new Term(name(), term), + maxDistance, + prefixLength, + IndexSearcher.getMaxClauseCount(), + transpositions, + MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE + ); + IntervalsSource fuzzyIntervals = Intervals.multiterm(fuzzyQuery.getAutomata(), IndexSearcher.getMaxClauseCount(), term); + return toIntervalsSource(fuzzyIntervals, fuzzyQuery, context); + } + + @Override + public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) { + return toIntervalsSource( + Intervals.wildcard(pattern, IndexSearcher.getMaxClauseCount()), + new MatchAllDocsQuery(), // wildcard queries can be expensive, what should the approximation be? + context + ); + } + + @Override + public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) { + return toIntervalsSource( + Intervals.regexp(pattern, IndexSearcher.getMaxClauseCount()), + new MatchAllDocsQuery(), // regexp queries can be expensive, what should the approximation be? + context + ); + } + + @Override + public IntervalsSource rangeIntervals( + BytesRef lowerTerm, + BytesRef upperTerm, + boolean includeLower, + boolean includeUpper, + SearchExecutionContext context + ) { + return toIntervalsSource( + Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper, IndexSearcher.getMaxClauseCount()), + new MatchAllDocsQuery(), // range queries can be expensive, what should the approximation be? + context + ); + } + + @Override + public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, SearchExecutionContext queryShardContext) + throws IOException { + final Query textQuery = textFieldType.phraseQuery(stream, slop, enablePosIncrements, queryShardContext); + final Query templateQuery = templateFieldType.phraseQuery(stream, slop, enablePosIncrements, queryShardContext); + return combinedQuery(textQuery, templateQuery, queryShardContext); + } + + @Override + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, SearchExecutionContext queryShardContext) + throws IOException { + final Query textQuery = textFieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, queryShardContext); + final Query templateQuery = templateFieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, queryShardContext); + return combinedQuery(textQuery, templateQuery, queryShardContext); + } + + @Override + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, SearchExecutionContext queryShardContext) + throws IOException { + final Query textQuery = textFieldType.phrasePrefixQuery(stream, slop, maxExpansions, queryShardContext); + final Query templateQuery = templateFieldType.phrasePrefixQuery(stream, slop, maxExpansions, queryShardContext); + return combinedQuery(textQuery, templateQuery, queryShardContext); + } + + @Override + public BlockLoader blockLoader(BlockLoaderContext blContext) { + return new PatternedTextBlockLoader(name(), templateFieldName(), timestampFieldName(), argsFieldName()); + } + + @Override + public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) { + var templateDataBuilder = new SortedSetOrdinalsIndexFieldData.Builder( + templateFieldName(), + KEYWORD, + (dv, n) -> new KeywordDocValuesField(FieldData.toString(dv), n) + ); + var argsDataBuilder = new SortedSetOrdinalsIndexFieldData.Builder( + argsFieldName(), + KEYWORD, + (dv, n) -> new KeywordDocValuesField(FieldData.toString(dv), n) + ); + var optimizedArgsDataBuilder = new SortedSetOrdinalsIndexFieldData.Builder[PatternedTextFieldMapper.OPTIMIZED_ARG_COUNT]; + for (int i = 0; i < optimizedArgsDataBuilder.length; i++) { + optimizedArgsDataBuilder[i] = new SortedSetOrdinalsIndexFieldData.Builder( + argsFieldName() + "." + i, + KEYWORD, + (dv, n) -> new KeywordDocValuesField(FieldData.toString(dv), n) + ); + } + var timestampDataBuilder = new SortedNumericIndexFieldData.Builder( + timestampFieldName(), + IndexNumericFieldData.NumericType.LONG, + DateMillisDocValuesField::new, + false + ); + return new PatternedTextIndexFieldData.Builder( + name(), + templateDataBuilder, + argsDataBuilder, + optimizedArgsDataBuilder, + timestampDataBuilder + ); + } + + String templateFieldName() { + return name() + TEMPLATE_SUFFIX; + } + + String argsFieldName() { + return name() + ARGS_SUFFIX; + } + + String timestampFieldName() { + return name() + TIMESTAMP_SUFFIX; + } +} diff --git a/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextIndexFieldData.java b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextIndexFieldData.java new file mode 100644 index 0000000000000..f7b276402ad77 --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextIndexFieldData.java @@ -0,0 +1,156 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.patternedtext; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.search.SortField; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.index.fielddata.FieldData; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.IndexFieldDataCache; +import org.elasticsearch.index.fielddata.LeafNumericFieldData; +import org.elasticsearch.index.fielddata.LeafOrdinalsFieldData; +import org.elasticsearch.index.fielddata.plain.AbstractIndexOrdinalsFieldData; +import org.elasticsearch.index.fielddata.plain.AbstractLeafOrdinalsFieldData; +import org.elasticsearch.index.fielddata.plain.SortedNumericIndexFieldData; +import org.elasticsearch.index.fielddata.plain.SortedSetOrdinalsIndexFieldData; +import org.elasticsearch.indices.breaker.CircuitBreakerService; +import org.elasticsearch.script.field.KeywordDocValuesField; +import org.elasticsearch.script.field.ToScriptFieldFactory; +import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.search.MultiValueMode; +import org.elasticsearch.search.sort.BucketedSort; +import org.elasticsearch.search.sort.SortOrder; + +import static org.elasticsearch.search.aggregations.support.CoreValuesSourceType.KEYWORD; + +public class PatternedTextIndexFieldData extends AbstractIndexOrdinalsFieldData { + + final SortedSetOrdinalsIndexFieldData templateFieldData; + final SortedSetOrdinalsIndexFieldData argsFieldData; + final SortedSetOrdinalsIndexFieldData[] optimizedArgsFieldData; + final SortedNumericIndexFieldData timestampFieldData; + + static class Builder implements IndexFieldData.Builder { + + final String name; + final SortedSetOrdinalsIndexFieldData.Builder templateFieldDataBuilder; + final SortedSetOrdinalsIndexFieldData.Builder argsFieldDataBuilder; + final SortedSetOrdinalsIndexFieldData.Builder[] optimizedArgsFieldDataBuilder; + final SortedNumericIndexFieldData.Builder timestampFieldDataBuilder; + + Builder( + String name, + SortedSetOrdinalsIndexFieldData.Builder templateFieldData, + SortedSetOrdinalsIndexFieldData.Builder argsFieldData, + SortedSetOrdinalsIndexFieldData.Builder[] optimizedArgsFieldData, + SortedNumericIndexFieldData.Builder timestampFieldData + ) { + this.name = name; + this.templateFieldDataBuilder = templateFieldData; + this.argsFieldDataBuilder = argsFieldData; + this.optimizedArgsFieldDataBuilder = optimizedArgsFieldData; + this.timestampFieldDataBuilder = timestampFieldData; + } + + public PatternedTextIndexFieldData build(IndexFieldDataCache cache, CircuitBreakerService breakerService) { + SortedSetOrdinalsIndexFieldData templateFieldData = templateFieldDataBuilder.build(cache, breakerService); + SortedSetOrdinalsIndexFieldData argsFieldData = argsFieldDataBuilder.build(cache, breakerService); + SortedSetOrdinalsIndexFieldData[] optimizedArgsFieldData = + new SortedSetOrdinalsIndexFieldData[optimizedArgsFieldDataBuilder.length]; + for (int i = 0; i < optimizedArgsFieldData.length; i++) { + optimizedArgsFieldData[i] = optimizedArgsFieldDataBuilder[i].build(cache, breakerService); + } + SortedNumericIndexFieldData timestampFieldData = timestampFieldDataBuilder.build(cache, breakerService); + ToScriptFieldFactory factory = (dv, n) -> new KeywordDocValuesField(FieldData.toString(dv), n); + return new PatternedTextIndexFieldData( + name, + cache, + breakerService, + factory, + templateFieldData, + argsFieldData, + optimizedArgsFieldData, + timestampFieldData + ); + } + } + + PatternedTextIndexFieldData( + String name, + IndexFieldDataCache cache, + CircuitBreakerService breakerService, + ToScriptFieldFactory toScriptFieldFactory, + SortedSetOrdinalsIndexFieldData templateFieldData, + SortedSetOrdinalsIndexFieldData argsFieldData, + SortedSetOrdinalsIndexFieldData[] optimizedArgsFieldData, + SortedNumericIndexFieldData timestampFieldData + ) { + super(name, KEYWORD, cache, breakerService, toScriptFieldFactory); + this.templateFieldData = templateFieldData; + this.argsFieldData = argsFieldData; + this.optimizedArgsFieldData = optimizedArgsFieldData; + this.timestampFieldData = timestampFieldData; + } + + @Override + public LeafOrdinalsFieldData load(LeafReaderContext context) { + return loadDirect(context); + } + + @Override + public LeafOrdinalsFieldData loadDirect(LeafReaderContext context) { + LeafOrdinalsFieldData leafTemplateFieldData = templateFieldData.loadDirect(context); + LeafOrdinalsFieldData leafArgsFieldData = argsFieldData.loadDirect(context); + LeafOrdinalsFieldData[] leafOptimizedArgsFieldData = new LeafOrdinalsFieldData[optimizedArgsFieldData.length]; + LeafNumericFieldData leafTimestampFieldData = timestampFieldData.loadDirect(context); + for (int i = 0; i < leafOptimizedArgsFieldData.length; i++) { + leafOptimizedArgsFieldData[i] = optimizedArgsFieldData[i].loadDirect(context); + } + + return new AbstractLeafOrdinalsFieldData(toScriptFieldFactory) { + @Override + public SortedSetDocValues getOrdinalsValues() { + SortedSetDocValues templateDocValues = leafTemplateFieldData.getOrdinalsValues(); + SortedSetDocValues argsDocValues = leafArgsFieldData.getOrdinalsValues(); + SortedSetDocValues[] optimizedArgsDocValues = new SortedSetDocValues[PatternedTextFieldMapper.OPTIMIZED_ARG_COUNT]; + SortedNumericDocValues timestampDocValues = leafTimestampFieldData.getLongValues(); + for (int i = 0; i < optimizedArgsDocValues.length; i++) { + optimizedArgsDocValues[i] = leafOptimizedArgsFieldData[i].getOrdinalsValues(); + } + return new PatternedTextDocValues(templateDocValues, argsDocValues, optimizedArgsDocValues, timestampDocValues); + } + + @Override + public long ramBytesUsed() { + return 0; // unknown + } + }; + } + + @Override + public SortField sortField(Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested, boolean reverse) { + return templateFieldData.sortField(missingValue, sortMode, nested, reverse); + } + + @Override + public BucketedSort newBucketedSort( + BigArrays bigArrays, + Object missingValue, + MultiValueMode sortMode, + XFieldComparatorSource.Nested nested, + SortOrder sortOrder, + DocValueFormat format, + int bucketSize, + BucketedSort.ExtraData extra + ) { + throw new IllegalArgumentException("only supported on numeric fields"); + } +} diff --git a/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextMapperPlugin.java b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextMapperPlugin.java new file mode 100644 index 0000000000000..f4b5acb85289a --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextMapperPlugin.java @@ -0,0 +1,24 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.patternedtext; + +import org.elasticsearch.index.mapper.Mapper; +import org.elasticsearch.plugins.MapperPlugin; +import org.elasticsearch.plugins.Plugin; + +import java.util.Map; + +import static java.util.Collections.singletonMap; + +public class PatternedTextMapperPlugin extends Plugin implements MapperPlugin { + @Override + public Map getMappers() { + return singletonMap(PatternedTextFieldType.CONTENT_TYPE, PatternedTextFieldMapper.PARSER); + } + +} diff --git a/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextPainlessExtension.java b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextPainlessExtension.java new file mode 100644 index 0000000000000..3fe1d9cc80008 --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextPainlessExtension.java @@ -0,0 +1,37 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.patternedtext; + +import org.elasticsearch.common.util.Maps; +import org.elasticsearch.painless.spi.PainlessExtension; +import org.elasticsearch.painless.spi.Whitelist; +import org.elasticsearch.painless.spi.WhitelistLoader; +import org.elasticsearch.script.ScriptContext; + +import java.util.List; +import java.util.Map; + +import static java.util.Collections.singletonList; +import static org.elasticsearch.script.ScriptModule.CORE_CONTEXTS; + +public class PatternedTextPainlessExtension implements PainlessExtension { + private static final Whitelist WHITELIST = WhitelistLoader.loadFromResourceFiles( + PatternedTextPainlessExtension.class, + "org.elasticsearch.xpack.patternedtext.txt" + ); + + @Override + public Map, List> getContextWhitelists() { + List whitelist = singletonList(WHITELIST); + Map, List> contextWhitelists = Maps.newMapWithExpectedSize(CORE_CONTEXTS.size()); + for (ScriptContext scriptContext : CORE_CONTEXTS.values()) { + contextWhitelists.put(scriptContext, whitelist); + } + return contextWhitelists; + } +} diff --git a/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextSyntheticFieldLoaderLayer.java b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextSyntheticFieldLoaderLayer.java new file mode 100644 index 0000000000000..a4d66da8613a2 --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextSyntheticFieldLoaderLayer.java @@ -0,0 +1,82 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.patternedtext; + +import org.apache.lucene.index.LeafReader; +import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; + +class PatternedTextSyntheticFieldLoaderLayer implements CompositeSyntheticFieldLoader.DocValuesLayer { + + private final String templateFieldName; + private final String timestampFieldName; + private final String argsFieldName; + + PatternedTextSyntheticFieldLoaderLayer(String templateFieldName, String timestampFieldName, String argsFieldName) { + this.templateFieldName = templateFieldName; + this.timestampFieldName = timestampFieldName; + this.argsFieldName = argsFieldName; + } + + private PatternedTextSyntheticFieldLoader loader; + + @Override + public long valueCount() { + return loader != null ? loader.count() : 0; + } + + @Override + public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException { + var docValues = PatternedTextDocValues.from(leafReader, templateFieldName, timestampFieldName, argsFieldName); + if (docValues == null || docValues.getValueCount() == 0) { + return null; + } + loader = new PatternedTextSyntheticFieldLoader(docValues); + return loader; + } + + @Override + public boolean hasValue() { + return loader != null && loader.count() > 0; + } + + @Override + public void write(XContentBuilder b) throws IOException { + if (loader != null) { + loader.write(b); + } + } + + @Override + public String fieldName() { + return ""; + } + + private record PatternedTextSyntheticFieldLoader(PatternedTextDocValues docValues) implements DocValuesLoader { + + @Override + public boolean advanceToDoc(int docId) throws IOException { + return docValues.advanceExact(docId); + } + + public int count() { + return docValues.docValueCount(); + } + + public void write(XContentBuilder b) throws IOException { + if (docValues.getValueCount() == 0) { + return; + } + for (int i = 0; i < count(); i++) { + b.value(docValues.lookupOrdAsString(docValues.nextOrd())); + } + } + } +} diff --git a/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextValueProcessor.java b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextValueProcessor.java new file mode 100644 index 0000000000000..80aea408be789 --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/main/java/org/elasticsearch/xpack/patternedtext/PatternedTextValueProcessor.java @@ -0,0 +1,289 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.patternedtext; + +import org.elasticsearch.common.util.ByteUtils; +import org.elasticsearch.index.mapper.DateFieldMapper; + +import java.util.ArrayList; +import java.util.Base64; +import java.util.Collections; +import java.util.List; +import java.util.UUID; +import java.util.regex.Pattern; + +public class PatternedTextValueProcessor { + private static final String TEXT_ARG_PLACEHOLDER = "%W"; + private static final String DATE_ARG_PLACEHOLDER = "%D"; + private static final String IP_ARG_PLACEHOLDER = "%I"; + private static final String UUID_ARG_PLACEHOLDER = "%U"; + private static final String TIMESTAMP_PLACEHOLDER = "%T"; + private static final String DELIMITER = "[\\s\\[\\]]"; + private static final String SPACE = " "; + + // 2021-04-13T13:51:38.000Z + private static final Pattern timestampPattern = Pattern.compile( + "^(\\d{4})[-/](\\d{2})[-/](\\d{2})[T ](\\d{2}):(\\d{2}):(\\d{2})(\\.(\\d{3})Z?)?[ ]?([\\+\\-]\\d{2}([:]?\\d{2})?)?$" + ); + + record Parts(String template, Long timestamp, List args, String indexed) { + String templateStripped() { + List stripped = new ArrayList<>(); + String[] parts = template.split(SPACE); + for (String part : parts) { + if (part.startsWith("%") == false) { + stripped.add(part); + } + } + return String.join(SPACE, stripped); + } + + } + + static Parts split(String text) { + StringBuilder template = new StringBuilder(); + StringBuilder indexed = new StringBuilder(); + Long timestamp = null; + List args = new ArrayList<>(); + byte[] ipv4Bytes = new byte[4]; + byte[] uuidBytes = new byte[16]; + String[] tokens = text.split(DELIMITER); + int textIndex = 0; + for (int i = 0; i < tokens.length; i++) { + String token = tokens[i]; + if (token.isEmpty()) { + if (textIndex < text.length() - 1) { + template.append(text.charAt(textIndex++)); + } + continue; + } + if (isTimestamp(tokens[i])) { + long millis = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis(token); + if (timestamp == null) { + timestamp = millis; + template.append(TIMESTAMP_PLACEHOLDER); + } else { + byte[] millisBytes = new byte[8]; + ByteUtils.writeLongLE(millis, millisBytes, 0); + String encoded = Base64.getEncoder().withoutPadding().encodeToString(millisBytes); + args.add(encoded); + template.append(DATE_ARG_PLACEHOLDER); + indexed.append(encoded).append(SPACE); + } + } else if (i < tokens.length - 1 + && token.length() == 10 + && tokens[i + 1].length() >= 8 + && tokens[i + 1].length() < 16 + && isTimestamp(tokens[i] + SPACE + tokens[i + 1])) { + String combined = tokens[i].replace("/", "-") + 'T' + tokens[i + 1]; + long millis = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis(combined); + if (timestamp == null) { + timestamp = millis; + template.append(TIMESTAMP_PLACEHOLDER); + textIndex += tokens[i + 1].length() + 1; + i++; + } else { + byte[] millisBytes = new byte[8]; + ByteUtils.writeLongLE(millis, millisBytes, 0); + String encoded = Base64.getEncoder().withoutPadding().encodeToString(millisBytes); + args.add(encoded); + template.append(DATE_ARG_PLACEHOLDER); + indexed.append(encoded).append(SPACE); + textIndex += tokens[i + 1].length() + 1; + i++; + if (i < tokens.length - 1 && tokens[i + 1].equals("+0000")) { + textIndex += tokens[i + 1].length() + 1; + i++; + } + } + } else if (isIpv4(token, ipv4Bytes)) { + String encoded = Base64.getEncoder().withoutPadding().encodeToString(ipv4Bytes); + args.add(encoded); + template.append(IP_ARG_PLACEHOLDER); + indexed.append(encoded).append(SPACE); + } else if (isUUID(token, uuidBytes)) { + String encoded = Base64.getEncoder().withoutPadding().encodeToString(uuidBytes); + args.add(encoded); + template.append(UUID_ARG_PLACEHOLDER); + indexed.append(encoded).append(SPACE); + } else if (isArg(token)) { + args.add(token); + template.append(TEXT_ARG_PLACEHOLDER); + indexed.append(token).append(SPACE); + } else { + template.append(token); + indexed.append(token).append(SPACE); + } + textIndex += token.length(); + if (textIndex < text.length()) { + template.append(text.charAt(textIndex++)); + } + } + while (textIndex < text.length()) { + template.append(text.charAt(textIndex++)); + } + return new Parts(template.toString(), timestamp, args, indexed.toString().trim()); + } + + private static boolean isTimestamp(String text) { + return timestampPattern.matcher(text).matches(); + } + + /** + * Checks if the given text is a valid IPv4 address and fills the provided byte array with the corresponding bytes. + * If the text is not a valid IPv4 address, it returns false and the byte array's content is not undefined and should not be used. + * @param text the text to check + * @param bytes the byte array to fill with the parsed UUID bytes + * @return true if the text is a valid IPv4 address, false otherwise + */ + static boolean isIpv4(String text, byte[] bytes) { + if (text.length() < 7 || text.length() > 15) { + return false; + } + int octetIndex = 0; + int octetValue = 0; + for (int i = 0; i < text.length(); i++) { + char c = text.charAt(i); + if (c == '.') { + if (octetIndex == 3) { + return false; + } + bytes[octetIndex] = (byte) octetValue; + octetValue = 0; + octetIndex++; + } else if ('0' <= c && c <= '9') { + // Character.isDigit(c) is invalid for IPs and inconsistent with the calculation of the numeric value of the character + octetValue = octetValue * 10 + c - '0'; + if (octetValue > 255) { + return false; + } + } else { + return false; + } + } + if (octetIndex != 3) { + return false; + } + bytes[octetIndex] = (byte) octetValue; + return true; + } + + private static String toIPv4(byte[] bytes) { + assert bytes.length == 4 : bytes.length; + return Byte.toUnsignedInt(bytes[0]) + + "." + + Byte.toUnsignedInt(bytes[1]) + + "." + + Byte.toUnsignedInt(bytes[2]) + + "." + + Byte.toUnsignedInt(bytes[3]); + } + + static boolean isUUID(String text, byte[] bytes) { + assert bytes.length == 16 : bytes.length; + if (text.length() == 36 && text.charAt(8) == '-' && text.charAt(13) == '-' && text.charAt(18) == '-' && text.charAt(23) == '-') { + UUID uuid; + try { + uuid = UUID.fromString(text); + } catch (IllegalArgumentException e) { + // false positive in the enclosing if statement - should be very rare. Just ignore it. + return false; + } + ByteUtils.writeLongLE(uuid.getMostSignificantBits(), bytes, 0); + ByteUtils.writeLongLE(uuid.getLeastSignificantBits(), bytes, 8); + return true; + } + return false; + } + + private static String toUUID(byte[] bytes) { + assert bytes.length == 16 : bytes.length; + UUID uuid = new UUID(ByteUtils.readLongLE(bytes, 0), ByteUtils.readLongLE(bytes, 8)); + return uuid.toString(); + } + + private static boolean isArg(String text) { + for (int i = 0; i < text.length(); i++) { + if (Character.isDigit(text.charAt(i))) { + return true; + } + } + return false; + } + + static String merge(Parts parts) { + StringBuilder builder = new StringBuilder(); + String[] templateParts = parts.template.split(DELIMITER); + int i = 0; + int templateIndex = 0; + for (String part : templateParts) { + if (part.equals(TEXT_ARG_PLACEHOLDER)) { + builder.append(parts.args.get(i++)); + templateIndex += TEXT_ARG_PLACEHOLDER.length(); + } else if (part.equals(TIMESTAMP_PLACEHOLDER)) { + assert parts.timestamp != null; + builder.append(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(parts.timestamp)); + templateIndex += TIMESTAMP_PLACEHOLDER.length(); + } else if (part.equals(DATE_ARG_PLACEHOLDER)) { + var bytes = Base64.getDecoder().decode(parts.args.get(i++)); + builder.append(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(ByteUtils.readLongLE(bytes, 0))); + templateIndex += DATE_ARG_PLACEHOLDER.length(); + } else if (part.equals(IP_ARG_PLACEHOLDER)) { + var bytes = Base64.getDecoder().decode(parts.args.get(i++)); + builder.append(toIPv4(bytes)); + templateIndex += IP_ARG_PLACEHOLDER.length(); + } else if (part.equals(UUID_ARG_PLACEHOLDER)) { + var bytes = Base64.getDecoder().decode(parts.args.get(i++)); + builder.append(toUUID(bytes)); + templateIndex += UUID_ARG_PLACEHOLDER.length(); + } else if (part.isEmpty() == false) { + builder.append(part); + templateIndex += part.length(); + } + if (templateIndex < parts.template.length()) { + builder.append(parts.template.charAt(templateIndex++)); + } + } + assert i == parts.args.size() : "expected " + i + " but got " + parts.args.size(); + assert builder.toString().contains(TEXT_ARG_PLACEHOLDER) == false : builder.toString(); + while (templateIndex < parts.template.length()) { + builder.append(parts.template.charAt(templateIndex++)); + } + return builder.toString(); + } + + static String mergeRemainingArgs(Parts parts, int startOffset) { + StringBuilder builder = new StringBuilder(); + for (int i = startOffset; i < parts.args.size(); i++) { + builder.append((i > startOffset) ? SPACE : "").append(parts.args.get(i)); + } + return builder.toString(); + } + + static void addRemainingArgs(List args, String mergedArgs) { + Collections.addAll(args, mergedArgs.split(SPACE)); + } + + static int countArgs(String template) { + int count = 0; + for (int i = 0; i < template.length() - 1; i++) { + if (template.charAt(i) == '%') { + char next = template.charAt(i + 1); + if (next == 'W' || next == 'D' || next == 'U' || next == 'I') { + count++; + i++; + } + } + } + return count; + } + + static boolean hasTimestamp(String template) { + return template.contains(TIMESTAMP_PLACEHOLDER); + } +} diff --git a/x-pack/plugin/mapper-patterned-text/src/main/resources/META-INF/services/org.elasticsearch.painless.spi.PainlessExtension b/x-pack/plugin/mapper-patterned-text/src/main/resources/META-INF/services/org.elasticsearch.painless.spi.PainlessExtension new file mode 100644 index 0000000000000..06e71a093a4f1 --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/main/resources/META-INF/services/org.elasticsearch.painless.spi.PainlessExtension @@ -0,0 +1 @@ +org.elasticsearch.xpack.patternedtext.PatternedTextPainlessExtension diff --git a/x-pack/plugin/mapper-patterned-text/src/main/resources/org/elasticsearch/xpack/patternedtext/org.elasticsearch.xpack.patternedtext.txt b/x-pack/plugin/mapper-patterned-text/src/main/resources/org/elasticsearch/xpack/patternedtext/org.elasticsearch.xpack.patternedtext.txt new file mode 100644 index 0000000000000..28ab984edebdc --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/main/resources/org/elasticsearch/xpack/patternedtext/org.elasticsearch.xpack.patternedtext.txt @@ -0,0 +1,10 @@ +# + # Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + # or more contributor license agreements. Licensed under the Elastic License + # 2.0; you may not use this file except in compliance with the Elastic License + # 2.0. +# + +# subclass of BaseKeywordDocValuesField +class org.elasticsearch.xpack.patternedtext.PatternedTextDocValuesField @dynamic_type { +} diff --git a/x-pack/plugin/mapper-patterned-text/src/test/java/org/elasticsearch/xpack/patternedtext/PatternedTextFieldMapperTests.java b/x-pack/plugin/mapper-patterned-text/src/test/java/org/elasticsearch/xpack/patternedtext/PatternedTextFieldMapperTests.java new file mode 100644 index 0000000000000..6ba716ecaa81f --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/test/java/org/elasticsearch/xpack/patternedtext/PatternedTextFieldMapperTests.java @@ -0,0 +1,284 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.patternedtext; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.IndexableFieldType; +import org.apache.lucene.search.FieldExistsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TotalHits; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.analysis.CannedTokenStream; +import org.apache.lucene.tests.analysis.Token; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.Strings; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.index.mapper.DocumentMapper; +import org.elasticsearch.index.mapper.KeywordFieldMapper; +import org.elasticsearch.index.mapper.LuceneDocument; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.MapperParsingException; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.MapperTestCase; +import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.index.query.MatchPhraseQueryBuilder; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.junit.AssumptionViolatedException; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.UUID; +import java.util.function.Function; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.startsWith; + +public class PatternedTextFieldMapperTests extends MapperTestCase { + + @Override + protected Collection getPlugins() { + return List.of(new PatternedTextMapperPlugin()); + } + + @Override + protected Object getSampleValueForDocument() { + return "value"; + } + + @Override + protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) { + assertThat(query, instanceOf(FieldExistsQuery.class)); + FieldExistsQuery fieldExistsQuery = (FieldExistsQuery) query; + assertThat(fieldExistsQuery.getField(), startsWith("field")); + assertNoFieldNamesField(fields); + } + + public void testExistsStandardSource() throws IOException { + assertExistsQuery(createMapperService(fieldMapping(b -> b.field("type", "patterned_text")))); + } + + public void testExistsSyntheticSource() throws IOException { + assertExistsQuery(createSytheticSourceMapperService(fieldMapping(b -> b.field("type", "patterned_text")))); + } + + public void testPhraseQueryStandardSource() throws IOException { + assertPhraseQuery(createMapperService(fieldMapping(b -> b.field("type", "patterned_text")))); + } + + public void testPhraseQuerySyntheticSource() throws IOException { + assertPhraseQuery(createSytheticSourceMapperService(fieldMapping(b -> b.field("type", "patterned_text")))); + } + + private void assertPhraseQuery(MapperService mapperService) throws IOException { + try (Directory directory = newDirectory()) { + RandomIndexWriter iw = new RandomIndexWriter(random(), directory); + LuceneDocument doc = mapperService.documentMapper().parse(source(b -> b.field("field", "the quick brown fox 1"))).rootDoc(); + iw.addDocument(doc); + iw.close(); + try (DirectoryReader reader = DirectoryReader.open(directory)) { + SearchExecutionContext context = createSearchExecutionContext(mapperService, newSearcher(reader)); + MatchPhraseQueryBuilder queryBuilder = new MatchPhraseQueryBuilder("field", "brown fox 1"); + TopDocs docs = context.searcher().search(queryBuilder.toQuery(context), 1); + assertThat(docs.totalHits.value(), equalTo(1L)); + assertThat(docs.totalHits.relation(), equalTo(TotalHits.Relation.EQUAL_TO)); + assertThat(docs.scoreDocs[0].doc, equalTo(0)); + } + } + } + + @Override + protected void registerParameters(ParameterChecker checker) throws IOException { + checker.registerUpdateCheck( + b -> { b.field("meta", Collections.singletonMap("format", "mysql.access")); }, + m -> assertEquals(Collections.singletonMap("format", "mysql.access"), m.fieldType().meta()) + ); + } + + @Override + protected void minimalMapping(XContentBuilder b) throws IOException { + b.field("type", "patterned_text"); + } + + @Override + protected void minimalStoreMapping(XContentBuilder b) throws IOException { + // 'store' is always true + minimalMapping(b); + } + + public void testDefaults() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), mapper.mappingSource().toString()); + + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); + List fields = doc.rootDoc().getFields("field"); + assertEquals(1, fields.size()); + assertEquals("1234", fields.get(0).stringValue()); + IndexableFieldType fieldType = fields.get(0).fieldType(); + assertThat(fieldType.omitNorms(), equalTo(true)); + assertTrue(fieldType.tokenized()); + assertFalse(fieldType.stored()); + assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS)); + assertThat(fieldType.storeTermVectors(), equalTo(false)); + assertThat(fieldType.storeTermVectorOffsets(), equalTo(false)); + assertThat(fieldType.storeTermVectorPositions(), equalTo(false)); + assertThat(fieldType.storeTermVectorPayloads(), equalTo(false)); + assertEquals(DocValuesType.NONE, fieldType.docValuesType()); + } + + public void testNullConfigValuesFail() throws MapperParsingException { + Exception e = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper(fieldMapping(b -> b.field("type", "patterned_text").field("meta", (String) null))) + ); + assertThat(e.getMessage(), containsString("[meta] on mapper [field] of type [patterned_text] must not have a [null] value")); + } + + public void testSimpleMerge() throws IOException { + XContentBuilder startingMapping = fieldMapping(b -> b.field("type", "patterned_text")); + MapperService mapperService = createMapperService(startingMapping); + assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(PatternedTextFieldMapper.class)); + + merge(mapperService, startingMapping); + assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(PatternedTextFieldMapper.class)); + + XContentBuilder newField = mapping(b -> { + b.startObject("field").field("type", "patterned_text").startObject("meta").field("key", "value").endObject().endObject(); + b.startObject("other_field").field("type", "keyword").endObject(); + }); + merge(mapperService, newField); + assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(PatternedTextFieldMapper.class)); + assertThat(mapperService.documentMapper().mappers().getMapper("other_field"), instanceOf(KeywordFieldMapper.class)); + } + + public void testDisabledSource() throws IOException { + XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("_doc"); + { + mapping.startObject("properties"); + { + mapping.startObject("foo"); + { + mapping.field("type", "patterned_text"); + } + mapping.endObject(); + } + mapping.endObject(); + + mapping.startObject("_source"); + { + mapping.field("enabled", false); + } + mapping.endObject(); + } + mapping.endObject().endObject(); + + MapperService mapperService = createMapperService(mapping); + MappedFieldType ft = mapperService.fieldType("foo"); + SearchExecutionContext context = createSearchExecutionContext(mapperService); + TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 4, 7)); + + // Allowed even if source is disabled. + ft.phraseQuery(ts, 0, true, context); + ft.termQuery("a", context); + } + + @Override + protected Object generateRandomInputValue(MappedFieldType ft) { + assumeFalse("We don't have a way to assert things here", true); + return null; + } + + @Override + protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException { + assumeFalse("We don't have a way to assert things here", true); + } + + @Override + protected boolean supportsIgnoreMalformed() { + return false; + } + + @Override + protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) { + assertFalse("patterned_text doesn't support ignoreMalformed", ignoreMalformed); + return new PatternedTextSyntheticSourceSupport(); + } + + static class PatternedTextSyntheticSourceSupport implements SyntheticSourceSupport { + @Override + public SyntheticSourceExample example(int maxValues) { + Tuple v = generateValue(); + return new SyntheticSourceExample(v.v1(), v.v2(), this::mapping); + } + + private Tuple generateValue() { + StringBuilder builder = new StringBuilder(); + if (randomBoolean()) { + builder.append(randomAlphaOfLength(5)); + } else { + String timestamp = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(System.currentTimeMillis()); + builder.append(timestamp); + } + for (int i = 0; i < randomIntBetween(0, 9); i++) { + builder.append(" "); + int rand = randomIntBetween(0, 4); + switch (rand) { + case 0 -> builder.append(randomAlphaOfLength(5)); + case 1 -> builder.append(randomAlphanumericOfLength(5)); + case 2 -> builder.append(UUID.randomUUID()); + case 3 -> builder.append(randomIp(true)); + case 4 -> builder.append(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(randomMillisUpToYear9999())); + } + } + String value = builder.toString(); + return Tuple.tuple(value, value); + } + + private void mapping(XContentBuilder b) throws IOException { + b.field("type", "patterned_text"); + } + + @Override + public List invalidExample() throws IOException { + return List.of(); + } + } + + public void testDocValues() throws IOException { + MapperService mapper = createMapperService(fieldMapping(b -> b.field("type", "patterned_text"))); + assertScriptDocValues(mapper, "foo", equalTo(List.of("foo"))); + } + + public void testDocValuesSynthetic() throws IOException { + MapperService mapper = createSytheticSourceMapperService(fieldMapping(b -> b.field("type", "patterned_text"))); + assertScriptDocValues(mapper, "foo", equalTo(List.of("foo"))); + } + + @Override + protected IngestScriptSupport ingestScriptSupport() { + throw new AssumptionViolatedException("not supported"); + } + + @Override + protected Function loadBlockExpected() { + return v -> ((BytesRef) v).utf8ToString(); + } +} diff --git a/x-pack/plugin/mapper-patterned-text/src/test/java/org/elasticsearch/xpack/patternedtext/PatternedTextFieldTypeTests.java b/x-pack/plugin/mapper-patterned-text/src/test/java/org/elasticsearch/xpack/patternedtext/PatternedTextFieldTypeTests.java new file mode 100644 index 0000000000000..296822bfd2717 --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/test/java/org/elasticsearch/xpack/patternedtext/PatternedTextFieldTypeTests.java @@ -0,0 +1,235 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.patternedtext; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.index.Term; +import org.apache.lucene.queries.intervals.Intervals; +import org.apache.lucene.queries.intervals.IntervalsSource; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.search.TermInSetQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.tests.analysis.CannedTokenStream; +import org.apache.lucene.tests.analysis.Token; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.lucene.BytesRefs; +import org.elasticsearch.common.lucene.search.AutomatonQueries; +import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.index.mapper.FieldTypeTestCase; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.extras.SourceIntervalsSource; +import org.hamcrest.Matchers; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.function.Supplier; + +public class PatternedTextFieldTypeTests extends FieldTypeTestCase { + + public void testTermQuery() { + MappedFieldType ft = new PatternedTextFieldType("field"); + assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field", "foo"))), ft.termQuery("foo", null)); + assertEquals(AutomatonQueries.caseInsensitiveTermQuery(new Term("field", "fOo")), ft.termQueryCaseInsensitive("fOo", null)); + } + + public void testTermsQuery() { + MappedFieldType ft = new PatternedTextFieldType("field"); + List terms = new ArrayList<>(); + terms.add(new BytesRef("foo")); + terms.add(new BytesRef("bar")); + assertEquals(new TermInSetQuery("field", terms), ft.termsQuery(Arrays.asList("foo", "bar"), null)); + } + + public void testRangeQuery() { + MappedFieldType ft = new PatternedTextFieldType("field"); + assertEquals( + new TermRangeQuery("field", BytesRefs.toBytesRef("foo"), BytesRefs.toBytesRef("bar"), true, false), + ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_CONTEXT) + ); + + ElasticsearchException ee = expectThrows( + ElasticsearchException.class, + () -> ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_CONTEXT_DISALLOW_EXPENSIVE) + ); + assertEquals( + "[range] queries on [text] or [keyword] fields cannot be executed when " + "'search.allow_expensive_queries' is set to false.", + ee.getMessage() + ); + } + + public void testRegexpQuery() { + MappedFieldType ft = new PatternedTextFieldType("field"); + assertEquals(new RegexpQuery(new Term("field", "foo.*")), ft.regexpQuery("foo.*", 0, 0, 10, null, MOCK_CONTEXT)); + + ElasticsearchException ee = expectThrows( + ElasticsearchException.class, + () -> ft.regexpQuery("foo.*", randomInt(10), 0, randomInt(10) + 1, null, MOCK_CONTEXT_DISALLOW_EXPENSIVE) + ); + assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage()); + } + + public void testFuzzyQuery() { + MappedFieldType ft = new PatternedTextFieldType("field"); + assertEquals( + new ConstantScoreQuery(new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true)), + ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_CONTEXT) + ); + + ElasticsearchException ee = expectThrows( + ElasticsearchException.class, + () -> ft.fuzzyQuery( + "foo", + Fuzziness.AUTO, + randomInt(10) + 1, + randomInt(10) + 1, + randomBoolean(), + MOCK_CONTEXT_DISALLOW_EXPENSIVE + ) + ); + assertEquals("[fuzzy] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage()); + } + + public void testFetchDocValue() throws IOException { + Supplier documentSupplier = () -> { + Document doc = new Document(); + doc.add(new SortedSetDocValuesField("field.template", new BytesRef("value"))); + return doc; + }; + + MappedFieldType fieldType = new PatternedTextFieldType("field"); + assertEquals(List.of("value"), fetchDocValues(fieldType, documentSupplier)); + } + + private Query unwrapPositionalQuery(Query query) { + query = ((ConstantScoreQuery) query).getQuery(); + return query; + } + + public void testPhraseQuery() throws IOException { + MappedFieldType ft = new PatternedTextFieldType("field"); + TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 4, 7)); + Query query = ft.phraseQuery(ts, 0, true, MOCK_CONTEXT); + Query delegate = unwrapPositionalQuery(query); + assertEquals( + new BooleanQuery.Builder().add(new PhraseQuery("field", "a", "b"), BooleanClause.Occur.SHOULD) + .add(new PhraseQuery("field.template", "a", "b"), BooleanClause.Occur.SHOULD) + .build() + .toString(), + delegate.toString() + ); + } + + public void testMultiPhraseQuery() throws IOException { + MappedFieldType ft = new PatternedTextFieldType("field"); + TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 0, 0, 3), new Token("c", 4, 7)); + Query query = ft.multiPhraseQuery(ts, 0, true, MOCK_CONTEXT); + Query delegate = unwrapPositionalQuery(query); + Query expected = new BooleanQuery.Builder().add( + new MultiPhraseQuery.Builder().add(new Term[] { new Term("field", "a"), new Term("field", "b") }) + .add(new Term("field", "c")) + .build(), + BooleanClause.Occur.SHOULD + ) + .add( + new MultiPhraseQuery.Builder().add(new Term[] { new Term("field.template", "a"), new Term("field.template", "b") }) + .add(new Term("field.template", "c")) + .build(), + BooleanClause.Occur.SHOULD + ) + .build(); + assertEquals(expected.toString(), delegate.toString()); + } + + public void testPhrasePrefixQuery() throws IOException { + MappedFieldType ft = new PatternedTextFieldType("field"); + TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 0, 0, 3), new Token("c", 4, 7)); + Query query = ft.phrasePrefixQuery(ts, 0, 10, MOCK_CONTEXT); + Query delegate = unwrapPositionalQuery(query); + MultiPhrasePrefixQuery expected = new MultiPhrasePrefixQuery("field"); + expected.add(new Term[] { new Term("field", "a"), new Term("field", "b") }); + expected.add(new Term("field", "c")); + MultiPhrasePrefixQuery expectedTemplate = new MultiPhrasePrefixQuery("field.template"); + expectedTemplate.add(new Term[] { new Term("field.template", "a"), new Term("field.template", "b") }); + expectedTemplate.add(new Term("field.template", "c")); + assertEquals( + new BooleanQuery.Builder().add(expected, BooleanClause.Occur.SHOULD) + .add(expectedTemplate, BooleanClause.Occur.SHOULD) + .build() + .toString(), + delegate.toString() + ); + } + + public void testTermIntervals() { + MappedFieldType ft = new PatternedTextFieldType("field"); + IntervalsSource termIntervals = ft.termIntervals(new BytesRef("foo"), MOCK_CONTEXT); + assertThat(termIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + assertEquals(Intervals.term(new BytesRef("foo")), ((SourceIntervalsSource) termIntervals).getIntervalsSource()); + } + + public void testPrefixIntervals() { + MappedFieldType ft = new PatternedTextFieldType("field"); + IntervalsSource prefixIntervals = ft.prefixIntervals(new BytesRef("foo"), MOCK_CONTEXT); + assertThat(prefixIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + assertEquals( + Intervals.prefix(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), + ((SourceIntervalsSource) prefixIntervals).getIntervalsSource() + ); + } + + public void testWildcardIntervals() { + MappedFieldType ft = new PatternedTextFieldType("field"); + IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT); + assertThat(wildcardIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + assertEquals( + Intervals.wildcard(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), + ((SourceIntervalsSource) wildcardIntervals).getIntervalsSource() + ); + } + + public void testRegexpIntervals() { + MappedFieldType ft = new PatternedTextFieldType("field"); + IntervalsSource regexpIntervals = ft.regexpIntervals(new BytesRef("foo"), MOCK_CONTEXT); + assertThat(regexpIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + assertEquals( + Intervals.regexp(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), + ((SourceIntervalsSource) regexpIntervals).getIntervalsSource() + ); + } + + public void testFuzzyIntervals() { + MappedFieldType ft = new PatternedTextFieldType("field"); + IntervalsSource fuzzyIntervals = ft.fuzzyIntervals("foo", 1, 2, true, MOCK_CONTEXT); + assertThat(fuzzyIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + } + + public void testRangeIntervals() { + MappedFieldType ft = new PatternedTextFieldType("field"); + IntervalsSource rangeIntervals = ft.rangeIntervals(new BytesRef("foo"), new BytesRef("foo1"), true, true, MOCK_CONTEXT); + assertThat(rangeIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + assertEquals( + Intervals.range(new BytesRef("foo"), new BytesRef("foo1"), true, true, IndexSearcher.getMaxClauseCount()), + ((SourceIntervalsSource) rangeIntervals).getIntervalsSource() + ); + } +} diff --git a/x-pack/plugin/mapper-patterned-text/src/test/java/org/elasticsearch/xpack/patternedtext/PatternedTextValueProcessorTests.java b/x-pack/plugin/mapper-patterned-text/src/test/java/org/elasticsearch/xpack/patternedtext/PatternedTextValueProcessorTests.java new file mode 100644 index 0000000000000..c6d67a14a3f20 --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/test/java/org/elasticsearch/xpack/patternedtext/PatternedTextValueProcessorTests.java @@ -0,0 +1,141 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.patternedtext; + +import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.test.ESTestCase; +import org.hamcrest.Matchers; + +public class PatternedTextValueProcessorTests extends ESTestCase { + + public void testEmpty() { + String text = ""; + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text); + assertEquals(text, parts.template()); + assertNull(parts.timestamp()); + assertTrue(parts.args().isEmpty()); + assertEquals(text, PatternedTextValueProcessor.merge(parts)); + } + + public void testWhitespace() { + String text = " "; + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text); + assertEquals(text, parts.template()); + assertNull(parts.timestamp()); + assertTrue(parts.args().isEmpty()); + assertEquals(text, PatternedTextValueProcessor.merge(parts)); + } + + public void testWithoutTimestamp() { + String text = " some text with arg1 and 2arg2 and 333 "; + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text); + assertEquals(" some text with %W and %W and %W ", parts.template()); + assertNull(parts.timestamp()); + assertThat(parts.args(), Matchers.contains("arg1", "2arg2", "333")); + assertEquals(text, PatternedTextValueProcessor.merge(parts)); + } + + public void testWithTimestamp() { + String text = " 2021-04-13T13:51:38.000Z some text with arg1 and arg2 and arg3"; + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text); + assertEquals(" %T some text with %W and %W and %W", parts.template()); + assertEquals(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2021-04-13T13:51:38.000Z"), (long) parts.timestamp()); + assertThat(parts.args(), Matchers.contains("arg1", "arg2", "arg3")); + assertEquals(text, PatternedTextValueProcessor.merge(parts)); + } + + public void testWithDateSpaceTime() { + String text = " 2021-04-13 13:51:38 some text with arg1 and arg2 and arg3"; + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text); + assertEquals(" %T some text with %W and %W and %W", parts.template()); + assertEquals(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2021-04-13T13:51:38.000Z"), (long) parts.timestamp()); + assertThat(parts.args(), Matchers.contains("arg1", "arg2", "arg3")); + assertEquals(text.replace("2021-04-13 13:51:38", "2021-04-13T13:51:38.000Z"), PatternedTextValueProcessor.merge(parts)); + } + + public void testMalformedDate() { + String text = "2020/09/06 10:11:38 Using namespace: kubernetes-dashboard' | HTTP status: 400, message: [1:395]"; + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text); + assertEquals("%T Using namespace: kubernetes-dashboard' | HTTP status: %W message: [%W]", parts.template()); + assertEquals(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-09-06T10:11:38"), (long) parts.timestamp()); + assertThat(parts.args(), Matchers.contains("400,", "1:395")); + assertEquals(text.replace("2020/09/06 10:11:38", "2020-09-06T10:11:38.000Z"), PatternedTextValueProcessor.merge(parts)); + } + + public void testUUID() { + String text = "[2020-08-18T00:58:56.751+00:00][15][2354][action_controller][INFO]: [18be2355-6306-4a00-9db9-f0696aa1a225] " + + "some text with arg1 and arg2"; + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text); + assertEquals("[%T][%W][%W][action_controller][INFO]: [%U] some text with %W and %W", parts.template()); + assertEquals(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-08-18T00:58:56.751+00:00"), (long) parts.timestamp()); + assertThat(parts.args(), Matchers.contains("15", "2354", "AEoGY1UjvhgloqFqafC5nQ", "arg1", "arg2")); + assertEquals(text.replace("+00:00", "Z"), PatternedTextValueProcessor.merge(parts)); + } + + public void testIP() { + String text = "[2020-08-18T00:58:56.751+00:00][15][2354][action_controller][INFO]: from 94.168.152.150 and arg1"; + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text); + assertEquals("[%T][%W][%W][action_controller][INFO]: from %I and %W", parts.template()); + assertEquals(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-08-18T00:58:56.751+00:00"), (long) parts.timestamp()); + assertThat(parts.args(), Matchers.contains("15", "2354", "XqiYlg", "arg1")); + assertEquals(text.replace("+00:00", "Z"), PatternedTextValueProcessor.merge(parts)); + } + + public void testSecondDate() { + String text = "[2020-08-18T00:58:56.751+00:00][15][2354][action_controller][INFO]: at 2020-08-18 00:58:56 +0000 and arg1"; + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text); + assertEquals("[%T][%W][%W][action_controller][INFO]: at %D and %W", parts.template()); + assertEquals(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2020-08-18T00:58:56.751+00:00"), (long) parts.timestamp()); + assertThat(parts.args(), Matchers.contains("15", "2354", "gIQT/3MBAAA", "arg1")); + assertEquals( + text.replace("2020-08-18 00:58:56 +0000", "2020-08-18T00:58:56.000Z").replace("+00:00", "Z"), + PatternedTextValueProcessor.merge(parts) + ); + } + + public void testIsUUID() { + String[] validUUIDs = { "123e4567-e89b-12d3-a456-426614174000", "550e8400-e29b-41d4-a716-446655440000" }; + + String[] invalidUUIDs = { + "not-a-uuid", // very invalid + "550e8400-e29b-41d4-a716-4466554400000", // Invalid last extra character + "550e8400-e29b-41d4-a716-44665544000g" // Invalid character + }; + + byte[] bytes = new byte[16]; + for (String uuid : validUUIDs) { + assertTrue("Expected valid UUID: " + uuid, PatternedTextValueProcessor.isUUID(uuid, bytes)); + } + for (String uuid : invalidUUIDs) { + assertFalse("Expected invalid UUID: " + uuid, PatternedTextValueProcessor.isUUID(uuid, bytes)); + } + } + + public void testIsIPv4() { + String[] validIPv4s = { "192.168.1.1", "10.0.0.1", "172.16.0.1", "255.255.255.255", "0.0.0.0" }; + + String[] invalidIPv4s = { + "256.256.256.256", // Out of range + "192.168.1", // Missing one octet + "192.168.1.1.1", // Extra octet + "192.168.1.a" // Invalid character + }; + + byte[] bytes = new byte[4]; + for (String ip : validIPv4s) { + assertTrue("Expected valid IPv4: " + ip, PatternedTextValueProcessor.isIpv4(ip, bytes)); + String[] octets = ip.split("\\."); + for (int i = 0; i < 4; i++) { + assertEquals("Expected valid IPv4 octet: " + octets[i], Integer.parseInt(octets[i]), bytes[i] & 0xFF); + } + } + for (String ip : invalidIPv4s) { + assertFalse("Expected invalid IPv4: " + ip, PatternedTextValueProcessor.isIpv4(ip, bytes)); + } + } +} diff --git a/x-pack/plugin/mapper-patterned-text/src/yamlRestTest/java/org/elasticsearch/xpack/patternedtext/PatternedTextClientYamlTestSuiteIT.java b/x-pack/plugin/mapper-patterned-text/src/yamlRestTest/java/org/elasticsearch/xpack/patternedtext/PatternedTextClientYamlTestSuiteIT.java new file mode 100644 index 0000000000000..2bd0dc31646dd --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/yamlRestTest/java/org/elasticsearch/xpack/patternedtext/PatternedTextClientYamlTestSuiteIT.java @@ -0,0 +1,37 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.patternedtext; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate; +import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase; +import org.junit.ClassRule; + +/** Runs yaml rest tests */ +public class PatternedTextClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase { + + public PatternedTextClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) { + super(testCandidate); + } + + @ParametersFactory + public static Iterable parameters() throws Exception { + return ESClientYamlSuiteTestCase.createParameters(); + } + + @ClassRule + public static ElasticsearchCluster cluster = ElasticsearchCluster.local().module("patterned-text").build(); + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } +} diff --git a/x-pack/plugin/mapper-patterned-text/src/yamlRestTest/resources/rest-api-spec/test/10_basic.yml b/x-pack/plugin/mapper-patterned-text/src/yamlRestTest/resources/rest-api-spec/test/10_basic.yml new file mode 100644 index 0000000000000..e85994579bb45 --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/yamlRestTest/resources/rest-api-spec/test/10_basic.yml @@ -0,0 +1,414 @@ +setup: + + - do: + indices.create: + index: test + body: + mappings: + properties: + foo: + type: patterned_text + + - do: + index: + index: test + id: "1" + body: {} + + - do: + index: + index: test + id: "2" + body: { "foo": "Apache Lucene powers Elasticsearch" } + + - do: + index: + index: test + id: "3" + body: { "foo": "Elasticsearch is based on Apache Lucene" } + + - do: + index: + index: test + id: "4" + body: { "foo": "The Apache Software Foundation manages many projects including Lucene" } + + - do: + indices.refresh: {} + +--- +Field caps: + + - do: + field_caps: + index: test + fields: [ foo ] + + - match: { fields.foo.text.searchable: true } + - match: { fields.foo.text.aggregatable: true } + +--- +Exist query: + + - do: + search: + index: test + body: + query: + exists: + field: foo + + - match: { "hits.total.value": 3 } + - match: { "hits.hits.0._score": 1.0 } + +--- +Match query: + + - do: + search: + index: test + body: + query: + match: + foo: powers + + - match: { "hits.total.value": 1 } + - match: { "hits.hits.0._score": 1.0 } + +--- +Match Phrase query: + + - do: + search: + index: test + body: + query: + match_phrase: + foo: "lucene powers" + + - match: { "hits.total.value": 1 } + - match: { "hits.hits.0._score": 1.0 } + +--- +Match Phrase Prefix query: + + - do: + search: + index: test + body: + query: + match_phrase_prefix: + foo: "lucene pow" + + - match: { "hits.total.value": 1 } + - match: { "hits.hits.0._score": 1.0 } + +--- +Match template query: + + - do: + search: + index: test + body: + query: + match: + foo.template: powers + + - match: { "hits.total.value": 1 } + +--- +Query String query with phrase: + + - do: + search: + index: test + body: + query: + query_string: + query: '"lucene powers"' + default_field: "foo" + + - match: { "hits.total.value": 1 } + - match: { "hits.hits.0._score": 1.0 } + + + +--- +Regexp query: + + - do: + search: + index: test + body: + query: + regexp: + foo: "lu.*ne" + + - match: { "hits.total.value": 3 } + - match: { "hits.hits.0._score": 1.0 } + +--- +Wildcard query: + + - do: + search: + index: test + body: + query: + wildcard: + foo: "lu*ne" + + - match: { "hits.total.value": 3 } + - match: { "hits.hits.0._score": 1.0 } + +--- +Prefix query: + + - do: + search: + index: test + body: + query: + prefix: + foo: "luc" + + - match: { "hits.total.value": 3 } + - match: { "hits.hits.0._score": 1.0 } + +--- +Fuzzy query: + + - do: + search: + index: test + body: + query: + fuzzy: + foo: "lucane" + + - match: { "hits.total.value": 3 } + - match: { "hits.hits.0._score": 1.0 } + +--- +Span query: + + - do: + catch: bad_request + search: + index: test + body: + query: + span_term: + foo: lucene + +--- +Term intervals query: + + - do: + search: + index: test + body: + query: + intervals: + foo: + match: + query: "apache lucene" + max_gaps: 1 + + - match: { "hits.total.value": 2 } + +--- +Prefix intervals query: + + - do: + search: + index: test + body: + query: + intervals: + foo: + prefix: + prefix: "luc" + + - match: { "hits.total.value": 3 } + +--- +Wildcard intervals query: + + - do: + search: + index: test + body: + query: + intervals: + foo: + wildcard: + pattern: "*ase*" + + - match: { "hits.total.value": 1 } + +--- +Fuzzy intervals query: + + - do: + search: + index: test + body: + query: + intervals: + foo: + fuzzy: + term: "lucane" + + - match: { "hits.total.value": 3 } + +--- +Wildcard highlighting: + + - do: + search: + index: test + body: + query: + match: + foo: "many" + highlight: + fields: + "*": {} + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._source.foo: "The Apache Software Foundation manages many projects including Lucene" } + - match: { hits.hits.0.highlight.foo.0: "The Apache Software Foundation manages many projects including Lucene" } + +--- +Terms aggregation: + + - do: + search: + index: test + body: + aggs: + term_agg: + terms: + field: foo.template + + - match: { hits.total.value: 4 } + - length: { aggregations.term_agg.buckets: 10 } + - match: { aggregations.term_agg.buckets.0.key: apache } + - match: { aggregations.term_agg.buckets.0.doc_count: 3 } + - match: { aggregations.term_agg.buckets.1.key: lucene } + - match: { aggregations.term_agg.buckets.1.doc_count: 3 } + - match: { aggregations.term_agg.buckets.2.key: elasticsearch } + - match: { aggregations.term_agg.buckets.2.doc_count: 2 } + +--- +synthetic_source: + + - do: + indices.create: + index: synthetic_source_test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + foo: + type: patterned_text + + - do: + index: + index: synthetic_source_test + id: "1" + refresh: true + body: + foo: "Apache Lucene powers Elasticsearch" + + - do: + search: + index: synthetic_source_test + - match: { "hits.total.value": 1 } + - match: + hits.hits.0._source: + foo: "Apache Lucene powers Elasticsearch" + +--- +tsdb: + + - do: + indices.create: + index: tsdb_test + body: + settings: + index: + mode: time_series + routing_path: [ dimension ] + time_series: + start_time: 2000-01-01T00:00:00Z + end_time: 2099-12-31T23:59:59Z + mappings: + properties: + dimension: + type: keyword + time_series_dimension: true + foo: + type: patterned_text + + - do: + index: + index: tsdb_test + refresh: true + body: + "@timestamp": "2000-01-01T00:00:00Z" + dimension: "a" + foo: "Apache Lucene powers Elasticsearch" + + - do: + search: + index: tsdb_test + - match: { "hits.total.value": 1 } + - match: + hits.hits.0._source: + "@timestamp" : "2000-01-01T00:00:00.000Z" + "dimension" : "a" + foo: "Apache Lucene powers Elasticsearch" + +--- +synthetic_source with copy_to: + + - do: + indices.create: + index: synthetic_source_test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + foo: + type: patterned_text + copy_to: copy + copy: + type: keyword + + - do: + index: + index: synthetic_source_test + id: "1" + refresh: true + body: + foo: "Apache Lucene powers Elasticsearch" + + - do: + search: + index: synthetic_source_test + body: + fields: ["copy"] + + - match: { "hits.total.value": 1 } + - match: + hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch" + - match: + hits.hits.0.fields.copy.0: "Apache Lucene powers Elasticsearch" + + diff --git a/x-pack/plugin/mapper-patterned-text/src/yamlRestTest/resources/rest-api-spec/test/20_synthetic_source.yml b/x-pack/plugin/mapper-patterned-text/src/yamlRestTest/resources/rest-api-spec/test/20_synthetic_source.yml new file mode 100644 index 0000000000000..04c29b764b8f4 --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/yamlRestTest/resources/rest-api-spec/test/20_synthetic_source.yml @@ -0,0 +1,38 @@ +simple: + - do: + indices.create: + index: test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + id: + type: integer + message: + type: patterned_text + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "id": 1, "message": "some log message with no arg" }' + - '{ "create": { } }' + - '{ "id": 2, "message": "another log message with arg 1234 and arg 5678 and a mixed one ABCD9" }' + - '{ "create": { } }' + - '{ "id": 3, "message": "some log message with no arg" }' + - '{ "create": { } }' + - '{ "id": 4, "message": "another log message with arg 1234 and arg 8765 and a mixed one ABCD1" }' + + - do: + search: + index: test + sort: id + + - match: { hits.hits.0._source.message: "some log message with no arg" } + - match: { hits.hits.1._source.message: "another log message with arg 1234 and arg 5678 and a mixed one ABCD9" } + - match: { hits.hits.2._source.message: "some log message with no arg" } + - match: { hits.hits.3._source.message: "another log message with arg 1234 and arg 8765 and a mixed one ABCD1" } diff --git a/x-pack/plugin/mapper-patterned-text/src/yamlRestTest/resources/rest-api-spec/test/30_sort.yml b/x-pack/plugin/mapper-patterned-text/src/yamlRestTest/resources/rest-api-spec/test/30_sort.yml new file mode 100644 index 0000000000000..8a9c09821bc5e --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/yamlRestTest/resources/rest-api-spec/test/30_sort.yml @@ -0,0 +1,35 @@ +simple: + - do: + indices.create: + index: test + body: + mappings: + properties: + id: + type: integer + message: + type: patterned_text + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "id": 1, "message": "some log message with no arg" }' + - '{ "create": { } }' + - '{ "id": 2, "message": "another log message with arg 1234 and arg 5678 and a mixed one ABCD9" }' + - '{ "create": { } }' + - '{ "id": 3, "message": "some log message with no arg" }' + - '{ "create": { } }' + - '{ "id": 4, "message": "another log message with arg 1234 and arg 8765 and a mixed one ABCD1" }' + + - do: + search: + index: test + sort: message + + - match: { hits.hits.0._source.message: "another log message with arg 1234 and arg 5678 and a mixed one ABCD9" } + - match: { hits.hits.1._source.message: "another log message with arg 1234 and arg 8765 and a mixed one ABCD1" } + - match: { hits.hits.2._source.message: "some log message with no arg" } + - match: { hits.hits.3._source.message: "some log message with no arg" } diff --git a/x-pack/plugin/mapper-patterned-text/src/yamlRestTest/resources/rest-api-spec/test/40_script_doc_values.yml.back b/x-pack/plugin/mapper-patterned-text/src/yamlRestTest/resources/rest-api-spec/test/40_script_doc_values.yml.back new file mode 100644 index 0000000000000..254d582eeddcd --- /dev/null +++ b/x-pack/plugin/mapper-patterned-text/src/yamlRestTest/resources/rest-api-spec/test/40_script_doc_values.yml.back @@ -0,0 +1,207 @@ +setup: + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + mappings: + properties: + patterned_text: + type: patterned_text + rank: + type: integer + + - do: + index: + index: test + id: "1" + body: + rank: 1 + patterned_text: "Lots of text." + + - do: + index: + index: test + id: "2" + body: + rank: 2 + + - do: + index: + index: test + id: "3" + body: + rank: 3 + patterned_text: [ "Lots of text.", "even more text", "SOOOOO much text" ] + + - do: + indices.create: + index: test_synthetic + body: + settings: + number_of_shards: 1 + index: + mapping.source.mode: synthetic + mappings: + properties: + patterned_text: + type: patterned_text + rank: + type: integer + + - do: + index: + index: test_synthetic + id: "1" + body: + rank: 1 + patterned_text: "Lots of text." + + - do: + index: + index: test_synthetic + id: "2" + body: + rank: 2 + + - do: + index: + index: test_synthetic + id: "3" + body: + rank: 3 + patterned_text: [ "Lots of text.", "even more text", "SOOOOO much text" ] + + +--- +patterned_text: + - do: + catch: bad_request + search: + index: test + body: + query: { term: { _id: "1" } } + script_fields: + field: + script: + source: "doc['patterned_text'].get(0)" + + - match: { hits.hits.0.fields.field.0: "Lots of text." } + + - do: + catch: bad_request + search: + index: test + body: + query: { term: { _id: "1" } } + script_fields: + field: + script: + source: "doc['patterned_text'].value" + + - match: { hits.hits.0.fields.field.0: "Lots of text." } + + - do: + search: + index: test + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "field('patterned_text').get('')" + - match: { hits.hits.0.fields.field.0: "Lots of text." } + - match: { hits.hits.1.fields.field.0: "" } + - match: { hits.hits.2.fields.field.0: "Lots of text." } + + - do: + search: + index: test + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "/* avoid yaml stash */ $('patterned_text', '')" + - match: { hits.hits.0.fields.field.0: "Lots of text." } + - match: { hits.hits.1.fields.field.0: "" } + - match: { hits.hits.2.fields.field.0: "Lots of text." } + + - do: + search: + index: test + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "String defaultText = 'default text'; field('patterned_text').get(defaultText)" + - match: { hits.hits.0.fields.field.0: "Lots of text." } + - match: { hits.hits.1.fields.field.0: "default text" } + - match: { hits.hits.2.fields.field.0: "Lots of text." } + + - do: + search: + index: test + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "String defaultText = 'default text'; $('patterned_text', defaultText)" + - match: { hits.hits.0.fields.field.0: "Lots of text." } + - match: { hits.hits.1.fields.field.0: "default text" } + - match: { hits.hits.2.fields.field.0: "Lots of text." } + + - do: + search: + index: test + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "field('patterned_text').get(1, '')" + - match: { hits.hits.0.fields.field.0: "" } + - match: { hits.hits.1.fields.field.0: "" } + - match: { hits.hits.2.fields.field.0: "SOOOOO much text" } + + - do: + search: + index: test + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "String defaultText = 'default text'; field('patterned_text').get(1, defaultText)" + - match: { hits.hits.0.fields.field.0: "default text" } + - match: { hits.hits.1.fields.field.0: "default text" } + - match: { hits.hits.2.fields.field.0: "SOOOOO much text" } + + - do: + search: + index: test + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "field('patterned_text').get(1, '')" + - match: { hits.hits.0.fields.field.0: "" } + - match: { hits.hits.1.fields.field.0: "" } + - match: { hits.hits.2.fields.field.0: "SOOOOO much text" } + + - do: + search: + index: test + body: + sort: [ { rank: asc } ] + script_fields: + field: + script: + source: "String cat = ''; for (String s : field('patterned_text')) { cat += s; } cat + field('patterned_text').size();" + - match: { hits.hits.0.fields.field.0: "Lots of text.1" } + - match: { hits.hits.1.fields.field.0: "0" } + - match: { hits.hits.2.fields.field.0: "Lots of text.SOOOOO much texteven more text3" }