Skip to content

Commit 7f5c272

Browse files
committed
Merged with main
1 parent 1bbc0db commit 7f5c272

File tree

48 files changed

+1639
-586
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+1639
-586
lines changed

docs/changelog/134582.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 134582
2+
summary: Fixed match only text block loader not working when a keyword multi field
3+
is present
4+
area: Mapping
5+
type: bug
6+
issues: []

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 208 additions & 133 deletions
Large diffs are not rendered by default.

modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
package org.elasticsearch.index.mapper.extras;
1010

1111
import org.apache.lucene.analysis.TokenStream;
12+
import org.apache.lucene.document.FieldType;
1213
import org.apache.lucene.index.Term;
1314
import org.apache.lucene.queries.intervals.Intervals;
1415
import org.apache.lucene.queries.intervals.IntervalsSource;
@@ -27,20 +28,38 @@
2728
import org.apache.lucene.tests.analysis.Token;
2829
import org.apache.lucene.util.BytesRef;
2930
import org.elasticsearch.ElasticsearchException;
31+
import org.elasticsearch.cluster.metadata.IndexMetadata;
3032
import org.elasticsearch.common.lucene.BytesRefs;
33+
import org.elasticsearch.common.lucene.Lucene;
3134
import org.elasticsearch.common.lucene.search.AutomatonQueries;
3235
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
36+
import org.elasticsearch.common.settings.Settings;
3337
import org.elasticsearch.common.unit.Fuzziness;
38+
import org.elasticsearch.index.IndexMode;
39+
import org.elasticsearch.index.IndexSettings;
40+
import org.elasticsearch.index.IndexVersion;
41+
import org.elasticsearch.index.analysis.NamedAnalyzer;
42+
import org.elasticsearch.index.mapper.BlockLoader;
43+
import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
3444
import org.elasticsearch.index.mapper.FieldTypeTestCase;
45+
import org.elasticsearch.index.mapper.KeywordFieldMapper;
3546
import org.elasticsearch.index.mapper.MappedFieldType;
47+
import org.elasticsearch.index.mapper.MappingParserContext;
48+
import org.elasticsearch.index.mapper.TextFieldMapper;
49+
import org.elasticsearch.index.mapper.TextSearchInfo;
3650
import org.elasticsearch.index.mapper.extras.MatchOnlyTextFieldMapper.MatchOnlyTextFieldType;
51+
import org.elasticsearch.script.ScriptCompiler;
3752
import org.hamcrest.Matchers;
3853

3954
import java.io.IOException;
4055
import java.util.ArrayList;
4156
import java.util.Arrays;
57+
import java.util.Collections;
4258
import java.util.List;
4359

60+
import static org.mockito.Mockito.doReturn;
61+
import static org.mockito.Mockito.mock;
62+
4463
public class MatchOnlyTextFieldTypeTests extends FieldTypeTestCase {
4564

4665
public void testTermQuery() {
@@ -205,4 +224,149 @@ public void testRangeIntervals() {
205224
((SourceIntervalsSource) rangeIntervals).getIntervalsSource()
206225
);
207226
}
227+
228+
public void test_block_loader_uses_stored_fields_for_loading_when_synthetic_source_delegate_is_absent() {
229+
// given
230+
MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
231+
"parent",
232+
new TextSearchInfo(TextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
233+
mock(NamedAnalyzer.class),
234+
true,
235+
Collections.emptyMap(),
236+
false,
237+
false,
238+
null
239+
);
240+
241+
// when
242+
BlockLoader blockLoader = ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class));
243+
244+
// then
245+
// verify that we delegate block loading to the synthetic source delegate
246+
assertTrue(blockLoader instanceof MatchOnlyTextFieldType.BytesFromMixedStringsBytesRefBlockLoader);
247+
}
248+
249+
public void test_block_loader_uses_synthetic_source_delegate_when_ignore_above_is_not_set() {
250+
// given
251+
KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate = new KeywordFieldMapper.KeywordFieldType(
252+
"child",
253+
true,
254+
true,
255+
Collections.emptyMap()
256+
);
257+
258+
MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
259+
"parent",
260+
new TextSearchInfo(TextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
261+
mock(NamedAnalyzer.class),
262+
true,
263+
Collections.emptyMap(),
264+
false,
265+
false,
266+
syntheticSourceDelegate
267+
);
268+
269+
// when
270+
BlockLoader blockLoader = ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class));
271+
272+
// then
273+
// verify that we delegate block loading to the synthetic source delegate
274+
assertTrue(blockLoader instanceof BlockLoader.Delegating);
275+
}
276+
277+
public void test_block_loader_does_not_use_synthetic_source_delegate_when_ignore_above_is_set() {
278+
// given
279+
Settings settings = Settings.builder()
280+
.put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
281+
.put(IndexSettings.MODE.getKey(), IndexMode.STANDARD)
282+
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
283+
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
284+
.build();
285+
IndexSettings indexSettings = new IndexSettings(IndexMetadata.builder("index").settings(settings).build(), settings);
286+
MappingParserContext mappingParserContext = mock(MappingParserContext.class);
287+
doReturn(settings).when(mappingParserContext).getSettings();
288+
doReturn(indexSettings).when(mappingParserContext).getIndexSettings();
289+
doReturn(mock(ScriptCompiler.class)).when(mappingParserContext).scriptCompiler();
290+
291+
KeywordFieldMapper.Builder builder = new KeywordFieldMapper.Builder("child", mappingParserContext);
292+
builder.ignoreAbove(123);
293+
294+
KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate = new KeywordFieldMapper.KeywordFieldType(
295+
"child",
296+
mock(FieldType.class),
297+
mock(NamedAnalyzer.class),
298+
mock(NamedAnalyzer.class),
299+
mock(NamedAnalyzer.class),
300+
builder,
301+
true
302+
);
303+
304+
MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
305+
"parent",
306+
new TextSearchInfo(TextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
307+
mock(NamedAnalyzer.class),
308+
true,
309+
Collections.emptyMap(),
310+
false,
311+
false,
312+
syntheticSourceDelegate
313+
);
314+
315+
// when
316+
MappedFieldType.BlockLoaderContext blContext = mock(MappedFieldType.BlockLoaderContext.class);
317+
doReturn(FieldNamesFieldMapper.FieldNamesFieldType.get(false)).when(blContext).fieldNames();
318+
BlockLoader blockLoader = ft.blockLoader(blContext);
319+
320+
// then
321+
// verify that we don't delegate anything
322+
assertFalse(blockLoader instanceof BlockLoader.Delegating);
323+
}
324+
325+
public void test_block_loader_does_not_use_synthetic_source_delegate_when_ignore_above_is_set_at_index_level() {
326+
// given
327+
Settings settings = Settings.builder()
328+
.put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
329+
.put(IndexSettings.MODE.getKey(), IndexMode.STANDARD)
330+
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
331+
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
332+
.put(IndexSettings.IGNORE_ABOVE_SETTING.getKey(), 123)
333+
.build();
334+
IndexSettings indexSettings = new IndexSettings(IndexMetadata.builder("index").settings(settings).build(), settings);
335+
MappingParserContext mappingParserContext = mock(MappingParserContext.class);
336+
doReturn(settings).when(mappingParserContext).getSettings();
337+
doReturn(indexSettings).when(mappingParserContext).getIndexSettings();
338+
doReturn(mock(ScriptCompiler.class)).when(mappingParserContext).scriptCompiler();
339+
340+
KeywordFieldMapper.Builder builder = new KeywordFieldMapper.Builder("child", mappingParserContext);
341+
342+
KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate = new KeywordFieldMapper.KeywordFieldType(
343+
"child",
344+
mock(FieldType.class),
345+
mock(NamedAnalyzer.class),
346+
mock(NamedAnalyzer.class),
347+
mock(NamedAnalyzer.class),
348+
builder,
349+
true
350+
);
351+
352+
MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
353+
"parent",
354+
new TextSearchInfo(TextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
355+
mock(NamedAnalyzer.class),
356+
true,
357+
Collections.emptyMap(),
358+
false,
359+
false,
360+
syntheticSourceDelegate
361+
);
362+
363+
// when
364+
MappedFieldType.BlockLoaderContext blContext = mock(MappedFieldType.BlockLoaderContext.class);
365+
doReturn(FieldNamesFieldMapper.FieldNamesFieldType.get(false)).when(blContext).fieldNames();
366+
BlockLoader blockLoader = ft.blockLoader(blContext);
367+
368+
// then
369+
// verify that we don't delegate anything
370+
assertFalse(blockLoader instanceof BlockLoader.Delegating);
371+
}
208372
}

modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/match_only_text/10_basic.yml

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,114 @@ synthetic_source match_only_text with ignored multi-field:
650650
- match:
651651
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
652652

653+
---
654+
synthetic_source match_only_text with ignored multi-field and multiple values in the same doc:
655+
- requires:
656+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
657+
reason: "Source mode configured through index setting"
658+
659+
- do:
660+
indices.create:
661+
index: synthetic_source_test
662+
body:
663+
settings:
664+
index:
665+
mapping.source.mode: synthetic
666+
mappings:
667+
properties:
668+
foo:
669+
type: match_only_text
670+
fields:
671+
raw:
672+
type: keyword
673+
ignore_above: 20
674+
675+
- do:
676+
index:
677+
index: synthetic_source_test
678+
id: "1"
679+
refresh: true
680+
body:
681+
foo: [ "This value is too long and will be ignored", "This value is short" ]
682+
683+
- do:
684+
search:
685+
index: synthetic_source_test
686+
body:
687+
query:
688+
match_phrase:
689+
foo: this value is
690+
691+
- match: { "hits.total.value": 1 }
692+
- match: { hits.hits.0._source.foo.0: "This value is too long and will be ignored" }
693+
- match: { hits.hits.0._source.foo.1: "This value is short" }
694+
695+
# now, flip the values around
696+
- do:
697+
index:
698+
index: synthetic_source_test
699+
id: "1"
700+
refresh: true
701+
body:
702+
foo: [ "This value is short", "This value is too long and will be ignored" ]
703+
704+
- do:
705+
search:
706+
index: synthetic_source_test
707+
body:
708+
query:
709+
match_phrase:
710+
foo: this value is
711+
712+
- match: { "hits.total.value": 1 }
713+
# the order will be the same since text fields currently don't take offsets into account
714+
- match: { hits.hits.0._source.foo.0: "This value is too long and will be ignored" }
715+
- match: { hits.hits.0._source.foo.1: "This value is short" }
716+
717+
---
718+
synthetic_source match_only_text with ignored multi-field and multiple values in the same doc and preserved order:
719+
- requires:
720+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
721+
reason: "Source mode configured through index setting"
722+
723+
- do:
724+
indices.create:
725+
index: synthetic_source_test
726+
body:
727+
settings:
728+
index:
729+
mapping.source.mode: synthetic
730+
mappings:
731+
properties:
732+
foo:
733+
type: match_only_text
734+
# this will force the order to be preserved
735+
synthetic_source_keep: arrays
736+
fields:
737+
raw:
738+
type: keyword
739+
ignore_above: 20
740+
741+
- do:
742+
index:
743+
index: synthetic_source_test
744+
id: "1"
745+
refresh: true
746+
body:
747+
foo: [ "This value is short", "This value is too long and will be ignored" ]
748+
749+
- do:
750+
search:
751+
index: synthetic_source_test
752+
body:
753+
query:
754+
match_phrase:
755+
foo: this value is
756+
757+
- match: { "hits.total.value": 1 }
758+
- match: { hits.hits.0._source.foo.0: "This value is short" }
759+
- match: { hits.hits.0._source.foo.1: "This value is too long and will be ignored" }
760+
653761
---
654762
synthetic_source match_only_text with stored multi-field:
655763
- requires:

muted-tests.yml

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -555,18 +555,6 @@ tests:
555555
- class: org.elasticsearch.discovery.DiscoveryDisruptionIT
556556
method: testElectMasterWithLatestVersion
557557
issue: https://github.com/elastic/elasticsearch/issues/134748
558-
- class: org.elasticsearch.xpack.logsdb.patternedtext.PatternedTextIntegrationTests
559-
method: testLargeValueIsStored
560-
issue: https://github.com/elastic/elasticsearch/issues/134760
561-
- class: org.elasticsearch.xpack.logsdb.patternedtext.PatternedTextIntegrationTests
562-
method: testSourceMatchAllManyValues
563-
issue: https://github.com/elastic/elasticsearch/issues/134761
564-
- class: org.elasticsearch.xpack.logsdb.patternedtext.PatternedTextIntegrationTests
565-
method: testQueryResultsSameAsMatchOnlyText
566-
issue: https://github.com/elastic/elasticsearch/issues/134762
567-
- class: org.elasticsearch.xpack.logsdb.patternedtext.PatternedTextIntegrationTests
568-
method: testSmallValueNotStored
569-
issue: https://github.com/elastic/elasticsearch/issues/134763
570558

571559
# Examples:
572560
#

0 commit comments

Comments
 (0)