Skip to content

Commit 7b1899e

Browse files
martijnvgnot-napoleon
authored andcommitted
Change match_only_text's value fetcher to use SortedBinaryDocValues instead of interacting with doc values api directly.
This way, via field data abstraction, the right doc values type is used, and the right conversions happen. Values of all field types will get converted to strings.
1 parent bcc0e9a commit 7b1899e

File tree

2 files changed

+162
-10
lines changed

2 files changed

+162
-10
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
import org.apache.lucene.document.Field;
1515
import org.apache.lucene.document.FieldType;
1616
import org.apache.lucene.document.StoredField;
17-
import org.apache.lucene.index.DocValues;
1817
import org.apache.lucene.index.IndexOptions;
1918
import org.apache.lucene.index.LeafReaderContext;
2019
import org.apache.lucene.index.Term;
@@ -48,6 +47,7 @@
4847
import org.elasticsearch.index.mapper.BlockStoredFieldsReader;
4948
import org.elasticsearch.index.mapper.DocumentParserContext;
5049
import org.elasticsearch.index.mapper.FieldMapper;
50+
import org.elasticsearch.index.mapper.MappedFieldType;
5151
import org.elasticsearch.index.mapper.MapperBuilderContext;
5252
import org.elasticsearch.index.mapper.SourceValueFetcher;
5353
import org.elasticsearch.index.mapper.StringFieldType;
@@ -254,7 +254,8 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
254254
if (parent.isStored()) {
255255
return storedFieldFetcher(parentField);
256256
} else if (parent.hasDocValues()) {
257-
return docValuesFieldFetcher(parentField);
257+
var ifd = searchExecutionContext.getForField(parent, MappedFieldType.FielddataOperation.SEARCH);
258+
return docValuesFieldFetcher(ifd);
258259
} else {
259260
assert false : "parent field should either be stored or have doc values";
260261
}
@@ -266,7 +267,8 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
266267
if (fieldType.isStored()) {
267268
return storedFieldFetcher(fieldType.name());
268269
} else if (fieldType.hasDocValues()) {
269-
return docValuesFieldFetcher(fieldType.name());
270+
var ifd = searchExecutionContext.getForField(fieldType, MappedFieldType.FielddataOperation.SEARCH);
271+
return docValuesFieldFetcher(ifd);
270272
} else {
271273
assert false : "multi field should either be stored or have doc values";
272274
}
@@ -291,15 +293,16 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
291293
};
292294
}
293295

294-
private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher(String name) {
296+
private static IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> docValuesFieldFetcher(
297+
IndexFieldData<?> ifd
298+
) {
295299
return context -> {
296-
var sortedDocValues = DocValues.getSortedSet(context.reader(), name);
300+
var sortedBinaryDocValues = ifd.load(context).getBytesValues();
297301
return docId -> {
298-
if (sortedDocValues.advanceExact(docId)) {
299-
var values = new ArrayList<>(sortedDocValues.docValueCount());
300-
for (int i = 0; i < sortedDocValues.docValueCount(); i++) {
301-
long ord = sortedDocValues.nextOrd();
302-
values.add(sortedDocValues.lookupOrd(ord).utf8ToString());
302+
if (sortedBinaryDocValues.advanceExact(docId)) {
303+
var values = new ArrayList<>(sortedBinaryDocValues.docValueCount());
304+
for (int i = 0; i < sortedBinaryDocValues.docValueCount(); i++) {
305+
values.add(sortedBinaryDocValues.nextValue().utf8ToString());
303306
}
304307
return values;
305308
} else {
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
---
2+
synthetic_source match_only_text with wildcard as parent field:
3+
- requires:
4+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
5+
reason: "Source mode configured through index setting"
6+
7+
- do:
8+
indices.create:
9+
index: synthetic_source_test
10+
body:
11+
settings:
12+
index:
13+
mapping.source.mode: synthetic
14+
mappings:
15+
properties:
16+
foo:
17+
type: wildcard
18+
fields:
19+
text:
20+
type: match_only_text
21+
22+
- do:
23+
index:
24+
index: synthetic_source_test
25+
id: "1"
26+
refresh: true
27+
body:
28+
foo: "Apache Lucene powers Elasticsearch"
29+
30+
- do:
31+
search:
32+
index: synthetic_source_test
33+
body:
34+
query:
35+
match_phrase:
36+
foo.text: apache lucene
37+
38+
- match: { "hits.total.value": 1 }
39+
- match:
40+
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"
41+
42+
---
43+
synthetic_source match_only_text with number as parent field:
44+
- requires:
45+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
46+
reason: "Source mode configured through index setting"
47+
48+
- do:
49+
indices.create:
50+
index: synthetic_source_test
51+
body:
52+
settings:
53+
index:
54+
mapping.source.mode: synthetic
55+
mappings:
56+
properties:
57+
foo:
58+
type: long
59+
fields:
60+
text:
61+
type: match_only_text
62+
63+
- do:
64+
index:
65+
index: synthetic_source_test
66+
id: "1"
67+
refresh: true
68+
body:
69+
foo: [1, 5]
70+
71+
- do:
72+
search:
73+
index: synthetic_source_test
74+
body:
75+
query:
76+
match_phrase:
77+
foo.text: 1 5
78+
79+
- match: { "hits.total.value": 0 }
80+
81+
- do:
82+
indices.create:
83+
index: stored_source_test
84+
body:
85+
mappings:
86+
properties:
87+
foo:
88+
type: long
89+
fields:
90+
text:
91+
type: match_only_text
92+
93+
- do:
94+
index:
95+
index: stored_source_test
96+
id: "1"
97+
refresh: true
98+
body:
99+
foo: [1, 5]
100+
101+
- do:
102+
search:
103+
index: stored_source_test
104+
body:
105+
query:
106+
match_phrase:
107+
foo.text: 1 5
108+
109+
- match: { "hits.total.value": 0 }
110+
111+
---
112+
synthetic_source match_only_text with scaled_float as parent field:
113+
- requires:
114+
cluster_features: [ "mapper.source.mode_from_index_setting" ]
115+
reason: "Source mode configured through index setting"
116+
117+
- do:
118+
indices.create:
119+
index: synthetic_source_test
120+
body:
121+
settings:
122+
index:
123+
mapping.source.mode: synthetic
124+
mappings:
125+
properties:
126+
foo:
127+
type: scaled_float
128+
scaling_factor: 10
129+
fields:
130+
text:
131+
type: match_only_text
132+
133+
- do:
134+
index:
135+
index: synthetic_source_test
136+
id: "1"
137+
refresh: true
138+
body:
139+
foo: [1.1, 5.5]
140+
141+
- do:
142+
search:
143+
index: synthetic_source_test
144+
body:
145+
query:
146+
match_phrase:
147+
foo.text: 1.1 5.5
148+
149+
- match: { "hits.total.value": 0 }

0 commit comments

Comments
 (0)