Skip to content

Commit 68eff34

Browse files
authored
Handle nested fields with the termvectors REST API in artificial docs (#92568)
1 parent 47395ff commit 68eff34

File tree

3 files changed

+84
-19
lines changed

3 files changed

+84
-19
lines changed

docs/changelog/92568.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 92568
2+
summary: Support nested fields for term vectors API when using artificial documents
3+
area: Search
4+
type: enhancement
5+
issues:
6+
- 91902

server/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import org.elasticsearch.index.mapper.MappedFieldType;
3636
import org.elasticsearch.index.mapper.MapperService;
3737
import org.elasticsearch.index.mapper.MappingLookup;
38+
import org.elasticsearch.index.mapper.NestedPathFieldMapper;
3839
import org.elasticsearch.index.mapper.ParsedDocument;
3940
import org.elasticsearch.index.mapper.SourceFieldMapper;
4041
import org.elasticsearch.index.mapper.SourceToParse;
@@ -185,6 +186,11 @@ private static boolean isValidField(MappedFieldType fieldType) {
185186
if (fieldType.isIndexed() == false) {
186187
return false;
187188
}
189+
// and must not be the nested path field
190+
if (fieldType.name().equals(NestedPathFieldMapper.NAME)) {
191+
return false;
192+
}
193+
188194
return true;
189195
}
190196

@@ -291,7 +297,13 @@ private static Fields generateTermVectors(
291297
MemoryIndex index = new MemoryIndex(withOffsets);
292298
for (Map.Entry<String, Collection<Object>> entry : values.entrySet()) {
293299
String field = entry.getKey();
294-
Analyzer analyzer = getAnalyzerAtField(indexShard, field, perFieldAnalyzer);
300+
final Analyzer analyzer;
301+
try {
302+
analyzer = getAnalyzerAtField(indexShard, field, perFieldAnalyzer);
303+
} catch (IllegalArgumentException e) {
304+
// failed to get the analyzer for the given field, it could be a metadata field
305+
continue;
306+
}
295307
if (entry.getValue() instanceof List) {
296308
for (Object text : entry.getValue()) {
297309
index.addField(field, text.toString(), analyzer);
@@ -310,25 +322,26 @@ private static Fields generateTermVectorsFromDoc(IndexShard indexShard, TermVect
310322
MappingLookup mappingLookup = indexShard.mapperService().mappingLookup();
311323
ParsedDocument parsedDocument = documentParser.parseDocument(source, mappingLookup);
312324
// select the right fields and generate term vectors
313-
LuceneDocument doc = parsedDocument.rootDoc();
314-
Set<String> seenFields = new HashSet<>();
315-
Collection<DocumentField> documentFields = new HashSet<>();
316-
for (IndexableField field : doc.getFields()) {
317-
MappedFieldType fieldType = indexShard.mapperService().fieldType(field.name());
318-
if (isValidField(fieldType) == false) {
319-
continue;
320-
}
321-
if (request.selectedFields() != null && request.selectedFields().contains(field.name()) == false) {
322-
continue;
323-
}
324-
if (seenFields.contains(field.name())) {
325-
continue;
326-
} else {
327-
seenFields.add(field.name());
325+
final Set<String> seenFields = new HashSet<>();
326+
final Collection<DocumentField> documentFields = new HashSet<>();
327+
for (LuceneDocument doc : parsedDocument.docs()) {
328+
for (IndexableField field : doc.getFields()) {
329+
MappedFieldType fieldType = indexShard.mapperService().fieldType(field.name());
330+
if (isValidField(fieldType) == false) {
331+
continue;
332+
}
333+
if (request.selectedFields() != null && request.selectedFields().contains(field.name()) == false) {
334+
continue;
335+
}
336+
if (seenFields.contains(field.name())) {
337+
continue;
338+
} else {
339+
seenFields.add(field.name());
340+
}
341+
@SuppressWarnings("unchecked")
342+
List<Object> values = (List) getValues(doc.getFields(field.name()));
343+
documentFields.add(new DocumentField(field.name(), values));
328344
}
329-
@SuppressWarnings("unchecked")
330-
List<Object> values = (List) getValues(doc.getFields(field.name()));
331-
documentFields.add(new DocumentField(field.name(), values));
332345
}
333346
return generateTermVectors(
334347
indexShard,

server/src/test/java/org/elasticsearch/index/termvectors/TermVectorsServiceTests.java

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import static java.util.stream.Collectors.toList;
3131
import static org.elasticsearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE;
3232
import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
33+
import static org.hamcrest.Matchers.containsInAnyOrder;
3334
import static org.hamcrest.Matchers.equalTo;
3435
import static org.hamcrest.Matchers.notNullValue;
3536

@@ -147,4 +148,49 @@ public void testWithIndexedPhrases() throws IOException {
147148
assertEquals(max, phraseIterator.docFreq());
148149
}
149150
}
151+
152+
public void testArtificialDocWithNestedFields() throws IOException {
153+
final XContentBuilder mapping = jsonBuilder().startObject()
154+
.startObject("_doc")
155+
.startObject("properties")
156+
.startObject("group")
157+
.field("type", "text")
158+
.endObject()
159+
.startObject("user")
160+
.field("type", "nested")
161+
.startObject("properties")
162+
.startObject("first")
163+
.field("type", "text")
164+
.endObject()
165+
.startObject("last")
166+
.field("type", "text")
167+
.endObject()
168+
.endObject()
169+
.endObject()
170+
.endObject()
171+
.endObject()
172+
.endObject();
173+
final Settings settings = Settings.builder().put("number_of_shards", 1).build();
174+
createIndex("test", settings, mapping);
175+
ensureGreen();
176+
177+
final TermVectorsRequest request = new TermVectorsRequest().doc(
178+
jsonBuilder().startObject()
179+
.field("group", "test")
180+
.startArray("user")
181+
.startObject()
182+
.field("first", "John")
183+
.field("last", "Smith")
184+
.endObject()
185+
.endArray()
186+
.endObject()
187+
).termStatistics(true);
188+
189+
final IndicesService indicesService = getInstanceFromNode(IndicesService.class);
190+
final IndexService test = indicesService.indexService(resolveIndex("test"));
191+
final IndexShard shard = test.getShardOrNull(0);
192+
assertThat(shard, notNullValue());
193+
final TermVectorsResponse response = TermVectorsService.getTermVectors(shard, request);
194+
assertThat(response.getFields(), containsInAnyOrder("group", "user.first", "user.last"));
195+
}
150196
}

0 commit comments

Comments
 (0)