diff --git a/docs/changelog/92568.yaml b/docs/changelog/92568.yaml new file mode 100644 index 0000000000000..a2388e2a9fcc0 --- /dev/null +++ b/docs/changelog/92568.yaml @@ -0,0 +1,6 @@ +pr: 92568 +summary: Support nested fields for term vectors API when using artificial documents +area: Search +type: enhancement +issues: + - 91902 diff --git a/server/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java b/server/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java index db84be817bbd7..1f7383a947a71 100644 --- a/server/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java +++ b/server/src/main/java/org/elasticsearch/index/termvectors/TermVectorsService.java @@ -35,6 +35,7 @@ import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MappingLookup; +import org.elasticsearch.index.mapper.NestedPathFieldMapper; import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.SourceFieldMapper; import org.elasticsearch.index.mapper.SourceToParse; @@ -185,6 +186,11 @@ private static boolean isValidField(MappedFieldType fieldType) { if (fieldType.isIndexed() == false) { return false; } + // and must not be the nested path field + if (fieldType.name().equals(NestedPathFieldMapper.NAME)) { + return false; + } + return true; } @@ -291,7 +297,13 @@ private static Fields generateTermVectors( MemoryIndex index = new MemoryIndex(withOffsets); for (Map.Entry> entry : values.entrySet()) { String field = entry.getKey(); - Analyzer analyzer = getAnalyzerAtField(indexShard, field, perFieldAnalyzer); + final Analyzer analyzer; + try { + analyzer = getAnalyzerAtField(indexShard, field, perFieldAnalyzer); + } catch (IllegalArgumentException e) { + // failed to get the analyzer for the given field, it could be a metadata field + continue; + } if (entry.getValue() instanceof List) { for (Object text : entry.getValue()) { index.addField(field, text.toString(), analyzer); @@ -310,25 +322,26 @@ private static Fields generateTermVectorsFromDoc(IndexShard indexShard, TermVect MappingLookup mappingLookup = indexShard.mapperService().mappingLookup(); ParsedDocument parsedDocument = documentParser.parseDocument(source, mappingLookup); // select the right fields and generate term vectors - LuceneDocument doc = parsedDocument.rootDoc(); - Set seenFields = new HashSet<>(); - Collection documentFields = new HashSet<>(); - for (IndexableField field : doc.getFields()) { - MappedFieldType fieldType = indexShard.mapperService().fieldType(field.name()); - if (isValidField(fieldType) == false) { - continue; - } - if (request.selectedFields() != null && request.selectedFields().contains(field.name()) == false) { - continue; - } - if (seenFields.contains(field.name())) { - continue; - } else { - seenFields.add(field.name()); + final Set seenFields = new HashSet<>(); + final Collection documentFields = new HashSet<>(); + for (LuceneDocument doc : parsedDocument.docs()) { + for (IndexableField field : doc.getFields()) { + MappedFieldType fieldType = indexShard.mapperService().fieldType(field.name()); + if (isValidField(fieldType) == false) { + continue; + } + if (request.selectedFields() != null && request.selectedFields().contains(field.name()) == false) { + continue; + } + if (seenFields.contains(field.name())) { + continue; + } else { + seenFields.add(field.name()); + } + @SuppressWarnings("unchecked") + List values = (List) getValues(doc.getFields(field.name())); + documentFields.add(new DocumentField(field.name(), values)); } - @SuppressWarnings("unchecked") - List values = (List) getValues(doc.getFields(field.name())); - documentFields.add(new DocumentField(field.name(), values)); } return generateTermVectors( indexShard, diff --git a/server/src/test/java/org/elasticsearch/index/termvectors/TermVectorsServiceTests.java b/server/src/test/java/org/elasticsearch/index/termvectors/TermVectorsServiceTests.java index 3e3e7bde94891..ffd689c6769a4 100644 --- a/server/src/test/java/org/elasticsearch/index/termvectors/TermVectorsServiceTests.java +++ b/server/src/test/java/org/elasticsearch/index/termvectors/TermVectorsServiceTests.java @@ -30,6 +30,7 @@ import static java.util.stream.Collectors.toList; import static org.elasticsearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; +import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.notNullValue; @@ -147,4 +148,49 @@ public void testWithIndexedPhrases() throws IOException { assertEquals(max, phraseIterator.docFreq()); } } + + public void testArtificialDocWithNestedFields() throws IOException { + final XContentBuilder mapping = jsonBuilder().startObject() + .startObject("_doc") + .startObject("properties") + .startObject("group") + .field("type", "text") + .endObject() + .startObject("user") + .field("type", "nested") + .startObject("properties") + .startObject("first") + .field("type", "text") + .endObject() + .startObject("last") + .field("type", "text") + .endObject() + .endObject() + .endObject() + .endObject() + .endObject() + .endObject(); + final Settings settings = Settings.builder().put("number_of_shards", 1).build(); + createIndex("test", settings, mapping); + ensureGreen(); + + final TermVectorsRequest request = new TermVectorsRequest().doc( + jsonBuilder().startObject() + .field("group", "test") + .startArray("user") + .startObject() + .field("first", "John") + .field("last", "Smith") + .endObject() + .endArray() + .endObject() + ).termStatistics(true); + + final IndicesService indicesService = getInstanceFromNode(IndicesService.class); + final IndexService test = indicesService.indexService(resolveIndex("test")); + final IndexShard shard = test.getShardOrNull(0); + assertThat(shard, notNullValue()); + final TermVectorsResponse response = TermVectorsService.getTermVectors(shard, request); + assertThat(response.getFields(), containsInAnyOrder("group", "user.first", "user.last")); + } }