Skip to content
Merged
6 changes: 6 additions & 0 deletions docs/changelog/92568.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 92568
summary: Support nested fields for term vectors API when using artificial documents
area: Search
type: enhancement
issues:
- 91902
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.MappingLookup;
import org.elasticsearch.index.mapper.NestedPathFieldMapper;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.mapper.SourceFieldMapper;
import org.elasticsearch.index.mapper.SourceToParse;
Expand Down Expand Up @@ -185,6 +186,11 @@ private static boolean isValidField(MappedFieldType fieldType) {
if (fieldType.isIndexed() == false) {
return false;
}
// and must not be the nested path field
if (fieldType.name().equals(NestedPathFieldMapper.NAME)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added this to the original PR

return false;
}

return true;
}

Expand Down Expand Up @@ -291,7 +297,13 @@ private static Fields generateTermVectors(
MemoryIndex index = new MemoryIndex(withOffsets);
for (Map.Entry<String, Collection<Object>> entry : values.entrySet()) {
String field = entry.getKey();
Analyzer analyzer = getAnalyzerAtField(indexShard, field, perFieldAnalyzer);
final Analyzer analyzer;
try {
analyzer = getAnalyzerAtField(indexShard, field, perFieldAnalyzer);
} catch (IllegalArgumentException e) {
// failed to get the analyzer for the given field, it could be a metadata field
continue;
}
if (entry.getValue() instanceof List) {
for (Object text : entry.getValue()) {
index.addField(field, text.toString(), analyzer);
Expand All @@ -310,25 +322,26 @@ private static Fields generateTermVectorsFromDoc(IndexShard indexShard, TermVect
MappingLookup mappingLookup = indexShard.mapperService().mappingLookup();
ParsedDocument parsedDocument = documentParser.parseDocument(source, mappingLookup);
// select the right fields and generate term vectors
LuceneDocument doc = parsedDocument.rootDoc();
Set<String> seenFields = new HashSet<>();
Collection<DocumentField> documentFields = new HashSet<>();
for (IndexableField field : doc.getFields()) {
MappedFieldType fieldType = indexShard.mapperService().fieldType(field.name());
if (isValidField(fieldType) == false) {
continue;
}
if (request.selectedFields() != null && request.selectedFields().contains(field.name()) == false) {
continue;
}
if (seenFields.contains(field.name())) {
continue;
} else {
seenFields.add(field.name());
final Set<String> seenFields = new HashSet<>();
final Collection<DocumentField> documentFields = new HashSet<>();
for (LuceneDocument doc : parsedDocument.docs()) {
for (IndexableField field : doc.getFields()) {
MappedFieldType fieldType = indexShard.mapperService().fieldType(field.name());
if (isValidField(fieldType) == false) {
continue;
}
if (request.selectedFields() != null && request.selectedFields().contains(field.name()) == false) {
continue;
}
if (seenFields.contains(field.name())) {
continue;
} else {
seenFields.add(field.name());
}
@SuppressWarnings("unchecked")
List<Object> values = (List) getValues(doc.getFields(field.name()));
documentFields.add(new DocumentField(field.name(), values));
}
@SuppressWarnings("unchecked")
List<Object> values = (List) getValues(doc.getFields(field.name()));
documentFields.add(new DocumentField(field.name(), values));
}
return generateTermVectors(
indexShard,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import static java.util.stream.Collectors.toList;
import static org.elasticsearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE;
import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.notNullValue;

Expand Down Expand Up @@ -147,4 +148,49 @@ public void testWithIndexedPhrases() throws IOException {
assertEquals(max, phraseIterator.docFreq());
}
}

public void testArtificialDocWithNestedFields() throws IOException {
final XContentBuilder mapping = jsonBuilder().startObject()
.startObject("_doc")
.startObject("properties")
.startObject("group")
.field("type", "text")
.endObject()
.startObject("user")
.field("type", "nested")
.startObject("properties")
.startObject("first")
.field("type", "text")
.endObject()
.startObject("last")
.field("type", "text")
.endObject()
.endObject()
.endObject()
.endObject()
.endObject()
.endObject();
final Settings settings = Settings.builder().put("number_of_shards", 1).build();
createIndex("test", settings, mapping);
ensureGreen();

final TermVectorsRequest request = new TermVectorsRequest().doc(
jsonBuilder().startObject()
.field("group", "test")
.startArray("user")
.startObject()
.field("first", "John")
.field("last", "Smith")
.endObject()
.endArray()
.endObject()
).termStatistics(true);

final IndicesService indicesService = getInstanceFromNode(IndicesService.class);
final IndexService test = indicesService.indexService(resolveIndex("test"));
final IndexShard shard = test.getShardOrNull(0);
assertThat(shard, notNullValue());
final TermVectorsResponse response = TermVectorsService.getTermVectors(shard, request);
assertThat(response.getFields(), containsInAnyOrder("group", "user.first", "user.last"));
}
}