Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions muted-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -522,9 +522,6 @@ tests:
- class: org.elasticsearch.xpack.ml.integration.TextEmbeddingQueryIT
method: testModelWithPrefixStrings
issue: https://github.com/elastic/elasticsearch/issues/133138
- class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT
method: test {p0=search.vectors/90_sparse_vector/Indexing and searching multi-value sparse vectors in >=8.15}
issue: https://github.com/elastic/elasticsearch/issues/133184
- class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT
method: test {p0=search.vectors/45_knn_search_byte/Vector rescoring has no effect for non-quantized vectors and provides same results as non-rescored knn}
issue: https://github.com/elastic/elasticsearch/issues/133187
Expand Down Expand Up @@ -558,9 +555,6 @@ tests:
- class: org.elasticsearch.test.rest.yaml.RcsCcsCommonYamlTestSuiteIT
method: test {p0=search/160_exists_query/Test exists query on date field in empty index}
issue: https://github.com/elastic/elasticsearch/issues/133439
- class: org.elasticsearch.multiproject.test.CoreWithMultipleProjectsClientYamlTestSuiteIT
method: test {yaml=search.vectors/90_sparse_vector/Indexing and searching multi-value sparse vectors in >=8.15}
issue: https://github.com/elastic/elasticsearch/issues/133442
- class: org.elasticsearch.xpack.test.rest.XPackRestIT
method: test {p0=esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version}
issue: https://github.com/elastic/elasticsearch/issues/133449
Expand All @@ -582,9 +576,6 @@ tests:
- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT
method: test {csv-spec:spatial.ConvertFromStringParseError}
issue: https://github.com/elastic/elasticsearch/issues/133507
- class: org.elasticsearch.test.rest.yaml.RcsCcsCommonYamlTestSuiteIT
method: test {p0=search.vectors/90_sparse_vector/Indexing and searching multi-value sparse vectors in >=8.15}
issue: https://github.com/elastic/elasticsearch/issues/133508
- class: org.elasticsearch.test.rest.yaml.RcsCcsCommonYamlTestSuiteIT
method: test {p0=search/10_source_filtering/no filtering}
issue: https://github.com/elastic/elasticsearch/issues/133561
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.search.vectors.KnnSearchBuilder;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.xcontent.XContentBuilder;
Expand All @@ -22,6 +23,7 @@

import java.io.IOException;
import java.util.List;
import java.util.Map;

import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse;

Expand Down Expand Up @@ -178,4 +180,47 @@ public void testHnswEarlyTerminationQuery() {
});
}

public void testSparseVectorExists() throws IOException {
String indexName = "sparse_vector_index";
XContentBuilder mapping = XContentFactory.jsonBuilder()
.startObject()
.startObject("properties")
.startObject("id")
.field("type", "long")
.endObject()
.startObject(VECTOR_FIELD)
.field("type", "sparse_vector")
.endObject()
.startObject("embeddings")
.field("type", "sparse_vector")
.endObject()
.endObject()
.endObject();
Settings settings = Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 10)
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.build();
prepareCreate("sparse_vector_index").setMapping(mapping).setSettings(settings).get();
int loops = 10;
for (int i = 0; i < loops; i++) {
prepareIndex(indexName).setSource(VECTOR_FIELD, List.of(Map.of("dim", 1.0f), Map.of("dim", 12.0f)), "id", 1).get();
prepareIndex(indexName).setSource(VECTOR_FIELD, Map.of("dim", 2.0f), "id", 2).get();
prepareIndex(indexName).setSource(VECTOR_FIELD, List.of(), "id", 3).get();
prepareIndex(indexName).setSource(VECTOR_FIELD, Map.of(), "id", 4).get();
refresh(indexName);
}
TermsAggregationBuilder builder = new TermsAggregationBuilder("agg").field("id").size(1000);
for (int i = 0; i < 10; i++) {
assertResponse(
client().prepareSearch(indexName)
.setQuery(QueryBuilders.existsQuery(VECTOR_FIELD))
.setTrackTotalHits(true)
.setSize(30)
.addAggregation(builder),
resp -> {
assertEquals(3 * loops, resp.getHits().getTotalHits().value());
}
);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,13 @@
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.logging.DeprecationCategory;
import org.elasticsearch.common.lucene.Lucene;
Expand All @@ -29,6 +33,7 @@
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.mapper.DocumentParserContext;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperBuilderContext;
import org.elasticsearch.index.mapper.MappingParserContext;
Expand Down Expand Up @@ -430,6 +435,7 @@ private static class SparseVectorSyntheticFieldLoader implements SourceLoader.Sy
private final String leafName;

private TermsEnum termsDocEnum;
private boolean hasValue;

private SparseVectorSyntheticFieldLoader(String fullPath, String leafName) {
this.fullPath = fullPath;
Expand All @@ -443,39 +449,60 @@ public Stream<Map.Entry<String, StoredFieldLoader>> storedFieldLoaders() {

@Override
public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
var fieldInfos = leafReader.getFieldInfos().fieldInfo(fullPath);
if (fieldInfos == null || fieldInfos.hasTermVectors() == false) {
return null;
// Use an exists query on _field_names to distinguish documents with no value
// from those containing an empty map.
var existsQuery = new TermQuery(new Term(FieldNamesFieldMapper.NAME, fullPath));
var searcher = new IndexSearcher(leafReader);
searcher.setQueryCache(null);
var scorer = searcher.createWeight(existsQuery, ScoreMode.COMPLETE_NO_SCORES, 0).scorer(searcher.getLeafContexts().getFirst());
if (scorer == null) {
return docId -> false;
}

var fieldInfos = leafReader.getFieldInfos().fieldInfo(fullPath);
boolean hasTermVectors = fieldInfos != null && fieldInfos.hasTermVectors();
return docId -> {
var terms = leafReader.termVectors().get(docId, fullPath);
if (terms == null) {
return false;
termsDocEnum = null;

if (scorer.iterator().docID() < docId) {
scorer.iterator().advance(docId);
}
termsDocEnum = terms.iterator();
if (termsDocEnum.next() == null) {
termsDocEnum = null;
return false;
if (scorer.iterator().docID() != docId) {
return hasValue = false;
}
return true;

if (hasTermVectors == false) {
return hasValue = true;
}

var terms = leafReader.termVectors().get(docId, fullPath);
if (terms != null) {
termsDocEnum = terms.iterator();
if (termsDocEnum.next() == null) {
termsDocEnum = null;
}
}
return hasValue = true;
};
}

@Override
public boolean hasValue() {
return termsDocEnum != null;
return hasValue;
}

@Override
public void write(XContentBuilder b) throws IOException {
assert termsDocEnum != null;
PostingsEnum reuse = null;
assert hasValue;
b.startObject(leafName);
do {
reuse = termsDocEnum.postings(reuse);
reuse.nextDoc();
b.field(termsDocEnum.term().utf8ToString(), XFeatureField.decodeFeatureValue(reuse.freq()));
} while (termsDocEnum.next() != null);
if (termsDocEnum != null) {
PostingsEnum reuse = null;
do {
reuse = termsDocEnum.postings(reuse);
reuse.nextDoc();
b.field(termsDocEnum.term().utf8ToString(), XFeatureField.decodeFeatureValue(reuse.freq()));
} while (termsDocEnum.next() != null);
}
b.endObject();
}

Expand All @@ -485,7 +512,10 @@ public void write(XContentBuilder b) throws IOException {
* @throws IOException if reading fails
*/
private Map<String, Float> copyAsMap() throws IOException {
assert termsDocEnum != null;
assert hasValue;
if (termsDocEnum == null) {
return Map.of();
}
Map<String, Float> tokenMap = new LinkedHashMap<>();
PostingsEnum reuse = null;
do {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,16 @@ protected void minimalMapping(XContentBuilder b) throws IOException {
b.field("type", "sparse_vector");
}

@Override
protected boolean supportsEmptyInputArray() {
return false;
}

@Override
protected boolean addsValueWhenNotSupplied() {
return true;
}

protected void minimalFieldMappingPreviousIndexDefaultsIncluded(XContentBuilder b) throws IOException {
b.field("type", "sparse_vector");
b.field("store", false);
Expand Down