Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions muted-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -525,9 +525,6 @@ tests:
- class: org.elasticsearch.xpack.ml.integration.TextEmbeddingQueryIT
method: testModelWithPrefixStrings
issue: https://github.com/elastic/elasticsearch/issues/133138
- class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT
method: test {p0=search.vectors/90_sparse_vector/Indexing and searching multi-value sparse vectors in >=8.15}
issue: https://github.com/elastic/elasticsearch/issues/133184
- class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT
method: test {p0=search.vectors/45_knn_search_byte/Vector rescoring has no effect for non-quantized vectors and provides same results as non-rescored knn}
issue: https://github.com/elastic/elasticsearch/issues/133187
Expand Down Expand Up @@ -585,9 +582,6 @@ tests:
- class: org.elasticsearch.test.rest.yaml.RcsCcsCommonYamlTestSuiteIT
method: test {p0=search/160_exists_query/Test exists query on date field in empty index}
issue: https://github.com/elastic/elasticsearch/issues/133439
- class: org.elasticsearch.multiproject.test.CoreWithMultipleProjectsClientYamlTestSuiteIT
method: test {yaml=search.vectors/90_sparse_vector/Indexing and searching multi-value sparse vectors in >=8.15}
issue: https://github.com/elastic/elasticsearch/issues/133442
- class: org.elasticsearch.xpack.test.rest.XPackRestIT
method: test {p0=esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version}
issue: https://github.com/elastic/elasticsearch/issues/133449
Expand All @@ -609,9 +603,6 @@ tests:
- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT
method: test {csv-spec:spatial.ConvertFromStringParseError}
issue: https://github.com/elastic/elasticsearch/issues/133507
- class: org.elasticsearch.test.rest.yaml.RcsCcsCommonYamlTestSuiteIT
method: test {p0=search.vectors/90_sparse_vector/Indexing and searching multi-value sparse vectors in >=8.15}
issue: https://github.com/elastic/elasticsearch/issues/133508
- class: org.elasticsearch.test.rest.yaml.RcsCcsCommonYamlTestSuiteIT
method: test {p0=search/10_source_filtering/no filtering}
issue: https://github.com/elastic/elasticsearch/issues/133561
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.elasticsearch.search.vectors.KnnSearchBuilder;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.xcontent.XContentBuilder;
Expand All @@ -22,6 +23,7 @@

import java.io.IOException;
import java.util.List;
import java.util.Map;

import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse;

Expand Down Expand Up @@ -178,4 +180,47 @@ public void testHnswEarlyTerminationQuery() {
});
}

public void testSparseVectorExists() throws IOException {
XContentBuilder mapping = XContentFactory.jsonBuilder()
.startObject()
.startObject("properties")
.startObject("id")
.field("type", "long")
.endObject()
.startObject(VECTOR_FIELD)
.field("type", "sparse_vector")
.endObject()
.startObject("embeddings")
.field("type", "sparse_vector")
.endObject()
.endObject()
.endObject();
Settings settings = Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 10)
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.build();
prepareCreate(INDEX_NAME).setMapping(mapping).setSettings(settings).get();
int loops = 10;
for (int i = 0; i < loops; i++) {
prepareIndex(INDEX_NAME).setSource(VECTOR_FIELD, List.of(Map.of("dim", 1.0f), Map.of("dim", 12.0f)), "id", 1).get();
prepareIndex(INDEX_NAME).setSource(VECTOR_FIELD, Map.of("dim", 2.0f), "id", 2).get();
prepareIndex(INDEX_NAME).setSource(VECTOR_FIELD, List.of(), "id", 3).get();
prepareIndex(INDEX_NAME).setSource(VECTOR_FIELD, Map.of(), "id", 4).get();
refresh(INDEX_NAME);
}
TermsAggregationBuilder builder = new TermsAggregationBuilder("agg").field("id").size(1000);
for (int i = 0; i < 10; i++) {
assertResponse(
client().prepareSearch(INDEX_NAME)
.setQuery(QueryBuilders.existsQuery(VECTOR_FIELD))
.setTrackTotalHits(true)
.setSize(30)
.addAggregation(builder),
resp -> {
assertEquals(3 * loops, resp.getHits().getTotalHits().value());
}
);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,13 @@
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.logging.DeprecationCategory;
import org.elasticsearch.common.lucene.Lucene;
Expand All @@ -29,6 +33,7 @@
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.mapper.DocumentParserContext;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperBuilderContext;
import org.elasticsearch.index.mapper.MappingParserContext;
Expand Down Expand Up @@ -443,21 +448,35 @@ public Stream<Map.Entry<String, StoredFieldLoader>> storedFieldLoaders() {

@Override
public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
// Use an exists query on _field_names to distinguish documents with no value
// from those containing an empty map.
var existsQuery = new TermQuery(new Term(FieldNamesFieldMapper.NAME, fullPath));
var searcher = new IndexSearcher(leafReader);
searcher.setQueryCache(null);
var scorer = searcher.createWeight(existsQuery, ScoreMode.COMPLETE_NO_SCORES, 0).scorer(searcher.getLeafContexts().getFirst());
if (scorer == null) {
return docId -> false;
}

var fieldInfos = leafReader.getFieldInfos().fieldInfo(fullPath);
if (fieldInfos == null || fieldInfos.hasTermVectors() == false) {
return null;
}
return docId -> {
var terms = leafReader.termVectors().get(docId, fullPath);
if (terms == null) {
if (scorer.iterator().docID() < docId) {
scorer.iterator().advance(docId);
}
if (scorer.iterator().docID() != docId) {
return false;
}
termsDocEnum = terms.iterator();
if (termsDocEnum.next() == null) {
var terms = leafReader.termVectors().get(docId, fullPath);
if (terms != null && (termsDocEnum = terms.iterator()) != null) {
return true;
} else {
// this is an empty map
termsDocEnum = null;
return false;
return true;
}
return true;
};
}

Expand All @@ -468,14 +487,15 @@ public boolean hasValue() {

@Override
public void write(XContentBuilder b) throws IOException {
assert termsDocEnum != null;
PostingsEnum reuse = null;
b.startObject(leafName);
do {
reuse = termsDocEnum.postings(reuse);
reuse.nextDoc();
b.field(termsDocEnum.term().utf8ToString(), XFeatureField.decodeFeatureValue(reuse.freq()));
} while (termsDocEnum.next() != null);
if (termsDocEnum != null) {
PostingsEnum reuse = null;
do {
reuse = termsDocEnum.postings(reuse);
reuse.nextDoc();
b.field(termsDocEnum.term().utf8ToString(), XFeatureField.decodeFeatureValue(reuse.freq()));
} while (termsDocEnum.next() != null);
}
b.endObject();
}

Expand Down