Skip to content

Remove soar duplicate checking #132617

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ public int visit(KnnCollector knnCollector) throws IOException {
int limit = vectors - BULK_SIZE + 1;
int i = 0;
for (; i < limit; i += BULK_SIZE) {
final int docsToBulkScore = docToBulkScore(docIdsScratch, i, needsScoring);
final int docsToBulkScore = needsScoring == null ? BULK_SIZE : docToBulkScore(docIdsScratch, i, needsScoring);
if (docsToBulkScore == 0) {
continue;
}
Expand Down Expand Up @@ -476,7 +476,7 @@ public int visit(KnnCollector knnCollector) throws IOException {
// process tail
for (; i < vectors; i++) {
int doc = docIdsScratch[i];
if (needsScoring.test(doc)) {
if (needsScoring != null && needsScoring.test(doc)) {
quantizeQueryIfNecessary();
indexInput.seek(slicePos + i * quantizedByteLength);
float qcDist = osqVectorsScorer.quantizeScore(quantizedQueryScratch);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,8 @@ public final void search(String field, float[] target, KnnCollector knnCollector
}
int numVectors = rawVectorsReader.getFloatVectorValues(field).size();
BitSet visitedDocs = new FixedBitSet(state.segmentInfo.maxDoc() + 1);
IntPredicate needsScoring = docId -> {
if (acceptDocs != null && acceptDocs.get(docId) == false) {
IntPredicate needsScoring = acceptDocs == null ? null : docId -> {
if (acceptDocs.get(docId) == false) {
return false;
}
return visitedDocs.getAndSet(docId) == false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

package org.elasticsearch.search.vectors;

import com.carrotsearch.hppc.IntHashSet;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
Expand Down Expand Up @@ -115,7 +117,8 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException {
filterWeight = null;
}
// we request numCands as we are using it as an approximation measure
KnnCollectorManager knnCollectorManager = getKnnCollectorManager(numCands, indexSearcher);
// we need to ensure we are getting at least 2*k results to ensure we cover overspill duplicates
KnnCollectorManager knnCollectorManager = getKnnCollectorManager(Math.max(Math.round(2f * k), numCands), indexSearcher);
TaskExecutor taskExecutor = indexSearcher.getTaskExecutor();
List<LeafReaderContext> leafReaderContexts = reader.leaves();
List<Callable<TopDocs>> tasks = new ArrayList<>(leafReaderContexts.size());
Expand All @@ -135,12 +138,23 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException {

private TopDocs searchLeaf(LeafReaderContext ctx, Weight filterWeight, KnnCollectorManager knnCollectorManager) throws IOException {
TopDocs results = getLeafResults(ctx, filterWeight, knnCollectorManager);
if (ctx.docBase > 0) {
for (ScoreDoc scoreDoc : results.scoreDocs) {
IntHashSet dedup = new IntHashSet(results.scoreDocs.length * 4 / 3);
int deduplicateCount = 0;
for (ScoreDoc scoreDoc : results.scoreDocs) {
if (dedup.add(scoreDoc.doc)) {
deduplicateCount++;
}
}
ScoreDoc[] deduplicatedScoreDocs = new ScoreDoc[deduplicateCount];
dedup.clear();
int index = 0;
for (ScoreDoc scoreDoc : results.scoreDocs) {
if (dedup.add(scoreDoc.doc)) {
scoreDoc.doc += ctx.docBase;
deduplicatedScoreDocs[index++] = scoreDoc;
}
}
return results;
return new TopDocs(results.totalHits, deduplicatedScoreDocs);
}

TopDocs getLeafResults(LeafReaderContext ctx, Weight filterWeight, KnnCollectorManager knnCollectorManager) throws IOException {
Expand Down