Skip to content

Commit bb79d38

Browse files
committed
adding test case for empty nested vector search
1 parent 8b9c350 commit bb79d38

File tree

2 files changed

+130
-0
lines changed

2 files changed

+130
-0
lines changed

server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.lucene.codecs.KnnVectorsReader;
2626
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
2727
import org.apache.lucene.document.Document;
28+
import org.apache.lucene.document.Field;
2829
import org.apache.lucene.document.KnnFloatVectorField;
2930
import org.apache.lucene.index.CodecReader;
3031
import org.apache.lucene.index.DirectoryReader;
@@ -34,12 +35,21 @@
3435
import org.apache.lucene.index.IndexWriterConfig;
3536
import org.apache.lucene.index.KnnVectorValues;
3637
import org.apache.lucene.index.LeafReader;
38+
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
39+
import org.apache.lucene.index.Term;
3740
import org.apache.lucene.index.VectorSimilarityFunction;
41+
import org.apache.lucene.search.FieldExistsQuery;
3842
import org.apache.lucene.search.IndexSearcher;
3943
import org.apache.lucene.search.KnnFloatVectorQuery;
44+
import org.apache.lucene.search.MatchAllDocsQuery;
4045
import org.apache.lucene.search.Query;
46+
import org.apache.lucene.search.TermQuery;
4147
import org.apache.lucene.search.TopDocs;
4248
import org.apache.lucene.search.TotalHits;
49+
import org.apache.lucene.search.join.BitSetProducer;
50+
import org.apache.lucene.search.join.CheckJoinIndex;
51+
import org.apache.lucene.search.join.DiversifyingChildrenFloatKnnVectorQuery;
52+
import org.apache.lucene.search.join.QueryBitSetProducer;
4353
import org.apache.lucene.store.Directory;
4454
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
4555
import org.apache.lucene.tests.util.TestUtil;
@@ -48,6 +58,9 @@
4858
import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils;
4959

5060
import java.io.IOException;
61+
import java.util.ArrayList;
62+
import java.util.Arrays;
63+
import java.util.List;
5164
import java.util.Locale;
5265

5366
import static java.lang.String.format;
@@ -70,6 +83,58 @@ protected Codec getCodec() {
7083
return codec;
7184
}
7285

86+
static String encodeInts(int[] i) {
87+
return Arrays.toString(i);
88+
}
89+
90+
static BitSetProducer parentFilter(IndexReader r) throws IOException {
91+
// Create a filter that defines "parent" documents in the index
92+
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
93+
CheckJoinIndex.check(r, parentsFilter);
94+
return parentsFilter;
95+
}
96+
97+
Document makeParent(int[] children) {
98+
Document parent = new Document();
99+
parent.add(newStringField("docType", "_parent", Field.Store.NO));
100+
parent.add(newStringField("id", encodeInts(children), Field.Store.YES));
101+
return parent;
102+
}
103+
104+
public void testEmptyDiversifiedChildSearch() throws Exception {
105+
String fieldName = "field";
106+
int dims = random().nextInt(4, 65);
107+
float[] vector = randomVector(dims);
108+
VectorSimilarityFunction similarityFunction = VectorSimilarityFunction.EUCLIDEAN;
109+
try (Directory d = newDirectory()) {
110+
IndexWriterConfig iwc = newIndexWriterConfig().setCodec(codec);
111+
iwc.setMergePolicy(new SoftDeletesRetentionMergePolicy("soft_delete", MatchAllDocsQuery::new, iwc.getMergePolicy()));
112+
try (IndexWriter w = new IndexWriter(d, iwc)) {
113+
List<Document> toAdd = new ArrayList<>();
114+
for (int j = 1; j <= 5; j++) {
115+
Document doc = new Document();
116+
doc.add(new KnnFloatVectorField(fieldName, vector, similarityFunction));
117+
doc.add(newStringField("id", Integer.toString(j), Field.Store.YES));
118+
toAdd.add(doc);
119+
}
120+
toAdd.add(makeParent(new int[] { 1, 2, 3, 4, 5 }));
121+
w.addDocuments(toAdd);
122+
w.addDocuments(List.of(makeParent(new int[] { 6, 7, 8, 9, 10 })));
123+
w.deleteDocuments(new FieldExistsQuery(fieldName), new TermQuery(new Term("id", encodeInts(new int[] { 1, 2, 3, 4, 5 }))));
124+
w.flush();
125+
w.commit();
126+
w.forceMerge(1);
127+
try (IndexReader reader = DirectoryReader.open(w)) {
128+
IndexSearcher searcher = new IndexSearcher(reader);
129+
BitSetProducer parentFilter = parentFilter(searcher.getIndexReader());
130+
Query query = new DiversifyingChildrenFloatKnnVectorQuery(fieldName, vector, null, 1, parentFilter);
131+
assertTrue(searcher.search(query, 1).scoreDocs.length == 0);
132+
}
133+
}
134+
135+
}
136+
}
137+
73138
public void testSearch() throws Exception {
74139
String fieldName = "field";
75140
int numVectors = random().nextInt(99, 500);

server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.lucene.codecs.KnnVectorsReader;
2626
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
2727
import org.apache.lucene.document.Document;
28+
import org.apache.lucene.document.Field;
2829
import org.apache.lucene.document.KnnFloatVectorField;
2930
import org.apache.lucene.index.CodecReader;
3031
import org.apache.lucene.index.DirectoryReader;
@@ -34,13 +35,22 @@
3435
import org.apache.lucene.index.IndexWriterConfig;
3536
import org.apache.lucene.index.KnnVectorValues;
3637
import org.apache.lucene.index.LeafReader;
38+
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
39+
import org.apache.lucene.index.Term;
3740
import org.apache.lucene.index.VectorSimilarityFunction;
3841
import org.apache.lucene.misc.store.DirectIODirectory;
42+
import org.apache.lucene.search.FieldExistsQuery;
3943
import org.apache.lucene.search.IndexSearcher;
4044
import org.apache.lucene.search.KnnFloatVectorQuery;
45+
import org.apache.lucene.search.MatchAllDocsQuery;
4146
import org.apache.lucene.search.Query;
47+
import org.apache.lucene.search.TermQuery;
4248
import org.apache.lucene.search.TopDocs;
4349
import org.apache.lucene.search.TotalHits;
50+
import org.apache.lucene.search.join.BitSetProducer;
51+
import org.apache.lucene.search.join.CheckJoinIndex;
52+
import org.apache.lucene.search.join.DiversifyingChildrenFloatKnnVectorQuery;
53+
import org.apache.lucene.search.join.QueryBitSetProducer;
4454
import org.apache.lucene.store.Directory;
4555
import org.apache.lucene.store.FSDirectory;
4656
import org.apache.lucene.store.IOContext;
@@ -64,6 +74,9 @@
6474
import java.io.IOException;
6575
import java.nio.file.Files;
6676
import java.nio.file.Path;
77+
import java.util.ArrayList;
78+
import java.util.Arrays;
79+
import java.util.List;
6780
import java.util.Locale;
6881
import java.util.OptionalLong;
6982

@@ -87,6 +100,58 @@ protected Codec getCodec() {
87100
return codec;
88101
}
89102

103+
static String encodeInts(int[] i) {
104+
return Arrays.toString(i);
105+
}
106+
107+
static BitSetProducer parentFilter(IndexReader r) throws IOException {
108+
// Create a filter that defines "parent" documents in the index
109+
BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
110+
CheckJoinIndex.check(r, parentsFilter);
111+
return parentsFilter;
112+
}
113+
114+
Document makeParent(int[] children) {
115+
Document parent = new Document();
116+
parent.add(newStringField("docType", "_parent", Field.Store.NO));
117+
parent.add(newStringField("id", encodeInts(children), Field.Store.YES));
118+
return parent;
119+
}
120+
121+
public void testEmptyDiversifiedChildSearch() throws Exception {
122+
String fieldName = "field";
123+
int dims = random().nextInt(4, 65);
124+
float[] vector = randomVector(dims);
125+
VectorSimilarityFunction similarityFunction = VectorSimilarityFunction.EUCLIDEAN;
126+
try (Directory d = newDirectory()) {
127+
IndexWriterConfig iwc = newIndexWriterConfig().setCodec(codec);
128+
iwc.setMergePolicy(new SoftDeletesRetentionMergePolicy("soft_delete", MatchAllDocsQuery::new, iwc.getMergePolicy()));
129+
try (IndexWriter w = new IndexWriter(d, iwc)) {
130+
List<Document> toAdd = new ArrayList<>();
131+
for (int j = 1; j <= 5; j++) {
132+
Document doc = new Document();
133+
doc.add(new KnnFloatVectorField(fieldName, vector, similarityFunction));
134+
doc.add(newStringField("id", Integer.toString(j), Field.Store.YES));
135+
toAdd.add(doc);
136+
}
137+
toAdd.add(makeParent(new int[] { 1, 2, 3, 4, 5 }));
138+
w.addDocuments(toAdd);
139+
w.addDocuments(List.of(makeParent(new int[] { 6, 7, 8, 9, 10 })));
140+
w.deleteDocuments(new FieldExistsQuery(fieldName), new TermQuery(new Term("id", encodeInts(new int[] { 1, 2, 3, 4, 5 }))));
141+
w.flush();
142+
w.commit();
143+
w.forceMerge(1);
144+
try (IndexReader reader = DirectoryReader.open(w)) {
145+
IndexSearcher searcher = new IndexSearcher(reader);
146+
BitSetProducer parentFilter = parentFilter(searcher.getIndexReader());
147+
Query query = new DiversifyingChildrenFloatKnnVectorQuery(fieldName, vector, null, 1, parentFilter);
148+
assertTrue(searcher.search(query, 1).scoreDocs.length == 0);
149+
}
150+
}
151+
152+
}
153+
}
154+
90155
public void testSearch() throws Exception {
91156
String fieldName = "field";
92157
int numVectors = random().nextInt(99, 500);

0 commit comments

Comments
 (0)