2525import org .apache .lucene .codecs .KnnVectorsReader ;
2626import org .apache .lucene .codecs .perfield .PerFieldKnnVectorsFormat ;
2727import org .apache .lucene .document .Document ;
28+ import org .apache .lucene .document .Field ;
2829import org .apache .lucene .document .KnnFloatVectorField ;
2930import org .apache .lucene .index .CodecReader ;
3031import org .apache .lucene .index .DirectoryReader ;
3435import org .apache .lucene .index .IndexWriterConfig ;
3536import org .apache .lucene .index .KnnVectorValues ;
3637import org .apache .lucene .index .LeafReader ;
38+ import org .apache .lucene .index .SoftDeletesRetentionMergePolicy ;
39+ import org .apache .lucene .index .Term ;
3740import org .apache .lucene .index .VectorSimilarityFunction ;
3841import org .apache .lucene .misc .store .DirectIODirectory ;
42+ import org .apache .lucene .search .FieldExistsQuery ;
3943import org .apache .lucene .search .IndexSearcher ;
4044import org .apache .lucene .search .KnnFloatVectorQuery ;
45+ import org .apache .lucene .search .MatchAllDocsQuery ;
4146import org .apache .lucene .search .Query ;
47+ import org .apache .lucene .search .TermQuery ;
4248import org .apache .lucene .search .TopDocs ;
4349import org .apache .lucene .search .TotalHits ;
50+ import org .apache .lucene .search .join .BitSetProducer ;
51+ import org .apache .lucene .search .join .CheckJoinIndex ;
52+ import org .apache .lucene .search .join .DiversifyingChildrenFloatKnnVectorQuery ;
53+ import org .apache .lucene .search .join .QueryBitSetProducer ;
4454import org .apache .lucene .store .Directory ;
4555import org .apache .lucene .store .FSDirectory ;
4656import org .apache .lucene .store .IOContext ;
6474import java .io .IOException ;
6575import java .nio .file .Files ;
6676import java .nio .file .Path ;
77+ import java .util .ArrayList ;
78+ import java .util .Arrays ;
79+ import java .util .List ;
6780import java .util .Locale ;
6881import java .util .OptionalLong ;
6982
@@ -87,6 +100,58 @@ protected Codec getCodec() {
87100 return codec ;
88101 }
89102
103+ static String encodeInts (int [] i ) {
104+ return Arrays .toString (i );
105+ }
106+
107+ static BitSetProducer parentFilter (IndexReader r ) throws IOException {
108+ // Create a filter that defines "parent" documents in the index
109+ BitSetProducer parentsFilter = new QueryBitSetProducer (new TermQuery (new Term ("docType" , "_parent" )));
110+ CheckJoinIndex .check (r , parentsFilter );
111+ return parentsFilter ;
112+ }
113+
114+ Document makeParent (int [] children ) {
115+ Document parent = new Document ();
116+ parent .add (newStringField ("docType" , "_parent" , Field .Store .NO ));
117+ parent .add (newStringField ("id" , encodeInts (children ), Field .Store .YES ));
118+ return parent ;
119+ }
120+
121+ public void testEmptyDiversifiedChildSearch () throws Exception {
122+ String fieldName = "field" ;
123+ int dims = random ().nextInt (4 , 65 );
124+ float [] vector = randomVector (dims );
125+ VectorSimilarityFunction similarityFunction = VectorSimilarityFunction .EUCLIDEAN ;
126+ try (Directory d = newDirectory ()) {
127+ IndexWriterConfig iwc = newIndexWriterConfig ().setCodec (codec );
128+ iwc .setMergePolicy (new SoftDeletesRetentionMergePolicy ("soft_delete" , MatchAllDocsQuery ::new , iwc .getMergePolicy ()));
129+ try (IndexWriter w = new IndexWriter (d , iwc )) {
130+ List <Document > toAdd = new ArrayList <>();
131+ for (int j = 1 ; j <= 5 ; j ++) {
132+ Document doc = new Document ();
133+ doc .add (new KnnFloatVectorField (fieldName , vector , similarityFunction ));
134+ doc .add (newStringField ("id" , Integer .toString (j ), Field .Store .YES ));
135+ toAdd .add (doc );
136+ }
137+ toAdd .add (makeParent (new int [] { 1 , 2 , 3 , 4 , 5 }));
138+ w .addDocuments (toAdd );
139+ w .addDocuments (List .of (makeParent (new int [] { 6 , 7 , 8 , 9 , 10 })));
140+ w .deleteDocuments (new FieldExistsQuery (fieldName ), new TermQuery (new Term ("id" , encodeInts (new int [] { 1 , 2 , 3 , 4 , 5 }))));
141+ w .flush ();
142+ w .commit ();
143+ w .forceMerge (1 );
144+ try (IndexReader reader = DirectoryReader .open (w )) {
145+ IndexSearcher searcher = new IndexSearcher (reader );
146+ BitSetProducer parentFilter = parentFilter (searcher .getIndexReader ());
147+ Query query = new DiversifyingChildrenFloatKnnVectorQuery (fieldName , vector , null , 1 , parentFilter );
148+ assertTrue (searcher .search (query , 1 ).scoreDocs .length == 0 );
149+ }
150+ }
151+
152+ }
153+ }
154+
90155 public void testSearch () throws Exception {
91156 String fieldName = "field" ;
92157 int numVectors = random ().nextInt (99 , 500 );
0 commit comments