3131import org .apache .lucene .util .hnsw .RandomVectorScorer ;
3232import org .apache .lucene .util .hnsw .RandomVectorScorerSupplier ;
3333import org .apache .lucene .util .hnsw .UpdateableRandomVectorScorer ;
34- import org .elasticsearch .index .codec .vectors .es910 .hnsw .HnswUtil .Component ;
3534
3635import java .io .IOException ;
37- import java .util .Comparator ;
38- import java .util .List ;
3936import java .util .Locale ;
4037import java .util .Objects ;
4138import java .util .SplittableRandom ;
4239import java .util .concurrent .TimeUnit ;
4340import java .util .concurrent .locks .Lock ;
4441
4542import static java .lang .Math .log ;
46- import static org .apache .lucene .search .DocIdSetIterator .NO_MORE_DOCS ;
4743
4844/**
4945 * Builder for HNSW graph. See {@link HnswGraph} for a gloss on the algorithm and the meaning of the
@@ -54,11 +50,6 @@ public class HnswGraphBuilder {
5450 /** Default number of maximum connections per node */
5551 public static final int DEFAULT_MAX_CONN = 16 ;
5652
57- /**
58- * Default number of the size of the queue maintained while searching during a graph construction.
59- */
60- public static final int DEFAULT_BEAM_WIDTH = 100 ;
61-
6253 /** Default random seed for level generation * */
6354 private static final long DEFAULT_RAND_SEED = 42 ;
6455
@@ -444,111 +435,6 @@ void finish() throws IOException {
444435 hnsw .finishBuild ();
445436 }
446437
447- @ SuppressWarnings ("unused" )
448- private void connectComponents () throws IOException {
449- long start = System .nanoTime ();
450- for (int level = 0 ; level < hnsw .numLevels (); level ++) {
451- if (connectComponents (level ) == false ) {
452- if (infoStream .isEnabled (HNSW_COMPONENT )) {
453- infoStream .message (HNSW_COMPONENT , "connectComponents failed on level " + level );
454- }
455- }
456- }
457- if (infoStream .isEnabled (HNSW_COMPONENT )) {
458- infoStream .message (HNSW_COMPONENT , "connectComponents " + (System .nanoTime () - start ) / 1_000_000 + " ms" );
459- }
460- }
461-
462- private boolean connectComponents (int level ) throws IOException {
463- FixedBitSet notFullyConnected = new FixedBitSet (hnsw .size ());
464- int maxConn = M ;
465- if (level == 0 ) {
466- maxConn *= 2 ;
467- }
468- List <Component > components = HnswUtil .components (hnsw , level , notFullyConnected , maxConn );
469- if (infoStream .isEnabled (HNSW_COMPONENT )) {
470- infoStream .message (HNSW_COMPONENT , "connect " + components .size () + " components on level=" + level );
471- }
472- // System.out.println("HnswGraphBuilder. level=" + level + ": " + components);
473- boolean result = true ;
474- if (components .size () > 1 ) {
475- // connect other components to the largest one
476- Component c0 = components .stream ().max (Comparator .comparingInt (Component ::size )).get ();
477- if (c0 .start () == NO_MORE_DOCS ) {
478- // the component is already fully connected - no room for new connections
479- return false ;
480- }
481- // try for more connections? We only do one since otherwise they may become full
482- // while linking
483- GraphBuilderKnnCollector beam = new GraphBuilderKnnCollector (2 );
484- int [] eps = new int [1 ];
485- UpdateableRandomVectorScorer scorer = scorerSupplier .scorer ();
486- for (Component c : components ) {
487- if (c != c0 ) {
488- if (c .start () == NO_MORE_DOCS ) {
489- continue ;
490- }
491- if (infoStream .isEnabled (HNSW_COMPONENT )) {
492- infoStream .message (HNSW_COMPONENT , "connect component " + c + " to " + c0 );
493- }
494-
495- beam .clear ();
496- eps [0 ] = c0 .start ();
497- scorer .setScoringOrdinal (c .start ());
498- // find the closest node in the largest component to the lowest-numbered node in this
499- // component that has room to make a connection
500- graphSearcher .searchLevel (beam , scorer , level , eps , hnsw , notFullyConnected );
501- boolean linked = false ;
502- while (beam .size () > 0 ) {
503- int c0node = beam .popNode ();
504- if (c0node == c .start () || notFullyConnected .get (c0node ) == false ) {
505- continue ;
506- }
507- float score = beam .minimumScore ();
508- assert notFullyConnected .get (c0node );
509- // link the nodes
510- // System.out.println("link " + c0 + "." + c0node + " to " + c + "." + c.start());
511- link (level , c0node , c .start (), score , notFullyConnected );
512- linked = true ;
513- if (infoStream .isEnabled (HNSW_COMPONENT )) {
514- infoStream .message (HNSW_COMPONENT , "connected ok " + c0node + " -> " + c .start ());
515- }
516- }
517- if (linked == false ) {
518- if (infoStream .isEnabled (HNSW_COMPONENT )) {
519- infoStream .message (HNSW_COMPONENT , "not connected; no free nodes found" );
520- }
521- result = false ;
522- }
523- }
524- }
525- }
526- return result ;
527- }
528-
529- // Try to link two nodes bidirectionally; the forward connection will always be made.
530- // Update notFullyConnected.
531- private void link (int level , int n0 , int n1 , float score , FixedBitSet notFullyConnected ) {
532- NeighborArray nbr0 = hnsw .getNeighbors (level , n0 );
533- NeighborArray nbr1 = hnsw .getNeighbors (level , n1 );
534- // must subtract 1 here since the nodes array is one larger than the configured
535- // max neighbors (M / 2M).
536- // We should have taken care of this check by searching for not-full nodes
537- int maxConn = nbr0 .maxSize () - 1 ;
538- assert notFullyConnected .get (n0 );
539- assert nbr0 .size () < maxConn : "node " + n0 + " is full, has " + nbr0 .size () + " friends" ;
540- nbr0 .addOutOfOrder (n1 , score );
541- if (nbr0 .size () == maxConn ) {
542- notFullyConnected .clear (n0 );
543- }
544- if (nbr1 .size () < maxConn ) {
545- nbr1 .addOutOfOrder (n0 , score );
546- if (nbr1 .size () == maxConn ) {
547- notFullyConnected .clear (n1 );
548- }
549- }
550- }
551-
552438 /**
553439 * A restricted, specialized knnCollector that can be used when building a graph.
554440 *
0 commit comments