3232import java .util .Objects ;
3333import java .util .concurrent .ForkJoinPool ;
3434import java .util .concurrent .atomic .AtomicReference ;
35+ import java .util .function .IntUnaryOperator ;
3536import java .util .stream .IntStream ;
3637
3738public abstract class PQVectors implements CompressedVectors {
@@ -76,7 +77,7 @@ public static PQVectors load(RandomAccessReader in, long offset) throws IOExcept
7677 /**
7778 * Build a PQVectors instance from the given RandomAccessVectorValues. The vectors are encoded in parallel
7879 * and split into chunks to avoid exceeding the maximum array size.
79- *
80+ * <p>
8081 * This is a helper method for the special case where the ordinals mapping in the graph and the RAVV/PQVectors are the same.
8182 *
8283 * @param pq the ProductQuantization to use
@@ -86,7 +87,7 @@ public static PQVectors load(RandomAccessReader in, long offset) throws IOExcept
8687 * @return the PQVectors instance
8788 */
8889 public static ImmutablePQVectors encodeAndBuild (ProductQuantization pq , int vectorCount , RandomAccessVectorValues ravv , ForkJoinPool simdExecutor ) {
89- return encodeAndBuild (pq , vectorCount , IntStream . range ( 0 , vectorCount ). toArray (), ravv , simdExecutor );
90+ return encodeAndBuild (pq , vectorCount , IntUnaryOperator . identity (), ravv , simdExecutor );
9091 }
9192
9293 /**
@@ -95,14 +96,14 @@ public static ImmutablePQVectors encodeAndBuild(ProductQuantization pq, int vect
9596 *
9697 * @param pq the ProductQuantization to use
9798 * @param vectorCount the number of vectors to encode
99+ * @param ordinalsMapping the graph ordinals to RAVV mapping, the function should be defined in [0, vectorCount)
98100 * @param ravv the RandomAccessVectorValues to encode
99101 * @param simdExecutor the ForkJoinPool to use for SIMD operations
100- * @param ordinalsMapping the graph ordinals to RAVV mapping
101102 * @return the PQVectors instance
102103 */
103- public static ImmutablePQVectors encodeAndBuild (ProductQuantization pq , int vectorCount , int [] ordinalsMapping , RandomAccessVectorValues ravv , ForkJoinPool simdExecutor ) {
104+ public static ImmutablePQVectors encodeAndBuild (ProductQuantization pq , int vectorCount , IntUnaryOperator ordinalsMapping , RandomAccessVectorValues ravv , ForkJoinPool simdExecutor ) {
104105 int compressedDimension = pq .compressedVectorSize ();
105- PQLayout layout = new PQLayout (vectorCount ,compressedDimension );
106+ PQLayout layout = new PQLayout (vectorCount , compressedDimension );
106107 final ByteSequence <?>[] chunks = new ByteSequence <?>[layout .totalChunks ];
107108 for (int i = 0 ; i < layout .fullSizeChunks ; i ++) {
108109 chunks [i ] = vectorTypeSupport .createByteSequence (layout .fullChunkBytes );
@@ -115,13 +116,13 @@ public static ImmutablePQVectors encodeAndBuild(ProductQuantization pq, int vect
115116 // The changes are concurrent, but because they are coordinated and do not overlap, we can use parallel streams
116117 // and then we are guaranteed safe publication because we join the thread after completion.
117118 var ravvCopy = ravv .threadLocalSupplier ();
118- simdExecutor .submit (() -> IntStream .range (0 , ordinalsMapping . length )
119+ simdExecutor .submit (() -> IntStream .range (0 , vectorCount )
119120 .parallel ()
120121 .forEach (ordinal -> {
121122 // Retrieve the slice and mutate it.
122123 var localRavv = ravvCopy .get ();
123124 var slice = PQVectors .get (chunks , ordinal , layout .fullChunkVectors , pq .getSubspaceCount ());
124- var vector = localRavv .getVector (ordinalsMapping [ ordinal ] );
125+ var vector = localRavv .getVector (ordinalsMapping . applyAsInt ( ordinal ) );
125126 if (vector != null )
126127 pq .encodeTo (vector , slice );
127128 else
0 commit comments