3232import java .util .Objects ;
3333import java .util .concurrent .ForkJoinPool ;
3434import java .util .concurrent .atomic .AtomicReference ;
35+ import java .util .function .IntFunction ;
3536import java .util .stream .IntStream ;
3637
3738public abstract class PQVectors implements CompressedVectors {
@@ -76,7 +77,7 @@ public static PQVectors load(RandomAccessReader in, long offset) throws IOExcept
7677 /**
7778 * Build a PQVectors instance from the given RandomAccessVectorValues. The vectors are encoded in parallel
7879 * and split into chunks to avoid exceeding the maximum array size.
79- *
80+ * </p>
8081 * This is a helper method for the special case where the ordinals mapping in the graph and the RAVV/PQVectors are the same.
8182 *
8283 * @param pq the ProductQuantization to use
@@ -86,23 +87,39 @@ public static PQVectors load(RandomAccessReader in, long offset) throws IOExcept
8687 * @return the PQVectors instance
8788 */
8889 public static ImmutablePQVectors encodeAndBuild (ProductQuantization pq , int vectorCount , RandomAccessVectorValues ravv , ForkJoinPool simdExecutor ) {
89- return encodeAndBuild (pq , vectorCount , IntStream .range (0 , vectorCount ).toArray (), ravv , simdExecutor );
90+ IntFunction <Integer > mapper = (ordinal ) -> ordinal ;
91+ return encodeAndBuild (pq , vectorCount , mapper , ravv , simdExecutor );
9092 }
9193
9294 /**
9395 * Build a PQVectors instance from the given RandomAccessVectorValues. The vectors are encoded in parallel
9496 * and split into chunks to avoid exceeding the maximum array size.
9597 *
9698 * @param pq the ProductQuantization to use
97- * @param vectorCount the number of vectors to encode
99+ * @param ordinalsMapping the graph ordinals to RAVV mapping
98100 * @param ravv the RandomAccessVectorValues to encode
99101 * @param simdExecutor the ForkJoinPool to use for SIMD operations
102+ * @return the PQVectors instance
103+ */
104+ public static ImmutablePQVectors encodeAndBuild (ProductQuantization pq , int [] ordinalsMapping , RandomAccessVectorValues ravv , ForkJoinPool simdExecutor ) {
105+ IntFunction <Integer > mapper = (ordinal ) -> ordinalsMapping [ordinal ];
106+ return encodeAndBuild (pq , ordinalsMapping .length , mapper , ravv , simdExecutor );
107+ }
108+
109+ /**
110+ * Build a PQVectors instance from the given RandomAccessVectorValues. The vectors are encoded in parallel
111+ * and split into chunks to avoid exceeding the maximum array size.
112+ *
113+ * @param pq the ProductQuantization to use
114+ * @param vectorCount the number of vectors to encode
100115 * @param ordinalsMapping the graph ordinals to RAVV mapping
116+ * @param ravv the RandomAccessVectorValues to encode
117+ * @param simdExecutor the ForkJoinPool to use for SIMD operations
101118 * @return the PQVectors instance
102119 */
103- public static ImmutablePQVectors encodeAndBuild (ProductQuantization pq , int vectorCount , int [] ordinalsMapping , RandomAccessVectorValues ravv , ForkJoinPool simdExecutor ) {
120+ public static ImmutablePQVectors encodeAndBuild (ProductQuantization pq , int vectorCount , IntFunction < Integer > ordinalsMapping , RandomAccessVectorValues ravv , ForkJoinPool simdExecutor ) {
104121 int compressedDimension = pq .compressedVectorSize ();
105- PQLayout layout = new PQLayout (vectorCount ,compressedDimension );
122+ PQLayout layout = new PQLayout (vectorCount , compressedDimension );
106123 final ByteSequence <?>[] chunks = new ByteSequence <?>[layout .totalChunks ];
107124 for (int i = 0 ; i < layout .fullSizeChunks ; i ++) {
108125 chunks [i ] = vectorTypeSupport .createByteSequence (layout .fullChunkBytes );
@@ -115,13 +132,13 @@ public static ImmutablePQVectors encodeAndBuild(ProductQuantization pq, int vect
115132 // The changes are concurrent, but because they are coordinated and do not overlap, we can use parallel streams
116133 // and then we are guaranteed safe publication because we join the thread after completion.
117134 var ravvCopy = ravv .threadLocalSupplier ();
118- simdExecutor .submit (() -> IntStream .range (0 , ordinalsMapping . length )
135+ simdExecutor .submit (() -> IntStream .range (0 , vectorCount )
119136 .parallel ()
120137 .forEach (ordinal -> {
121138 // Retrieve the slice and mutate it.
122139 var localRavv = ravvCopy .get ();
123140 var slice = PQVectors .get (chunks , ordinal , layout .fullChunkVectors , pq .getSubspaceCount ());
124- var vector = localRavv .getVector (ordinalsMapping [ ordinal ] );
141+ var vector = localRavv .getVector (ordinalsMapping . apply ( ordinal ) );
125142 if (vector != null )
126143 pq .encodeTo (vector , slice );
127144 else
0 commit comments