Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ public GraphIndexBuilder(BuildScoreProvider scoreProvider,
this.simdExecutor = simdExecutor;
this.parallelExecutor = parallelExecutor;

this.graph = new OnHeapGraphIndex(maxDegrees, dimension, neighborOverflow, new VamanaDiversityProvider(scoreProvider, alpha));
this.graph = new OnHeapGraphIndex(maxDegrees, dimension, neighborOverflow, new VamanaDiversityProvider(scoreProvider, alpha), addHierarchy);

this.searchers = ExplicitThreadLocal.withInitial(() -> {
var gs = new GraphSearcher(graph);
Expand All @@ -349,14 +349,12 @@ public GraphIndexBuilder(BuildScoreProvider scoreProvider,
* @param beamWidth the width of the beam used during the graph building process.
* @param neighborOverflow the factor determining how many additional neighbors are allowed beyond the configured limit.
* @param alpha the weight factor for balancing score computations.
* @param addHierarchy whether to add hierarchical structures while building the graph.
* @param refineFinalGraph whether to perform a refinement step on the final graph structure.
* @param simdExecutor the ForkJoinPool executor used for SIMD tasks during graph building.
* @param parallelExecutor the ForkJoinPool executor used for general parallelization during graph building.
*
* @throws IOException if an I/O error occurs during the graph loading or conversion process.
*/
private GraphIndexBuilder(BuildScoreProvider buildScoreProvider, int dimension, MutableGraphIndex mutableGraphIndex, int beamWidth, float neighborOverflow, float alpha, boolean addHierarchy, boolean refineFinalGraph, ForkJoinPool simdExecutor, ForkJoinPool parallelExecutor) {
@Experimental
public GraphIndexBuilder(BuildScoreProvider buildScoreProvider, int dimension, MutableGraphIndex mutableGraphIndex, int beamWidth, float neighborOverflow, float alpha, boolean refineFinalGraph, ForkJoinPool simdExecutor, ForkJoinPool parallelExecutor) {
if (beamWidth <= 0) {
throw new IllegalArgumentException("beamWidth must be positive");
}
Expand All @@ -371,7 +369,7 @@ private GraphIndexBuilder(BuildScoreProvider buildScoreProvider, int dimension,
this.neighborOverflow = neighborOverflow;
this.dimension = dimension;
this.alpha = alpha;
this.addHierarchy = addHierarchy;
this.addHierarchy = mutableGraphIndex.isHierarchical();
this.refineFinalGraph = refineFinalGraph;
this.beamWidth = beamWidth;
this.simdExecutor = simdExecutor;
Expand Down Expand Up @@ -981,8 +979,6 @@ private void loadV3(RandomAccessReader in, int size) throws IOException {
* @param beamWidth the width of the beam used during the graph building process.
* @param overflowRatio the ratio of extra neighbors to allow temporarily when inserting a node.
* @param alpha the weight factor for balancing score computations.
* @param addHierarchy whether to add hierarchical structures while building the graph.
*
* @return the in-memory representation of the graph index.
* @throws IOException if an I/O error occurs during the graph loading or conversion process.
*/
Expand All @@ -993,10 +989,9 @@ public static ImmutableGraphIndex buildAndMergeNewNodes(RandomAccessReader in,
int startingNodeOffset,
int beamWidth,
float overflowRatio,
float alpha,
boolean addHierarchy) throws IOException {
float alpha) throws IOException {

return buildAndMergeNewNodes(in, newVectors, buildScoreProvider, startingNodeOffset, beamWidth, overflowRatio, alpha, addHierarchy, PhysicalCoreExecutor.pool(), ForkJoinPool.commonPool());
return buildAndMergeNewNodes(in, newVectors, buildScoreProvider, startingNodeOffset, beamWidth, overflowRatio, alpha, PhysicalCoreExecutor.pool(), ForkJoinPool.commonPool());
}

/**
Expand All @@ -1010,7 +1005,6 @@ public static ImmutableGraphIndex buildAndMergeNewNodes(RandomAccessReader in,
* @param beamWidth the width of the beam used during the graph building process.
* @param overflowRatio the ratio of extra neighbors to allow temporarily when inserting a node.
* @param alpha the weight factor for balancing score computations.
* @param addHierarchy whether to add hierarchical structures while building the graph.
* @param simdExecutor the ForkJoinPool executor used for SIMD tasks during graph building.
* @param parallelExecutor the ForkJoinPool executor used for general parallelization during graph building.
*
Expand All @@ -1025,7 +1019,6 @@ public static ImmutableGraphIndex buildAndMergeNewNodes(RandomAccessReader in,
int beamWidth,
float overflowRatio,
float alpha,
boolean addHierarchy,
ForkJoinPool simdExecutor,
ForkJoinPool parallelExecutor) throws IOException {

Expand All @@ -1040,7 +1033,6 @@ public static ImmutableGraphIndex buildAndMergeNewNodes(RandomAccessReader in,
beamWidth,
overflowRatio,
alpha,
addHierarchy,
true,
simdExecutor,
parallelExecutor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,15 @@ default boolean containsNode(int nodeId) {
@Override
void close() throws IOException;


/**
* Returns true if this graph is hierarchical, false otherwise.
* Note that a graph can be hierarchical even if it has a single layer, i.e., getMaxLevel() == 0.
* For example, while building a new hierarchical graph, we may temporarily only have nodes at level 0
* because of the random assignment of nodes to levels.
*/
boolean isHierarchical();

/**
* @return The maximum (coarser) level that contains a vector in the graph.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ public class OnHeapGraphIndex implements MutableGraphIndex {

private volatile boolean allMutationsCompleted = false;

OnHeapGraphIndex(List<Integer> maxDegrees, int dimension, double overflowRatio, DiversityProvider diversityProvider) {
private final boolean isHierarchical;

OnHeapGraphIndex(List<Integer> maxDegrees, int dimension, double overflowRatio, DiversityProvider diversityProvider, boolean isHierarchical) {
this.overflowRatio = overflowRatio;
this.maxDegrees = new IntArrayList();
this.dimension = dimension;
Expand All @@ -94,6 +96,7 @@ public class OnHeapGraphIndex implements MutableGraphIndex {
getDegree(0),
(int) (getDegree(0) * overflowRatio))
);
this.isHierarchical = isHierarchical;
}

/**
Expand Down Expand Up @@ -128,6 +131,11 @@ public NodesIterator getNeighborsIterator(int level, int node) {
}
}

@Override
public boolean isHierarchical() {
return isHierarchical;
}

@Override
public int getMaxLevelForNode(int node) {
int maxLayer = -1;
Expand Down Expand Up @@ -568,7 +576,8 @@ public static OnHeapGraphIndex load(RandomAccessReader in, int dimension, double

int entryNode = in.readInt();

var graph = new OnHeapGraphIndex(layerDegrees, dimension, overflowRatio, diversityProvider);
boolean isHierarchical = layerCount > 1;
var graph = new OnHeapGraphIndex(layerDegrees, dimension, overflowRatio, diversityProvider, isHierarchical);

Map<Integer, Integer> nodeLevelMap = new HashMap<>();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,11 @@ public Set<FeatureId> getFeatureSet() {
return features.keySet();
}

@Override
public boolean isHierarchical() {
return layerInfo.size() > 1;
}

@Override
public int getDimension() {
return dimension;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,11 @@ public FullyConnectedGraphIndex(int entryNode, List<Integer> layerSizes) {
this.layerSizes = layerSizes;
}

@Override
public boolean isHierarchical() {
return layerSizes.size() > 1;
}

@Override
public int size(int level) {
return layerSizes.get(level);
Expand Down Expand Up @@ -388,6 +393,11 @@ public RandomlyConnectedGraphIndex(int size, int M, Random random) {
this(List.of(new CommonHeader.LayerInfo(size, M)), random);
}

@Override
public boolean isHierarchical() {
return layerInfo.size() > 1;
}

@Override
public int getMaxLevel() {
return layerInfo.size() - 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ public void testIncrementalInsertionFromOnDiskIndex_withIdentityOrdinalMapping()
// We will create a trivial 1:1 mapping between the new graph and the ravv
final int[] graphToRavvOrdMap = IntStream.range(0, allVectorsRavv.size()).toArray();
final RemappedRandomAccessVectorValues remappedAllVectorsRavv = new RemappedRandomAccessVectorValues(allVectorsRavv, graphToRavvOrdMap);
ImmutableGraphIndex reconstructedAllNodeOnHeapGraphIndex = GraphIndexBuilder.buildAndMergeNewNodes(readerSupplier.get(), remappedAllVectorsRavv, allBuildScoreProvider, NUM_BASE_VECTORS, BEAM_WIDTH, NEIGHBOR_OVERFLOW, ALPHA, ADD_HIERARCHY);
ImmutableGraphIndex reconstructedAllNodeOnHeapGraphIndex = GraphIndexBuilder.buildAndMergeNewNodes(readerSupplier.get(), remappedAllVectorsRavv, allBuildScoreProvider, NUM_BASE_VECTORS, BEAM_WIDTH, NEIGHBOR_OVERFLOW, ALPHA);

// Verify that the recall is similar across multiple queries
// Note: Incremental insertion can have slightly different recall than bulk indexing due to the order of insertions
Expand Down Expand Up @@ -313,8 +313,7 @@ public void testIncrementalInsertionFromOnDiskIndex_withNonIdentityOrdinalMappin
final int[] allGraphToRavvOrdMap = IntStream.range(0, allVectorsRavv.size()).map(i -> allVectorsRavv.size() - 1 - i).toArray();
final RemappedRandomAccessVectorValues remappedAllVectorsRavv = new RemappedRandomAccessVectorValues(allVectorsRavv, allGraphToRavvOrdMap);
var allBsp = BuildScoreProvider.randomAccessScoreProvider(remappedAllVectorsRavv, SIMILARITY_FUNCTION);

ImmutableGraphIndex reconstructedAllNodeOnHeapGraphIndex = GraphIndexBuilder.buildAndMergeNewNodes(readerSupplier.get(), remappedAllVectorsRavv, allBsp, NUM_BASE_VECTORS, BEAM_WIDTH, NEIGHBOR_OVERFLOW, ALPHA, ADD_HIERARCHY);
ImmutableGraphIndex reconstructedAllNodeOnHeapGraphIndex = GraphIndexBuilder.buildAndMergeNewNodes(readerSupplier.get(), remappedAllVectorsRavv, allBsp, NUM_BASE_VECTORS, BEAM_WIDTH, NEIGHBOR_OVERFLOW, ALPHA);

// Verify that the recall is similar across multiple queries
// Note: Non-identity mapping can have slightly lower recall due to the complexity of merging with remapped ordinals
Expand Down