Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ public GraphIndexBuilder(BuildScoreProvider scoreProvider,
this.simdExecutor = simdExecutor;
this.parallelExecutor = parallelExecutor;

this.graph = new OnHeapGraphIndex(maxDegrees, dimension, neighborOverflow, new VamanaDiversityProvider(scoreProvider, alpha));
this.graph = new OnHeapGraphIndex(maxDegrees, dimension, neighborOverflow, new VamanaDiversityProvider(scoreProvider, alpha), addHierarchy);

this.searchers = ExplicitThreadLocal.withInitial(() -> {
var gs = new GraphSearcher(graph);
Expand All @@ -349,14 +349,11 @@ public GraphIndexBuilder(BuildScoreProvider scoreProvider,
* @param beamWidth the width of the beam used during the graph building process.
* @param neighborOverflow the factor determining how many additional neighbors are allowed beyond the configured limit.
* @param alpha the weight factor for balancing score computations.
* @param addHierarchy whether to add hierarchical structures while building the graph.
* @param refineFinalGraph whether to perform a refinement step on the final graph structure.
* @param simdExecutor the ForkJoinPool executor used for SIMD tasks during graph building.
* @param parallelExecutor the ForkJoinPool executor used for general parallelization during graph building.
*
* @throws IOException if an I/O error occurs during the graph loading or conversion process.
*/
private GraphIndexBuilder(BuildScoreProvider buildScoreProvider, int dimension, MutableGraphIndex mutableGraphIndex, int beamWidth, float neighborOverflow, float alpha, boolean addHierarchy, boolean refineFinalGraph, ForkJoinPool simdExecutor, ForkJoinPool parallelExecutor) {
public GraphIndexBuilder(BuildScoreProvider buildScoreProvider, int dimension, MutableGraphIndex mutableGraphIndex, int beamWidth, float neighborOverflow, float alpha, boolean refineFinalGraph, ForkJoinPool simdExecutor, ForkJoinPool parallelExecutor) {
if (beamWidth <= 0) {
throw new IllegalArgumentException("beamWidth must be positive");
}
Expand All @@ -371,7 +368,7 @@ private GraphIndexBuilder(BuildScoreProvider buildScoreProvider, int dimension,
this.neighborOverflow = neighborOverflow;
this.dimension = dimension;
this.alpha = alpha;
this.addHierarchy = addHierarchy;
this.addHierarchy = mutableGraphIndex.isHierarchical();
this.refineFinalGraph = refineFinalGraph;
this.beamWidth = beamWidth;
this.simdExecutor = simdExecutor;
Expand Down Expand Up @@ -983,8 +980,6 @@ private void loadV3(RandomAccessReader in, int size) throws IOException {
* @param beamWidth the width of the beam used during the graph building process.
* @param overflowRatio the ratio of extra neighbors to allow temporarily when inserting a node.
* @param alpha the weight factor for balancing score computations.
* @param addHierarchy whether to add hierarchical structures while building the graph.
*
* @return the in-memory representation of the graph index.
* @throws IOException if an I/O error occurs during the graph loading or conversion process.
*/
Expand All @@ -996,8 +991,7 @@ public static ImmutableGraphIndex buildAndMergeNewNodes(RandomAccessReader in,
int[] graphToRavvOrdMap,
int beamWidth,
float overflowRatio,
float alpha,
boolean addHierarchy) throws IOException {
float alpha) throws IOException {

var diversityProvider = new VamanaDiversityProvider(buildScoreProvider, alpha);

Expand All @@ -1010,7 +1004,6 @@ public static ImmutableGraphIndex buildAndMergeNewNodes(RandomAccessReader in,
beamWidth,
overflowRatio,
alpha,
addHierarchy,
true,
PhysicalCoreExecutor.pool(),
ForkJoinPool.commonPool()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,15 @@ default boolean containsNode(int nodeId) {
@Override
void close() throws IOException;


/**
* Returns true if this graph is hierarchical, false otherwise.
* Note that a graph can be hierarchical even if it has a single layer, i.e., getMaxLevel() == 0.
* For example, while building a new hierarchical graph, we may temporarily only have nodes at level 0
* because of the random assignment of nodes to levels.
*/
boolean isHierarchical();

/**
* @return The maximum (coarser) level that contains a vector in the graph.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,9 @@ public class OnHeapGraphIndex implements MutableGraphIndex {

private volatile boolean allMutationsCompleted = false;

OnHeapGraphIndex(List<Integer> maxDegrees, int dimension, double overflowRatio, DiversityProvider diversityProvider) {
private final boolean isHierarchical;

OnHeapGraphIndex(List<Integer> maxDegrees, int dimension, double overflowRatio, DiversityProvider diversityProvider, boolean isHierarchical) {
this.overflowRatio = overflowRatio;
this.maxDegrees = new IntArrayList();
this.dimension = dimension;
Expand All @@ -94,6 +96,7 @@ public class OnHeapGraphIndex implements MutableGraphIndex {
getDegree(0),
(int) (getDegree(0) * overflowRatio))
);
this.isHierarchical = isHierarchical;
}

/**
Expand Down Expand Up @@ -128,6 +131,11 @@ public NodesIterator getNeighborsIterator(int level, int node) {
}
}

@Override
public boolean isHierarchical() {
return isHierarchical;
}

@Override
public int getMaxLevelForNode(int node) {
int maxLayer = -1;
Expand Down Expand Up @@ -568,7 +576,8 @@ public static OnHeapGraphIndex load(RandomAccessReader in, int dimension, double

int entryNode = in.readInt();

var graph = new OnHeapGraphIndex(layerDegrees, dimension, overflowRatio, diversityProvider);
var isHierarchical = layerCount > 1;
var graph = new OnHeapGraphIndex(layerDegrees, dimension, overflowRatio, diversityProvider, isHierarchical);

Map<Integer, Integer> nodeLevelMap = new HashMap<>();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,11 @@ public Set<FeatureId> getFeatureSet() {
return features.keySet();
}

@Override
public boolean isHierarchical() {
return layerInfo.size() > 1;
}

@Override
public int getDimension() {
return dimension;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,11 @@ public FullyConnectedGraphIndex(int entryNode, List<Integer> layerSizes) {
this.layerSizes = layerSizes;
}

@Override
public boolean isHierarchical() {
return layerSizes.size() > 1;
}

@Override
public int size(int level) {
return layerSizes.get(level);
Expand Down Expand Up @@ -388,6 +393,11 @@ public RandomlyConnectedGraphIndex(int size, int M, Random random) {
this(List.of(new CommonHeader.LayerInfo(size, M)), random);
}

@Override
public boolean isHierarchical() {
return layerInfo.size() > 1;
}

@Override
public int getMaxLevel() {
return layerInfo.size() - 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ public void testIncrementalInsertionFromOnDiskIndex() throws IOException {
try (var readerSupplier = new SimpleMappedReader.Supplier(heapGraphOutputPath.toAbsolutePath())) {
// We will create a trivial 1:1 mapping between the new graph and the ravv
final int[] graphToRavvOrdMap = IntStream.range(0, allVectorsRavv.size()).toArray();
ImmutableGraphIndex reconstructedAllNodeOnHeapGraphIndex = GraphIndexBuilder.buildAndMergeNewNodes(readerSupplier.get(), allVectorsRavv, allBuildScoreProvider, NUM_BASE_VECTORS, graphToRavvOrdMap, BEAM_WIDTH, NEIGHBOR_OVERFLOW, ALPHA, ADD_HIERARCHY);
ImmutableGraphIndex reconstructedAllNodeOnHeapGraphIndex = GraphIndexBuilder.buildAndMergeNewNodes(readerSupplier.get(), allVectorsRavv, allBuildScoreProvider, NUM_BASE_VECTORS, graphToRavvOrdMap, BEAM_WIDTH, NEIGHBOR_OVERFLOW, ALPHA);

// Verify that the recall is similar
float recallFromReconstructedAllNodeOnHeapGraphIndex = calculateRecall(reconstructedAllNodeOnHeapGraphIndex, allBuildScoreProvider, queryVector, groundTruthAllVectors, TOP_K);
Expand Down