Skip to content

Commit 44f9087

Browse files
michaeljmarshalldriftx
authored andcommitted
CNDB-14797: Upgrade to jvector 4.0.0-rc1 (#1866)
### What is the issue Fixes riptano/cndb#14797 CNDB test PR riptano/cndb#14799 ### What does this PR fix and why was it fixed Here is the list of commits included in the upgrade: datastax/jvector@4.0.0-beta.5...4.0.0-rc.1 Here is a copy of the relevant `CHANGELOG.md` lines: > #### [4.0.0-rc.1](datastax/jvector@4.0.0-beta.6...4.0.0-rc.1) > > - Fix issue when calling cleanup while concurrently executing searches [`#483`](datastax/jvector#483) > - Improve the efficiency and memory usage of GraphIndexBuilder.cleanup [`#477`](datastax/jvector#477) > - add PQ training benchmark [`#482`](datastax/jvector#482) > - Remove extraneous character from datasets.yml [`#484`](datastax/jvector#484) > - Upgrade YAML files to v5 after the format was introduced in the last update [`#478`](datastax/jvector#478) > - New chunked memory-mapped reader that supports >2GB files [`61bffbe`](datastax/jvector@61bffbe) > - release 4.0.0-rc.1 [`6737596`](datastax/jvector@6737596) > - Fix comparison in TestADCGraphIndex [`b637f65`](datastax/jvector@b637f65) > > #### [4.0.0-beta.6](datastax/jvector@4.0.0-beta.5...4.0.0-beta.6) > > > 13 June 2025 > - Add a new graph node using a search score [`#473`](datastax/jvector#473) > - chore(release): Bump tag version and update changelog [`#471`](datastax/jvector#471) > - Sequential disk writer (#475). Upgrades file format from 4 to 5 [`d0ccb32`](datastax/jvector@d0ccb32) > - Allow empty sections in datasets.yml & add colbert-1M.yml [`2bf5f9a`](datastax/jvector@2bf5f9a) > - chore (release): Start release version 4.0.0-beta.6 [`9a453a3`](datastax/jvector@9a453a3) > >
1 parent 7b5fce3 commit 44f9087

File tree

4 files changed

+17
-12
lines changed

4 files changed

+17
-12
lines changed

.build/parent-pom-template.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1248,7 +1248,7 @@
12481248
<dependency>
12491249
<groupId>io.github.jbellis</groupId>
12501250
<artifactId>jvector</artifactId>
1251-
<version>4.0.0-beta.5</version>
1251+
<version>4.0.0-rc.1</version>
12521252
</dependency>
12531253
<dependency>
12541254
<groupId>com.carrotsearch.randomizedtesting</groupId>

src/java/org/apache/cassandra/index/sai/disk/vector/CassandraDiskAnn.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import io.github.jbellis.jvector.graph.GraphSearcher;
3232
import io.github.jbellis.jvector.graph.disk.feature.FeatureId;
3333
import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex;
34+
import io.github.jbellis.jvector.graph.similarity.DefaultSearchScoreProvider;
3435
import io.github.jbellis.jvector.graph.similarity.SearchScoreProvider;
3536
import io.github.jbellis.jvector.quantization.BQVectors;
3637
import io.github.jbellis.jvector.quantization.CompressedVectors;
@@ -240,12 +241,12 @@ public CloseableIterator<RowIdWithScore> search(VectorFloat<?> queryVector,
240241
{
241242
var asf = view.approximateScoreFunctionFor(queryVector, similarityFunction);
242243
var rr = isRerankless ? null : view.rerankerFor(queryVector, similarityFunction);
243-
ssp = new SearchScoreProvider(asf, rr);
244+
ssp = new DefaultSearchScoreProvider(asf, rr);
244245
}
245246
else if (compressedVectors == null)
246247
{
247248
// no compression, so we ignore isRerankless (except for setting rerankK to limit)
248-
ssp = new SearchScoreProvider(view.rerankerFor(queryVector, similarityFunction));
249+
ssp = new DefaultSearchScoreProvider(view.rerankerFor(queryVector, similarityFunction));
249250
}
250251
else
251252
{
@@ -256,7 +257,7 @@ else if (compressedVectors == null)
256257
: similarityFunction;
257258
var asf = compressedVectors.precomputedScoreFunctionFor(queryVector, sf);
258259
var rr = isRerankless ? null : view.rerankerFor(queryVector, similarityFunction);
259-
ssp = new SearchScoreProvider(asf, rr);
260+
ssp = new DefaultSearchScoreProvider(asf, rr);
260261
}
261262
long start = nanoTime();
262263
var result = searcher.search(ssp, limit, rerankK, threshold, context.getAnnRerankFloor(), ordinalsMap.ignoringDeleted(acceptBits));

src/java/org/apache/cassandra/index/sai/disk/vector/CassandraOnHeapGraph.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
import io.github.jbellis.jvector.graph.disk.feature.Feature;
5050
import io.github.jbellis.jvector.graph.disk.feature.FeatureId;
5151
import io.github.jbellis.jvector.graph.disk.feature.InlineVectors;
52-
import io.github.jbellis.jvector.graph.similarity.SearchScoreProvider;
52+
import io.github.jbellis.jvector.graph.similarity.DefaultSearchScoreProvider;
5353
import io.github.jbellis.jvector.quantization.BinaryQuantization;
5454
import io.github.jbellis.jvector.quantization.CompressedVectors;
5555
import io.github.jbellis.jvector.quantization.ProductQuantization;
@@ -342,7 +342,7 @@ public CloseableIterator<SearchResult.NodeScore> search(QueryContext context, Ve
342342
searcher.setView(builder.getGraph().getView());
343343
try
344344
{
345-
var ssf = SearchScoreProvider.exact(queryVector, similarityFunction, vectorValues);
345+
var ssf = DefaultSearchScoreProvider.exact(queryVector, similarityFunction, vectorValues);
346346
long start = nanoTime();
347347
var result = searcher.search(ssf, limit, rerankK, threshold, 0.0f, bits);
348348
long elapsed = nanoTime() - start;
@@ -440,10 +440,10 @@ public SegmentMetadata.ComponentMetadataMap flush(IndexComponents.ForWrite perIn
440440
try (var pqOutput = perIndexComponents.addOrGet(IndexComponentType.PQ).openOutput(true);
441441
var postingsOutput = perIndexComponents.addOrGet(IndexComponentType.POSTING_LISTS).openOutput(true);
442442
var indexWriter = new OnDiskGraphIndexWriter.Builder(builder.getGraph(), indexFile.toPath())
443+
.withStartOffset(termsOffset)
443444
.withVersion(Version.current().onDiskFormat().jvectorFileFormatVersion())
444445
.withMapper(ordinalMapper)
445446
.with(new InlineVectors(vectorValues.dimension()))
446-
.withStartOffset(termsOffset)
447447
.build())
448448
{
449449
SAICodecUtils.writeHeader(pqOutput);

src/java/org/apache/cassandra/index/sai/disk/vector/CompactionGraph.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -211,23 +211,27 @@ else if (compressor instanceof BinaryQuantization)
211211
indexConfig.getNeighborhoodOverflow(1.2f),
212212
indexConfig.getAlpha(dimension > 3 ? 1.2f : 1.4f),
213213
indexConfig.isHierarchyEnabled() && jvectorVersion >= 4,
214-
compactionSimdPool, compactionFjp);
214+
true, // We always refine during compaction
215+
compactionSimdPool,
216+
compactionFjp);
215217

216218
termsFile = perIndexComponents.addOrGet(IndexComponentType.TERMS_DATA).file();
217219
termsOffset = (termsFile.exists() ? termsFile.length() : 0)
218220
+ SAICodecUtils.headerSize();
219221
// placeholder writer, will be replaced at flush time when we finalize the index contents
220-
writer = createTermsWriterBuilder().withMapper(new OrdinalMapper.IdentityMapper(maxRowsInGraph)).build();
222+
writer = createTermsWriter(new OrdinalMapper.IdentityMapper(maxRowsInGraph));
221223
writer.getOutput().seek(termsFile.length()); // position at the end of the previous segment before writing our own header
222224
SAICodecUtils.writeHeader(SAICodecUtils.toLuceneOutput(writer.getOutput()));
223225
}
224226

225-
private OnDiskGraphIndexWriter.Builder createTermsWriterBuilder() throws IOException
227+
private OnDiskGraphIndexWriter createTermsWriter(OrdinalMapper ordinalMapper) throws IOException
226228
{
227229
return new OnDiskGraphIndexWriter.Builder(builder.getGraph(), termsFile.toPath())
228230
.withStartOffset(termsOffset)
229231
.with(new InlineVectors(dimension))
230-
.withVersion(Version.current().onDiskFormat().jvectorFileFormatVersion());
232+
.withVersion(Version.current().onDiskFormat().jvectorFileFormatVersion())
233+
.withMapper(ordinalMapper)
234+
.build();
231235
}
232236

233237
@Override
@@ -446,7 +450,7 @@ public SegmentMetadata.ComponentMetadataMap flush() throws IOException
446450
}
447451

448452
// Recreate the writer with the final ordinalMapper
449-
writer = createTermsWriterBuilder().withMapper(ordinalMapper.get()).build();
453+
writer = createTermsWriter(ordinalMapper.get());
450454

451455
// write the graph edge lists and optionally fused adc features
452456
var start = nanoTime();

0 commit comments

Comments
 (0)