Skip to content

CNDB-13483: Fix loading PQ file when disabled_reads is true #1713

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,14 @@ public enum CassandraRelevantProperties
*/
SAI_INDEX_READS_DISABLED("cassandra.sai.disabled_reads", "false"),

/**
* Only takes effect when SAI_INDEX_READS_DISABLED is true. If true, creates an index searcher that loads
* segment metadata and has the ability to load extra files, like the PQ file for vector indexes, but does not do
* so until it is actually needed. Currently only affects vector indexes. Other indexes fall back to the empty index
* searcher when SAI_INDEX_READS_DISABLED is true.
*/
SAI_INDEX_LOAD_SEGMENT_METADATA("cassandra.sai.load_segment_metadata", "true"),

/**
* Allows custom implementation of {@link SensorsFactory} to optionally create
* and configure {@link org.apache.cassandra.sensors.RequestSensors} instances.
Expand Down
45 changes: 43 additions & 2 deletions src/java/org/apache/cassandra/index/sai/SSTableIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Stream;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.MoreObjects;
import com.google.common.base.Objects;

Expand All @@ -37,12 +39,18 @@
import org.apache.cassandra.db.virtual.SimpleDataSet;
import org.apache.cassandra.dht.AbstractBounds;
import org.apache.cassandra.index.sai.disk.EmptyIndex;
import org.apache.cassandra.index.sai.disk.V1MetadataOnlySearchableIndex;
import org.apache.cassandra.index.sai.disk.PrimaryKeyMapIterator;
import org.apache.cassandra.index.sai.disk.SearchableIndex;
import org.apache.cassandra.index.sai.disk.format.IndexComponents;
import org.apache.cassandra.index.sai.disk.format.IndexFeatureSet;
import org.apache.cassandra.index.sai.disk.format.Version;
import org.apache.cassandra.index.sai.disk.v1.PerIndexFiles;
import org.apache.cassandra.index.sai.disk.v1.Segment;
import org.apache.cassandra.index.sai.disk.v1.SegmentMetadata;
import org.apache.cassandra.index.sai.disk.v1.V1SearchableIndex;
import org.apache.cassandra.index.sai.disk.v5.V5VectorPostingsWriter;
import org.apache.cassandra.index.sai.disk.vector.ProductQuantizationFetcher;
import org.apache.cassandra.index.sai.iterators.KeyRangeAntiJoinIterator;
import org.apache.cassandra.index.sai.iterators.KeyRangeIterator;
import org.apache.cassandra.index.sai.plan.Expression;
Expand Down Expand Up @@ -92,8 +100,20 @@ private static SearchableIndex createSearchableIndex(SSTableContext sstableConte
{
if (CassandraRelevantProperties.SAI_INDEX_READS_DISABLED.getBoolean())
{
logger.info("Creating dummy (empty) index searcher for sstable {} as SAI index reads are disabled", sstableContext.sstable.descriptor);
return new EmptyIndex();
var context = perIndexComponents.context();
if (!perIndexComponents.isEmpty()
&& context != null
&& context.isVector()
&& CassandraRelevantProperties.SAI_INDEX_LOAD_SEGMENT_METADATA.getBoolean())
{
logger.info("Creating a V1MetadataOnlySearchableIndex for sstable {} as SAI index reads are disabled, but this is a vector index", sstableContext.sstable.descriptor.id);
return new V1MetadataOnlySearchableIndex(sstableContext, perIndexComponents);
}
else
{
logger.info("Creating dummy (empty) index searcher for sstable {} as SAI index reads are disabled", sstableContext.sstable.descriptor);
return new EmptyIndex();
}
}

return perIndexComponents.onDiskFormat().newSearchableIndex(sstableContext, perIndexComponents);
Expand Down Expand Up @@ -122,6 +142,22 @@ public List<Segment> getSegments()
return searchableIndex.getSegments();
}

public List<SegmentMetadata> getSegmentMetadatas()
{
return searchableIndex.getSegmentMetadatas();
}

@VisibleForTesting
public boolean areSegmentsLoaded()
{
return searchableIndex instanceof V1SearchableIndex;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instanceof smells here, cannot we push this method into SearchableIndex ?

}

public ProductQuantizationFetcher.PqInfo getPqInfo(int segmentPosition)
{
return searchableIndex.getPqInfo(segmentPosition);
}

public long indexFileCacheSize()
{
return searchableIndex.indexFileCacheSize();
Expand Down Expand Up @@ -257,6 +293,11 @@ public Version getVersion()
return perIndexComponents.version();
}

public Stream<V5VectorPostingsWriter.Structure> getPostingsStructures()
{
return searchableIndex.getPostingsStructures();
}

public IndexFeatureSet indexFeatureSet()
{
return getVersion().onDiskFormat().indexFeatureSet();
Expand Down
22 changes: 22 additions & 0 deletions src/java/org/apache/cassandra/index/sai/disk/EmptyIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,17 @@
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.stream.Stream;

import org.apache.cassandra.db.DecoratedKey;
import org.apache.cassandra.db.PartitionPosition;
import org.apache.cassandra.db.virtual.SimpleDataSet;
import org.apache.cassandra.dht.AbstractBounds;
import org.apache.cassandra.index.sai.QueryContext;
import org.apache.cassandra.index.sai.disk.v1.Segment;
import org.apache.cassandra.index.sai.disk.v1.SegmentMetadata;
import org.apache.cassandra.index.sai.disk.v5.V5VectorPostingsWriter;
import org.apache.cassandra.index.sai.disk.vector.ProductQuantizationFetcher;
import org.apache.cassandra.index.sai.iterators.KeyRangeIterator;
import org.apache.cassandra.index.sai.plan.Expression;
import org.apache.cassandra.index.sai.plan.Orderer;
Expand Down Expand Up @@ -113,6 +117,24 @@ public List<Segment> getSegments()
return List.of();
}

@Override
public List<SegmentMetadata> getSegmentMetadatas()
{
return List.of();
}

@Override
public Stream<V5VectorPostingsWriter.Structure> getPostingsStructures()
{
return Stream.empty();
}

@Override
public ProductQuantizationFetcher.PqInfo getPqInfo(int segmentPosition)
{
return null;
}

@Override
public void populateSystemView(SimpleDataSet dataSet, SSTableReader sstable)
{
Expand Down
10 changes: 10 additions & 0 deletions src/java/org/apache/cassandra/index/sai/disk/SearchableIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,17 @@
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.stream.Stream;

import org.apache.cassandra.db.DecoratedKey;
import org.apache.cassandra.db.PartitionPosition;
import org.apache.cassandra.db.virtual.SimpleDataSet;
import org.apache.cassandra.dht.AbstractBounds;
import org.apache.cassandra.index.sai.QueryContext;
import org.apache.cassandra.index.sai.disk.v1.Segment;
import org.apache.cassandra.index.sai.disk.v1.SegmentMetadata;
import org.apache.cassandra.index.sai.disk.v5.V5VectorPostingsWriter;
import org.apache.cassandra.index.sai.disk.vector.ProductQuantizationFetcher;
import org.apache.cassandra.index.sai.iterators.KeyRangeIterator;
import org.apache.cassandra.index.sai.plan.Expression;
import org.apache.cassandra.index.sai.plan.Orderer;
Expand Down Expand Up @@ -84,6 +88,12 @@ public List<CloseableIterator<PrimaryKeyWithSortKey>> orderResultsBy(QueryContex

List<Segment> getSegments();

List<SegmentMetadata> getSegmentMetadatas();

Stream<V5VectorPostingsWriter.Structure> getPostingsStructures();

ProductQuantizationFetcher.PqInfo getPqInfo(int segmentPosition);

public void populateSystemView(SimpleDataSet dataSet, SSTableReader sstable);

long estimateMatchingRowsCount(Expression predicate, AbstractBounds<PartitionPosition> keyRange);
Expand Down
Loading