Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,13 @@ public enum CassandraRelevantProperties
*/
SAI_INDEX_READS_DISABLED("cassandra.sai.disabled_reads", "false"),

/**
* Only takes effect when SAI_INDEX_READS_DISABLED is true. If true, creates a lazy index searcher that only loads
* segment metadata and has the ability to load extra files, like the PQ file for vector indexes. Currently only
* affects vector indexes. Other indexes fall back to the empty index searcher when SAI_INDEX_READS_DISABLED is true.
*/
SAI_INDEX_LOAD_SEGMENT_METADATA_ONLY("cassandra.sai.load_segment_metadata_only", "true"),

/**
* Allows custom implementation of {@link SensorsFactory} to optionally create
* and configure {@link org.apache.cassandra.sensors.RequestSensors} instances.
Expand Down
36 changes: 34 additions & 2 deletions src/java/org/apache/cassandra/index/sai/SSTableIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,16 @@
import org.apache.cassandra.db.virtual.SimpleDataSet;
import org.apache.cassandra.dht.AbstractBounds;
import org.apache.cassandra.index.sai.disk.EmptyIndex;
import org.apache.cassandra.index.sai.disk.V1MetadataOnlySearchableIndex;
import org.apache.cassandra.index.sai.disk.PrimaryKeyMapIterator;
import org.apache.cassandra.index.sai.disk.SearchableIndex;
import org.apache.cassandra.index.sai.disk.format.IndexComponents;
import org.apache.cassandra.index.sai.disk.format.IndexFeatureSet;
import org.apache.cassandra.index.sai.disk.format.Version;
import org.apache.cassandra.index.sai.disk.v1.PerIndexFiles;
import org.apache.cassandra.index.sai.disk.v1.Segment;
import org.apache.cassandra.index.sai.disk.v1.SegmentMetadata;
import org.apache.cassandra.index.sai.disk.v1.V1SearchableIndex;
import org.apache.cassandra.index.sai.iterators.KeyRangeAntiJoinIterator;
import org.apache.cassandra.index.sai.iterators.KeyRangeIterator;
import org.apache.cassandra.index.sai.plan.Expression;
Expand Down Expand Up @@ -92,8 +96,20 @@ private static SearchableIndex createSearchableIndex(SSTableContext sstableConte
{
if (CassandraRelevantProperties.SAI_INDEX_READS_DISABLED.getBoolean())
{
logger.info("Creating dummy (empty) index searcher for sstable {} as SAI index reads are disabled", sstableContext.sstable.descriptor);
return new EmptyIndex();
var context = perIndexComponents.context();
if (!perIndexComponents.isEmpty()
&& context != null
&& context.isVector()
&& CassandraRelevantProperties.SAI_INDEX_LOAD_SEGMENT_METADATA_ONLY.getBoolean())
{
logger.info("Creating a lazy index searcher for sstable {} as SAI index reads are disabled, but this is a vector index", sstableContext.sstable.descriptor.id);
return new V1MetadataOnlySearchableIndex(sstableContext, perIndexComponents);
}
else
{
logger.info("Creating dummy (empty) index searcher for sstable {} as SAI index reads are disabled", sstableContext.sstable.descriptor);
return new EmptyIndex();
}
}

return perIndexComponents.onDiskFormat().newSearchableIndex(sstableContext, perIndexComponents);
Expand Down Expand Up @@ -122,6 +138,22 @@ public List<Segment> getSegments()
return searchableIndex.getSegments();
}

public List<SegmentMetadata> getSegmentMetadatas()
{
return searchableIndex.getSegmentMetadatas();
}

public boolean areSegmentsLoaded()
{
return searchableIndex instanceof V1SearchableIndex;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instanceof smells here, cannot we push this method into SearchableIndex ?

}

public PerIndexFiles indexFiles()
{
assert searchableIndex instanceof V1MetadataOnlySearchableIndex;
return ((V1MetadataOnlySearchableIndex) searchableIndex).indexFiles();
}

public long indexFileCacheSize()
{
return searchableIndex.indexFileCacheSize();
Expand Down
7 changes: 7 additions & 0 deletions src/java/org/apache/cassandra/index/sai/disk/EmptyIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.cassandra.dht.AbstractBounds;
import org.apache.cassandra.index.sai.QueryContext;
import org.apache.cassandra.index.sai.disk.v1.Segment;
import org.apache.cassandra.index.sai.disk.v1.SegmentMetadata;
import org.apache.cassandra.index.sai.iterators.KeyRangeIterator;
import org.apache.cassandra.index.sai.plan.Expression;
import org.apache.cassandra.index.sai.plan.Orderer;
Expand Down Expand Up @@ -113,6 +114,12 @@ public List<Segment> getSegments()
return List.of();
}

@Override
public List<SegmentMetadata> getSegmentMetadatas()
{
return List.of();
}

@Override
public void populateSystemView(SimpleDataSet dataSet, SSTableReader sstable)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.cassandra.dht.AbstractBounds;
import org.apache.cassandra.index.sai.QueryContext;
import org.apache.cassandra.index.sai.disk.v1.Segment;
import org.apache.cassandra.index.sai.disk.v1.SegmentMetadata;
import org.apache.cassandra.index.sai.iterators.KeyRangeIterator;
import org.apache.cassandra.index.sai.plan.Expression;
import org.apache.cassandra.index.sai.plan.Orderer;
Expand Down Expand Up @@ -84,6 +85,8 @@ public List<CloseableIterator<PrimaryKeyWithSortKey>> orderResultsBy(QueryContex

List<Segment> getSegments();

List<SegmentMetadata> getSegmentMetadatas();

public void populateSystemView(SimpleDataSet dataSet, SSTableReader sstable);

long estimateMatchingRowsCount(Expression predicate, AbstractBounds<PartitionPosition> keyRange);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.cassandra.index.sai.disk;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.List;

import org.apache.cassandra.db.DecoratedKey;
import org.apache.cassandra.db.PartitionPosition;
import org.apache.cassandra.db.virtual.SimpleDataSet;
import org.apache.cassandra.dht.AbstractBounds;
import org.apache.cassandra.dht.Token;
import org.apache.cassandra.index.sai.IndexContext;
import org.apache.cassandra.index.sai.QueryContext;
import org.apache.cassandra.index.sai.SSTableContext;
import org.apache.cassandra.index.sai.disk.format.IndexComponents;
import org.apache.cassandra.index.sai.disk.v1.MetadataSource;
import org.apache.cassandra.index.sai.disk.v1.PerIndexFiles;
import org.apache.cassandra.index.sai.disk.v1.Segment;
import org.apache.cassandra.index.sai.disk.v1.SegmentMetadata;
import org.apache.cassandra.index.sai.iterators.KeyRangeIterator;
import org.apache.cassandra.index.sai.plan.Expression;
import org.apache.cassandra.index.sai.plan.Orderer;
import org.apache.cassandra.index.sai.utils.PrimaryKey;
import org.apache.cassandra.index.sai.utils.PrimaryKeyWithSortKey;
import org.apache.cassandra.index.sai.utils.TypeUtil;
import org.apache.cassandra.io.sstable.format.SSTableReader;
import org.apache.cassandra.io.util.FileUtils;
import org.apache.cassandra.utils.CloseableIterator;
import org.apache.cassandra.utils.Throwables;

import static org.apache.cassandra.index.sai.virtual.SegmentsSystemView.CELL_COUNT;
import static org.apache.cassandra.index.sai.virtual.SegmentsSystemView.COLUMN_NAME;
import static org.apache.cassandra.index.sai.virtual.SegmentsSystemView.COMPONENT_METADATA;
import static org.apache.cassandra.index.sai.virtual.SegmentsSystemView.END_TOKEN;
import static org.apache.cassandra.index.sai.virtual.SegmentsSystemView.MAX_SSTABLE_ROW_ID;
import static org.apache.cassandra.index.sai.virtual.SegmentsSystemView.MAX_TERM;
import static org.apache.cassandra.index.sai.virtual.SegmentsSystemView.MIN_SSTABLE_ROW_ID;
import static org.apache.cassandra.index.sai.virtual.SegmentsSystemView.MIN_TERM;
import static org.apache.cassandra.index.sai.virtual.SegmentsSystemView.START_TOKEN;
import static org.apache.cassandra.index.sai.virtual.SegmentsSystemView.TABLE_NAME;

/**
* An index that eagerly loads segment metadata and nothing else. It is currently only used for vector indexes to
* read PQ files during compaction.
*/
public class V1MetadataOnlySearchableIndex implements SearchableIndex
{
private final List<SegmentMetadata> metadatas;
private final DecoratedKey minKey;
private final DecoratedKey maxKey; // in token order
private final ByteBuffer minTerm;
private final ByteBuffer maxTerm;
private final long minSSTableRowId, maxSSTableRowId;
private final long numRows;
private final IndexContext indexContext;
private PerIndexFiles indexFiles;

public V1MetadataOnlySearchableIndex(SSTableContext sstableContext, IndexComponents.ForRead perIndexComponents)
{
try
{
this.indexContext = perIndexComponents.context();
this.indexFiles = new PerIndexFiles(perIndexComponents);

final MetadataSource source = MetadataSource.loadMetadata(perIndexComponents);

// We skip loading the terms distribution becuase this class doesn't use them for now.
metadatas = SegmentMetadata.load(source, indexContext, false);

this.minKey = metadatas.get(0).minKey.partitionKey();
this.maxKey = metadatas.get(metadatas.size() - 1).maxKey.partitionKey();

var version = perIndexComponents.version();
this.minTerm = metadatas.stream().map(m -> m.minTerm).min(TypeUtil.comparator(indexContext.getValidator(), version)).orElse(null);
this.maxTerm = metadatas.stream().map(m -> m.maxTerm).max(TypeUtil.comparator(indexContext.getValidator(), version)).orElse(null);

this.numRows = metadatas.stream().mapToLong(m -> m.numRows).sum();

this.minSSTableRowId = metadatas.get(0).minSSTableRowId;
this.maxSSTableRowId = metadatas.get(metadatas.size() - 1).maxSSTableRowId;
}
catch (Throwable t)
{
FileUtils.closeQuietly(indexFiles);
FileUtils.closeQuietly(sstableContext);
throw Throwables.unchecked(t);
}
}

@Override
public long indexFileCacheSize()
{
// In V1IndexSearcher we accumulate the index file cache size from the segments, so this is 0.
return 0;
}

@Override
public long getRowCount()
{
return numRows;
}

@Override
public long minSSTableRowId()
{
return minSSTableRowId;
}
@Override
public long maxSSTableRowId()
{
return maxSSTableRowId;
}

@Override
public ByteBuffer minTerm()
{
return minTerm;
}

@Override
public ByteBuffer maxTerm()
{
return maxTerm;
}

@Override
public DecoratedKey minKey()
{
return minKey;
}

@Override
public DecoratedKey maxKey()
{
return maxKey;
}

@Override
public KeyRangeIterator search(Expression expression,
AbstractBounds<PartitionPosition> keyRange,
QueryContext context,
boolean defer,
int limit) throws IOException
{
// This index is not meant for searching, only for accessing metadata and index files
throw new UnsupportedOperationException();
}

@Override
public List<CloseableIterator<PrimaryKeyWithSortKey>> orderBy(Orderer orderer,
Expression slice,
AbstractBounds<PartitionPosition> keyRange,
QueryContext context,
int limit,
long totalRows) throws IOException
{
// This index is not meant for searching, only for accessing metadata and index files
throw new UnsupportedOperationException();
}

@Override
public List<Segment> getSegments()
{
throw new UnsupportedOperationException();
}

@Override
public List<SegmentMetadata> getSegmentMetadatas()
{
return metadatas;
}

public PerIndexFiles indexFiles()
{
return indexFiles;
}

@Override
public void populateSystemView(SimpleDataSet dataSet, SSTableReader sstable)
{
Token.TokenFactory tokenFactory = sstable.metadata().partitioner.getTokenFactory();

for (SegmentMetadata metadata : metadatas)
{
dataSet.row(sstable.metadata().keyspace, indexContext.getIndexName(), sstable.getFilename(), metadata.segmentRowIdOffset)
.column(TABLE_NAME, sstable.descriptor.cfname)
.column(COLUMN_NAME, indexContext.getColumnName())
.column(CELL_COUNT, metadata.numRows)
.column(MIN_SSTABLE_ROW_ID, metadata.minSSTableRowId)
.column(MAX_SSTABLE_ROW_ID, metadata.maxSSTableRowId)
.column(START_TOKEN, tokenFactory.toString(metadata.minKey.partitionKey().getToken()))
.column(END_TOKEN, tokenFactory.toString(metadata.maxKey.partitionKey().getToken()))
.column(MIN_TERM, "N/A")
.column(MAX_TERM, "N/A")
.column(COMPONENT_METADATA, metadata.componentMetadatas.asMap());
}
}

@Override
public long estimateMatchingRowsCount(Expression predicate, AbstractBounds<PartitionPosition> keyRange)
{
throw new UnsupportedOperationException();
}

@Override
public void close() throws IOException
{
FileUtils.closeQuietly(indexFiles);
}

@Override
public List<CloseableIterator<PrimaryKeyWithSortKey>> orderResultsBy(QueryContext context, List<PrimaryKey> keys, Orderer orderer, int limit, long totalRows) throws IOException
{
throw new UnsupportedOperationException();
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
import org.apache.cassandra.index.sai.disk.v1.IndexSearcher;
import org.apache.cassandra.index.sai.disk.v1.PerIndexFiles;
import org.apache.cassandra.index.sai.disk.v1.SegmentMetadata;
import org.apache.cassandra.index.sai.disk.v5.V5OnDiskOrdinalsMap;
import org.apache.cassandra.index.sai.disk.vector.OnDiskOrdinalsMap;
import org.apache.cassandra.index.sai.memory.RowMapping;
import org.apache.cassandra.index.sai.memory.TrieMemtableIndex;
import org.apache.cassandra.index.sai.utils.PrimaryKey;
Expand Down Expand Up @@ -94,6 +96,16 @@ public interface OnDiskFormat
*/
public PrimaryKeyMap.Factory newPrimaryKeyMapFactory(IndexComponents.ForRead perSSTableComponents, PrimaryKey.Factory primaryKeyFactory, SSTableReader sstable) throws IOException;

/**
* Create a new {@link OnDiskOrdinalsMap} for the provided {@link PerIndexFiles} and {@link SegmentMetadata}.
* Only used by vector indexes currently.
*
* @param indexFiles
* @param segmentMetadata
* @return
*/
public OnDiskOrdinalsMap newOnDiskOrdinalsMap(PerIndexFiles indexFiles, SegmentMetadata segmentMetadata);

/**
* Create a new {@link SearchableIndex} for an on-disk index. This is held by the {@SSTableIndex}
* and shared between queries.
Expand Down
Loading