diff --git a/README.md b/README.md index 1d04e80..2a7f1c3 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ Print out sstable metadata for a column family. Useful in helping to tune compac ## pstats ## -Tool for finding largest partitions. Reads the Index.db files so is relatively quick. +Tool for finding largest partitions. ### Usage ### @@ -321,6 +321,32 @@ Largest reclaimable partitions: Partitions with the largest amount of reclaimabl | Reclaim | Reclaimable uncompressed size | | Generations | SSTable generations the partition belongs to | +### Testing with CCM ### + +You can test this tool with the CCM tool (as it will save some time over needing to install and configure cassandra), simply do the following + +Locate the ccm installation directory on your machine, usually this is ~/.ccm + +Identify the version of cassandra you wish to test on. Run take the binary (target/ic-sstable-tools.jar) and copy it into the lib directory located in +` +~/.ccm/repository/\/lib/ +` + +For example +`~/.ccm/repository/5.0.0/lib/` + +Now run + +`export CASSANDRA_INCLUDE=~/.ccm///bin/cassandra.in.sh` + +For example + +`export CASSANDRA_INCLUDE=~/.ccm/test/node1/bin/cassandra.in.sh` + + +and you should be able to run commands using the script located in the bin directory! + +`ic-sstable-tools pstats keyspace table` Please see https://www.instaclustr.com/support/documentation/announcements/instaclustr-open-source-project-status/ for Instaclustr support status of this project diff --git a/pom.xml b/pom.xml index 35f45e7..2fc9bee 100644 --- a/pom.xml +++ b/pom.xml @@ -4,8 +4,8 @@ 4.0.0 com.instaclustr - ic-sstable-tools-5.0.0 - 1.0.0 + ic-sstable-tools-5.0.4 + 1.1.0 Instaclustr SSTable Tools Handy SSTable tools for Apache Cassandra @@ -88,7 +88,7 @@ org.apache.cassandra cassandra-all - 5.0.0 + 5.0.4 provided diff --git a/src/main/java/com/instaclustr/sstabletools/cassandra/ColumnFamilyBackend.java b/src/main/java/com/instaclustr/sstabletools/cassandra/ColumnFamilyBackend.java index 9d6c169..398c577 100644 --- a/src/main/java/com/instaclustr/sstabletools/cassandra/ColumnFamilyBackend.java +++ b/src/main/java/com/instaclustr/sstabletools/cassandra/ColumnFamilyBackend.java @@ -3,25 +3,18 @@ import com.instaclustr.sstabletools.*; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.DecoratedKey; -import org.apache.cassandra.db.SerializationHeader; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.io.sstable.Component; import org.apache.cassandra.io.sstable.format.SSTableFormat; import org.apache.cassandra.io.sstable.format.big.BigFormat; -import org.apache.cassandra.io.sstable.format.big.BigTableReader; -import org.apache.cassandra.io.util.FileHandle; -import org.apache.cassandra.utils.FilterFactory; +import org.apache.cassandra.io.sstable.format.bti.BtiFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; import java.time.Instant; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Optional; -import java.util.Set; +import java.util.*; /** * ColumnFamilyProxy using Cassandra 3.5 backend. @@ -80,8 +73,8 @@ public ColumnFamilyBackend(AbstractType keyValidator, if (filter != null) { List filteredSSTables = new ArrayList<>(sstables.size()); for (org.apache.cassandra.io.sstable.format.SSTableReader sstable : sstables) { - File dataFile = sstable.descriptor.fileFor(SSTableFormat.Components.DATA).toJavaIOFile();; - if (filter.contains(dataFile.getName())) { + String filename = sstable.descriptor.fileFor(SSTableFormat.Components.DATA).name(); + if (filter.contains(filename)) { filteredSSTables.add(sstable); } } @@ -94,34 +87,33 @@ public Collection getIndexReaders() { Collection readers = new ArrayList<>(sstables.size()); for (org.apache.cassandra.io.sstable.format.SSTableReader sstable : sstables) { try { - Set components = sstable.descriptor.discoverComponents(); + Set discoveredComponents = + sstable.descriptor.getComponents(Set.of(), Set.of(BtiFormat.Components.PARTITION_INDEX, BigFormat.Components.PRIMARY_INDEX)); - Optional maybeIndexComponent = components.stream().filter(c -> c.name.contains("Index")).findFirst(); - if (!maybeIndexComponent.isPresent()) { + if (discoveredComponents.isEmpty()) { + //Nothing to read. continue; } - org.apache.cassandra.io.util.File indexFile = sstable.descriptor.fileFor(maybeIndexComponent.get()); - FileHandle indexHandle = new FileHandle.Builder(indexFile).complete(); + if(discoveredComponents.size() > 1){ + logger.error("Multiple Components found, this should never happen. Filename might be incorrect."); + } - BigTableReader reader = new BigTableReader.Builder(sstable.descriptor) - .setComponents(components) - .setFilter(FilterFactory.AlwaysPresent) - .setSerializationHeader(SerializationHeader.makeWithoutStats(cfStore.metadata())) - .setIndexFile(indexHandle) - .build(this.cfStore, false, false); + Optional maybeComponent = discoveredComponents.stream().findFirst(); + + org.apache.cassandra.io.util.File sstableIndexFile = + sstable.descriptor.fileFor(maybeComponent.orElseThrow(() -> + new IllegalStateException(String.format("No Component found on sstable %s, this should never happen.", sstable.getFilename())))); - File dataFile = sstable.descriptor.fileFor(SSTableFormat.Components.DATA).toJavaIOFile(); readers.add(new IndexReader( new SSTableStatistics( sstable.descriptor.id, - dataFile.getName(), + sstableIndexFile.name(), sstable.uncompressedLength(), sstable.getMinTimestamp(), sstable.getMaxTimestamp(), sstable.getSSTableLevel()), - reader.getIndexFile().createReader(), - sstable.descriptor.version, + sstable.keyReader(), sstable.getPartitioner() )); } catch (Throwable t) { diff --git a/src/main/java/com/instaclustr/sstabletools/cassandra/IndexReader.java b/src/main/java/com/instaclustr/sstabletools/cassandra/IndexReader.java index 3ebfd0d..95b93f8 100644 --- a/src/main/java/com/instaclustr/sstabletools/cassandra/IndexReader.java +++ b/src/main/java/com/instaclustr/sstabletools/cassandra/IndexReader.java @@ -7,23 +7,17 @@ import com.instaclustr.sstabletools.PartitionStatistics; import com.instaclustr.sstabletools.SSTableStatistics; import org.apache.cassandra.dht.IPartitioner; -import org.apache.cassandra.io.sstable.format.Version; -import org.apache.cassandra.io.util.RandomAccessReader; -import org.apache.cassandra.utils.ByteBufferUtil; +import org.apache.cassandra.io.sstable.KeyReader; /** * SSTable Index.db reader. */ public class IndexReader extends AbstractSSTableReader { - /** - * Index.db reader. - */ - private RandomAccessReader reader; /** - * SSTable version. + * The SSTable KeyReader. */ - private Version version; + private KeyReader keyReader; /** * The sstable partitioner. @@ -45,34 +39,21 @@ public class IndexReader extends AbstractSSTableReader { */ private boolean completed = false; + /** * Construct a reader for Index.db sstable file. * * @param tableStats SSTable statistics. - * @param reader Reader to Index.db file. - * @param version Version of SSTable + * @param keyReader KeyReader for sstable. * @param partitioner The sstable partitioner. */ - public IndexReader(SSTableStatistics tableStats, RandomAccessReader reader, Version version, IPartitioner partitioner) { + public IndexReader(SSTableStatistics tableStats, KeyReader keyReader, IPartitioner partitioner) { this.tableStats = tableStats; - this.reader = reader; - this.version = version; + this.keyReader = keyReader; this.nextKey = null; this.partitioner = partitioner; } - /** - * Skip data field on index entry. - * - * @throws IOException - */ - private void skipData() throws IOException { - int size = version.version.compareTo("ma") >= 0 ? (int) reader.readUnsignedVInt() : reader.readInt(); - if (size > 0) { - reader.skipBytesFully(size); - } - } - @Override public boolean next() { if (completed) { @@ -80,20 +61,18 @@ public boolean next() { } try { if (nextKey == null) { - nextKey = ByteBufferUtil.readWithShortLength(reader); - nextPosition = version.version.compareTo("ma") > 0 ? reader.readUnsignedVInt() : reader.readLong(); - skipData(); + nextKey = keyReader.key(); + nextPosition = keyReader.dataPosition(); } partitionStats = new PartitionStatistics(partitioner.decorateKey(nextKey)); long position = nextPosition; - if (!reader.isEOF()) { - nextKey = ByteBufferUtil.readWithShortLength(reader); - nextPosition = version.version.compareTo("ma") > 0 ? reader.readUnsignedVInt() : reader.readLong(); - skipData(); + if (!keyReader.isExhausted() && keyReader.advance()) { + nextKey = keyReader.key(); + nextPosition = keyReader.dataPosition(); partitionStats.size = nextPosition - position; } else { partitionStats.size = this.tableStats.size - position; - reader.close(); + keyReader.close(); completed = true; } this.tableStats.partitionCount++; @@ -103,7 +82,7 @@ public boolean next() { e.printStackTrace(); if (!completed) { try { - reader.close(); + keyReader.close(); } catch (Throwable t) { } }