diff --git a/conf/cassandra-env.sh b/conf/cassandra-env.sh index 168e81bc20cd..0f394f403a0f 100644 --- a/conf/cassandra-env.sh +++ b/conf/cassandra-env.sh @@ -188,7 +188,7 @@ if [ "$JVM_ARCH" = "64-Bit" ] && [ $USING_CMS -eq 0 ]; then fi # provides hints to the JIT compiler -JVM_OPTS="$JVM_OPTS -XX:CompileCommandFile=$CASSANDRA_CONF/hotspot_compiler" +JVM_OPTS="$JVM_OPTS -XX:CompileCommandFile=$CASSANDRA_CONF/hotspot_compiler -XX:+UnlockDiagnosticVMOptions -XX:CompilerDirectivesFile=$CASSANDRA_CONF/vector_hotspot_compiler" # add the jamm javaagent JVM_OPTS="$JVM_OPTS -javaagent:$CASSANDRA_HOME/lib/jamm-0.3.2.jar" diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml index 1d12152fd1e4..868fad066c09 100644 --- a/conf/cassandra.yaml +++ b/conf/cassandra.yaml @@ -526,8 +526,8 @@ seed_provider: # On the other hand, since writes are almost never IO bound, the ideal # number of "concurrent_writes" is dependent on the number of cores in # your system; (8 * number_of_cores) is a good rule of thumb. -concurrent_reads: 32 -concurrent_writes: 32 +concurrent_reads: 128 +concurrent_writes: 128 concurrent_counter_writes: 32 # For materialized view writes, as there is a read involved, so this should @@ -573,8 +573,8 @@ concurrent_materialized_view_writes: 32 # accepting writes when the limit is exceeded until a flush completes, # and will trigger a flush based on memtable_cleanup_threshold # If omitted, Cassandra will set both to 1/4 the size of the heap. -# memtable_heap_space_in_mb: 2048 -# memtable_offheap_space_in_mb: 2048 +memtable_heap_space_in_mb: 8192 +memtable_offheap_space_in_mb: 32000 # memtable_cleanup_threshold is deprecated. The default calculation # is the only reasonable choice. See the comments on memtable_flush_writers @@ -646,7 +646,7 @@ memtable_allocation_type: offheap_objects # The default value is the smaller of 8192, and 1/4 of the total space # of the commitlog volume. # -# commitlog_total_space_in_mb: 8192 +commitlog_total_space_in_mb: 32000 # This sets the number of memtable flush writer threads per disk # as well as the total number of memtables that can be flushed concurrently. @@ -675,7 +675,7 @@ memtable_allocation_type: offheap_objects # and flush size and frequency. More is not better you just need enough flush writers # to never stall waiting for flushing to free memory. # -#memtable_flush_writers: 2 +memtable_flush_writers: 8 # Total space to use for change-data-capture logs on disk. # @@ -918,7 +918,7 @@ column_index_cache_size_in_kb: 2 # # If your data directories are backed by SSD, you should increase this # to the number of cores. -#concurrent_compactors: 1 +concurrent_compactors: 64 # Number of simultaneous repair validations to allow. If not set or set to # a value less than 1, it defaults to the value of concurrent_compactors. @@ -939,7 +939,7 @@ concurrent_materialized_view_builders: 1 # Setting this to 0 disables throttling. Note that this accounts for all types # of compaction, including validation compaction (building Merkle trees # for repairs). -compaction_throughput_mb_per_sec: 64 +compaction_throughput_mb_per_sec: 1000 # When compacting, the replacement sstable(s) can be opened before they # are completely written, and used in place of the prior sstables for @@ -1499,6 +1499,9 @@ enable_drop_compact_storage: false # config value. # emulate_dbaas_defaults: false +sai_options: + segment_write_buffer_space_mb: 1000 + # Guardrails settings. # guardrails: # When executing a scan, within or across a partition, we need to keep the diff --git a/conf/jvm-server.options b/conf/jvm-server.options index bc92e3ebd9bc..d0beddb482bd 100644 --- a/conf/jvm-server.options +++ b/conf/jvm-server.options @@ -115,7 +115,7 @@ -XX:+UseNUMA # http://www.evanjones.ca/jvm-mmap-pause.html --XX:+PerfDisableSharedMem +#-XX:+PerfDisableSharedMem # Prefer binding to IPv4 network intefaces (when net.ipv6.bindv6only=1). See # http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6342561 (short version: @@ -125,6 +125,12 @@ # Disable chronicle analytics. See CASSANDRA-19656 -Dchronicle.analytics.disable=true +-Dunified_compaction.vector_sstable_growth=1 +-Dunified_compaction.override_ucs_config_for_vector_tables=true + +-Dcassandra.sai.latest.version=ec +-Dcassandra.sai.jvector_version=4 +-Dcassandra.sai_segment_builder_cores=96 ### Debug options # uncomment to enable flight recorder @@ -157,8 +163,8 @@ # the same value to avoid stop-the-world GC pauses during resize, and # so that we can lock the heap in memory on startup to prevent any # of it from being swapped out. -#-Xms4G -#-Xmx4G +-Xms128G +-Xmx128G # Young generation size is automatically calculated by cassandra-env # based on this formula: min(100 * num_cores, 1/4 * heap size) diff --git a/conf/vector_hotspot_compiler b/conf/vector_hotspot_compiler new file mode 100644 index 000000000000..5110aeae2eeb --- /dev/null +++ b/conf/vector_hotspot_compiler @@ -0,0 +1,10 @@ +[ + { + match: ["*.*"], + "inline": [ +// Third party library used for Vector Search https://github.com/jbellis/jvector + "+io.github.jbellis.jvector.vector.VectorUtil::*", + "+io.github.jbellis.jvector.vector.SimdOps::*" + ] + } +] diff --git a/src/java/org/apache/cassandra/index/sai/disk/v1/SegmentBuilder.java b/src/java/org/apache/cassandra/index/sai/disk/v1/SegmentBuilder.java index 8b678b5ab672..98da073b66ae 100644 --- a/src/java/org/apache/cassandra/index/sai/disk/v1/SegmentBuilder.java +++ b/src/java/org/apache/cassandra/index/sai/disk/v1/SegmentBuilder.java @@ -78,8 +78,10 @@ public abstract class SegmentBuilder { private static final Logger logger = LoggerFactory.getLogger(SegmentBuilder.class); + private static final int NUM_THREADS = Integer.getInteger("cassandra.sai_segment_builder_cores", Runtime.getRuntime().availableProcessors()); + /** for parallelism within a single compaction */ - public static final ExecutorService compactionExecutor = new DebuggableThreadPoolExecutor(Runtime.getRuntime().availableProcessors(), + public static final ExecutorService compactionExecutor = new DebuggableThreadPoolExecutor(NUM_THREADS, 1, TimeUnit.MINUTES, new ArrayBlockingQueue<>(10 * Runtime.getRuntime().availableProcessors()), diff --git a/src/java/org/apache/cassandra/index/sai/disk/vector/CassandraOnHeapGraph.java b/src/java/org/apache/cassandra/index/sai/disk/vector/CassandraOnHeapGraph.java index 8de03a51861b..bc2c55781412 100644 --- a/src/java/org/apache/cassandra/index/sai/disk/vector/CassandraOnHeapGraph.java +++ b/src/java/org/apache/cassandra/index/sai/disk/vector/CassandraOnHeapGraph.java @@ -618,7 +618,7 @@ ProductQuantization computeOrRefineFrom(PqInfo existingInfo, VectorCompression p if (vectorValues.size() < MIN_PQ_ROWS) return null; else - return ProductQuantization.compute(vectorValues, preferredCompression.getCompressedSize(), 256, false); + return ProductQuantization.compute(vectorValues, preferredCompression.getCompressedSize(), 128, false); } // use the existing one unmodified if we either don't have enough rows to fine-tune, or