Skip to content

Changes for 1B vector benchmark #1850

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: hcd-1.2.2-beta
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion conf/cassandra-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ if [ "$JVM_ARCH" = "64-Bit" ] && [ $USING_CMS -eq 0 ]; then
fi

# provides hints to the JIT compiler
JVM_OPTS="$JVM_OPTS -XX:CompileCommandFile=$CASSANDRA_CONF/hotspot_compiler"
JVM_OPTS="$JVM_OPTS -XX:CompileCommandFile=$CASSANDRA_CONF/hotspot_compiler -XX:+UnlockDiagnosticVMOptions -XX:CompilerDirectivesFile=$CASSANDRA_CONF/vector_hotspot_compiler"

# add the jamm javaagent
JVM_OPTS="$JVM_OPTS -javaagent:$CASSANDRA_HOME/lib/jamm-0.3.2.jar"
Expand Down
19 changes: 11 additions & 8 deletions conf/cassandra.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -526,8 +526,8 @@ seed_provider:
# On the other hand, since writes are almost never IO bound, the ideal
# number of "concurrent_writes" is dependent on the number of cores in
# your system; (8 * number_of_cores) is a good rule of thumb.
concurrent_reads: 32
concurrent_writes: 32
concurrent_reads: 128
concurrent_writes: 128
concurrent_counter_writes: 32

# For materialized view writes, as there is a read involved, so this should
Expand Down Expand Up @@ -573,8 +573,8 @@ concurrent_materialized_view_writes: 32
# accepting writes when the limit is exceeded until a flush completes,
# and will trigger a flush based on memtable_cleanup_threshold
# If omitted, Cassandra will set both to 1/4 the size of the heap.
# memtable_heap_space_in_mb: 2048
# memtable_offheap_space_in_mb: 2048
memtable_heap_space_in_mb: 8192
memtable_offheap_space_in_mb: 32000

# memtable_cleanup_threshold is deprecated. The default calculation
# is the only reasonable choice. See the comments on memtable_flush_writers
Expand Down Expand Up @@ -646,7 +646,7 @@ memtable_allocation_type: offheap_objects
# The default value is the smaller of 8192, and 1/4 of the total space
# of the commitlog volume.
#
# commitlog_total_space_in_mb: 8192
commitlog_total_space_in_mb: 32000

# This sets the number of memtable flush writer threads per disk
# as well as the total number of memtables that can be flushed concurrently.
Expand Down Expand Up @@ -675,7 +675,7 @@ memtable_allocation_type: offheap_objects
# and flush size and frequency. More is not better you just need enough flush writers
# to never stall waiting for flushing to free memory.
#
#memtable_flush_writers: 2
memtable_flush_writers: 8

# Total space to use for change-data-capture logs on disk.
#
Expand Down Expand Up @@ -918,7 +918,7 @@ column_index_cache_size_in_kb: 2
#
# If your data directories are backed by SSD, you should increase this
# to the number of cores.
#concurrent_compactors: 1
concurrent_compactors: 64

# Number of simultaneous repair validations to allow. If not set or set to
# a value less than 1, it defaults to the value of concurrent_compactors.
Expand All @@ -939,7 +939,7 @@ concurrent_materialized_view_builders: 1
# Setting this to 0 disables throttling. Note that this accounts for all types
# of compaction, including validation compaction (building Merkle trees
# for repairs).
compaction_throughput_mb_per_sec: 64
compaction_throughput_mb_per_sec: 1000

# When compacting, the replacement sstable(s) can be opened before they
# are completely written, and used in place of the prior sstables for
Expand Down Expand Up @@ -1499,6 +1499,9 @@ enable_drop_compact_storage: false
# config value.
# emulate_dbaas_defaults: false

sai_options:
segment_write_buffer_space_mb: 1000

# Guardrails settings.
# guardrails:
# When executing a scan, within or across a partition, we need to keep the
Expand Down
12 changes: 9 additions & 3 deletions conf/jvm-server.options
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@
-XX:+UseNUMA

# http://www.evanjones.ca/jvm-mmap-pause.html
-XX:+PerfDisableSharedMem
#-XX:+PerfDisableSharedMem

# Prefer binding to IPv4 network intefaces (when net.ipv6.bindv6only=1). See
# http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6342561 (short version:
Expand All @@ -125,6 +125,12 @@
# Disable chronicle analytics. See CASSANDRA-19656
-Dchronicle.analytics.disable=true

-Dunified_compaction.vector_sstable_growth=1
-Dunified_compaction.override_ucs_config_for_vector_tables=true

-Dcassandra.sai.latest.version=ec
-Dcassandra.sai.jvector_version=4
-Dcassandra.sai_segment_builder_cores=96
### Debug options

# uncomment to enable flight recorder
Expand Down Expand Up @@ -157,8 +163,8 @@
# the same value to avoid stop-the-world GC pauses during resize, and
# so that we can lock the heap in memory on startup to prevent any
# of it from being swapped out.
#-Xms4G
#-Xmx4G
-Xms128G
-Xmx128G

# Young generation size is automatically calculated by cassandra-env
# based on this formula: min(100 * num_cores, 1/4 * heap size)
Expand Down
10 changes: 10 additions & 0 deletions conf/vector_hotspot_compiler
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[
{
match: ["*.*"],
"inline": [
// Third party library used for Vector Search https://github.com/jbellis/jvector
"+io.github.jbellis.jvector.vector.VectorUtil::*",
"+io.github.jbellis.jvector.vector.SimdOps::*"
]
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,10 @@ public abstract class SegmentBuilder
{
private static final Logger logger = LoggerFactory.getLogger(SegmentBuilder.class);

private static final int NUM_THREADS = Integer.getInteger("cassandra.sai_segment_builder_cores", Runtime.getRuntime().availableProcessors());

/** for parallelism within a single compaction */
public static final ExecutorService compactionExecutor = new DebuggableThreadPoolExecutor(Runtime.getRuntime().availableProcessors(),
public static final ExecutorService compactionExecutor = new DebuggableThreadPoolExecutor(NUM_THREADS,
1,
TimeUnit.MINUTES,
new ArrayBlockingQueue<>(10 * Runtime.getRuntime().availableProcessors()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,7 @@ ProductQuantization computeOrRefineFrom(PqInfo existingInfo, VectorCompression p
if (vectorValues.size() < MIN_PQ_ROWS)
return null;
else
return ProductQuantization.compute(vectorValues, preferredCompression.getCompressedSize(), 256, false);
return ProductQuantization.compute(vectorValues, preferredCompression.getCompressedSize(), 128, false);
}

// use the existing one unmodified if we either don't have enough rows to fine-tune, or
Expand Down
Loading