diff --git a/.build/build-rat.xml b/.build/build-rat.xml
index 27e8f63ae43d..ea87fa5a2902 100644
--- a/.build/build-rat.xml
+++ b/.build/build-rat.xml
@@ -49,12 +49,14 @@
+
+
@@ -69,6 +71,7 @@
+
@@ -77,6 +80,8 @@
+
+
@@ -91,6 +96,9 @@
+
+
+
diff --git a/.build/build-resolver.xml b/.build/build-resolver.xml
index 55718377e244..a962024a3f69 100644
--- a/.build/build-resolver.xml
+++ b/.build/build-resolver.xml
@@ -53,11 +53,16 @@
+
+
+
+
+
-
-
+
+
-
+
+
@@ -308,6 +317,9 @@
+
+
+
diff --git a/.build/cassandra-build-deps-template.xml b/.build/cassandra-build-deps-template.xml
index cc1a25a8c1fc..f12be6b7c050 100644
--- a/.build/cassandra-build-deps-template.xml
+++ b/.build/cassandra-build-deps-template.xml
@@ -17,8 +17,8 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
4.0.0
- cassandra-parent
- org.apache.cassandra
+ dse-db-parent
+ com.datastax.dse
@version@
@final.name@-parent.pom
@@ -155,5 +155,9 @@
org.bouncycastle
bcutil-jdk18on
+
+ com.bpodgursky
+ jbool_expressions
+
diff --git a/.build/cassandra-deps-template.xml b/.build/cassandra-deps-template.xml
index ab98e36ab85f..8e5194e83d6b 100644
--- a/.build/cassandra-deps-template.xml
+++ b/.build/cassandra-deps-template.xml
@@ -17,12 +17,12 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
4.0.0
- org.apache.cassandra
- cassandra-parent
+ com.datastax.dse
+ dse-db-parent
@version@
@final.name@-parent.pom
- cassandra-all
+ dse-db-all
@version@
Apache Cassandra
The Apache Cassandra Project develops a highly scalable second-generation distributed database, bringing together Dynamo's fully distributed design and Bigtable's ColumnFamily-based data model.
@@ -35,9 +35,9 @@
- scm:https://gitbox.apache.org/repos/asf/cassandra.git
- scm:https://gitbox.apache.org/repos/asf/cassandra.git
- https://gitbox.apache.org/repos/asf?p=cassandra.git
+ scm:git:ssh://git@github.com:datastax/cassandra.git
+ scm:git:ssh://git@github.com:datastax/cassandra.git
+ scm:git:ssh://git@github.com:datastax/cassandra.git
@@ -104,6 +104,10 @@
com.fasterxml.jackson.datatype
jackson-datatype-jsr310
+
+ com.googlecode.json-simple
+ json-simple
+
com.boundary
high-scale-lib
@@ -136,6 +140,10 @@
com.clearspring.analytics
stream
+
+ com.esri.geometry
+ esri-geometry-api
+
ch.qos.logback
logback-core
@@ -201,6 +209,10 @@
net.openhft
chronicle-threads
+
+ net.openhft
+ chronicle-map
+
net.openhft
@@ -368,6 +380,10 @@
org.apache.lucene
lucene-analysis-common
+
+ org.apache.lucene
+ lucene-backward-codecs
+
io.github.jbellis
jvector
@@ -376,5 +392,17 @@
com.vdurmont
semver4j
+
+ io.micrometer
+ micrometer-core
+
+
+ org.latencyutils
+ LatencyUtils
+
+
+ de.huxhorn.sulky
+ de.huxhorn.sulky.ulid
+
diff --git a/.build/parent-pom-template.xml b/.build/parent-pom-template.xml
index a3f507706c02..3b4d3d67781f 100644
--- a/.build/parent-pom-template.xml
+++ b/.build/parent-pom-template.xml
@@ -21,13 +21,13 @@
org.apache
22
- org.apache.cassandra
- cassandra-parent
+ com.datastax.dse
+ dse-db-parent
@version@
pom
- Apache Cassandra
- The Apache Cassandra Project develops a highly scalable second-generation distributed database, bringing together Dynamo's fully distributed design and Bigtable's ColumnFamily-based data model.
- https://cassandra.apache.org
+ Datastax DB
+ The Apache Cassandra Project develops a highly scalable second-generation distributed database. DataStax, Inc. provides additional improvements on top of Apache Cassandra
+ https://datastax.com
2009
@@ -36,8 +36,8 @@
- 1.12.13
- 4.0.20
+ 1.14.17
+ 4.0.23
0.5.1
@@ -239,9 +239,9 @@
- scm:https://gitbox.apache.org/repos/asf/cassandra.git
- scm:https://gitbox.apache.org/repos/asf/cassandra.git
- https://gitbox.apache.org/repos/asf?p=cassandra.git
+ scm:git:ssh://git@github.com:datastax/cassandra.git
+ scm:git:ssh://git@github.com:datastax/cassandra.git
+ scm:git:ssh://git@github.com:datastax/cassandra.git
@@ -291,7 +291,7 @@
org.xerial.snappy
snappy-java
- 1.1.10.4
+ 1.1.10.7
org.lz4
@@ -306,7 +306,7 @@
com.google.guava
guava
- 32.0.1-jre
+ 33.4.0-jre
jsr305
@@ -397,52 +397,57 @@
org.slf4j
slf4j-api
- 1.7.36
+ 2.0.9
org.slf4j
log4j-over-slf4j
- 1.7.36
+ 2.0.9
org.slf4j
jcl-over-slf4j
- 1.7.36
+ 2.0.9
ch.qos.logback
logback-core
- 1.2.12
+ 1.4.14
ch.qos.logback
logback-classic
- 1.2.12
+ 1.4.14
com.fasterxml.jackson.core
jackson-core
- 2.13.2
+ 2.18.3
com.fasterxml.jackson.core
jackson-databind
- 2.13.2.2
+ 2.18.3
com.fasterxml.jackson.core
jackson-annotations
- 2.13.2
+ 2.18.3
+
+
+ com.googlecode.json-simple
+ json-simple
+ 1.1
com.fasterxml.jackson.datatype
jackson-datatype-jsr310
- 2.13.2
+ 2.18.3
com.fasterxml.jackson.dataformat
jackson-dataformat-yaml
- 2.13.2
+ 2.18.3
test
@@ -464,12 +469,12 @@
org.yaml
snakeyaml
- 1.26
+ 2.4
junit
junit
- 4.12
+ 4.13
test
@@ -729,7 +734,7 @@
io.netty
netty-all
- 4.1.96.Final
+ 4.1.118.Final
io.netty
@@ -800,7 +805,7 @@
io.netty
netty-tcnative-boringssl-static
- 2.0.61.Final
+ 2.0.69.Final
org.bouncycastle
@@ -823,18 +828,18 @@
io.netty
netty-transport-native-epoll
- 4.1.96.Final
+ 4.1.118.Final
io.netty
netty-transport-native-epoll
- 4.1.96.Final
+ 4.1.118.Final
linux-x86_64
io.netty
netty-transport-native-epoll
- 4.1.96.Final
+ 4.1.118.Final
linux-aarch_64
@@ -842,7 +847,7 @@
net.openhft
chronicle-queue
- 5.23.37
+ 5.24ea27
tools
@@ -858,7 +863,7 @@
net.openhft
chronicle-core
- 2.23.36
+ 2.24ea28
chronicle-analytics
@@ -873,7 +878,7 @@
net.openhft
chronicle-bytes
- 2.23.33
+ 2.24ea20
annotations
@@ -884,7 +889,7 @@
net.openhft
chronicle-wire
- 2.23.39
+ 2.24ea27
compiler
@@ -900,7 +905,19 @@
net.openhft
chronicle-threads
- 2.23.25
+ 2.24ea14
+
+
+
+ net.openhft
+ affinity
+
+
+
+
+ net.openhft
+ chronicle-map
+ 3.24ea4
@@ -926,7 +943,7 @@
com.google.code.findbugs
jsr305
- 2.0.2
+ 3.0.0
com.clearspring.analytics
@@ -939,6 +956,11 @@
+
+ com.esri.geometry
+ esri-geometry-api
+ 2.2.4
+
com.datastax.cassandra
cassandra-driver-core
@@ -1206,22 +1228,27 @@
org.agrona
agrona
- 1.17.1
+ 1.20.0
org.apache.lucene
lucene-core
- 9.7.0
+ 9.8.0
org.apache.lucene
lucene-analysis-common
- 9.7.0
+ 9.8.0
+
+
+ org.apache.lucene
+ lucene-backward-codecs
+ 9.8.0
io.github.jbellis
jvector
- 1.0.2
+ 4.0.0-beta.4
com.carrotsearch.randomizedtesting
@@ -1245,6 +1272,27 @@
semver4j
3.1.0
+
+ com.bpodgursky
+ jbool_expressions
+ 1.24
+ test
+
+
+ io.micrometer
+ micrometer-core
+ 1.5.5
+
+
+ org.latencyutils
+ LatencyUtils
+ 2.0.3
+
+
+ de.huxhorn.sulky
+ de.huxhorn.sulky.ulid
+ 8.2.0
+
diff --git a/.build/run-python-dtests.sh b/.build/run-python-dtests.sh
index 5b1307e1546d..13f95c689654 100755
--- a/.build/run-python-dtests.sh
+++ b/.build/run-python-dtests.sh
@@ -68,6 +68,7 @@ ALLOWED_DTEST_VARIANTS="novnode|large|latest|upgrade"
[[ "${DTEST_TARGET}" =~ ^dtest(-(${ALLOWED_DTEST_VARIANTS}))*$ ]] || { echo >&2 "Unknown dtest target: ${DTEST_TARGET}. Allowed variants are ${ALLOWED_DTEST_VARIANTS}"; exit 1; }
java_version=$(java -version 2>&1 | awk -F '"' '/version/ {print $2}' | awk -F. '{print $1}')
+project_name=$(grep '&1 | awk -F '"' '/version/ {print $2}' | awk -F. '{print $1}')
+ local -r project_name=$(grep '
-
-
-
-patch by ; reviewed by for CASSANDRA-#####
-
-Co-authored-by: Name1
-Co-authored-by: Name2
-
-```
-
-The [Cassandra Jira](https://issues.apache.org/jira/projects/CASSANDRA/issues/)
+### What is the issue
+...
+### What does this PR fix and why was it fixed
+...
diff --git a/.github/workflows/checklist_comment_on_new_pr.yml b/.github/workflows/checklist_comment_on_new_pr.yml
new file mode 100644
index 000000000000..fda81de21fd2
--- /dev/null
+++ b/.github/workflows/checklist_comment_on_new_pr.yml
@@ -0,0 +1,18 @@
+name: Comment on new Pull Request with checklist
+on:
+ pull_request:
+ types: opened
+
+jobs:
+ checklist-comment:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout Repository
+ uses: actions/checkout@v4
+ - name: Add comment
+ run:
+ gh pr comment $PRNUM --body-file .github/workflows/pr_checklist.md
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ GH_REPO: ${{ github.repository }}
+ PRNUM: ${{ github.event.pull_request.number }}
diff --git a/.github/workflows/pr_checklist.md b/.github/workflows/pr_checklist.md
new file mode 100644
index 000000000000..4f942016c202
--- /dev/null
+++ b/.github/workflows/pr_checklist.md
@@ -0,0 +1,11 @@
+### Checklist before you submit for review
+- [ ] Make sure there is a PR in the CNDB project updating the Converged Cassandra version
+- [ ] Use `NoSpamLogger` for log lines that may appear frequently in the logs
+- [ ] Verify test results on Butler
+- [ ] Test coverage for new/modified code is > 80%
+- [ ] Proper code formatting
+- [ ] Proper title for each commit staring with the project-issue number, like CNDB-1234
+- [ ] Each commit has a meaningful description
+- [ ] Each commit is not very long and contains related changes
+- [ ] Renames, moves and reformatting are in distinct commits
+- [ ] All new files should contain the DataStax copyright header instead of the Apache License one
diff --git a/.gitignore b/.gitignore
index aa9e76c9323d..c5696389ec72 100644
--- a/.gitignore
+++ b/.gitignore
@@ -56,6 +56,12 @@ ide/nbproject/private
nb-configuration.xml
nbactions.xml
+# VScode
+.vscode/
+
+# Aider (aider.chat)
+.aider*
+
# Maven, etc.
out/
target/
@@ -65,6 +71,7 @@ target/
*.pyc
*~
*.bak
+*.log
*.sw[o,p]
*.tmp
.DS_Store
diff --git a/CHANGES.txt b/CHANGES.txt
index a1129aafd527..d8cc44811491 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,3 +1,12 @@
+Future version (tbd)
+ * Require only MODIFY permission on base when updating table with MV (STAR-564)
+
+
+Merged from 5.1:
+ * Expose current compaction throughput in nodetool (CASSANDRA-13890)
+Merged from 5.0:
+ * Improve error messages when initializing auth classes (CASSANDRA-20368 and CASSANDRA-20450)
+ * Use ParameterizedClass for all auth-related implementations (CASSANDRA-19946 and partially CASSANDRA-18554)
5.0.2
* Use SinglePartitionReadCommand for index queries that use strict filtering (CASSANDRA-19968)
* Always write local expiration time as an int to LivenessInfo digest (CASSANDRA-19989)
@@ -173,13 +182,11 @@ Merged from 3.0:
5.0-alpha2
- * Add support for vector search in SAI (CASSANDRA-18715)
* Remove crc_check_chance from CompressionParams (CASSANDRA-18872)
* Fix schema loading of UDTs inside vectors inside UDTs (CASSANDRA-18964)
* Add cqlsh autocompletion for the vector data type (CASSANDRA-18946)
* Fix nodetool tablehistograms output to avoid printing repeated information and ensure at most two arguments (CASSANDRA-18955)
* Change the checksum algorithm SAI-related files use from CRC32 to CRC32C (CASSANDRA-18836)
- * Correctly remove Index.Group from IndexRegistry (CASSANDRA-18905)
* Fix vector type to support DDM's mask_default function (CASSANDRA-18889)
* Remove unnecessary reporter-config3 dependency (CASSANDRA-18907)
* Remove support for empty values on the vector data type (CASSANDRA-18876)
diff --git a/NEWS.txt b/NEWS.txt
index 717b5cf37de6..921ce03d4f2d 100644
--- a/NEWS.txt
+++ b/NEWS.txt
@@ -109,7 +109,6 @@ New features
src/java/org/apache/cassandra/db/compaction/UnifiedCompactionStrategy.md
- New `VectorType` (cql `vector`) which adds new fixed-length element arrays. See CASSANDRA-18504
- Added new vector similarity functions `similarity_cosine`, `similarity_euclidean` and `similarity_dot_product`.
- - Added ANN vector similarity search via ORDER BY ANN OF syntax on SAI indexes (using jvector library).
- Removed UDT type migration logic for 3.6+ clusters upgrading to 4.0. If migration has been disabled, it must be
enabled before upgrading to 5.0 if the cluster used UDTs. See CASSANDRA-18504
- Entended max expiration time from 2038-01-19T03:14:06+00:00 to 2106-02-07T06:28:13+00:00
@@ -294,6 +293,11 @@ Deprecation
Cluster hosts running with dual native ports were not correctly identified in the system.peers tables and server-sent EVENTs,
causing clients that encrypt traffic to fail to maintain correct connection pools. For more information, see CASSANDRA-19392.
- Deprecated `use_deterministic_table_id` in cassandra.yaml. Table IDs may still be supplied explicitly on CREATE.
+ - Chronicle Queue has changed the enums used for log rolling (cassandra.yaml -> full_query_logging_options:roll_cycle).
+ Older legacy options will still work for the foreseeable future but you will see warnings in logs and future dependency
+ upgrades may break your log rolling param. The default log rolling param will be changed with the next major release
+ from HOURLY to FAST_HOURLY, primarily different on how frequently indexes are built (256 in FAST_HOURLY vs. 16 in HOURLY).
+ For more info refer to: net.openhft.chronicle.queue.RollCycles
4.1
===
diff --git a/NOTICE.txt b/NOTICE.txt
index fd185210450f..5a2c26ae740d 100644
--- a/NOTICE.txt
+++ b/NOTICE.txt
@@ -8,3 +8,9 @@ Android Code
Copyright 2005-2008 The Android Open Source Project
This product includes software developed as part of
The Android Open Source Project (http://source.android.com).
+
+This project includes software from the Apache Lucene project. Relevant
+portions of its NOTICE are excerpted below:
+=======================================================================
+Apache Lucene
+Copyright 2001-2018 The Apache Software Foundation
diff --git a/README.asc b/README.asc
index be26f9d97b79..0ca628bf817d 100644
--- a/README.asc
+++ b/README.asc
@@ -25,8 +25,8 @@ and running, and demonstrate some simple reads and writes. For a more-complete g
First, we'll unpack our archive:
- $ tar -zxvf apache-cassandra-$VERSION.tar.gz
- $ cd apache-cassandra-$VERSION
+ $ tar -zxvf dse-db-$VERSION.tar.gz
+ $ cd dse-db-$VERSION
After that we start the server. Running the startup script with the -f argument will cause
Cassandra to remain in the foreground and log to standard out; it can be stopped with ctrl-C.
diff --git a/bin/cassandra.in.sh b/bin/cassandra.in.sh
index b838c2d4cf9c..5d83b4ed673e 100644
--- a/bin/cassandra.in.sh
+++ b/bin/cassandra.in.sh
@@ -30,7 +30,7 @@ CLASSPATH="$CASSANDRA_CONF"
# compiled classes. NOTE: This isn't needed by the startup script,
# it's just used here in constructing the classpath.
if [ -d $CASSANDRA_HOME/build ] ; then
- jars_cnt="`ls -1 $CASSANDRA_HOME/build/apache-cassandra*.jar | grep -v 'javadoc.jar' | grep -v 'sources.jar' | wc -l | xargs echo`"
+ jars_cnt="`ls -1 $CASSANDRA_HOME/build/dse-db*.jar | grep -v 'javadoc.jar' | grep -v 'sources.jar' | wc -l | xargs echo`"
if [ "$jars_cnt" -gt 1 ]; then
dir="`cd $CASSANDRA_HOME/build; pwd`"
echo "There are JAR artifacts for multiple versions in the $dir directory. Please clean the project with 'ant realclean' and build it again." 1>&2
@@ -38,8 +38,8 @@ if [ -d $CASSANDRA_HOME/build ] ; then
fi
if [ "$jars_cnt" = "1" ]; then
- cassandra_bin="`ls -1 $CASSANDRA_HOME/build/apache-cassandra*.jar | grep -v javadoc | grep -v sources`"
- CLASSPATH="$CLASSPATH:$cassandra_bin"
+ dse_db_bin="`ls -1 $CASSANDRA_HOME/build/dse-db*.jar | grep -v javadoc | grep -v sources`"
+ CLASSPATH="$CLASSPATH:$dse_db_bin"
fi
fi
@@ -122,11 +122,16 @@ jvmver=`echo "$java_ver_output" | grep '[openjdk|java] version' | awk -F'"' 'NR=
JVM_VERSION=${jvmver%_*}
short=$(echo "${jvmver}" | cut -c1-2)
-JAVA_VERSION=17
+JAVA_VERSION=22
if [ "$short" = "11" ] ; then
JAVA_VERSION=11
elif [ "$JVM_VERSION" \< "17" ] ; then
- echo "Cassandra 5.0 requires Java 11 or Java 17."
+ echo "DSE DB 5.0 requires Java 11 or higher."
+ exit 1;
+elif [ "$short" = "17" ] ; then
+ JAVA_VERSION=17
+elif [ "$JVM_VERSION" \< "22" ] ; then
+ echo "DSE DB 5.0 requires Java 11 or higher."
exit 1;
fi
@@ -151,7 +156,9 @@ esac
# Read user-defined JVM options from jvm-server.options file
JVM_OPTS_FILE=$CASSANDRA_CONF/jvm${jvmoptions_variant:--clients}.options
-if [ $JAVA_VERSION -ge 17 ] ; then
+if [ $JAVA_VERSION -ge 22 ] ; then
+ JVM_DEP_OPTS_FILE=$CASSANDRA_CONF/jvm22${jvmoptions_variant:--clients}.options
+elif [ $JAVA_VERSION -ge 17 ] ; then
JVM_DEP_OPTS_FILE=$CASSANDRA_CONF/jvm17${jvmoptions_variant:--clients}.options
elif [ $JAVA_VERSION -ge 11 ] ; then
JVM_DEP_OPTS_FILE=$CASSANDRA_CONF/jvm11${jvmoptions_variant:--clients}.options
diff --git a/bin/cqlsh.py b/bin/cqlsh.py
index 738f0aeeb716..87bbfa9da3c4 100755
--- a/bin/cqlsh.py
+++ b/bin/cqlsh.py
@@ -56,7 +56,7 @@ def find_zip(libprefix):
sys.path.insert(0, os.path.join(cql_zip, 'cassandra-driver-' + ver))
# the driver needs dependencies
-third_parties = ('pure_sasl-', 'wcwidth-')
+third_parties = ('pure_sasl-', 'wcwidth-', 'geomet-')
for lib in third_parties:
lib_zip = find_zip(lib)
diff --git a/build.properties.default b/build.properties.default
index 36676f5712d8..380270479620 100644
--- a/build.properties.default
+++ b/build.properties.default
@@ -21,3 +21,4 @@
artifact.remoteRepository.central: https://repo1.maven.org/maven2
artifact.remoteRepository.apache: https://repo.maven.apache.org/maven2
artifact.remoteRepository.apacheSnapshot: https://repository.apache.org/content/repositories/snapshots
+artifact.remoteRepository.datastax: https://repo.datastax.com/dse
diff --git a/build.xml b/build.xml
index fdebf135acb8..eae801283cf2 100644
--- a/build.xml
+++ b/build.xml
@@ -14,7 +14,7 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
-
@@ -33,19 +33,18 @@
-
-
-
-
+
+
+
+
-
+
@@ -95,11 +94,12 @@
-
-
-
+
+
+
@@ -113,14 +113,14 @@
-
+
-
+
-
-
+
+
@@ -133,16 +133,24 @@
-
+
-
+
+
+
+
+
+
+
+
+
-
+
@@ -249,8 +257,15 @@
--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.transport.tcp=ALL-UNNAMED
--add-exports java.sql/java.sql=ALL-UNNAMED
+ --add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED
+ --add-exports jdk.unsupported/sun.misc=ALL-UNNAMED
+ --add-exports java.base/jdk.internal.util=ALL-UNNAMED
+ --add-opens java.base/java.io=ALL-UNNAMED
+ --add-opens java.base/java.util=ALL-UNNAMED
+ --add-opens java.base/java.lang=ALL-UNNAMED
--add-opens java.base/java.lang.module=ALL-UNNAMED
+ --add-opens java.base/java.lang.reflect=ALL-UNNAMED
--add-opens java.base/java.net=ALL-UNNAMED
--add-opens java.base/jdk.internal.loader=ALL-UNNAMED
--add-opens java.base/jdk.internal.ref=ALL-UNNAMED
@@ -258,6 +273,7 @@
--add-opens java.base/jdk.internal.math=ALL-UNNAMED
--add-opens java.base/jdk.internal.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED
+ --add-opens jdk.compiler/com.sun.tools.javac=ALL-UNNAMED
--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED
@@ -298,6 +314,7 @@
--add-opens java.base/java.lang=ALL-UNNAMED
--add-opens java.base/java.util=ALL-UNNAMED
--add-opens java.base/java.nio=ALL-UNNAMED
+ --add-opens java.base/java.nio.file.attribute=ALL-UNNAMED
--add-opens java.rmi/sun.rmi.transport.tcp=ALL-UNNAMED
@@ -306,6 +323,78 @@
+
+ -XX:+UnlockDiagnosticVMOptions
+ -Djdk.attach.allowAttachSelf=true
+ -XX:+UseG1GC
+ -XX:+ParallelRefProcEnabled
+
+
+ -XX:G1RSetUpdatingPauseTimePercent=5
+ -XX:MaxGCPauseMillis=100
+
+
+ -XX:-RestrictContended
+ -XX:+UseThreadPriorities
+ -XX:+DebugNonSafepoints
+ -XX:+UseStringDeduplication
+ -XX:StringTableSize=1000003
+ -XX:+PerfDisableSharedMem
+ -XX:+AlwaysPreTouch
+ -XX:+UseTLAB
+ -XX:+ResizeTLAB
+ -XX:+UseNUMA
+
+
+ --add-exports java.base/jdk.internal.misc=ALL-UNNAMED
+ --add-exports java.base/jdk.internal.ref=ALL-UNNAMED
+ --add-exports java.base/jdk.internal.perf=ALL-UNNAMED
+ --add-exports java.base/sun.nio.ch=ALL-UNNAMED
+ --add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED
+ --add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED
+ --add-exports java.rmi/sun.rmi.server=ALL-UNNAMED
+ --add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED
+ --add-exports jdk.naming.dns/com.sun.jndi.dns=java.naming
+ --add-exports jdk.unsupported/sun.misc=ALL-UNNAMED
+
+ --add-opens java.base/java.io=ALL-UNNAMED
+ --add-opens java.base/java.lang=ALL-UNNAMED
+ --add-opens java.base/java.lang.module=ALL-UNNAMED
+ --add-opens java.base/java.lang.ref=ALL-UNNAMED
+ --add-opens java.base/java.lang.reflect=ALL-UNNAMED
+ --add-opens java.base/java.math=ALL-UNNAMED
+ --add-opens java.base/java.net=ALL-UNNAMED
+ --add-opens java.base/java.nio=ALL-UNNAMED
+ --add-opens java.base/java.nio.charset=ALL-UNNAMED
+ --add-opens java.base/java.nio.file.spi=ALL-UNNAMED
+ --add-opens java.base/java.util=ALL-UNNAMED
+ --add-opens java.base/java.util.concurrent.locks=ALL-UNNAMED
+ --add-opens java.base/jdk.internal.loader=ALL-UNNAMED
+ --add-opens java.base/jdk.internal.math=ALL-UNNAMED
+ --add-opens java.base/jdk.internal.module=ALL-UNNAMED
+ --add-opens java.base/jdk.internal.ref=ALL-UNNAMED
+ --add-opens java.base/jdk.internal.reflect=ALL-UNNAMED
+ --add-opens java.base/jdk.internal.vm=ALL-UNNAMED
+ --add-opens java.base/sun.nio.ch=ALL-UNNAMED
+ --add-opens jdk.compiler/com.sun.tools.javac=ALL-UNNAMED
+ --add-opens jdk.management.jfr/jdk.management.jfr=ALL-UNNAMED
+ --add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED
+ --add-opens jdk.naming.dns/com.sun.jndi.dns=ALL-UNNAMED
+
+ --add-opens java.base/java.nio.file.attribute=ALL-UNNAMED
+
+
+ --add-opens java.base/java.util.concurrent=ALL-UNNAMED
+ --add-opens java.base/java.util.concurrent.atomic=ALL-UNNAMED
+ --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED
+
+
+
+
+
+
+ --add-modules jdk.incubator.vector
+
+
+
+
+
+
+
+
-
+
+ failonerror="true"
+ fork="true"
+ outputproperty="antlr.output"
+ errorproperty="antlr.error">
@@ -508,6 +621,7 @@
+
@@ -735,7 +849,7 @@
-->
+ description="Assemble DSE DB JAR files">
@@ -749,9 +863,9 @@
-
+
-
+
@@ -760,7 +874,7 @@
+ description="Assemble DSE DB JAR files">
@@ -775,7 +889,7 @@
+ description="Assemble DSE DB JAR files">
@@ -791,7 +905,7 @@
+ description="Assemble DSE DB JAR files">
@@ -826,13 +940,13 @@
+ description="Assemble DSE DB JAR files">
-
+
@@ -842,7 +956,7 @@
-
+
@@ -909,8 +1023,8 @@
-
+
@@ -1134,6 +1248,9 @@
+
+
+
@@ -1146,7 +1263,7 @@
-
+
@@ -1171,23 +1288,32 @@
+
+
+
+
+
-
+
-
+
+
+
+
+
@@ -1385,6 +1577,7 @@
+
@@ -1398,6 +1591,7 @@
+
@@ -1410,6 +1604,7 @@
+
@@ -1446,6 +1641,22 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1454,22 +1665,41 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1477,6 +1707,7 @@
+
@@ -1491,6 +1722,7 @@
timeout="${test.long.timeout}">
+
@@ -1498,6 +1730,7 @@
+
@@ -1516,6 +1749,7 @@
+
@@ -1562,6 +1796,7 @@
+
@@ -1705,6 +1940,13 @@
+
+
+
+
+
+
+
@@ -1712,6 +1954,13 @@
+
+
+
+
+
+
+
@@ -1773,6 +2022,7 @@
+
@@ -1810,6 +2060,7 @@
+
@@ -1826,6 +2077,7 @@
+
@@ -1944,10 +2196,10 @@
]]>
- IDE configuration in .idea/ updated for use with JDK${ant.java.version}.
+ IDE configuration in .idea/ updated for use with JDK${ant.java.version}.
- In IntelliJ verify that the SDK is ${ant.java.version}, and its path is valid.
- This can be verified in 'Project Structure/Project Setting/Project' and 'Project Structure/Platform Setting/SDKs'.
+ In IntelliJ verify that the SDK is ${ant.java.version}, and its path is valid.
+ This can be verified in 'Project Structure/Project Setting/Project' and 'Project Structure/Platform Setting/SDKs'.
@@ -2047,7 +2299,7 @@
file="${build.dir}/${final.name}-parent.pom"
packaging="pom"/>
-
+
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/conf/cassandra-env.sh b/conf/cassandra-env.sh
index ba9f9d459641..7d57e6762fcf 100644
--- a/conf/cassandra-env.sh
+++ b/conf/cassandra-env.sh
@@ -98,7 +98,7 @@ echo "$JVM_OPTS" | grep -qe "-[X]log:gc"
if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line
# only add -Xlog:gc if it's not mentioned in jvm-server.options file
mkdir -p ${CASSANDRA_LOG_DIR}
- JVM_OPTS="$JVM_OPTS -Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=${CASSANDRA_LOG_DIR}/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760"
+ JVM_OPTS="$JVM_OPTS -Xlog:gc=info,heap*=debug,age*=debug,safepoint=info,promotion*=debug:file=${CASSANDRA_LOG_DIR}/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760"
fi
# Check what parameters were defined on jvm-server.options file to avoid conflicts
@@ -210,9 +210,9 @@ JVM_ON_OUT_OF_MEMORY_ERROR_OPT="-XX:OnOutOfMemoryError=kill -9 %p"
# for more on configuring JMX through firewalls, etc. (Short version:
# get it working with no firewall first.)
#
-# Cassandra ships with JMX accessible *only* from localhost.
+# Cassandra ships with JMX accessible *only* from localhost.
# To enable remote JMX connections, uncomment lines below
-# with authentication and/or ssl enabled. See https://wiki.apache.org/cassandra/JmxSecurity
+# with authentication and/or ssl enabled. See https://wiki.apache.org/cassandra/JmxSecurity
#
if [ "x$LOCAL_JMX" = "x" ]; then
LOCAL_JMX=yes
diff --git a/conf/cassandra.yaml b/conf/cassandra.yaml
index 5562a2c1560d..7e0943aefa6c 100644
--- a/conf/cassandra.yaml
+++ b/conf/cassandra.yaml
@@ -156,6 +156,32 @@ auto_hints_cleanup_enabled: false
# Min unit: KiB
batchlog_replay_throttle: 1024KiB
+# Strategy to choose the batchlog storage endpoints.
+#
+# Available options:
+#
+# - random_remote
+# Default, purely random, prevents the local rack, if possible.
+#
+# - prefer_local
+# Similar to random_remote. Random, except that one of the replications will go to the local rack,
+# which mean it offers lower availability guarantee than random_remote or dynamic_remote.
+#
+# - dynamic_remote
+# Using DynamicEndpointSnitch to select batchlog storage endpoints, prevents the
+# local rack, if possible. This strategy offers the same availability guarantees
+# as random_remote but selects the fastest endpoints according to the DynamicEndpointSnitch.
+# (DynamicEndpointSnitch currently only tracks reads and not writes - i.e. write-only
+# (or mostly-write) workloads might not benefit from this strategy.)
+# Note: this strategy will fall back to random_remote, if dynamic_snitch is not enabled.
+#
+# - dynamic
+# Mostly the same as dynamic_remote, except that local rack is not excluded, which mean it offers lower
+# availability guarantee than random_remote or dynamic_remote.
+# Note: this strategy will fall back to random_remote, if dynamic_snitch is not enabled.
+#
+# batchlog_endpoint_strategy: random_remote
+
# Authentication backend, implementing IAuthenticator; used to identify users
# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator,
# PasswordAuthenticator}.
@@ -369,6 +395,11 @@ partitioner: org.apache.cassandra.dht.Murmur3Partitioner
# data_file_directories:
# - /var/lib/cassandra/data
+# Metadata directory that holds information about the cluster, local node and its peers.
+# Currently, only a single subdirectory called 'nodes' will be used.
+# If not set, the default directory is $CASSANDRA_HOME/data/metadata.
+# metadata_directory: /var/lib/cassandra/metadata
+
# Directory were Cassandra should store the data of the local system keyspaces.
# By default Cassandra will store the data of the local system keyspaces in the first of the data directories specified
# by data_file_directories.
@@ -661,6 +692,8 @@ commitlog_disk_access_mode: legacy
# none : Flush without compressing blocks but while still doing checksums.
# fast : Flush with a fast compressor. If the table is already using a
# fast compressor that compressor is used.
+# adaptive : Flush with a fast adaptive compressor. If the table is already using a
+# fast compressor that compressor is used.
# table: Always flush with the same compressor that the table uses. This
# was the pre 4.0 behavior.
#
@@ -796,7 +829,7 @@ memtable:
#
# offheap_objects
# off heap objects
-memtable_allocation_type: heap_buffers
+memtable_allocation_type: offheap_objects
# Limit memory usage for Merkle tree calculations during repairs of a certain
# table and common token range. Repair commands targetting multiple tables or
@@ -822,7 +855,7 @@ memtable_allocation_type: heap_buffers
# There isn't a limit by default for backwards compatibility, but this can
# produce OOM for commands repairing multiple tables or multiple virtual nodes.
# A limit of just 1 simultaneous Merkle tree request is generally recommended
-# with no virtual nodes so repair_session_space, and thereof the Merkle tree
+# with no virtual nodes so repair_session_space, and therefore the Merkle tree
# resolution, can be high. For virtual nodes a value of 1 with the default
# repair_session_space value will produce higher resolution Merkle trees
# at the expense of speed. Alternatively, when working with virtual nodes it
@@ -925,7 +958,7 @@ index_summary_resize_interval: 60m
# buffers. Enable this to avoid sudden dirty buffer flushing from
# impacting read latencies. Almost always a good idea on SSDs; not
# necessarily on platters.
-trickle_fsync: false
+trickle_fsync: true
# Min unit: KiB
trickle_fsync_interval: 10240KiB
@@ -1235,7 +1268,8 @@ sstable_preemptive_open_interval: 50MiB
# set to true, each newly created sstable will have a UUID based generation identifier and such files are
# not readable by previous Cassandra versions. At some point, this option will become true by default
# and eventually get removed from the configuration.
-uuid_sstable_identifiers_enabled: false
+# In Converged Cassandra, we enable this option by default
+uuid_sstable_identifiers_enabled: true
# When enabled, permits Cassandra to zero-copy stream entire eligible
# SSTables between nodes, including every component.
@@ -1318,6 +1352,16 @@ truncate_request_timeout: 60000ms
# Lowest acceptable value is 10 ms.
# Min unit: ms
request_timeout: 10000ms
+# Upper bound for how long any request received via native transport
+# should be considered live and serviceable by the system. This is
+# currently considered at two points: when the message is dequeued and
+# executed by the NATIVE_TRANSPORT_REQUESTS stage, and when the message
+# is dequeued and executed by an async stage if NATIVE_TRANSPORT_ASYNC_READ_WRITE_ENABLED
+# is set to true. If the request is not completed within this time, an
+# OverloadedException is thrown.
+# Min unit: ms
+native_transport_timeout: 12000ms
+
# Defensive settings for protecting Cassandra from true network partitions.
# See (CASSANDRA-14358) for details.
@@ -1728,12 +1772,6 @@ transparent_data_encryption_options:
store_type: JCEKS
key_password: cassandra
-# Storage Attached Indexing options.
-# sai_options:
- ## Total permitted memory allowed for writing SAI index segments. This memory
- ## is split between all SAI indexes being built so more indexes will mean smaller
- ## segment sizes.
- # segment_write_buffer_size: 1024MiB
#####################
# SAFETY THRESHOLDS #
@@ -1827,6 +1865,11 @@ unlogged_batch_across_partitions_warn_threshold: 10
# Audit logging - Logs every incoming CQL command request, authentication to a node. See the docs
# on audit_logging for full details about the various configuration options and production tips.
+# NOTE: Chronicle Queue has changed the enums used for log rolling roll_cycle).
+# Older legacy options will still work for the foreseeable future, but you will see warnings in logs and future dependency
+# upgrades may break your log rolling param. The default log rolling param will be changed with the next major release
+# from HOURLY to FAST_HOURLY, primarily different on how frequently indexes are built. For more info refer to:
+# net.openhft.chronicle.queue.RollCycles
audit_logging_options:
enabled: false
logger:
@@ -1909,6 +1952,10 @@ report_unconfirmed_repaired_data_mismatches: false
# Defaults to false to disable dynamic data masking.
# dynamic_data_masking_enabled: false
+# This is the page size used internally by aggregation queries. It aims to limit the memory used by aggregation
+# queries when there is a lot of data to aggregate.
+# aggregation_subpage_size_in_kb: 2048
+
#########################
# EXPERIMENTAL FEATURES #
#########################
@@ -1977,7 +2024,7 @@ drop_compact_storage_enabled: false
# columns_per_table_warn_threshold: -1
# columns_per_table_fail_threshold: -1
#
-# Guardrail to warn or fail when creating more secondary indexes per table than threshold.
+# Guardrail to warn or fail when creating more secondary indexes per table than threshold (does not apply to CUSTOM INDEX StorageAttachedIndex).
# The two thresholds default to -1 to disable.
# secondary_indexes_per_table_warn_threshold: -1
# secondary_indexes_per_table_fail_threshold: -1
@@ -1985,6 +2032,16 @@ drop_compact_storage_enabled: false
# Guardrail to enable or disable the creation of secondary indexes
# secondary_indexes_enabled: true
#
+# Failure threshold for number of StorageAttachedIndex per table (only applies to CUSTOM INDEX StorageAttachedIndex)
+# Default is 10 (same when apply_dbaas_defaults is enabled)
+# sai_indexes_per_table_warn_threshold: -1
+# sai_indexes_per_table_fail_threshold: 10
+#
+# Failure threshold for total number of StorageAttachedIndex across all keyspaces (only applies to CUSTOM INDEX StorageAttachedIndex)
+# Default is 10 (same when apply_dbaas_defaults is enabled)
+# sai_indexes_total_warn_threshold: -1
+# sai_indexes_total_fail_threshold: 100
+#
# Guardrail to warn or fail when creating more materialized views per table than threshold.
# The two thresholds default to -1 to disable.
# materialized_views_per_table_warn_threshold: -1
@@ -2113,7 +2170,7 @@ drop_compact_storage_enabled: false
# Guardrail to warn or fail when creating a vector column with more dimensions than threshold.
# Default -1 to disable.
# vector_dimensions_warn_threshold: -1
-# vector_dimensions_fail_threshold: -1
+# vector_dimensions_fail_threshold: 8192
#
# Guardrail to indicate whether or not users are allowed to use ALTER TABLE commands to make column changes to tables
# alter_table_enabled: true
@@ -2184,6 +2241,20 @@ drop_compact_storage_enabled: false
# sai_vector_term_size_warn_threshold: 16KiB
# sai_vector_term_size_fail_threshold: 32KiB
+# Guardrail to warn or fail when using LIMIT/OFFSET paging skipping more rows than threshold.
+# Default offset_rows_warn_threshold is 10000, may differ if emulate_dbaas_defaults is enabled
+# Default offset_rows_failure_threshold is 20000, may differ if emulate_dbaas_defaults is enabled
+# offset_rows_warn_threshold: 10000
+# offset_rows_failure_threshold: 20000
+
+# Guardrail to warn or fail when a SELECT query has more column value filters than threshold.
+# Note that restrictions on indexed columns can be expanded to multiple column filters if the indexes have an analyzer.
+# In that case, there will be a filter for every token produced by the analyzer for the queried column value. This can
+# prevent that productive analyzers such as n-gram explode the query to a large number of filtering operations.
+# Default -1 to disable, may differ if emulate_dbaas_defaults is enabled
+# query_filters_warn_threshold: -1
+# query_filters_fail_threshold: -1
+
# The default secondary index implementation when CREATE INDEX does not specify one via USING.
# ex. "legacy_local_table" - (default) legacy secondary index, implemented as a hidden table
# ex. "sai" - "storage-attched" index, implemented via optimized SSTable/Memtable-attached indexes
@@ -2246,4 +2317,16 @@ drop_compact_storage_enabled: false
# and ensures stability. If Cassandra was started at the previous version by accident, a node with disabled
# compatibility mode would no longer toggle behaviors as when it was running in the UPGRADING mode.
#
-storage_compatibility_mode: CASSANDRA_4
+storage_compatibility_mode: NONE
+
+# Emulates DataStax Constellation database-as-a-service defaults.
+#
+# When enabled, some defaults are modified to match those used by DataStax Constellation (DataStax cloud data
+# platform). This includes (but is not limited to) stricter guardrails defaults.
+#
+# This can be used as an convenience to develop and test applications meant to run on DataStax Constellation.
+#
+# Warning: when enabled, the updated defaults reflect those of DataStax Constellation _at the time_ of the currently
+# used DSE release. This is a best-effort emulation of said defaults. Further, all nodes must use the same
+# config value.
+# emulate_dbaas_defaults: false
diff --git a/conf/cassandra_latest.yaml b/conf/cassandra_latest.yaml
index fd86f149e617..38aa35878700 100644
--- a/conf/cassandra_latest.yaml
+++ b/conf/cassandra_latest.yaml
@@ -1302,6 +1302,15 @@ truncate_request_timeout: 60000ms
# Lowest acceptable value is 10 ms.
# Min unit: ms
request_timeout: 10000ms
+# Upper bound for how long any request received via native transport
+# should be considered live and serviceable by the system. This is
+# currently considered at two points: when the message is dequeued and
+# executed by the NATIVE_TRANSPORT_REQUESTS stage, and when the message
+# is dequeued and executed by an async stage if NATIVE_TRANSPORT_ASYNC_READ_WRITE_ENABLED
+# is set to true. If the request is not completed within this time, an
+# OverloadedException is thrown.
+# Min unit: ms
+native_transport_timeout: 12000ms
# Defensive settings for protecting Cassandra from true network partitions.
# See (CASSANDRA-14358) for details.
diff --git a/conf/cqlshrc.sample.cloud b/conf/cqlshrc.sample.cloud
new file mode 100644
index 000000000000..62528670c48b
--- /dev/null
+++ b/conf/cqlshrc.sample.cloud
@@ -0,0 +1,17 @@
+; Copyright DataStax, Inc.
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+; http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+;
+; Sample ~/.cqlshrc file with cloud configuration.
+[connection]
+secure_connect_bundle = /path/to/creds.zip
diff --git a/conf/jvm11-clients.options b/conf/jvm11-clients.options
index 3d59816c045f..08ce8f2a30f6 100644
--- a/conf/jvm11-clients.options
+++ b/conf/jvm11-clients.options
@@ -29,18 +29,28 @@
-Djdk.attach.allowAttachSelf=true
--add-exports java.base/jdk.internal.misc=ALL-UNNAMED
--add-exports java.base/jdk.internal.ref=ALL-UNNAMED
+--add-exports java.base/jdk.internal.util=ALL-UNNAMED
--add-exports java.base/sun.nio.ch=ALL-UNNAMED
--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED
--add-exports java.sql/java.sql=ALL-UNNAMED
+--add-exports jdk.compiler/com.sun.tools.javac=ALL-UNNAMED
+--add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED
+--add-exports jdk.unsupported/sun.misc=ALL-UNNAMED
+--add-opens java.base/java.io=ALL-UNNAMED
+--add-opens java.base/java.lang=ALL-UNNAMED
--add-opens java.base/java.lang.module=ALL-UNNAMED
+--add-opens java.base/java.lang.reflect=ALL-UNNAMED
+--add-opens java.base/java.util=ALL-UNNAMED
--add-opens java.base/jdk.internal.loader=ALL-UNNAMED
--add-opens java.base/jdk.internal.ref=ALL-UNNAMED
--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED
--add-opens java.base/jdk.internal.math=ALL-UNNAMED
--add-opens java.base/jdk.internal.module=ALL-UNNAMED
+--add-opens java.base/java.util=ALL-UNNAMED
+--add-opens java.base/jdk.internal.util=ALL-UNNAMED
--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED
--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED
--add-opens java.base/java.lang.reflect=ALL-UNNAMED
diff --git a/conf/jvm11-server.options b/conf/jvm11-server.options
index f71f6287ffb5..0915b2ca385d 100644
--- a/conf/jvm11-server.options
+++ b/conf/jvm11-server.options
@@ -30,6 +30,8 @@
# Disable biased locking as it does not benefit Cassandra.
-XX:-UseBiasedLocking
+-XX:ThreadPriorityPolicy=1
+-XX:+UseThreadPriorities
#################
# GC SETTINGS #
@@ -94,14 +96,21 @@
--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED
--add-exports java.sql/java.sql=ALL-UNNAMED
+--add-exports jdk.unsupported/sun.misc=ALL-UNNAMED
+--add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED
+--add-opens java.base/java.io=ALL-UNNAMED
+--add-opens java.base/java.lang=ALL-UNNAMED
--add-opens java.base/java.lang.module=ALL-UNNAMED
+--add-opens java.base/java.lang.reflect=ALL-UNNAMED
+--add-opens=java.base/java.util=ALL-UNNAMED
--add-opens java.base/jdk.internal.loader=ALL-UNNAMED
--add-opens java.base/jdk.internal.ref=ALL-UNNAMED
--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED
--add-opens java.base/jdk.internal.math=ALL-UNNAMED
--add-opens java.base/jdk.internal.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED
+--add-opens jdk.compiler/com.sun.tools.javac=ALL-UNNAMED
--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED
@@ -110,7 +119,7 @@
# Java 11 (and newer) GC logging options:
# See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax
# The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M
-#-Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=/var/log/cassandra/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760
+#-Xlog:gc=info,heap*=debug,age*=debug,safepoint=info,promotion*=debug:file=/var/log/cassandra/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760
# Notes for Java 8 migration:
#
diff --git a/conf/jvm17-clients.options b/conf/jvm17-clients.options
index 671d91b21f95..36e15c838fce 100644
--- a/conf/jvm17-clients.options
+++ b/conf/jvm17-clients.options
@@ -28,6 +28,8 @@
-Djdk.attach.allowAttachSelf=true
--add-exports java.base/jdk.internal.misc=ALL-UNNAMED
+--add-exports java.base/jdk.internal.ref=ALL-UNNAMED
+--add-exports java.base/sun.nio.ch=ALL-UNNAMED
--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED
diff --git a/conf/jvm17-server.options b/conf/jvm17-server.options
index 1a0f8f9c565f..567a9b76ea57 100644
--- a/conf/jvm17-server.options
+++ b/conf/jvm17-server.options
@@ -22,6 +22,9 @@
# See jvm-server.options. This file is specific for Java 17 and newer. #
###########################################################################
+-XX:ThreadPriorityPolicy=1
+-XX:+UseThreadPriorities
+
#################
# GC SETTINGS #
#################
@@ -43,7 +46,7 @@
## Main G1GC tunable: lowering the pause target will lower throughput and vise versa.
## 200ms is the JVM default and lowest viable setting
## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml.
--XX:MaxGCPauseMillis=300
+-XX:MaxGCPauseMillis=500
## Optional G1 Settings
# Save CPU time on large (>= 16GB) heaps by delaying region scanning
@@ -63,6 +66,8 @@
-Djdk.attach.allowAttachSelf=true
--add-exports java.base/jdk.internal.misc=ALL-UNNAMED
+--add-exports java.base/jdk.internal.ref=ALL-UNNAMED
+--add-exports java.base/sun.nio.ch=ALL-UNNAMED
--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED
--add-exports java.management/com.sun.jmx.remote.security=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED
@@ -70,8 +75,8 @@
--add-exports java.sql/java.sql=ALL-UNNAMED
--add-exports java.base/java.lang.ref=ALL-UNNAMED
--add-exports jdk.unsupported/sun.misc=ALL-UNNAMED
+--add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED
---add-opens java.base/java.lang.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.loader=ALL-UNNAMED
--add-opens java.base/jdk.internal.ref=ALL-UNNAMED
--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED
@@ -83,15 +88,20 @@
--add-opens java.base/java.io=ALL-UNNAMED
--add-opens java.base/java.lang.reflect=ALL-UNNAMED
--add-opens java.base/java.lang=ALL-UNNAMED
+--add-opens=java.base/java.nio.charset=ALL-UNNAMED
--add-opens java.base/java.util=ALL-UNNAMED
--add-opens java.base/java.nio=ALL-UNNAMED
+--add-opens jdk.compiler/com.sun.tools.javac=ALL-UNNAMED
+
+# required for org.apache.cassandra.Util.getSupportedMTimeGranularity
+--add-opens java.base/java.nio.file.attribute=ALL-UNNAMED
### GC logging options -- uncomment to enable
# Java 11 (and newer) GC logging options:
# See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax
# The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M
-#-Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=/var/log/cassandra/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760
+# -Xlog:gc=info,heap*=debug,age*=debug,safepoint=info,promotion*=debug:file=/var/log/cassandra/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760
# Notes for Java 8 migration:
#
@@ -114,5 +124,12 @@
# Revert changes in defaults introduced in https://netty.io/news/2022/03/10/4-1-75-Final.html
-Dio.netty.allocator.useCacheForAllThreads=true
-Dio.netty.allocator.maxOrder=11
+### Enable vector incubator feature (simd support)
+
+--add-modules jdk.incubator.vector
+
+### Compatibility Options
+--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/java.util=ALL-UNNAMED
+-Djava.security.manager=allow
# The newline in the end of file is intentional
diff --git a/conf/jvm22-clients.options b/conf/jvm22-clients.options
new file mode 100644
index 000000000000..81af895ed216
--- /dev/null
+++ b/conf/jvm22-clients.options
@@ -0,0 +1,50 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+###########################################################################
+# jvm22-clients.options #
+# #
+# See jvm-clients.options. This file is specific for Java 22 and newer. #
+###########################################################################
+
+###################
+# JPMS SETTINGS #
+###################
+
+-Djdk.attach.allowAttachSelf=true
+--add-exports java.base/jdk.internal.misc=ALL-UNNAMED
+--add-exports java.base/jdk.internal.ref=ALL-UNNAMED
+--add-exports java.base/sun.nio.ch=ALL-UNNAMED
+--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED
+--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED
+--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED
+--add-exports java.sql/java.sql=ALL-UNNAMED
+--add-exports jdk.attach/sun.tools.attach=ALL-UNNAMED
+
+--add-opens java.base/java.io=ALL-UNNAMED
+--add-opens java.base/java.lang.module=ALL-UNNAMED
+--add-opens java.base/java.lang.reflect=ALL-UNNAMED
+--add-opens java.base/jdk.internal.loader=ALL-UNNAMED
+--add-opens java.base/jdk.internal.math=ALL-UNNAMED
+--add-opens java.base/jdk.internal.module=ALL-UNNAMED
+--add-opens java.base/jdk.internal.ref=ALL-UNNAMED
+--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED
+--add-opens java.base/sun.nio.ch=ALL-UNNAMED
+--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED
+
+# The newline in the end of file is intentional
diff --git a/conf/jvm22-server.options b/conf/jvm22-server.options
new file mode 100644
index 000000000000..b836204660bb
--- /dev/null
+++ b/conf/jvm22-server.options
@@ -0,0 +1,128 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+###########################################################################
+# jvm22-server.options #
+# #
+# See jvm-server.options. This file is specific for Java 22 and newer. #
+###########################################################################
+
+#################
+# GC SETTINGS #
+#################
+
+### G1 Settings
+## Use the Hotspot garbage-first collector.
+-XX:+UseG1GC
+-XX:+ParallelRefProcEnabled
+
+#
+## Have the JVM do less remembered set work during STW, instead
+## preferring concurrent GC. Reduces p99.9 latency.
+-XX:G1RSetUpdatingPauseTimePercent=5
+#
+## Main G1GC tunable: lowering the pause target will lower throughput and vise versa.
+## 200ms is the JVM default and lowest viable setting
+## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml.
+-XX:MaxGCPauseMillis=500
+
+## Optional G1 Settings
+# Save CPU time on large (>= 16GB) heaps by delaying region scanning
+# until the heap is 70% full. The default in Hotspot 8u40 is 40%.
+#-XX:InitiatingHeapOccupancyPercent=70
+
+# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores.
+# Otherwise equal to the number of cores when 8 or less.
+# Machines with > 10 cores should try setting these to <= full cores.
+#-XX:ParallelGCThreads=16
+# By default, ConcGCThreads is 1/4 of ParallelGCThreads.
+# Setting both to the same value can reduce STW durations.
+#-XX:ConcGCThreads=16
+
+
+### JPMS
+
+-Djdk.attach.allowAttachSelf=true
+-Djava.security.manager=allow
+--add-exports java.base/jdk.internal.misc=ALL-UNNAMED
+--add-exports java.base/jdk.internal.ref=ALL-UNNAMED
+--add-exports java.base/jdk.internal.perf=ALL-UNNAMED
+--add-exports java.base/sun.nio.ch=ALL-UNNAMED
+--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED
+--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED
+--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED
+--add-exports jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED
+--add-exports jdk.naming.dns/com.sun.jndi.dns=java.naming
+--add-exports jdk.unsupported/sun.misc=ALL-UNNAMED
+
+--add-opens java.base/java.io=ALL-UNNAMED
+--add-opens java.base/java.lang.module=ALL-UNNAMED
+--add-opens java.base/java.lang=ALL-UNNAMED
+--add-opens java.base/java.lang.reflect=ALL-UNNAMED
+--add-opens java.base/java.nio.charset=ALL-UNNAMED
+--add-opens java.base/java.nio.file.spi=ALL-UNNAMED
+--add-opens java.base/java.nio=ALL-UNNAMED
+--add-opens java.base/java.net=ALL-UNNAMED
+--add-opens java.base/java.util=ALL-UNNAMED
+--add-opens java.base/java.util.concurrent.atomic=ALL-UNNAMED
+--add-opens java.base/java.util.concurrent.locks=ALL-UNNAMED
+--add-opens java.base/jdk.internal.loader=ALL-UNNAMED
+--add-opens java.base/jdk.internal.math=ALL-UNNAMED
+--add-opens java.base/jdk.internal.module=ALL-UNNAMED
+--add-opens java.base/jdk.internal.ref=ALL-UNNAMED
+--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED
+--add-opens java.base/jdk.internal.vm=ALL-UNNAMED
+--add-opens java.base/sun.nio.ch=ALL-UNNAMED
+--add-opens jdk.compiler/com.sun.tools.javac=ALL-UNNAMED
+--add-opens jdk.management.jfr/jdk.management.jfr=ALL-UNNAMED
+--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED
+--add-opens jdk.naming.dns/com.sun.jndi.dns=ALL-UNNAMED
+
+# required for org.apache.cassandra.Util.getSupportedMTimeGranularity
+--add-opens java.base/java.nio.file.attribute=ALL-UNNAMED
+
+### GC logging options -- uncomment to enable
+
+# Java 11 (and newer) GC logging options:
+# See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax
+# The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M
+# -Xlog:gc=info,heap*=debug,age*=debug,safepoint=info,promotion*=debug:file=/var/log/cassandra/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760
+
+# Notes for Java 8 migration:
+#
+# -XX:+PrintGCDetails maps to -Xlog:gc*:... - i.e. add a '*' after "gc"
+# -XX:+PrintGCDateStamps maps to decorator 'time'
+#
+# -XX:+PrintHeapAtGC maps to 'heap' with level 'trace'
+# -XX:+PrintTenuringDistribution maps to 'age' with level 'debug'
+# -XX:+PrintGCApplicationStoppedTime maps to 'safepoint' with level 'info'
+# -XX:+PrintPromotionFailure maps to 'promotion' with level 'trace'
+# -XX:PrintFLSStatistics=1 maps to 'freelist' with level 'trace'
+
+### Netty Options
+
+# On Java >= 9 Netty requires the io.netty.tryReflectionSetAccessible system property to be set to true to enable
+# creation of direct buffers using Unsafe. Without it, this falls back to ByteBuffer.allocateDirect which has
+# inferior performance and risks exceeding MaxDirectMemory
+-Dio.netty.tryReflectionSetAccessible=true
+
+### Enable vector incubator feature (simd support)
+
+--add-modules jdk.incubator.vector
+
+# The newline in the end of file is intentional
diff --git a/doc/cql3/CQL.textile b/doc/cql3/CQL.textile
index 959533f77186..314d55acbcf3 100644
--- a/doc/cql3/CQL.textile
+++ b/doc/cql3/CQL.textile
@@ -721,6 +721,8 @@ bc(syntax)..
'(' ( ',' )* ')'
( CALLED | RETURNS NULL ) ON NULL INPUT
RETURNS
+ ( DETERMINISTIC )?
+ ( MONOTONIC ( ON )? )?
LANGUAGE
AS
p.
@@ -766,6 +768,10 @@ If the optional @IF NOT EXISTS@ keywords are used, the function will only be cre
@OR REPLACE@ and @IF NOT EXIST@ cannot be used together.
+The optional @DETERMINISTIC@ keyword specifies that the function is deterministic. This means that given a particular input, the function will always produce the same output.
+
+The optional @MONOTONIC@ keyword specifies that the function is monotonic. This means that it is either entirely nonincreasing or nondecreasing. Even if the function is not monotonic on all its arguments, it is possible to specify that it is monotonic @ON@ one of its arguments, meaning that partial applications of the function over that argument will be monotonic. Monotonicity is required to use the function in a @GROUP BY@ clause.
+
Functions belong to a keyspace. If no keyspace is specified in @@, the current keyspace is used (i.e. the keyspace specified using the "@USE@":#useStmt statement). It is not possible to create a user-defined function in one of the system keyspaces.
See the section on "user-defined functions":#udfs for more information.
@@ -806,6 +812,7 @@ bc(syntax)..
STYPE
( FINALFUNC )?
( INITCOND )?
+ ( DETERMINISTIC )?
p.
__Sample:__
@@ -826,6 +833,8 @@ See the section on "user-defined aggregates":#udas for a complete example.
@OR REPLACE@ and @IF NOT EXIST@ cannot be used together.
+The optional @DETERMINISTIC@ keyword specifies that the aggregate function is deterministic. This means that given a particular input, the function will always produce the same output.
+
Aggregates belong to a keyspace. If no keyspace is specified in @@, the current keyspace is used (i.e. the keyspace specified using the "@USE@":#useStmt statement). It is not possible to create a user-defined aggregate in one of the system keyspaces.
Signatures for user-defined aggregates follow the "same rules":#functionSignature as for user-defined functions.
@@ -1092,8 +1101,9 @@ bc(syntax)..
( GROUP BY )?
( ORDER BY )?
( PER PARTITION LIMIT )?
- ( LIMIT )?
+ ( LIMIT ( OFFSET )? )?
( ALLOW FILTERING )?
+ ( WITH ann_options = )?
::= DISTINCT?
@@ -1228,9 +1238,9 @@ Aggregate functions will produce a separate value for each group. If no @GROUP B
If a column is selected without an aggregate function, in a statement with a @GROUP BY@, the first value encounter in each group will be returned.
-h4(#selectLimit). @LIMIT@ and @PER PARTITION LIMIT@
+h4(#selectLimit). @LIMIT@, @OFFSET@ and @PER PARTITION LIMIT@
-The @LIMIT@ option to a @SELECT@ statement limits the number of rows returned by a query, while the @PER PARTITION LIMIT@ option limits the number of rows returned for a given partition by the query. Note that both type of limit can used in the same statement.
+The @LIMIT@ option in a @SELECT@ statement limits the number of rows returned by a query. The @LIMIT@ option can include an @OFFSET@ option to skip the first rows of the query result. The @PER PARTITION LIMIT@ option limits the number of rows returned for a given partition by the query. Note that both type of limit can used in the same statement.
h4(#selectAllowFiltering). @ALLOW FILTERING@
diff --git a/doc/modules/cassandra/examples/BNF/create_aggregate_statement.bnf b/doc/modules/cassandra/examples/BNF/create_aggregate_statement.bnf
index c0126a23ffd8..1207ec06328c 100644
--- a/doc/modules/cassandra/examples/BNF/create_aggregate_statement.bnf
+++ b/doc/modules/cassandra/examples/BNF/create_aggregate_statement.bnf
@@ -4,3 +4,4 @@ create_aggregate_statement ::= CREATE [ OR REPLACE ] AGGREGATE [ IF NOT EXISTS ]
STYPE cql_type:
[ FINALFUNC function_name]
[ INITCOND term ]
+ [ DETERMINISTIC ]
diff --git a/doc/modules/cassandra/examples/BNF/create_function_statement.bnf b/doc/modules/cassandra/examples/BNF/create_function_statement.bnf
index 0da769a11fb0..82be39d42911 100644
--- a/doc/modules/cassandra/examples/BNF/create_function_statement.bnf
+++ b/doc/modules/cassandra/examples/BNF/create_function_statement.bnf
@@ -1,6 +1,8 @@
create_function_statement::= CREATE [ OR REPLACE ] FUNCTION [ IF NOT EXISTS]
function_name '(' arguments_declaration ')'
[ CALLED | RETURNS NULL ] ON NULL INPUT
- RETURNS cql_type
+ RETURNS cql_type
+ [ DETERMINISTIC ]
+ [ MONOTONIC [ ON arg_name ] ]
LANGUAGE identifier
AS string arguments_declaration: identifier cql_type ( ',' identifier cql_type )*
diff --git a/doc/modules/cassandra/examples/BNF/select_statement.bnf b/doc/modules/cassandra/examples/BNF/select_statement.bnf
index f53da41da57c..ff630209cbd0 100644
--- a/doc/modules/cassandra/examples/BNF/select_statement.bnf
+++ b/doc/modules/cassandra/examples/BNF/select_statement.bnf
@@ -4,8 +4,9 @@ select_statement::= SELECT [ JSON | DISTINCT ] ( select_clause | '*' )
[ GROUP BY `group_by_clause` ]
[ ORDER BY `ordering_clause` ]
[ PER PARTITION LIMIT (`integer` | `bind_marker`) ]
- [ LIMIT (`integer` | `bind_marker`) ]
+ [ LIMIT (`integer` | `bind_marker`) [ OFFSET (`integer` | `bind_marker`) ] ]
[ ALLOW FILTERING ]
+ [ WITH ann_options = map-literal ]
select_clause::= `selector` [ AS `identifier` ] ( ',' `selector` [ AS `identifier` ] )
selector::== `column_name`
| `term`
diff --git a/doc/modules/cassandra/examples/CQL/query_with_ann_options.cql b/doc/modules/cassandra/examples/CQL/query_with_ann_options.cql
new file mode 100644
index 000000000000..14ddcd47b938
--- /dev/null
+++ b/doc/modules/cassandra/examples/CQL/query_with_ann_options.cql
@@ -0,0 +1 @@
+SELECT * FROM embeddings ORDER BY vector ANN OF [1.2, 3.4] LIMIT 100 WITH ann_options = { 'rerank_k': 1000 }
diff --git a/doc/modules/cassandra/pages/developing/cql/cql_singlefile.adoc b/doc/modules/cassandra/pages/developing/cql/cql_singlefile.adoc
index 18dd52e13dd7..c21c0aa3e537 100644
--- a/doc/modules/cassandra/pages/developing/cql/cql_singlefile.adoc
+++ b/doc/modules/cassandra/pages/developing/cql/cql_singlefile.adoc
@@ -1155,6 +1155,8 @@ CREATE FUNCTION akeyspace.fname IF NOT EXISTS +
( someArg int ) +
CALLED ON NULL INPUT +
RETURNS text +
+( DETERMINISTIC )? +
+( MONOTONIC ( ON )? )? +
LANGUAGE java +
AS $$ +
// some Java code +
@@ -1194,6 +1196,17 @@ exist.
`OR REPLACE` and `IF NOT EXIST` cannot be used together.
+The optional `DETERMINISTIC` keyword specifies that the function is
+deterministic. This means that given a particular input, the function
+will always produce the same output.
+
+The optional `MONOTONIC` keyword specifies that the function is monotonic.
+This means that it is either entirely nonincreasing or nondecreasing.
+Even if the function is not monotonic on all its arguments, it is possible
+to specify that it is monotonic `ON` one of its arguments, meaning that
+partial applications of the function over that argument will be monotonic.
+Monotonicity is required to use the function in a `GROUP BY` clause.
+
Functions belong to a keyspace. If no keyspace is specified in
``, the current keyspace is used (i.e. the keyspace
specified using the link:#useStmt[`USE`] statement). It is not possible
@@ -1243,6 +1256,7 @@ SFUNC +
STYPE +
( FINALFUNC )? +
( INITCOND )? +
+( DETERMINISTIC )? +
p. +
_Sample:_
@@ -1268,6 +1282,10 @@ creates an aggregate if it does not already exist.
`OR REPLACE` and `IF NOT EXIST` cannot be used together.
+The optional `DETERMINISTIC` keyword specifies that the aggregate
+function is deterministic. This means that given a particular input,
+the function will always produce the same output.
+
Aggregates belong to a keyspace. If no keyspace is specified in
``, the current keyspace is used (i.e. the keyspace
specified using the link:#useStmt[`USE`] statement). It is not possible
@@ -1658,8 +1676,9 @@ FROM +
( GROUP BY )? +
( ORDER BY )? +
( PER PARTITION LIMIT )? +
-( LIMIT )? +
+( LIMIT ( OFFSET )? )? +
( ALLOW FILTERING )?
+( WITH ann_options = )?
::= DISTINCT?
@@ -1878,12 +1897,12 @@ with a `GROUP BY`, the first value encounter in each group will be
returned.
[[selectLimit]]
-===== `LIMIT` and `PER PARTITION LIMIT`
+===== `LIMIT`, `OFFSET` and `PER PARTITION LIMIT`
-The `LIMIT` option to a `SELECT` statement limits the number of rows
-returned by a query, while the `PER PARTITION LIMIT` option limits the
-number of rows returned for a given partition by the query. Note that
-both type of limit can used in the same statement.
+The `LIMIT` option in a `SELECT` statement limits the number of rows returned by a query.
+The `LIMIT` option can include an `OFFSET` option to skip the first rows of the query result.
+The `PER PARTITION LIMIT` option limits the number of rows returned for a given partition by the query.
+Note that both types of limits can used in the same statement.
[[selectAllowFiltering]]
===== `ALLOW FILTERING`
diff --git a/doc/modules/cassandra/pages/developing/cql/dml.adoc b/doc/modules/cassandra/pages/developing/cql/dml.adoc
index ef76cdbb38ff..ba272a6415eb 100644
--- a/doc/modules/cassandra/pages/developing/cql/dml.adoc
+++ b/doc/modules/cassandra/pages/developing/cql/dml.adoc
@@ -214,9 +214,10 @@ or the reverse
[[limit-clause]]
=== Limiting results
-The `LIMIT` option to a `SELECT` statement limits the number of rows
-returned by a query. The `PER PARTITION LIMIT` option limits the
-number of rows returned for a given partition by the query. Both types of limits can used in the same statement.
+The `LIMIT` option in a `SELECT` statement limits the number of rows returned by a query.
+The `LIMIT` option can include an `OFFSET` option to skip the first rows of the query result.
+The `PER PARTITION LIMIT` option limits the number of rows returned for a given partition by the query.
+Note that both types of limits can used in the same statement.
[[allow-filtering]]
=== Allowing filtering
@@ -264,6 +265,16 @@ execute:
include::cassandra:example$CQL/query_nofail_allow_filtering.cql[]
----
+[[ann-options]]
+=== ANN options
+
+`SELECT` queries using `ANN` ordering can provide a set of options to control the behavior of the ANN search:
+
+[source,cql]
+----
+include::example$CQL/query_with_ann_options.cql[]
+----
+
[[insert-statement]]
== INSERT
diff --git a/doc/modules/cassandra/pages/developing/cql/functions.adoc b/doc/modules/cassandra/pages/developing/cql/functions.adoc
index 75786de271a3..97c51fae6eb2 100644
--- a/doc/modules/cassandra/pages/developing/cql/functions.adoc
+++ b/doc/modules/cassandra/pages/developing/cql/functions.adoc
@@ -288,6 +288,43 @@ A number of functions allow to obtain the similarity score between vectors of fl
include::cassandra:partial$vector-search/vector_functions.adoc[]
+[[index-functions]]
+===== Index functions
+
+====== `sai_analyze`
+
+The `sai_analyze` functions returns the tokens that a SAI index will generate for a certain text value. The arguments
+are that text value and the JSON configuration of the SAI analyzer. This JSON configuration is the same as the one used
+to create the SAI index. For example, this function call:
+
+[source,cql]
+----
+sai_analyze('johnny apples seedlings',
+ '{
+ "tokenizer": {"name": "whitespace"}
+ }')
+----
+Will return `['johnny', 'apples', 'seedlings']`
+
+This other function call:
+[source,cql]
+----
+sai_analyze('johnny apples seedlings',
+ '{
+ "tokenizer": {"name": "whitespace"},
+ "filters": [{"name": "porterstem"}]
+ }')
+----
+Will return `['johnni', 'appl', 'seedl']`
+
+
+[[vector-functions]]
+===== Vector functions
+
+A number of functions to operate with vectors of floats.
+
+include::cassandra:partial$vector-search/vector_functions.adoc[]
+
[[user-defined-scalar-functions]]
=== User-defined functions
@@ -378,6 +415,16 @@ If the optional `IF NOT EXISTS` keywords are used, the function will only be cre
exist.
`OR REPLACE` and `IF NOT EXISTS` cannot be used together.
+The optional `DETERMINISTIC` keyword specifies that the aggregate function is deterministic.
+This means that given a particular input, the function will always produce the same output.
+
+The optional `MONOTONIC` keyword specifies that the function is monotonic.
+This means that it is either entirely nonincreasing or nondecreasing.
+Even if the function is not monotonic on all its arguments, it is possible
+to specify that it is monotonic `ON` one of its arguments, meaning that
+partial applications of the function over that argument will be monotonic.
+Monotonicity is required to use the function in a `GROUP BY` clause.
+
Behavior for `null` input values must be defined for each function:
* `RETURNS NULL ON NULL INPUT` declares that the function will always return `null` if any of the input arguments is `null`.
@@ -540,6 +587,9 @@ A `CREATE AGGREGATE` without `OR REPLACE` fails if an aggregate with the same si
The `CREATE AGGREGATE` command with the optional `IF NOT EXISTS` keywords creates an aggregate if it does not already exist.
The `OR REPLACE` and `IF NOT EXISTS` phrases cannot be used together.
+The optional `DETERMINISTIC` keyword specifies that the aggregate function is deterministic.
+This means that given a particular input, the function will always produce the same output.
+
The `STYPE` value defines the type of the state value and must be specified.
The optional `INITCOND` defines the initial state value for the aggregate; the default value is `null`.
A non-null `INITCOND` must be specified for state functions that are declared with `RETURNS NULL ON NULL INPUT`.
diff --git a/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_queries.adoc b/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_queries.adoc
index 3a4fb8d54a2c..a31419fc12e9 100644
--- a/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_queries.adoc
+++ b/doc/modules/cassandra/pages/developing/data-modeling/data-modeling_queries.adoc
@@ -28,7 +28,7 @@ here, however, you'll want to think not only from the customer
perspective in terms of how the data is written, but also in terms of
how the data will be queried by downstream use cases.
-You natural tendency as might be to focus first on designing the tables
+Your natural tendency might be to focus first on designing the tables
to store reservation and guest records, and only then start thinking
about the queries that would access them. You may have felt a similar
tension already when discussing the shopping queries before, thinking
diff --git a/doc/modules/cassandra/pages/managing/operating/audit_logging.adoc b/doc/modules/cassandra/pages/managing/operating/audit_logging.adoc
index 63f4ba1a1130..f728bfd73001 100644
--- a/doc/modules/cassandra/pages/managing/operating/audit_logging.adoc
+++ b/doc/modules/cassandra/pages/managing/operating/audit_logging.adoc
@@ -150,8 +150,12 @@ auditlogviewer [...] [options]
waiting for more records
`-r,--roll_cycle`::
How often to roll the log file was rolled. May be;;
- necessary for Chronicle to correctly parse file names. (MINUTELY,
- HOURLY, DAILY). Default HOURLY.
+ necessary for Chronicle to correctly parse file names. Some available options are:
+FIVE_MINUTELY, FAST_HOURLY, FAST_DAILY, LargeRollCycles.LARGE_DAILY, LargeRollCycles.XLARGE_DAILY,
+LargeRollCycles.HUGE_DAILY. Deprecated ones still availble but not recommended for new deployments:
+MINUTELY, HOURLY, DAILY
+For more options, refer: net.openhft.chronicle.queue.RollCycles.
+Default is set to FAST_HOURLY
`-h,--help`::
display this help message
diff --git a/doc/modules/cassandra/partials/vector-search/vector_functions.adoc b/doc/modules/cassandra/partials/vector-search/vector_functions.adoc
index daa4b2b8ce22..e73fc628b466 100644
--- a/doc/modules/cassandra/partials/vector-search/vector_functions.adoc
+++ b/doc/modules/cassandra/partials/vector-search/vector_functions.adoc
@@ -30,12 +30,38 @@ Examples:
Examples:
-`similarity_dot_product([0.1, 0.2], null)` -> `null`
+`similarity_dot_product([0.447214, 0.894427], null)` -> `null`
-`similarity_dot_product([0.1, 0.2], [0.1, 0.2])` -> `0.525`
+`similarity_dot_product([0.447214, 0.894427], [0.447214, 0.894427])` -> `1`
-`similarity_dot_product([0.1, 0.2], [-0.1, -0.2])` -> `0.475`
+`similarity_dot_product([0.447214, 0.894427], [-0.447214, -0.894427])` -> `0`
-`similarity_dot_product([0.1, 0.2], [0.9, 0.8])` -> `0.625`
+`similarity_dot_product([0.447214, 0.894427], [-0.447214, 0.894427])` -> `0.8`
+
+`similarity_dot_product([0.447214, 0.894427], [0.447214, -0.894427])` -> `0.2`
+
+| `random_float_vector(int, float, float)` | Returns a new vector of floats with the specified dimension and where
+all components will be in the specified min-max range.
+
+Examples:
+
+`random_float_vector(2, -1.0, 1.0)` -> `[-0.695395, -0.395755]`
+
+`random_float_vector(2, -1.0, 1.0)` -> `[-0.58795, 0.690014]`
+
+`random_float_vector(2, 0.0, 1.0)` -> `[0.423859, 0.630168]`
+
+`random_float_vector(2, 0.0, 1.0)` -> `[0.468159, 0.283808]`
+
+| `normalize_l2(vector)` | Applies L2 normalization to the input vector.
+The result is a vector with the same direction but with a magnitude of 1.
+
+Examples:
+
+`normalize_l2([0.1])` -> `[1]`
+
+`normalize_l2([-0.7])` -> `[1]`
+
+`normalize_l2([3.0, 4.0])` -> `[0.6, 0.8]`
|===
\ No newline at end of file
diff --git a/doc/native_protocol_v4.1.spec b/doc/native_protocol_v4.1.spec
new file mode 100644
index 000000000000..a10fd2404d8f
--- /dev/null
+++ b/doc/native_protocol_v4.1.spec
@@ -0,0 +1,1212 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+ CQL BINARY PROTOCOL v4.1
+
+
+Table of Contents
+
+ 1. Overview
+ 2. Frame header
+ 2.1. version
+ 2.2. flags
+ 2.3. stream
+ 2.4. opcode
+ 2.5. length
+ 3. Notations
+ 4. Messages
+ 4.1. Requests
+ 4.1.1. STARTUP
+ 4.1.2. AUTH_RESPONSE
+ 4.1.3. OPTIONS
+ 4.1.4. QUERY
+ 4.1.5. PREPARE
+ 4.1.6. EXECUTE
+ 4.1.7. BATCH
+ 4.1.8. REGISTER
+ 4.2. Responses
+ 4.2.1. ERROR
+ 4.2.2. READY
+ 4.2.3. AUTHENTICATE
+ 4.2.4. SUPPORTED
+ 4.2.5. RESULT
+ 4.2.5.1. Void
+ 4.2.5.2. Rows
+ 4.2.5.3. Set_keyspace
+ 4.2.5.4. Prepared
+ 4.2.5.5. Schema_change
+ 4.2.6. EVENT
+ 4.2.7. AUTH_CHALLENGE
+ 4.2.8. AUTH_SUCCESS
+ 5. Compression
+ 6. Data Type Serialization Formats
+ 7. User Defined Type Serialization
+ 8. Result paging
+ 9. Error codes
+ 10. Changes from v4
+
+
+1. Overview
+
+ The CQL binary protocol is a frame based protocol. Frames are defined as:
+
+ 0 8 16 24 32 40
+ +---------+---------+---------+---------+---------+
+ | version | flags | stream | opcode |
+ +---------+---------+---------+---------+---------+
+ | length |
+ +---------+---------+---------+---------+
+ | |
+ . ... body ... .
+ . .
+ . .
+ +----------------------------------------
+
+ The protocol is big-endian (network byte order).
+
+ Each frame contains a fixed size header (9 bytes) followed by a variable size
+ body. The header is described in Section 2. The content of the body depends
+ on the header opcode value (the body can in particular be empty for some
+ opcode values). The list of allowed opcodes is defined in Section 2.4 and the
+ details of each corresponding message are described Section 4.
+
+ The protocol distinguishes two types of frames: requests and responses. Requests
+ are those frames sent by the client to the server. Responses are those frames sent
+ by the server to the client. Note, however, that the protocol supports server pushes
+ (events) so a response does not necessarily come right after a client request.
+
+ Note to client implementors: client libraries should always assume that the
+ body of a given frame may contain more data than what is described in this
+ document. It will however always be safe to ignore the remainder of the frame
+ body in such cases. The reason is that this may enable extending the protocol
+ with optional features without needing to change the protocol version.
+
+
+
+2. Frame header
+
+2.1. version
+
+ The version is a single byte that indicates both the direction of the message
+ (request or response) and the version of the protocol in use. The most
+ significant bit of version is used to define the direction of the message:
+ 0 indicates a request, 1 indicates a response. This can be useful for protocol
+ analyzers to distinguish the nature of the packet from the direction in which
+ it is moving. The rest of that byte is the protocol version (4 for the protocol
+ defined in this document). In other words, for this version of the protocol,
+ version will be one of:
+ 0x04 Request frame for this protocol version
+ 0x84 Response frame for this protocol version
+
+ Please note that while every message ships with the version, only one version
+ of messages is accepted on a given connection. In other words, the first message
+ exchanged (STARTUP) sets the version for the connection for the lifetime of this
+ connection.
+
+ This document describes version 4 of the protocol. For the changes made since
+ version 3, see Section 10.
+
+
+2.2. flags
+
+ Flags applying to this frame. The flags have the following meaning (described
+ by the mask that allows selecting them):
+ 0x01: Compression flag. If set, the frame body is compressed. The actual
+ compression to use should have been set up beforehand through the
+ Startup message (which thus cannot be compressed; Section 4.1.1).
+ 0x02: Tracing flag. For a request frame, this indicates the client requires
+ tracing of the request. Note that only QUERY, PREPARE and EXECUTE queries
+ support tracing. Other requests will simply ignore the tracing flag if
+ set. If a request supports tracing and the tracing flag is set, the response
+ to this request will have the tracing flag set and contain tracing
+ information.
+ If a response frame has the tracing flag set, its body contains
+ a tracing ID. The tracing ID is a [uuid] and is the first thing in
+ the frame body.
+ 0x04: Custom payload flag. For a request or response frame, this indicates
+ that a generic key-value custom payload for a custom QueryHandler
+ implementation is present in the frame. Such a custom payload is simply
+ ignored by the default QueryHandler implementation.
+ Currently, only QUERY, PREPARE, EXECUTE and BATCH requests support
+ payload.
+ Type of custom payload is [bytes map] (see below). If either or both
+ of the tracing and warning flags are set, the custom payload will follow
+ those indicated elements in the frame body. If neither are set, the custom
+ payload will be the first value in the frame body.
+ 0x08: Warning flag. The response contains warnings which were generated by the
+ server to go along with this response.
+ If a response frame has the warning flag set, its body will contain the
+ text of the warnings. The warnings are a [string list] and will be the
+ first value in the frame body if the tracing flag is not set, or directly
+ after the tracing ID if it is.
+
+ The rest of flags is currently unused and ignored.
+
+2.3. stream
+
+ A frame has a stream id (a [short] value). When sending request messages, this
+ stream id must be set by the client to a non-negative value (negative stream id
+ are reserved for streams initiated by the server; currently all EVENT messages
+ (section 4.2.6) have a streamId of -1). If a client sends a request message
+ with the stream id X, it is guaranteed that the stream id of the response to
+ that message will be X.
+
+ This helps to enable the asynchronous nature of the protocol. If a client
+ sends multiple messages simultaneously (without waiting for responses), there
+ is no guarantee on the order of the responses. For instance, if the client
+ writes REQ_1, REQ_2, REQ_3 on the wire (in that order), the server might
+ respond to REQ_3 (or REQ_2) first. Assigning different stream ids to these 3
+ requests allows the client to distinguish to which request a received answer
+ responds to. As there can only be 32768 different simultaneous streams, it is up
+ to the client to reuse stream id.
+
+ Note that clients are free to use the protocol synchronously (i.e. wait for
+ the response to REQ_N before sending REQ_N+1). In that case, the stream id
+ can be safely set to 0. Clients should also feel free to use only a subset of
+ the 32768 maximum possible stream ids if it is simpler for its implementation.
+
+2.4. opcode
+
+ An integer byte that distinguishes the actual message:
+ 0x00 ERROR
+ 0x01 STARTUP
+ 0x02 READY
+ 0x03 AUTHENTICATE
+ 0x05 OPTIONS
+ 0x06 SUPPORTED
+ 0x07 QUERY
+ 0x08 RESULT
+ 0x09 PREPARE
+ 0x0A EXECUTE
+ 0x0B REGISTER
+ 0x0C EVENT
+ 0x0D BATCH
+ 0x0E AUTH_CHALLENGE
+ 0x0F AUTH_RESPONSE
+ 0x10 AUTH_SUCCESS
+
+ Messages are described in Section 4.
+
+ (Note that there is no 0x04 message in this version of the protocol)
+
+
+2.5. length
+
+ A 4 byte integer representing the length of the body of the frame (note:
+ currently a frame is limited to 256MB in length).
+
+
+3. Notations
+
+ To describe the layout of the frame body for the messages in Section 4, we
+ define the following:
+
+ [int] A 4 bytes integer
+ [long] A 8 bytes integer
+ [short] A 2 bytes unsigned integer
+ [string] A [short] n, followed by n bytes representing an UTF-8
+ string.
+ [long string] An [int] n, followed by n bytes representing an UTF-8 string.
+ [uuid] A 16 bytes long uuid.
+ [string list] A [short] n, followed by n [string].
+ [bytes] A [int] n, followed by n bytes if n >= 0. If n < 0,
+ no byte should follow and the value represented is `null`.
+ [value] A [int] n, followed by n bytes if n >= 0.
+ If n == -1 no byte should follow and the value represented is `null`.
+ If n == -2 no byte should follow and the value represented is
+ `not set` not resulting in any change to the existing value.
+ n < -2 is an invalid value and results in an error.
+ [short bytes] A [short] n, followed by n bytes if n >= 0.
+
+ [option] A pair of where is a [short] representing
+ the option id and depends on that option (and can be
+ of size 0). The supported id (and the corresponding )
+ will be described when this is used.
+ [option list] A [short] n, followed by n [option].
+ [inet] An address (ip and port) to a node. It consists of one
+ [byte] n, that represents the address size, followed by n
+ [byte] representing the IP address (in practice n can only be
+ either 4 (IPv4) or 16 (IPv6)), following by one [int]
+ representing the port.
+ [consistency] A consistency level specification. This is a [short]
+ representing a consistency level with the following
+ correspondance:
+ 0x0000 ANY
+ 0x0001 ONE
+ 0x0002 TWO
+ 0x0003 THREE
+ 0x0004 QUORUM
+ 0x0005 ALL
+ 0x0006 LOCAL_QUORUM
+ 0x0007 EACH_QUORUM
+ 0x0008 SERIAL
+ 0x0009 LOCAL_SERIAL
+ 0x000A LOCAL_ONE
+
+ [string map] A [short] n, followed by n pair where and
+ are [string].
+ [string multimap] A [short] n, followed by n pair where is a
+ [string] and is a [string list].
+ [bytes map] A [short] n, followed by n pair where is a
+ [string] and is a [bytes].
+
+
+4. Messages
+
+ Dependant on the flags specified in the header, the layout of the message body must be:
+ [][][]
+ where:
+ - is a UUID tracing ID, present if this is a request message and the Tracing flag is set.
+ - is a string list of warnings (if this is a request message and the Warning flag is set.
+ - is bytes map for the serialised custom payload present if this is one of the message types
+ which support custom payloads (QUERY, PREPARE, EXECUTE and BATCH) and the Custom payload flag is set.
+ - as defined below through sections 4 and 5.
+
+4.1. Requests
+
+ Note that outside of their normal responses (described below), all requests
+ can get an ERROR message (Section 4.2.1) as response.
+
+4.1.1. STARTUP
+
+ Initialize the connection. The server will respond by either a READY message
+ (in which case the connection is ready for queries) or an AUTHENTICATE message
+ (in which case credentials will need to be provided using AUTH_RESPONSE).
+
+ This must be the first message of the connection, except for OPTIONS that can
+ be sent before to find out the options supported by the server. Once the
+ connection has been initialized, a client should not send any more STARTUP
+ messages.
+
+ The body is a [string map] of options. Possible options are:
+ - "CQL_VERSION": the version of CQL to use. This option is mandatory and
+ currently the only version supported is "3.0.0". Note that this is
+ different from the protocol version.
+ - "COMPRESSION": the compression algorithm to use for frames (See section 5).
+ This is optional; if not specified no compression will be used.
+ - "NO_COMPACT": whether or not connection has to be established in compatibility
+ mode. This mode will make all Thrift and Compact Tables to be exposed as if
+ they were CQL Tables. This is optional; if not specified, the option will
+ not be used.
+ - "THROW_ON_OVERLOAD": In case of server overloaded with too many requests, by default the server puts
+ back pressure on the client connection. Instead, the server can send an OverloadedException error message back to
+ the client if this option is set to true.
+ - "PAGE_UNIT": a list of supported page units.
+
+
+4.1.2. AUTH_RESPONSE
+
+ Answers a server authentication challenge.
+
+ Authentication in the protocol is SASL based. The server sends authentication
+ challenges (a bytes token) to which the client answers with this message. Those
+ exchanges continue until the server accepts the authentication by sending a
+ AUTH_SUCCESS message after a client AUTH_RESPONSE. Note that the exchange
+ begins with the client sending an initial AUTH_RESPONSE in response to a
+ server AUTHENTICATE request.
+
+ The body of this message is a single [bytes] token. The details of what this
+ token contains (and when it can be null/empty, if ever) depends on the actual
+ authenticator used.
+
+ The response to a AUTH_RESPONSE is either a follow-up AUTH_CHALLENGE message,
+ an AUTH_SUCCESS message or an ERROR message.
+
+
+4.1.3. OPTIONS
+
+ Asks the server to return which STARTUP options are supported. The body of an
+ OPTIONS message should be empty and the server will respond with a SUPPORTED
+ message.
+
+
+4.1.4. QUERY
+
+ Performs a CQL query. The body of the message must be:
+
+ where is a [long string] representing the query and
+ must be
+ [[name_1]...[name_n]][][][][]
+ where:
+ - is the [consistency] level for the operation.
+ - is a [byte] whose bits define the options for this query and
+ in particular influence what the remainder of the message contains.
+ A flag is set if the bit corresponding to its `mask` is set. Supported
+ flags are, given their mask:
+ 0x00000001: Values. If set, a [short] followed by [value]
+ values are provided. Those values are used for bound variables in
+ the query. Optionally, if the 0x40 flag is present, each value
+ will be preceded by a [string] name, representing the name of
+ the marker the value must be bound to.
+ 0x00000002: Skip_metadata. If set, the Result Set returned as a response
+ to the query (if any) will have the NO_METADATA flag (see
+ Section 4.2.5.2).
+ 0x00000004: Page_size. If set, is an [int]
+ controlling the desired page size of the result (in CQL3 rows or bytes).
+ See the section on paging (Section 8) for more details.
+ 0x00000008: With_paging_state. If set, should be present.
+ is a [bytes] value that should have been returned
+ in a result set (Section 4.2.5.2). The query will be
+ executed but starting from a given paging state. This is also to
+ continue paging on a different node than the one where it
+ started (See Section 8 for more details).
+ 0x00000010: With serial consistency. If set, should be
+ present. is the [consistency] level for the
+ serial phase of conditional updates. That consitency can only be
+ either SERIAL or LOCAL_SERIAL and if not present, it defaults to
+ SERIAL. This option will be ignored for anything else other than a
+ conditional update/insert.
+ 0x00000020: With default timestamp. If set, should be present.
+ is a [long] representing the default timestamp for the query
+ in microseconds (negative values are forbidden). This will
+ replace the server side assigned timestamp as default timestamp.
+ Note that a timestamp in the query itself will still override
+ this timestamp. This is entirely optional.
+ 0x00000040: With names for values. This only makes sense if the 0x01 flag is set and
+ is ignored otherwise. If present, the values from the 0x01 flag will
+ be preceded by a name (see above). Note that this is only useful for
+ QUERY requests where named bind markers are used; for EXECUTE statements,
+ since the names for the expected values was returned during preparation,
+ a client can always provide values in the right order without any names
+ and using this flag, while supported, is almost surely inefficient.
+ 0x40000000: When set, the is provided in bytes rather than in rows.
+
+
+ Note that the consistency is ignored by some queries (USE, CREATE, ALTER,
+ TRUNCATE, ...).
+
+ The server will respond to a QUERY message with a RESULT message, the content
+ of which depends on the query.
+
+
+4.1.5. PREPARE
+
+ Prepare a query for later execution (through EXECUTE). The body consists of
+ the CQL query to prepare as a [long string].
+
+ The server will respond with a RESULT message with a `prepared` kind (0x0004,
+ see Section 4.2.5).
+
+
+4.1.6. EXECUTE
+
+ Executes a prepared query. The body of the message must be:
+
+ where is the prepared query ID. It's the [short bytes] returned as a
+ response to a PREPARE message. As for , it has the exact
+ same definition as in QUERY (see Section 4.1.4).
+
+ The response from the server will be a RESULT message.
+
+
+4.1.7. BATCH
+
+ Allows executing a list of queries (prepared or not) as a batch (note that
+ only DML statements are accepted in a batch). The body of the message must
+ be:
+ ...[][]
+ where:
+ - is a [byte] indicating the type of batch to use:
+ - If == 0, the batch will be "logged". This is equivalent to a
+ normal CQL3 batch statement.
+ - If == 1, the batch will be "unlogged".
+ - If == 2, the batch will be a "counter" batch (and non-counter
+ statements will be rejected).
+ - is a [byte] whose bits define the options for this query and
+ in particular influence what the remainder of the message contains. It is similar
+ to the from QUERY and EXECUTE methods, except that the 4 rightmost
+ bits must always be 0 as their corresponding options do not make sense for
+ Batch. A flag is set if the bit corresponding to its `mask` is set. Supported
+ flags are, given their mask:
+ 0x10: With serial consistency. If set, should be
+ present. is the [consistency] level for the
+ serial phase of conditional updates. That consistency can only be
+ either SERIAL or LOCAL_SERIAL and if not present, it defaults to
+ SERIAL. This option will be ignored for anything else other than a
+ conditional update/insert.
+ 0x20: With default timestamp. If set, should be present.
+ is a [long] representing the default timestamp for the query
+ in microseconds. This will replace the server side assigned
+ timestamp as default timestamp. Note that a timestamp in the query itself
+ will still override this timestamp. This is entirely optional.
+ 0x40: With names for values. If set, then all values for all must be
+ preceded by a [string] that have the same meaning as in QUERY
+ requests [IMPORTANT NOTE: this feature does not work and should not be
+ used. It is specified in a way that makes it impossible for the server
+ to implement. This will be fixed in a future version of the native
+ protocol. See https://issues.apache.org/jira/browse/CASSANDRA-10246 for
+ more details].
+ - is a [short] indicating the number of following queries.
+ - ... are the queries to execute. A must be of the
+ form:
+ []...[]
+ where:
+ - is a [byte] indicating whether the following query is a prepared
+ one or not. value must be either 0 or 1.
+ - depends on the value of . If == 0, it should be
+ a [long string] query string (as in QUERY, the query string might contain
+ bind markers). Otherwise (that is, if == 1), it should be a
+ [short bytes] representing a prepared query ID.
+ - is a [short] indicating the number (possibly 0) of following values.
+ - is the optional name of the following . It must be present
+ if and only if the 0x40 flag is provided for the batch.
+ - is the [value] to use for bound variable i (of bound variable
+ if the 0x40 flag is used).
+ - is the [consistency] level for the operation.
+ - is only present if the 0x10 flag is set. In that case,
+ is the [consistency] level for the serial phase of
+ conditional updates. That consitency can only be either SERIAL or
+ LOCAL_SERIAL and if not present will defaults to SERIAL. This option will
+ be ignored for anything else other than a conditional update/insert.
+
+ The server will respond with a RESULT message.
+
+
+4.1.8. REGISTER
+
+ Register this connection to receive some types of events. The body of the
+ message is a [string list] representing the event types to register for. See
+ section 4.2.6 for the list of valid event types.
+
+ The response to a REGISTER message will be a READY message.
+
+ Please note that if a client driver maintains multiple connections to a
+ Cassandra node and/or connections to multiple nodes, it is advised to
+ dedicate a handful of connections to receive events, but to *not* register
+ for events on all connections, as this would only result in receiving
+ multiple times the same event messages, wasting bandwidth.
+
+
+4.2. Responses
+
+ This section describes the content of the frame body for the different
+ responses. Please note that to make room for future evolution, clients should
+ support extra informations (that they should simply discard) to the one
+ described in this document at the end of the frame body.
+
+4.2.1. ERROR
+
+ Indicates an error processing a request. The body of the message will be an
+ error code ([int]) followed by a [string] error message. Then, depending on
+ the exception, more content may follow. The error codes are defined in
+ Section 9, along with their additional content if any.
+
+
+4.2.2. READY
+
+ Indicates that the server is ready to process queries. This message will be
+ sent by the server either after a STARTUP message if no authentication is
+ required (if authentication is required, the server indicates readiness by
+ sending a AUTH_RESPONSE message).
+
+ The body of a READY message is empty.
+
+
+4.2.3. AUTHENTICATE
+
+ Indicates that the server requires authentication, and which authentication
+ mechanism to use.
+
+ The authentication is SASL based and thus consists of a number of server
+ challenges (AUTH_CHALLENGE, Section 4.2.7) followed by client responses
+ (AUTH_RESPONSE, Section 4.1.2). The initial exchange is however boostrapped
+ by an initial client response. The details of that exchange (including how
+ many challenge-response pairs are required) are specific to the authenticator
+ in use. The exchange ends when the server sends an AUTH_SUCCESS message or
+ an ERROR message.
+
+ This message will be sent following a STARTUP message if authentication is
+ required and must be answered by a AUTH_RESPONSE message from the client.
+
+ The body consists of a single [string] indicating the full class name of the
+ IAuthenticator in use.
+
+
+4.2.4. SUPPORTED
+
+ Indicates which startup options are supported by the server. This message
+ comes as a response to an OPTIONS message.
+
+ The body of a SUPPORTED message is a [string multimap]. This multimap gives
+ for each of the supported STARTUP options, the list of supported values.
+
+
+4.2.5. RESULT
+
+ The result to a query (QUERY, PREPARE, EXECUTE or BATCH messages).
+
+ The first element of the body of a RESULT message is an [int] representing the
+ `kind` of result. The rest of the body depends on the kind. The kind can be
+ one of:
+ 0x0001 Void: for results carrying no information.
+ 0x0002 Rows: for results to select queries, returning a set of rows.
+ 0x0003 Set_keyspace: the result to a `use` query.
+ 0x0004 Prepared: result to a PREPARE message.
+ 0x0005 Schema_change: the result to a schema altering query.
+
+ The body for each kind (after the [int] kind) is defined below.
+
+
+4.2.5.1. Void
+
+ The rest of the body for a Void result is empty. It indicates that a query was
+ successful without providing more information.
+
+
+4.2.5.2. Rows
+
+ Indicates a set of rows. The rest of the body of a Rows result is:
+
+ where:
+ - is composed of:
+ [][?...]
+ where:
+ - is an [int]. The bits of provides information on the
+ formatting of the remaining information. A flag is set if the bit
+ corresponding to its `mask` is set. Supported flags are, given their
+ mask:
+ 0x0001 Global_tables_spec: if set, only one table spec (keyspace
+ and table name) is provided as . If not
+ set, is not present.
+ 0x0002 Has_more_pages: indicates whether this is not the last
+ page of results and more should be retrieved. If set, the
+ will be present. The is a
+ [bytes] value that should be used in QUERY/EXECUTE to
+ continue paging and retrieve the remainder of the result for
+ this query (See Section 8 for more details).
+ 0x0004 No_metadata: if set, the is only composed of
+ these , the and optionally the
+ (depending on the Has_more_pages flag) but
+ no other information (so no nor ).
+ This will only ever be the case if this was requested
+ during the query (see QUERY and RESULT messages).
+ - is an [int] representing the number of columns selected
+ by the query that produced this result. It defines the number of
+ elements in and the number of elements for each row in .
+ - is present if the Global_tables_spec is set in
+ . It is composed of two [string] representing the
+ (unique) keyspace name and table name the columns belong to.
+ - specifies the columns returned in the query. There are
+ such column specifications that are composed of:
+ ()?
+ The initial and are two [string] and are only present
+ if the Global_tables_spec flag is not set. The is a
+ [string] and is an [option] that corresponds to the description
+ (what this description is depends a bit on the context: in results to
+ selects, this will be either the user chosen alias or the selection used
+ (often a colum name, but it can be a function call too). In results to
+ a PREPARE, this will be either the name of the corresponding bind variable
+ or the column name for the variable if it is "anonymous") and type of
+ the corresponding result. The option for is either a native
+ type (see below), in which case the option has no value, or a
+ 'custom' type, in which case the value is a [string] representing
+ the fully qualified class name of the type represented. Valid option
+ ids are:
+ 0x0000 Custom: the value is a [string], see above.
+ 0x0001 Ascii
+ 0x0002 Bigint
+ 0x0003 Blob
+ 0x0004 Boolean
+ 0x0005 Counter
+ 0x0006 Decimal
+ 0x0007 Double
+ 0x0008 Float
+ 0x0009 Int
+ 0x000B Timestamp
+ 0x000C Uuid
+ 0x000D Varchar
+ 0x000E Varint
+ 0x000F Timeuuid
+ 0x0010 Inet
+ 0x0011 Date
+ 0x0012 Time
+ 0x0013 Smallint
+ 0x0014 Tinyint
+ 0x0020 List: the value is an [option], representing the type
+ of the elements of the list.
+ 0x0021 Map: the value is two [option], representing the types of the
+ keys and values of the map
+ 0x0022 Set: the value is an [option], representing the type
+ of the elements of the set
+ 0x0030 UDT: the value is ...
+ where:
+ - is a [string] representing the keyspace name this
+ UDT is part of.
+ - is a [string] representing the UDT name.
+ - is a [short] representing the number of fields of
+ the UDT, and thus the number of pairs
+ following
+ - is a [string] representing the name of the
+ i_th field of the UDT.
+ - is an [option] representing the type of the
+ i_th field of the UDT.
+ 0x0031 Tuple: the value is ... where is a [short]
+ representing the number of values in the type, and
+ are [option] representing the type of the i_th component
+ of the tuple
+
+ - is an [int] representing the number of rows present in this
+ result. Those rows are serialized in the part.
+ - is composed of ... where m is .
+ Each is composed of ... where n is
+ and where is a [bytes] representing the value
+ returned for the jth column of the ith row. In other words,
+ is composed of ( * ) [bytes].
+
+
+4.2.5.3. Set_keyspace
+
+ The result to a `use` query. The body (after the kind [int]) is a single
+ [string] indicating the name of the keyspace that has been set.
+
+
+4.2.5.4. Prepared
+
+ The result to a PREPARE message. The body of a Prepared result is:
+
+ where:
+ - is [short bytes] representing the prepared query ID.
+ - is composed of:
+ [...][?...]
+ where:
+ - is an [int]. The bits of provides information on the
+ formatting of the remaining information. A flag is set if the bit
+ corresponding to its `mask` is set. Supported masks and their flags
+ are:
+ 0x0001 Global_tables_spec: if set, only one table spec (keyspace
+ and table name) is provided as . If not
+ set, is not present.
+ - is an [int] representing the number of bind markers
+ in the prepared statement. It defines the number of