Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
1b1d9a8
Added transaction isolation level to YSQL workloads and appended tests.
qvad Mar 11, 2026
b307de8
Rename RC_OL_TEST timeout variable and adjust concurrency for append-…
qvad Mar 12, 2026
a11e4e1
Updated `run-jepsen.py` to include `has_valid_unknown` support and ad…
qvad Mar 16, 2026
d6a5df6
Reduced YSQL serializable test coverage in `run-jepsen.py` to align w…
qvad Mar 17, 2026
289d198
Reduced YSQL serializable test coverage in `run-jepsen.py` to align w…
qvad Mar 17, 2026
26f2515
Removed `sz.pl.geo.append` tests from YSQL workloads in `run-jepsen.p…
qvad Mar 17, 2026
86f102f
Added dedicated table-specific workloads (`SI`, `RC`, `Serializable`)…
qvad Mar 20, 2026
9f2cf49
Updated `append-table` to use deterministic primary key for consisten…
qvad Mar 20, 2026
ced54a6
Added logging to indicate whether queries use `IndexOnlyScan` or `Seq…
qvad Mar 21, 2026
6c6fe33
Added detailed logging for table creation and indexing in `append_tab…
qvad Mar 22, 2026
6f23502
Added detailed logging for table creation and indexing in `append_tab…
qvad Mar 24, 2026
e8f1803
Added detailed logging for table creation and indexing in `append_tab…
qvad Mar 24, 2026
4af4f80
Added detailed logging for table creation and indexing in `append_tab…
qvad Mar 24, 2026
82ca522
Added detailed logging for table creation and indexing in `append_tab…
qvad Mar 25, 2026
764f57b
WIP
qvad Mar 25, 2026
7828e7c
WIP
qvad Mar 25, 2026
55ed8c4
WIP
qvad Mar 25, 2026
232c7c3
WIP
qvad Mar 25, 2026
1440b6a
WIP
qvad Mar 25, 2026
218c993
WIP
qvad Mar 25, 2026
be815e3
Expanded YSQL test coverage by adding new Serializable isolation tests.
qvad Mar 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion yugabyte/project.clj
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@
[version-clj "2.0.2"]
[clj-wallhack "1.0.1"]]
:main yugabyte.runner
:jvm-opts ["-Djava.awt.headless=true" "-Xms4g" "-Xmx8g"])
:jvm-opts ["-Djava.awt.headless=true" "-Xms4g" "-Xmx10g"])
; :aot [yugabyte.runner
; clojure.tools.logging.impl])
103 changes: 67 additions & 36 deletions yugabyte/run-jepsen.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@
'returncode',
'timed_out',
'everything_looks_good',
'cycle_search_timeout_only'])
'cycle_search_timeout_only',
'has_valid_unknown'])


def is_cycle_search_timeout_only(lines):
Expand Down Expand Up @@ -85,7 +86,7 @@ def is_cycle_search_timeout_only(lines):
# The set test might time out if you let it run for 10 minutes and leave 10 more
# minutes for analysis, so cut its running time in half.
SINGLE_TEST_RUN_TIME_FOR_SET_TEST = 300
SINGLE_TEST_RUN_TIME_FOR_RC_OL_TEST = 300
SINGLE_TEST_RUN_TIME_FOR_RC_APPEND_TEST = 300

TEST_AND_ANALYSIS_TIMEOUT_SEC = 1200 # Includes test results analysis.
DEFAULT_TARBALL_URL = "https://downloads.yugabyte.com/yugabyte-1.3.1.0-linux.tar.gz"
Expand All @@ -109,56 +110,56 @@ def is_cycle_search_timeout_only(lines):
# YSQL serializable
"ysql/sz.counter",
"ysql/sz.set",
"ysql/sz.bank",
"ysql/sz.bank-contention",
"ysql/sz.bank-multitable",
"ysql/sz.long-fork",
"ysql/sz.single-key-acid",
"ysql/sz.multi-key-acid",
"ysql/sz.default-value",
"ysql/sz.ol.append",

# YSQL snapshot isolation
"ysql/si.ol.append",
"ysql/si.bank",
"ysql/si.bank-contention",
"ysql/si.bank-multitable",
"ysql/si.counter",
"ysql/si.set",
]
},
{
"start_version": "2.13.1.0-b1",
"tests": [
# YSQL read committed
"ysql/rc.ol.append",
]
},
{
# RC pessimistic locking available since 2.15
"start_version": "2.15.0.0-b1",
"tests": [
"ysql/rc.pl.append",
"ysql/rc.append",
]
},
{
# SI pessimistic locking available since 2.17.2
"start_version": "2.17.2.0-b1",
"tests": [
"ysql/si.pl.append",
"ysql/si.append",
]
},
{
"start_version": "2.18.0.0-b1",
"tests": [
"ysql/rc.pl.geo.append",
"ysql/si.pl.geo.append",
"ysql/sz.pl.geo.append",
"ysql/rc.ol.geo.append",
"ysql/si.ol.geo.append",
"ysql/sz.ol.geo.append",
"ysql/rc.geo.append",
"ysql/si.geo.append",
"ysql/sz.geo.append",
]
},
{
# SZ pessimistic locking available since 2.20
"start_version": "2.20.0.0-b1",
"tests": [
"ysql/sz.pl.append",
"ysql/sz.append",
]
},
{
"start_version": "2.29.0.0-b500",
"start_version_stable": "2026.1.0.0-b1",
"tests": [
"ysql/sz.append-table",
"ysql/si.append-table",
"ysql/rc.append-table",
]
}
]
Expand All @@ -182,12 +183,24 @@ def is_cycle_search_timeout_only(lines):
child_processes = []


def get_workload_version(workload):
def is_stable_version(version):
"""Check if version uses the stable/production format (2024.x, 2025.x, etc.)
Master versions use 2.x format (e.g. 2.29.0.0), stable use year-based (e.g. 2025.2.0.0)."""
first = int(re.split(r'\.|-b', version)[0])
return first >= 2024


def get_workload_version(workload, target_version=None):
"""Get the minimum version for a workload. When target_version is a stable/production
release and the workload has a start_version_stable, use that instead of the master
start_version."""
for el in TEST_PER_VERSION:
for tests in el["tests"]:
if workload in tests:
if target_version and is_stable_version(target_version) and "start_version_stable" in el:
return el["start_version_stable"]
return el["start_version"]
raise EnvironmentError(f"Unanable to find workload in tests: {TESTS}")
raise EnvironmentError(f"Unable to find workload in tests: {TESTS}")


def is_version_at_least(v_least, v_actual):
Expand Down Expand Up @@ -290,13 +303,16 @@ def run_cmd(cmd,
sys.exit(returncode)
everything_looks_good = False
cycle_search_timeout_only = False
has_valid_unknown = False
last_lines_of_output = []
if stdout_path is not None and os.path.exists(stdout_path):
last_lines_of_output, _ = get_last_lines(stdout_path, 50)
everything_looks_good = any(
line.startswith('Everything looks good!') for line in last_lines_of_output)
if not everything_looks_good:
cycle_search_timeout_only = is_cycle_search_timeout_only(last_lines_of_output)
has_valid_unknown = any(
':valid? :unknown' in line for line in last_lines_of_output)
if everything_looks_good:
keep_output_log_file = False
return CmdResult(
Expand All @@ -305,7 +321,8 @@ def run_cmd(cmd,
returncode=returncode,
timed_out=timed_out,
everything_looks_good=everything_looks_good,
cycle_search_timeout_only=cycle_search_timeout_only)
cycle_search_timeout_only=cycle_search_timeout_only,
has_valid_unknown=has_valid_unknown)

finally:
if stdout_file is not None:
Expand Down Expand Up @@ -395,6 +412,15 @@ def parse_args():
'--iterations',
type=int,
help='Run each workload repeatedly for this many iterations.')
parser.add_argument(
'--locking',
default=None,
choices=['mixed', 'optimistic', 'pessimistic'],
help='Locking mode for append workloads: mixed (default), optimistic, or pessimistic')
parser.add_argument(
'--stress-tuning',
action='store_true',
help='Enable stress-test flags with tiny thresholds for internal subsystems')
return parser.parse_args()


Expand Down Expand Up @@ -448,13 +474,16 @@ def main():
[os.path.join(os.environ["JAVA_HOME"], "bin", "java"), "-version"],
stderr=subprocess.STDOUT).decode().strip()
logging.info("Java version:\n%s", java_version)
locking_flag = f"--locking {args.locking}" if args.locking else ""
stress_flag = "--stress-tuning" if args.stress_tuning else ""
lein_cmd = " ".join(["lein run test",
"--os debian",
f"--url {url}",
f"--nemesis {nemeses}",
f"--nodes {get_ip_from_dns()}",
connection_manager_flag,
f"--concurrency {args.concurrency}"])
locking_flag,
stress_flag])

if args.iterations:
lein_cmd += " --test-count 1"
Expand All @@ -464,7 +493,7 @@ def main():

all_workloads = args.workloads.split(',')
workloads_to_evaluate = [workload for workload in all_workloads
if is_version_at_least(get_workload_version(workload),
if is_version_at_least(get_workload_version(workload, version),
version)]
workloads_to_skip = set(all_workloads) - set(workloads_to_evaluate)

Expand Down Expand Up @@ -496,11 +525,13 @@ def main():
test_start_time_sec = time.time()
if '/set' in test:
test_run_time_limit_no_analysis_sec = SINGLE_TEST_RUN_TIME_FOR_SET_TEST if args.test_time_sec == 0 else args.test_time_sec
elif '/rc.ol' in test:
test_run_time_limit_no_analysis_sec = SINGLE_TEST_RUN_TIME_FOR_RC_OL_TEST if args.test_time_sec == 0 else args.test_time_sec
elif '/rc.' in test and 'append' in test:
test_run_time_limit_no_analysis_sec = SINGLE_TEST_RUN_TIME_FOR_RC_APPEND_TEST if args.test_time_sec == 0 else args.test_time_sec
else:
test_run_time_limit_no_analysis_sec = SINGLE_TEST_RUN_TIME if args.test_time_sec == 0 else args.test_time_sec
concurrency = '3' if 'append-table' in test else args.concurrency
full_cmd = lein_cmd + \
f" --concurrency {concurrency}" + \
" --time-limit " + str(test_run_time_limit_no_analysis_sec) + \
" --workload " + test
result = run_cmd(
Expand Down Expand Up @@ -534,16 +565,16 @@ def main():
test_index, test_elapsed_time_sec, result.returncode,
result.everything_looks_good)

# For rc.ol workloads, accept cycle-search-timeout as valid (no actual anomalies found)
is_rc_ol_timeout_acceptable = (
'/rc.ol' in test and
result.cycle_search_timeout_only and
# For read committed workloads, accept valid-unknown results (e.g. cycle-search-timeout)
is_rc_unknown_acceptable = (
'/rc.' in test and
result.has_valid_unknown and
not result.timed_out
)

if result.everything_looks_good or is_rc_ol_timeout_acceptable:
if is_rc_ol_timeout_acceptable:
logging.info("Accepting rc.ol test with cycle-search-timeout (no anomalies found)")
if result.everything_looks_good or is_rc_unknown_acceptable:
if is_rc_unknown_acceptable:
logging.info("Accepting read committed test with valid-unknown result")
num_everything_looks_good += 1

if test_name not in test_cases:
Expand Down
4 changes: 2 additions & 2 deletions yugabyte/sort-results.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ find $STORE_DIR -name "jepsen.log" -printf "%T+\t%p\n" | sort | cut -f2 |
if grep -q ':valid? false' "$log_path"; then
category="invalid"
elif grep -q ':valid? :unknown' "$log_path"; then
# For rc.ol tests, :valid? :unknown with only cycle-search-timeout is acceptable
if [[ "$rel_dir_path" == *"rc.ol"* ]] && grep -q ':cycle-search-timeout' "$log_path" && ! grep -qE ':G0|:G1a|:G1b|:G1c|:G2' "$log_path"; then
# For rc tests, :valid? :unknown with only cycle-search-timeout is acceptable
if [[ "$rel_dir_path" == *"_rc."* ]] && grep -q ':cycle-search-timeout' "$log_path" && ! grep -qE ':G0|:G1a|:G1b|:G1c|:G2' "$log_path"; then
category="ok"
else
category="valid-unknown"
Expand Down
28 changes: 28 additions & 0 deletions yugabyte/src/yugabyte/append.clj
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,31 @@
; :consistency-models [:strict-serializable] ; default value
:additional-graphs [elle/realtime-graph]})))
; (update :generator (partial gen/stagger 1/5)))

; Append-table workloads use lower limits because each key is a separate table
; and every read fetches all rows — O(n) per read instead of O(1).
(defn workload-si-table
[opts]
(-> (append/test {:key-count 16
:max-txn-length 4
:max-writes-per-key 128
:anomalies [:internal :G-nonadjacent :G1 :G-SI]
:consistency-models [:snapshot-isolation]
:additional-graphs [elle/realtime-graph]})))

(defn workload-rc-table
[opts]
(-> (append/test {:key-count 16
:max-txn-length 4
:max-writes-per-key 128
:anomalies [:G0 :G1a :G1b]
:consistency-models [:read-committed]
:additional-graphs [elle/realtime-graph]})))

(defn workload-serializable-table
[opts]
(-> (append/test {:key-count 16
:max-txn-length 4
:max-writes-per-key 128
:anomalies [:G1 :G2]
:additional-graphs [elle/realtime-graph]})))
43 changes: 43 additions & 0 deletions yugabyte/src/yugabyte/auto.clj
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,45 @@
:--rpc_connection_timeout_ms 1500]
[]))

(defn master-tserver-stress-flags
"Shared stress-test flags for master and tserver.
Disabled flags are commented with the reason — re-enable after verifying startup."
[test]
(if (:stress-tuning test)
[; WAL: 512KB segments — may be too small for catalog bootstrap
; :--log_segment_size_bytes 524288
; :--consensus_max_batch_size_bytes 65536 ; 64KB — smaller replication batches
; :--bg_superblock_flush_interval_secs 5
]
[]))

(defn master-stress-flags
"Stress-test flags for master: tablet splitting.
Disabled — tiny thresholds cause split storms during bootstrap."
[test]
(if (:stress-tuning test)
[; :--enable_automatic_tablet_splitting true
; :--tablet_split_low_phase_size_threshold_bytes 1024
; :--tablet_split_high_phase_size_threshold_bytes 4096
; :--tablet_force_split_threshold_bytes 8192
]
[]))

(defn tserver-stress-flags
"Stress-test flags for tserver — DocDB, RocksDB, MVCC, intent cleanup."
[test]
(if (:stress-tuning test)
[:--txn_max_apply_batch_records 5
; :--db_write_buffer_size 524288
; :--db_block_cache_size_bytes 8388608
; :--aborted_intent_cleanup_ms 1000
; :--timestamp_history_retention_interval_sec 5
; :--transaction_deadlock_detection_interval_usec 1000000
:--backfill_index_write_batch_size 10
; :--cdc_stream_records_threshold_size_bytes 1024
]
[]))

(def limits-conf
"Ulimits, in the format for /etc/security/limits.conf."
"
Expand Down Expand Up @@ -544,6 +583,8 @@
(master-tserver-wait-on-conflict-flags test)
(master-tserver-packed-columns test)
(master-tserver-geo-partitioning-flags test node (:nodes test))
(master-tserver-stress-flags test)
(master-stress-flags test)
(master-api-opts (:api test) node)
)))

Expand All @@ -565,6 +606,8 @@
(master-tserver-wait-on-conflict-flags test)
(master-tserver-packed-columns test)
(master-tserver-geo-partitioning-flags test node (:nodes test))
(master-tserver-stress-flags test)
(tserver-stress-flags test)
(tserver-api-opts test node)
(tserver-connection-manager-preview test)
(tserver-read-committed-flags test)
Expand Down
30 changes: 14 additions & 16 deletions yugabyte/src/yugabyte/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@
"A map of workload names to functions that can take option maps and construct workloads."
#:ysql{:none noop-test
:sleep sleep-test
:sz.counter (with-client counter/workload (yugabyte.ysql.counter/->YSQLCounterClient))
:sz.set (with-client set/workload (yugabyte.ysql.set/->YSQLSetClient))
:sz.counter (with-client counter/workload (yugabyte.ysql.counter/->YSQLCounterClient :serializable))
:sz.set (with-client set/workload (yugabyte.ysql.set/->YSQLSetClient :serializable))
; This one doesn't work because of https://github.com/YugaByte/yugabyte-db/issues/1554
; :set-index (with-client set/workload (yugabyte.ysql.set/->YSQLSetIndexClient))
; We'd rather allow negatives for now because it makes reproducing error easier
Expand All @@ -103,25 +103,23 @@
:sz.long-fork (with-client long-fork/workload (yugabyte.ysql.long-fork/->YSQLLongForkClient))
:sz.single-key-acid (with-client single-key-acid/workload (yugabyte.ysql.single-key-acid/->YSQLSingleKeyAcidClient))
:sz.multi-key-acid (with-client multi-key-acid/workload (yugabyte.ysql.multi-key-acid/->YSQLMultiKeyAcidClient))
:sz.ol.geo.append (with-client append/workload-serializable (ysql.append/->Client :serializable :optimistic :geo))
:sz.pl.geo.append (with-client append/workload-serializable (ysql.append/->Client :serializable :pessimistic :geo))
:sz.ol.append (with-client append/workload-serializable (ysql.append/->Client :serializable :optimistic :no-geo))
:sz.pl.append (with-client append/workload-serializable (ysql.append/->Client :serializable :pessimistic :no-geo))
:sz.append-table (with-client append/workload-serializable (ysql.append-table/->Client :serializable))
:sz.geo.append (with-client append/workload-serializable (ysql.append/->Client :serializable (or (:locking opts) :mixed) :geo))
:sz.append (with-client append/workload-serializable (ysql.append/->Client :serializable (or (:locking opts) :mixed) :no-geo))
:sz.append-table (with-client append/workload-serializable-table (ysql.append-table/->Client :serializable))
:sz.default-value (with-client default-value/workload (ysql.default-value/->Client))
:rc.ol.geo.append (with-client append/workload-rc (ysql.append/->Client :read-committed :optimistic :geo))
:rc.pl.geo.append (with-client append/workload-rc (ysql.append/->Client :read-committed :pessimistic :geo))
:rc.ol.append (with-client append/workload-rc (ysql.append/->Client :read-committed :optimistic :no-geo))
:rc.pl.append (with-client append/workload-rc (ysql.append/->Client :read-committed :pessimistic :no-geo))
:rc.geo.append (with-client append/workload-rc (ysql.append/->Client :read-committed (or (:locking opts) :mixed) :geo))
:rc.append (with-client append/workload-rc (ysql.append/->Client :read-committed (or (:locking opts) :mixed) :no-geo))
; See https://docs.yugabyte.com/latest/architecture/transactions/isolation-levels/
; :snapshot-isolation maps to :repeatable_read SQL
:si.ol.geo.append (with-client append/workload-si (ysql.append/->Client :repeatable-read :optimistic :geo))
:si.pl.geo.append (with-client append/workload-si (ysql.append/->Client :repeatable-read :pessimistic :geo))
:si.ol.append (with-client append/workload-si (ysql.append/->Client :repeatable-read :optimistic :no-geo))
:si.pl.append (with-client append/workload-si (ysql.append/->Client :repeatable-read :pessimistic :no-geo))
:si.geo.append (with-client append/workload-si (ysql.append/->Client :repeatable-read (or (:locking opts) :mixed) :geo))
:si.append (with-client append/workload-si (ysql.append/->Client :repeatable-read (or (:locking opts) :mixed) :no-geo))
:si.bank (with-client bank/workload-allow-neg (yugabyte.ysql.bank/->YSQLBankClient true :repeatable-read))
:si.bank-multitable (with-client bank/workload-allow-neg (yugabyte.ysql.bank/->YSQLBankClient true :repeatable-read))
:si.bank-contention (with-client bank-improved/workload-contention-keys (yugabyte.ysql.bank-improved/->YSQLBankContentionClient :repeatable-read))})
:si.bank-contention (with-client bank-improved/workload-contention-keys (yugabyte.ysql.bank-improved/->YSQLBankContentionClient :repeatable-read))
:si.append-table (with-client append/workload-si-table (ysql.append-table/->Client :repeatable-read))
:si.counter (with-client counter/workload (yugabyte.ysql.counter/->YSQLCounterClient :repeatable-read))
:si.set (with-client set/workload (yugabyte.ysql.set/->YSQLSetClient :repeatable-read))
:rc.append-table (with-client append/workload-rc-table (ysql.append-table/->Client :read-committed))})

(def workloads
(merge workloads-ycql workloads-ysql))
Expand Down
Loading
Loading