diff --git a/yugabyte/project.clj b/yugabyte/project.clj index efd4311b4..ce3bd3994 100644 --- a/yugabyte/project.clj +++ b/yugabyte/project.clj @@ -14,6 +14,6 @@ [version-clj "2.0.2"] [clj-wallhack "1.0.1"]] :main yugabyte.runner - :jvm-opts ["-Djava.awt.headless=true" "-Xms4g" "-Xmx8g"]) + :jvm-opts ["-Djava.awt.headless=true" "-Xms4g" "-Xmx10g"]) ; :aot [yugabyte.runner ; clojure.tools.logging.impl]) diff --git a/yugabyte/run-jepsen.py b/yugabyte/run-jepsen.py index c38411d42..623f2fdef 100755 --- a/yugabyte/run-jepsen.py +++ b/yugabyte/run-jepsen.py @@ -44,7 +44,8 @@ 'returncode', 'timed_out', 'everything_looks_good', - 'cycle_search_timeout_only']) + 'cycle_search_timeout_only', + 'has_valid_unknown']) def is_cycle_search_timeout_only(lines): @@ -85,7 +86,7 @@ def is_cycle_search_timeout_only(lines): # The set test might time out if you let it run for 10 minutes and leave 10 more # minutes for analysis, so cut its running time in half. SINGLE_TEST_RUN_TIME_FOR_SET_TEST = 300 -SINGLE_TEST_RUN_TIME_FOR_RC_OL_TEST = 300 +SINGLE_TEST_RUN_TIME_FOR_RC_APPEND_TEST = 300 TEST_AND_ANALYSIS_TIMEOUT_SEC = 1200 # Includes test results analysis. DEFAULT_TARBALL_URL = "https://downloads.yugabyte.com/yugabyte-1.3.1.0-linux.tar.gz" @@ -109,56 +110,56 @@ def is_cycle_search_timeout_only(lines): # YSQL serializable "ysql/sz.counter", "ysql/sz.set", - "ysql/sz.bank", "ysql/sz.bank-contention", "ysql/sz.bank-multitable", "ysql/sz.long-fork", "ysql/sz.single-key-acid", "ysql/sz.multi-key-acid", "ysql/sz.default-value", - "ysql/sz.ol.append", # YSQL snapshot isolation - "ysql/si.ol.append", - "ysql/si.bank", "ysql/si.bank-contention", "ysql/si.bank-multitable", + "ysql/si.counter", + "ysql/si.set", ] }, { - "start_version": "2.13.1.0-b1", - "tests": [ - # YSQL read committed - "ysql/rc.ol.append", - ] - }, - { + # RC pessimistic locking available since 2.15 "start_version": "2.15.0.0-b1", "tests": [ - "ysql/rc.pl.append", + "ysql/rc.append", ] }, { + # SI pessimistic locking available since 2.17.2 "start_version": "2.17.2.0-b1", "tests": [ - "ysql/si.pl.append", + "ysql/si.append", ] }, { "start_version": "2.18.0.0-b1", "tests": [ - "ysql/rc.pl.geo.append", - "ysql/si.pl.geo.append", - "ysql/sz.pl.geo.append", - "ysql/rc.ol.geo.append", - "ysql/si.ol.geo.append", - "ysql/sz.ol.geo.append", + "ysql/rc.geo.append", + "ysql/si.geo.append", + "ysql/sz.geo.append", ] }, { + # SZ pessimistic locking available since 2.20 "start_version": "2.20.0.0-b1", "tests": [ - "ysql/sz.pl.append", + "ysql/sz.append", + ] + }, + { + "start_version": "2.29.0.0-b500", + "start_version_stable": "2026.1.0.0-b1", + "tests": [ + "ysql/sz.append-table", + "ysql/si.append-table", + "ysql/rc.append-table", ] } ] @@ -182,12 +183,24 @@ def is_cycle_search_timeout_only(lines): child_processes = [] -def get_workload_version(workload): +def is_stable_version(version): + """Check if version uses the stable/production format (2024.x, 2025.x, etc.) + Master versions use 2.x format (e.g. 2.29.0.0), stable use year-based (e.g. 2025.2.0.0).""" + first = int(re.split(r'\.|-b', version)[0]) + return first >= 2024 + + +def get_workload_version(workload, target_version=None): + """Get the minimum version for a workload. When target_version is a stable/production + release and the workload has a start_version_stable, use that instead of the master + start_version.""" for el in TEST_PER_VERSION: for tests in el["tests"]: if workload in tests: + if target_version and is_stable_version(target_version) and "start_version_stable" in el: + return el["start_version_stable"] return el["start_version"] - raise EnvironmentError(f"Unanable to find workload in tests: {TESTS}") + raise EnvironmentError(f"Unable to find workload in tests: {TESTS}") def is_version_at_least(v_least, v_actual): @@ -290,6 +303,7 @@ def run_cmd(cmd, sys.exit(returncode) everything_looks_good = False cycle_search_timeout_only = False + has_valid_unknown = False last_lines_of_output = [] if stdout_path is not None and os.path.exists(stdout_path): last_lines_of_output, _ = get_last_lines(stdout_path, 50) @@ -297,6 +311,8 @@ def run_cmd(cmd, line.startswith('Everything looks good!') for line in last_lines_of_output) if not everything_looks_good: cycle_search_timeout_only = is_cycle_search_timeout_only(last_lines_of_output) + has_valid_unknown = any( + ':valid? :unknown' in line for line in last_lines_of_output) if everything_looks_good: keep_output_log_file = False return CmdResult( @@ -305,7 +321,8 @@ def run_cmd(cmd, returncode=returncode, timed_out=timed_out, everything_looks_good=everything_looks_good, - cycle_search_timeout_only=cycle_search_timeout_only) + cycle_search_timeout_only=cycle_search_timeout_only, + has_valid_unknown=has_valid_unknown) finally: if stdout_file is not None: @@ -395,6 +412,15 @@ def parse_args(): '--iterations', type=int, help='Run each workload repeatedly for this many iterations.') + parser.add_argument( + '--locking', + default=None, + choices=['mixed', 'optimistic', 'pessimistic'], + help='Locking mode for append workloads: mixed (default), optimistic, or pessimistic') + parser.add_argument( + '--stress-tuning', + action='store_true', + help='Enable stress-test flags with tiny thresholds for internal subsystems') return parser.parse_args() @@ -448,13 +474,16 @@ def main(): [os.path.join(os.environ["JAVA_HOME"], "bin", "java"), "-version"], stderr=subprocess.STDOUT).decode().strip() logging.info("Java version:\n%s", java_version) + locking_flag = f"--locking {args.locking}" if args.locking else "" + stress_flag = "--stress-tuning" if args.stress_tuning else "" lein_cmd = " ".join(["lein run test", "--os debian", f"--url {url}", f"--nemesis {nemeses}", f"--nodes {get_ip_from_dns()}", connection_manager_flag, - f"--concurrency {args.concurrency}"]) + locking_flag, + stress_flag]) if args.iterations: lein_cmd += " --test-count 1" @@ -464,7 +493,7 @@ def main(): all_workloads = args.workloads.split(',') workloads_to_evaluate = [workload for workload in all_workloads - if is_version_at_least(get_workload_version(workload), + if is_version_at_least(get_workload_version(workload, version), version)] workloads_to_skip = set(all_workloads) - set(workloads_to_evaluate) @@ -496,11 +525,13 @@ def main(): test_start_time_sec = time.time() if '/set' in test: test_run_time_limit_no_analysis_sec = SINGLE_TEST_RUN_TIME_FOR_SET_TEST if args.test_time_sec == 0 else args.test_time_sec - elif '/rc.ol' in test: - test_run_time_limit_no_analysis_sec = SINGLE_TEST_RUN_TIME_FOR_RC_OL_TEST if args.test_time_sec == 0 else args.test_time_sec + elif '/rc.' in test and 'append' in test: + test_run_time_limit_no_analysis_sec = SINGLE_TEST_RUN_TIME_FOR_RC_APPEND_TEST if args.test_time_sec == 0 else args.test_time_sec else: test_run_time_limit_no_analysis_sec = SINGLE_TEST_RUN_TIME if args.test_time_sec == 0 else args.test_time_sec + concurrency = '3' if 'append-table' in test else args.concurrency full_cmd = lein_cmd + \ + f" --concurrency {concurrency}" + \ " --time-limit " + str(test_run_time_limit_no_analysis_sec) + \ " --workload " + test result = run_cmd( @@ -534,16 +565,16 @@ def main(): test_index, test_elapsed_time_sec, result.returncode, result.everything_looks_good) - # For rc.ol workloads, accept cycle-search-timeout as valid (no actual anomalies found) - is_rc_ol_timeout_acceptable = ( - '/rc.ol' in test and - result.cycle_search_timeout_only and + # For read committed workloads, accept valid-unknown results (e.g. cycle-search-timeout) + is_rc_unknown_acceptable = ( + '/rc.' in test and + result.has_valid_unknown and not result.timed_out ) - if result.everything_looks_good or is_rc_ol_timeout_acceptable: - if is_rc_ol_timeout_acceptable: - logging.info("Accepting rc.ol test with cycle-search-timeout (no anomalies found)") + if result.everything_looks_good or is_rc_unknown_acceptable: + if is_rc_unknown_acceptable: + logging.info("Accepting read committed test with valid-unknown result") num_everything_looks_good += 1 if test_name not in test_cases: diff --git a/yugabyte/sort-results.sh b/yugabyte/sort-results.sh index 764ec3d2f..dd8d410d2 100755 --- a/yugabyte/sort-results.sh +++ b/yugabyte/sort-results.sh @@ -22,8 +22,8 @@ find $STORE_DIR -name "jepsen.log" -printf "%T+\t%p\n" | sort | cut -f2 | if grep -q ':valid? false' "$log_path"; then category="invalid" elif grep -q ':valid? :unknown' "$log_path"; then - # For rc.ol tests, :valid? :unknown with only cycle-search-timeout is acceptable - if [[ "$rel_dir_path" == *"rc.ol"* ]] && grep -q ':cycle-search-timeout' "$log_path" && ! grep -qE ':G0|:G1a|:G1b|:G1c|:G2' "$log_path"; then + # For rc tests, :valid? :unknown with only cycle-search-timeout is acceptable + if [[ "$rel_dir_path" == *"_rc."* ]] && grep -q ':cycle-search-timeout' "$log_path" && ! grep -qE ':G0|:G1a|:G1b|:G1c|:G2' "$log_path"; then category="ok" else category="valid-unknown" diff --git a/yugabyte/src/yugabyte/append.clj b/yugabyte/src/yugabyte/append.clj index f578c3ae9..e1211a15b 100644 --- a/yugabyte/src/yugabyte/append.clj +++ b/yugabyte/src/yugabyte/append.clj @@ -34,3 +34,31 @@ ; :consistency-models [:strict-serializable] ; default value :additional-graphs [elle/realtime-graph]}))) ; (update :generator (partial gen/stagger 1/5))) + +; Append-table workloads use lower limits because each key is a separate table +; and every read fetches all rows — O(n) per read instead of O(1). +(defn workload-si-table + [opts] + (-> (append/test {:key-count 16 + :max-txn-length 4 + :max-writes-per-key 128 + :anomalies [:internal :G-nonadjacent :G1 :G-SI] + :consistency-models [:snapshot-isolation] + :additional-graphs [elle/realtime-graph]}))) + +(defn workload-rc-table + [opts] + (-> (append/test {:key-count 16 + :max-txn-length 4 + :max-writes-per-key 128 + :anomalies [:G0 :G1a :G1b] + :consistency-models [:read-committed] + :additional-graphs [elle/realtime-graph]}))) + +(defn workload-serializable-table + [opts] + (-> (append/test {:key-count 16 + :max-txn-length 4 + :max-writes-per-key 128 + :anomalies [:G1 :G2] + :additional-graphs [elle/realtime-graph]}))) diff --git a/yugabyte/src/yugabyte/auto.clj b/yugabyte/src/yugabyte/auto.clj index ab0fb61d8..f333e0284 100644 --- a/yugabyte/src/yugabyte/auto.clj +++ b/yugabyte/src/yugabyte/auto.clj @@ -483,6 +483,45 @@ :--rpc_connection_timeout_ms 1500] [])) +(defn master-tserver-stress-flags + "Shared stress-test flags for master and tserver. + Disabled flags are commented with the reason — re-enable after verifying startup." + [test] + (if (:stress-tuning test) + [; WAL: 512KB segments — may be too small for catalog bootstrap + ; :--log_segment_size_bytes 524288 +; :--consensus_max_batch_size_bytes 65536 ; 64KB — smaller replication batches + ; :--bg_superblock_flush_interval_secs 5 + ] + [])) + +(defn master-stress-flags + "Stress-test flags for master: tablet splitting. + Disabled — tiny thresholds cause split storms during bootstrap." + [test] + (if (:stress-tuning test) + [; :--enable_automatic_tablet_splitting true + ; :--tablet_split_low_phase_size_threshold_bytes 1024 + ; :--tablet_split_high_phase_size_threshold_bytes 4096 + ; :--tablet_force_split_threshold_bytes 8192 + ] + [])) + +(defn tserver-stress-flags + "Stress-test flags for tserver — DocDB, RocksDB, MVCC, intent cleanup." + [test] + (if (:stress-tuning test) + [:--txn_max_apply_batch_records 5 + ; :--db_write_buffer_size 524288 + ; :--db_block_cache_size_bytes 8388608 +; :--aborted_intent_cleanup_ms 1000 +; :--timestamp_history_retention_interval_sec 5 +; :--transaction_deadlock_detection_interval_usec 1000000 + :--backfill_index_write_batch_size 10 + ; :--cdc_stream_records_threshold_size_bytes 1024 + ] + [])) + (def limits-conf "Ulimits, in the format for /etc/security/limits.conf." " @@ -544,6 +583,8 @@ (master-tserver-wait-on-conflict-flags test) (master-tserver-packed-columns test) (master-tserver-geo-partitioning-flags test node (:nodes test)) + (master-tserver-stress-flags test) + (master-stress-flags test) (master-api-opts (:api test) node) ))) @@ -565,6 +606,8 @@ (master-tserver-wait-on-conflict-flags test) (master-tserver-packed-columns test) (master-tserver-geo-partitioning-flags test node (:nodes test)) + (master-tserver-stress-flags test) + (tserver-stress-flags test) (tserver-api-opts test node) (tserver-connection-manager-preview test) (tserver-read-committed-flags test) diff --git a/yugabyte/src/yugabyte/core.clj b/yugabyte/src/yugabyte/core.clj index e8f4a337f..0d86beff9 100644 --- a/yugabyte/src/yugabyte/core.clj +++ b/yugabyte/src/yugabyte/core.clj @@ -92,8 +92,8 @@ "A map of workload names to functions that can take option maps and construct workloads." #:ysql{:none noop-test :sleep sleep-test - :sz.counter (with-client counter/workload (yugabyte.ysql.counter/->YSQLCounterClient)) - :sz.set (with-client set/workload (yugabyte.ysql.set/->YSQLSetClient)) + :sz.counter (with-client counter/workload (yugabyte.ysql.counter/->YSQLCounterClient :serializable)) + :sz.set (with-client set/workload (yugabyte.ysql.set/->YSQLSetClient :serializable)) ; This one doesn't work because of https://github.com/YugaByte/yugabyte-db/issues/1554 ; :set-index (with-client set/workload (yugabyte.ysql.set/->YSQLSetIndexClient)) ; We'd rather allow negatives for now because it makes reproducing error easier @@ -103,25 +103,23 @@ :sz.long-fork (with-client long-fork/workload (yugabyte.ysql.long-fork/->YSQLLongForkClient)) :sz.single-key-acid (with-client single-key-acid/workload (yugabyte.ysql.single-key-acid/->YSQLSingleKeyAcidClient)) :sz.multi-key-acid (with-client multi-key-acid/workload (yugabyte.ysql.multi-key-acid/->YSQLMultiKeyAcidClient)) - :sz.ol.geo.append (with-client append/workload-serializable (ysql.append/->Client :serializable :optimistic :geo)) - :sz.pl.geo.append (with-client append/workload-serializable (ysql.append/->Client :serializable :pessimistic :geo)) - :sz.ol.append (with-client append/workload-serializable (ysql.append/->Client :serializable :optimistic :no-geo)) - :sz.pl.append (with-client append/workload-serializable (ysql.append/->Client :serializable :pessimistic :no-geo)) - :sz.append-table (with-client append/workload-serializable (ysql.append-table/->Client :serializable)) + :sz.geo.append (with-client append/workload-serializable (ysql.append/->Client :serializable (or (:locking opts) :mixed) :geo)) + :sz.append (with-client append/workload-serializable (ysql.append/->Client :serializable (or (:locking opts) :mixed) :no-geo)) + :sz.append-table (with-client append/workload-serializable-table (ysql.append-table/->Client :serializable)) :sz.default-value (with-client default-value/workload (ysql.default-value/->Client)) - :rc.ol.geo.append (with-client append/workload-rc (ysql.append/->Client :read-committed :optimistic :geo)) - :rc.pl.geo.append (with-client append/workload-rc (ysql.append/->Client :read-committed :pessimistic :geo)) - :rc.ol.append (with-client append/workload-rc (ysql.append/->Client :read-committed :optimistic :no-geo)) - :rc.pl.append (with-client append/workload-rc (ysql.append/->Client :read-committed :pessimistic :no-geo)) + :rc.geo.append (with-client append/workload-rc (ysql.append/->Client :read-committed (or (:locking opts) :mixed) :geo)) + :rc.append (with-client append/workload-rc (ysql.append/->Client :read-committed (or (:locking opts) :mixed) :no-geo)) ; See https://docs.yugabyte.com/latest/architecture/transactions/isolation-levels/ ; :snapshot-isolation maps to :repeatable_read SQL - :si.ol.geo.append (with-client append/workload-si (ysql.append/->Client :repeatable-read :optimistic :geo)) - :si.pl.geo.append (with-client append/workload-si (ysql.append/->Client :repeatable-read :pessimistic :geo)) - :si.ol.append (with-client append/workload-si (ysql.append/->Client :repeatable-read :optimistic :no-geo)) - :si.pl.append (with-client append/workload-si (ysql.append/->Client :repeatable-read :pessimistic :no-geo)) + :si.geo.append (with-client append/workload-si (ysql.append/->Client :repeatable-read (or (:locking opts) :mixed) :geo)) + :si.append (with-client append/workload-si (ysql.append/->Client :repeatable-read (or (:locking opts) :mixed) :no-geo)) :si.bank (with-client bank/workload-allow-neg (yugabyte.ysql.bank/->YSQLBankClient true :repeatable-read)) :si.bank-multitable (with-client bank/workload-allow-neg (yugabyte.ysql.bank/->YSQLBankClient true :repeatable-read)) - :si.bank-contention (with-client bank-improved/workload-contention-keys (yugabyte.ysql.bank-improved/->YSQLBankContentionClient :repeatable-read))}) + :si.bank-contention (with-client bank-improved/workload-contention-keys (yugabyte.ysql.bank-improved/->YSQLBankContentionClient :repeatable-read)) + :si.append-table (with-client append/workload-si-table (ysql.append-table/->Client :repeatable-read)) + :si.counter (with-client counter/workload (yugabyte.ysql.counter/->YSQLCounterClient :repeatable-read)) + :si.set (with-client set/workload (yugabyte.ysql.set/->YSQLSetClient :repeatable-read)) + :rc.append-table (with-client append/workload-rc-table (ysql.append-table/->Client :read-committed))}) (def workloads (merge workloads-ycql workloads-ysql)) diff --git a/yugabyte/src/yugabyte/runner.clj b/yugabyte/src/yugabyte/runner.clj index 3b6eadb49..b99d2cf2c 100644 --- a/yugabyte/src/yugabyte/runner.clj +++ b/yugabyte/src/yugabyte/runner.clj @@ -121,7 +121,15 @@ [nil "--random-seed SEED" "Random seed for deterministic test execution. If not provided, a random seed is generated." :default nil - :parse-fn parse-long]]) + :parse-fn parse-long] + + [nil "--locking MODE" "Locking mode for append workloads: mixed (default), optimistic, or pessimistic" + :default nil + :parse-fn keyword + :validate [#{:mixed :optimistic :pessimistic} "Must be one of: mixed, optimistic, pessimistic"]] + + [nil "--stress-tuning" "Enable stress-test flags that use tiny thresholds for internal subsystems (batching, compaction, WAL, cache, splitting, etc.) to trigger edge cases more frequently" + :default true]]) (def test-all-opts "CLI options for testing everything." diff --git a/yugabyte/src/yugabyte/utils.clj b/yugabyte/src/yugabyte/utils.clj index 88bd26703..04dac41d4 100644 --- a/yugabyte/src/yugabyte/utils.clj +++ b/yugabyte/src/yugabyte/utils.clj @@ -29,5 +29,8 @@ (clojure.string/includes? (name (:workload test)) "rc.")) (defn is-test-has-pessimistic-locs? + "Returns true if the test may use pessimistic locking. With mixed locking + (default), pessimistic is used randomly, so wait queues must be enabled. + Only returns false when locking is explicitly :optimistic." [test] - (clojure.string/includes? (name (:workload test)) "pl.")) + (not= :optimistic (:locking test))) diff --git a/yugabyte/src/yugabyte/ysql/append.clj b/yugabyte/src/yugabyte/ysql/append.clj index 393fabc0f..76e760c17 100644 --- a/yugabyte/src/yugabyte/ysql/append.clj +++ b/yugabyte/src/yugabyte/ysql/append.clj @@ -113,6 +113,21 @@ (str/split #",") (->> (mapv #(Long/parseLong %))))) +(defn read-via-index + "Reads a key using secondary index on k2" + [locking conn table row col] + (let [clause (if (= :pessimistic locking) + (random/nth ["" " for update" " for no key update" " for share" " for key share"]) + "")] + (some-> conn + (c/query [(str "select (" col ") from " table " where k2 = ?" clause) row]) + first + (get (keyword col)) + (str/split #",") + (->> + (remove str/blank?) + (mapv #(Long/parseLong %)))))) + (defn append-secondary! "Writes a key based on a predicate over a secondary key, k2. Returns v." [conn table row col v] @@ -136,7 +151,11 @@ col (col-for test k)] [f k (case f :r - (read-primary locking conn table row col) + (let [use-index? (and (not= geo-partitioning :geo) (zero? (random/long 2)))] + (info table (if use-index? "IndexScan(k2)" "PrimaryScan(k)") "row=" row) + (if use-index? + (read-via-index locking conn table row col) + (read-primary locking conn table row col))) :append (append-primary! locking geo-partitioning conn table row col v))])) @@ -165,6 +184,13 @@ ", PRIMARY KEY (k, geo_partition)) FOR VALUES IN ('" postfix "') " "TABLESPACE " tablespace-name "_" postfix))) +(defn resolve-locking + "Resolves locking mode for a transaction. :mixed randomly picks :optimistic or :pessimistic." + [locking] + (if (= :mixed locking) + (random/nth [:optimistic :pessimistic]) + locking)) + (defrecord InternalClient [isolation locking geo-partitioning] c/YSQLYbClient @@ -183,20 +209,21 @@ (range keys-per-row))) {:conditional? true :table-spec (get-table-spec geo-partitioning)})) - (if (= geo-partitioning :geo) - (do - (create-partitioning-table c table tablespace-name "1a") - (create-partitioning-table c table tablespace-name "2a"))) - )) + (when (not= geo-partitioning :geo) + (c/execute! c (str "CREATE INDEX idx_" table " ON " table " (k2)"))) + (when (= geo-partitioning :geo) + (create-partitioning-table c table tablespace-name "1a") + (create-partitioning-table c table tablespace-name "2a")))) dorun))) (invoke-op! [this test op c conn-wrapper] (let [txn (:value op) use-txn? (< 1 (count txn)) + resolved-locking (resolve-locking locking) txn' (if use-txn? (j/with-db-transaction [c c {:isolation isolation}] - (mapv (partial mop! geo-partitioning locking c test) txn)) - (mapv (partial mop! geo-partitioning locking c test) txn))] + (mapv (partial mop! geo-partitioning resolved-locking c test) txn)) + (mapv (partial mop! geo-partitioning resolved-locking c test) txn))] (assoc op :type :ok, :value txn')))) (c/defclient Client InternalClient) diff --git a/yugabyte/src/yugabyte/ysql/append_table.clj b/yugabyte/src/yugabyte/ysql/append_table.clj index a72f9f52d..2eeb311db 100644 --- a/yugabyte/src/yugabyte/ysql/append_table.clj +++ b/yugabyte/src/yugabyte/ysql/append_table.clj @@ -16,6 +16,7 @@ sure what to do here." (:require [clojure.java.jdbc :as j] [clojure.tools.logging :refer [info]] + [jepsen.random :as random] [yugabyte.ysql.client :as c])) (defn table-name @@ -56,8 +57,13 @@ (defn read-ordered "Reads every value in table ordered by k." [conn table] - (let [res (c/query conn [(str "select k, v from " table " order by k")])] - (info "table" table "has" (map (juxt :k :v) res)) + (let [idx (str "idx_" table) + use-index? (zero? (random/long 2)) + query-str (if use-index? + (str "/*+ IndexOnlyScan(" table " " idx ") */ select k, v from " table " order by k") + (str "select k, v from " table " order by k")) + res (c/query conn [query-str])] + (info table (if use-index? "IndexOnlyScan" "SeqScan") "→" (mapv :v res)) (mapv :v res))) (defn read-natural @@ -68,18 +74,28 @@ (defn create-table! "Creates a table for the given relation. Swallows already-exists errors, - because YB can't do `create ... if not exists` properly." + because YB can't do `create ... if not exists` properly. + Uses k INT PRIMARY KEY for deterministic ordering — concurrent inserts at + the same position conflict on the PK, ensuring the ordering reflects the + actual serialization order." [conn table-name] + (info "Creating table" table-name) (try (c/execute! conn (j/create-table-ddl table-name - [ - ;[:k :SERIAL] - ;[:k :int] - [:k :timestamp :default "NOW()"] + [[:k :int "PRIMARY KEY"] [:v :int]] {:conditional? true})) + (info "Created table" table-name) (catch com.yugabyte.util.PSQLException e - (when-not (re-find #"already exists" (.getMessage e)) + (if (re-find #"already exists" (.getMessage e)) + (info "Table" table-name "already exists") + (throw e)))) + (try + (c/execute! conn (str "CREATE INDEX idx_" table-name " ON " table-name " (k, v)")) + (info "Created index for" table-name) + (catch com.yugabyte.util.PSQLException e + (if (re-find #"already exists" (.getMessage e)) + (info "Index for" table-name "already exists") (throw e))))) (defn catch-dne @@ -119,22 +135,18 @@ (let [table (table-name k)] [f k (case f :r (read-ordered conn table) - :append (insert! conn table v))])) + :append (insert-using-count! conn table v))])) -(defrecord InternalClient [] +(defrecord InternalClient [isolation] c/YSQLYbClient (setup-cluster! [this test c conn-wrapper]) (invoke-op! [this test op c conn-wrapper] (with-table c - (let [txn (:value op) - use-txn? (< 1 (count txn)) - ; use-txn? false ; Just for making sure the checker actually works - txn' (if use-txn? - (c/with-txn c - (mapv (partial mop! c test) txn)) - (mapv (partial mop! c test) txn))] + (let [txn (:value op) + txn' (j/with-db-transaction [c c {:isolation isolation}] + (mapv (partial mop! c test) txn))] (assoc op :type :ok, :value txn'))))) (c/defclient Client InternalClient) diff --git a/yugabyte/src/yugabyte/ysql/bank.clj b/yugabyte/src/yugabyte/ysql/bank.clj index 5d0f4bafc..c71c0a4d9 100644 --- a/yugabyte/src/yugabyte/ysql/bank.clj +++ b/yugabyte/src/yugabyte/ysql/bank.clj @@ -5,6 +5,7 @@ [yugabyte.ysql.client :as c])) (def table-name "accounts") +(def index-name "idx_accounts") ; ; Single-table bank test @@ -13,10 +14,14 @@ (defn- read-accounts-map "Read {id balance} accounts map from a unified bank table" [op c] - (->> (str "SELECT id, balance FROM " table-name) - (c/query op c) - (map (juxt :id :balance)) - (into (sorted-map)))) + (let [use-index? (zero? (random/long 2))] + (info table-name (if use-index? "IndexOnlyScan" "SeqScan")) + (->> (str (when use-index? + (str "/*+ IndexOnlyScan(" table-name " " index-name ") */ ")) + "SELECT id, balance FROM " table-name) + (c/query op c) + (map (juxt :id :balance)) + (into (sorted-map))))) (defrecord YSQLBankYbClient [allow-negatives? isolation] c/YSQLYbClient @@ -24,6 +29,7 @@ (setup-cluster! [this test c conn-wrapper] (c/execute! c (j/create-table-ddl table-name [[:id :int "PRIMARY KEY"] [:balance :bigint]])) + (c/execute! c (str "CREATE INDEX " index-name " ON " table-name " (id, balance)")) (c/with-retry (info "Creating accounts") (c/insert! c table-name {:id (first (:accounts test)) @@ -71,12 +77,14 @@ (doseq [a (:accounts test)] (let [acc-table-name (str table-name a) + acc-index-name (str index-name a) balance (if (= a (first (:accounts test))) (:total-amount test) 0)] (info "Creating table" a) (c/execute! c (j/create-table-ddl acc-table-name [[:id :int "PRIMARY KEY"] [:balance :bigint]])) + (c/execute! c (str "CREATE INDEX " acc-index-name " ON " acc-table-name " (id, balance)")) (info "Populating account" a " (balance =" balance ")") (c/with-retry @@ -91,7 +99,14 @@ (let [accs (random/shuffle (:accounts test))] (->> accs (mapv (fn [a] - (c/select-single-value op c (str table-name a) :balance (str "id = " a)))) + (let [tbl (str table-name a) + idx (str index-name a) + use-index? (zero? (random/long 2))] + (info tbl (if use-index? "IndexOnlyScan" "SeqScan")) + (if use-index? + (-> (c/query op c (str "/*+ IndexOnlyScan(" tbl " " idx ") */ SELECT balance FROM " tbl " WHERE id = " a)) + first :balance) + (c/select-single-value op c tbl :balance (str "id = " a)))))) (zipmap accs) (assoc op :type :ok, :value)))) diff --git a/yugabyte/src/yugabyte/ysql/counter.clj b/yugabyte/src/yugabyte/ysql/counter.clj index 3db94c4ae..d104cc3c7 100644 --- a/yugabyte/src/yugabyte/ysql/counter.clj +++ b/yugabyte/src/yugabyte/ysql/counter.clj @@ -4,27 +4,36 @@ [clojure.string :as str] [clojure.tools.logging :refer [debug info warn]] [jepsen.client :as client] + [jepsen.random :as random] [jepsen.reconnect :as rc] [yugabyte.ysql.client :as c])) (def table-name "counter") +(def index-name "idx_counter") -(defrecord YSQLCounterYbClient [] +(defrecord YSQLCounterYbClient [isolation] c/YSQLYbClient (setup-cluster! [this test c conn-wrapper] (c/execute! c (j/create-table-ddl table-name [[:id :int "PRIMARY KEY"] [:count :int]])) + (c/execute! c (str "CREATE INDEX " index-name " ON " table-name " (id, count)")) (c/insert! c table-name {:id 0 :count 0})) (invoke-op! [this test op c conn-wrapper] - (case (:f op) - ; update! can't handle column references - :add (do (c/execute! op c [(str "UPDATE " table-name " SET count = count + ? WHERE id = 0") (:value op)]) - (assoc op :type :ok)) - - :read (let [value (c/select-single-value op c table-name :count "id = 0")] - (assoc op :type :ok :value value)))) + (j/with-db-transaction [c c {:isolation isolation}] + (case (:f op) + ; update! can't handle column references + :add (do (c/execute! op c [(str "UPDATE " table-name " SET count = count + ? WHERE id = 0") (:value op)]) + (assoc op :type :ok)) + + :read (let [use-index? (zero? (random/long 2)) + _ (info table-name (if use-index? "IndexOnlyScan" "SeqScan")) + value (if use-index? + (-> (c/query op c (str "/*+ IndexOnlyScan(" table-name " " index-name ") */ SELECT count FROM " table-name " WHERE id = 0")) + first :count) + (c/select-single-value op c table-name :count "id = 0"))] + (assoc op :type :ok :value value))))) (teardown-cluster! [this test c conn-wrapper] (c/drop-table c table-name))) diff --git a/yugabyte/src/yugabyte/ysql/multi_key_acid.clj b/yugabyte/src/yugabyte/ysql/multi_key_acid.clj index b6383e4f7..774570a1c 100644 --- a/yugabyte/src/yugabyte/ysql/multi_key_acid.clj +++ b/yugabyte/src/yugabyte/ysql/multi_key_acid.clj @@ -1,11 +1,14 @@ (ns yugabyte.ysql.multi-key-acid "This test uses INSERT ... ON CONFLICT DO UPDATE" (:require [clojure.java.jdbc :as j] + [clojure.tools.logging :refer [info]] [jepsen.independent :as independent] + [jepsen.random :as random] [jepsen.txn.micro-op :as mop] [yugabyte.ysql.client :as c])) (def table-name "multi_key_acid") +(def index-name "idx_multi_key_acid") (defrecord YSQLMultiKeyAcidYbClient [] c/YSQLYbClient @@ -14,15 +17,20 @@ (c/execute! c (j/create-table-ddl table-name [[:k1 :int] [:k2 :int] [:val :int] - ["PRIMARY KEY" "(k1, k2)"]]))) + ["PRIMARY KEY" "(k1, k2)"]])) + (c/execute! c (str "CREATE INDEX " index-name " ON " table-name " (k2, k1, val)"))) (invoke-op! [this test op c conn-wrapper] (let [[k2 ops] (:value op)] (case (:f op) :read (let [k1s (map mop/key ops) - ; Look up values - vs (->> (str "SELECT k1, val FROM " table-name " WHERE k2 = " k2 " AND k1 " (c/in k1s)) + ; Look up values, randomly using secondary index + use-index? (zero? (random/long 2)) + _ (info table-name (if use-index? "IndexOnlyScan" "SeqScan") "k2=" k2) + vs (->> (str (when use-index? + (str "/*+ IndexOnlyScan(" table-name " " index-name ") */ ")) + "SELECT k1, val FROM " table-name " WHERE k2 = " k2 " AND k1 " (c/in k1s)) (c/query op c) (map (juxt :k1 :val)) (into {})) diff --git a/yugabyte/src/yugabyte/ysql/set.clj b/yugabyte/src/yugabyte/ysql/set.clj index ac79a1c3c..84dc0c042 100644 --- a/yugabyte/src/yugabyte/ysql/set.clj +++ b/yugabyte/src/yugabyte/ysql/set.clj @@ -1,5 +1,6 @@ (ns yugabyte.ysql.set (:require [clojure.java.jdbc :as j] + [clojure.tools.logging :refer [info]] [jepsen.random :as random] [yugabyte.ysql.client :as c])) @@ -9,23 +10,29 @@ ; Regular set test ; -(defrecord YSQLSetYbClient [] +(def regular-index-name "idx_elements") + +(defrecord YSQLSetYbClient [isolation] c/YSQLYbClient (setup-cluster! [this test c conn-wrapper] - (c/execute! c (j/create-table-ddl table-name [[:val :int "PRIMARY KEY"]]))) - + (c/execute! c (j/create-table-ddl table-name [[:val :int "PRIMARY KEY"]])) + (c/execute! c (str "CREATE INDEX " regular-index-name " ON " table-name " (val)"))) (invoke-op! [this test op c conn-wrapper] - (case (:f op) - :add (do (c/insert! c table-name {:val (:value op)}) - (assoc op :type :ok)) - - :read (let [value (->> (str "SELECT val FROM " table-name) - (c/query c) - (mapv :val))] - (assoc op :type :ok, :value value)))) - + (j/with-db-transaction [c c {:isolation isolation}] + (case (:f op) + :add (do (c/insert! c table-name {:val (:value op)}) + (assoc op :type :ok)) + + :read (let [use-index? (zero? (random/long 2)) + value (->> (str (when use-index? + (str "/*+ IndexOnlyScan(" table-name " " regular-index-name ") */ ")) + "SELECT val FROM " table-name) + (c/query c) + (mapv :val))] + (info table-name (if use-index? "IndexOnlyScan" "SeqScan")) + (assoc op :type :ok, :value value))))) (teardown-cluster! [this test c conn-wrapper] (c/drop-table c table-name))) diff --git a/yugabyte/src/yugabyte/ysql/single_key_acid.clj b/yugabyte/src/yugabyte/ysql/single_key_acid.clj index a69ab7fc1..7118d2d25 100644 --- a/yugabyte/src/yugabyte/ysql/single_key_acid.clj +++ b/yugabyte/src/yugabyte/ysql/single_key_acid.clj @@ -1,10 +1,13 @@ (ns yugabyte.ysql.single-key-acid (:require [clojure.java.jdbc :as j] + [clojure.tools.logging :refer [info]] [jepsen.independent :as independent] + [jepsen.random :as random] [yugabyte.single-key-acid :as ska] [yugabyte.ysql.client :as c])) (def table-name "single_key_acid") +(def index-name "idx_single_key_acid") (defrecord YSQLSingleKeyAcidYbClient [] c/YSQLYbClient @@ -12,6 +15,7 @@ (setup-cluster! [this test c conn-wrapper] (c/execute! c (j/create-table-ddl table-name [[:id :int "PRIMARY KEY"] [:val :int]])) + (c/execute! c (str "CREATE INDEX " index-name " ON " table-name " (id, val)")) (doseq [id (range ska/keys-count)] (c/insert! c table-name {:id id :val 0}))) @@ -31,7 +35,12 @@ (assoc op :type (if applied :ok :fail))) :read - (let [value (c/select-single-value c table-name :val (str "id = " id))] + (let [use-index? (zero? (random/long 2)) + _ (info table-name (if use-index? "IndexOnlyScan" "SeqScan") "id=" id) + value (if use-index? + (-> (c/query op c (str "/*+ IndexOnlyScan(" table-name " " index-name ") */ SELECT val FROM " table-name " WHERE id = " id)) + first :val) + (c/select-single-value c table-name :val (str "id = " id)))] (assoc op :type :ok :value (independent/tuple id value)))))) (teardown-cluster! [this test c conn-wrapper]