From 6c1e7796c5051c93aea2db7b9b12d8f10bc4b2e1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 18 Mar 2026 09:23:37 +0000 Subject: [PATCH] feature(jenkins): add cluster reuse support in Jenkins pipelines Add `reuse_cluster` parameter to Jenkins pipelines. When set to an existing test ID, pipeline reuses the original SCT runner and skips provisioning. --- docs/reuse_cluster.md | 66 ++++++++++ sct.py | 52 +++++++- sdcm/cluster.py | 18 ++- sdcm/cluster_aws.py | 1 + sdcm/cluster_azure.py | 1 + sdcm/cluster_gce.py | 1 + sdcm/cluster_oci.py | 1 + sdcm/provision/common/configuration_script.py | 13 +- sdcm/sct_runner.py | 3 - unit_tests/test_sct_runner.py | 115 ++++++++++++++++-- vars/cleanSctRunners.groovy | 17 ++- vars/createArgusTestRun.groovy | 4 + vars/createSctRunner.groovy | 22 ++-- vars/finishArgusTestRun.groovy | 5 + vars/jepsenPipeline.groovy | 3 + vars/longevityPipeline.groovy | 8 +- vars/managerPipeline.groovy | 4 + vars/perfRegressionParallelPipeline.groovy | 5 + vars/provisionResources.groovy | 5 + vars/rollingUpgradePipeline.groovy | 5 + vars/runCleanupResource.groovy | 7 +- vars/runCollectLogs.groovy | 4 + vars/runSctTest.groovy | 4 + vars/runSendEmail.groovy | 4 + 24 files changed, 332 insertions(+), 36 deletions(-) diff --git a/docs/reuse_cluster.md b/docs/reuse_cluster.md index 6b0e9e761ff..0ef115c9f7f 100644 --- a/docs/reuse_cluster.md +++ b/docs/reuse_cluster.md @@ -70,6 +70,72 @@ hydra update-scylla-packages --test-id $SCT_REUSE_CLUSTER -p ~/new_scylla_packag hydra run-test longevity_test.LongevityTest.test_custom_time --backend aws --config test-cases/longevity/longevity-10gb-3h.yaml --config configurations/network_config/test_communication_public.yaml ``` +### Jenkins pipeline reuse + +Cluster reuse is also supported in Jenkins pipelines. This allows re-running a test against an existing cluster directly from the Jenkins UI, without provisioning new infrastructure. + +#### How it works + +When a Jenkins build runs with `post_behavior_*` parameters set to `keep`, the test cluster and the SCT runner VM are both preserved after the build completes. A subsequent build can then reuse the preserved cluster and runner by specifying the original run's `test_id` in the `reuse_cluster` parameter. + +The reuse flow: +1. **SCT runner**: instead of creating a new runner VM, the pipeline looks up the runner from the original test by its `test_id` and reuses it +2. **Provisioning**: the `Provision Resources` stage is skipped entirely +3. **Test execution**: the test runs against the existing cluster using `SCT_REUSE_CLUSTER` +4. **Cleanup**: post-test cleanup respects the current run's `post_behavior` settings — set them to `keep` again to preserve the cluster for another reuse, or `destroy` to tear everything down + +#### Step-by-step + +1. **Run the initial build with `post_behavior_*=keep`** + + In the Jenkins build parameters, set: + - `post_behavior_db_nodes` = `keep` + - `post_behavior_loader_nodes` = `keep` + - `post_behavior_monitor_nodes` = `keep` + + This preserves both the test cluster and the SCT runner after the build finishes. + +2. **Find the test ID** + + The `test_id` of the completed build can be found in: + - The Argus link in the Jenkins build description (the UUID in the URL) + - Jenkins build logs — search for `test_id` + - The `SCT_TEST_ID` value shown in the build's environment variables + +3. **Start a reuse build** + + Trigger a new build of the same pipeline (or a compatible one). In the build parameters: + - Set `reuse_cluster` to the `test_id` from step 2 + - Set `post_behavior_*` to `keep` if you plan to reuse again, or `destroy` to clean up + +4. **Verify the reuse** + + In the build log, look for: + - `"Reuse mode: looking up existing SCT runner"` — confirms runner reuse + - `"Cluster reuse mode: skipping resource provisioning"` — confirms provisioning was skipped + +#### Supported pipelines + +The `reuse_cluster` parameter is available in the following pipeline types: +- Longevity (`longevityPipeline`) +- Manager (`managerPipeline`) +- Rolling Upgrade (`rollingUpgradePipeline`) +- Jepsen (`jepsenPipeline`) +- Performance Regression (`perfRegressionParallelPipeline`) + +#### Safety: runner expiry + +Preserved runners are tagged with a numeric `keep` value (hours from VM launch time) that acts as a safety ceiling. The existing cleanup logic automatically terminates runners once the elapsed time since launch exceeds this value (default: 120 hours / 5 days). + +If a runner expires between builds, the reuse build will fail with an error indicating no runner was found. In that case, run a fresh build without `reuse_cluster`. + +#### Limitations + +- **Same backend required**: the reuse build must use the same cloud backend as the original build +- **Same pipeline type recommended**: while cross-pipeline reuse is technically possible, it is only reliable when both pipelines use compatible test configurations +- **No partial reuse**: you cannot reuse only the DB nodes and create new loaders — the entire test environment is reused +- **Runner state**: a reused runner may have artifacts from the previous run; if the reuse build fails due to runner issues, run a fresh build + ### Scylla Cloud (xcloud) backend When reusing clusters deployed in Scylla Cloud backend, the test environment consists of: diff --git a/sct.py b/sct.py index faa8a1cff1a..8276ae5869d 100755 --- a/sct.py +++ b/sct.py @@ -1894,8 +1894,9 @@ def send_email( email_recipients = email_recipients.split(",") sct_config = SCTConfiguration() - LOGGER.info("Sending email for test %s...", test_id) - client = init_argus_client(os.environ.get("SCT_TEST_ID")) + argus_test_id = os.environ.get("SCT_REUSE_CLUSTER") or os.environ.get("SCT_TEST_ID") + LOGGER.info("Sending email for test %s...", argus_test_id) + client = init_argus_client(argus_test_id) run = client.get_run() title_template_data = {**dict(sct_config), **run} @@ -2202,6 +2203,7 @@ def create_runner_instance( @cli.command("set-runner-tags") @click.argument("runner-ip", type=str) +@click.option("-b", "--backend", type=click.Choice(available_backends), help="Cloud backend to use") @click.option( "-t", "--tags", @@ -2209,9 +2211,51 @@ def create_runner_instance( help="Space separated key value pair to add as a new tag to the runner", multiple=True, ) -def set_runner_tags(runner_ip, tags): +def set_runner_tags(runner_ip, backend, tags): add_file_logger() - update_sct_runner_tags(test_runner_ip=runner_ip, tags=dict(tags)) + update_sct_runner_tags(test_runner_ip=runner_ip, backend=backend, tags=dict(tags)) + + +@cli.command("find-runner-instance", help="Find an existing SCT runner by test ID and write its IP to sct_runner_ip") +@click.option("-t", "--test-id", required=True, type=str, help="Test ID to find the runner for") +@click.option("-b", "--backend", type=click.Choice(available_backends), help="Cloud backend to use") +@click.option( + "-d", "--duration", required=False, type=int, default=0, help="New test duration in minutes (extends keep tag)" +) +def find_runner_instance(test_id, backend, duration): + add_file_logger() + sct_runner_ip_path = Path("sct_runner_ip") + sct_runner_ip_path.unlink(missing_ok=True) + + runners = list_sct_runners(backend=backend, test_id=test_id) + if not runners: + LOGGER.error("No SCT runner found for test_id: %s", test_id) + sys.exit(1) + + runner = runners[0] + if not runner.public_ips: + LOGGER.error("SCT runner %s has no public IPs", runner.instance_name) + sys.exit(1) + + runner_ip = runner.public_ips[0] + LOGGER.info( + "Found SCT runner %s at %s for test %s", + runner.instance_name, + runner_ip, + test_id, + ) + + if duration: + # keep is relative to launch_time, so add elapsed hours to give the test enough time + elapsed_hours = ( + int((datetime.now(UTC) - runner.launch_time).total_seconds() / 3600) if runner.launch_time else 0 + ) + tags_to_update = {"keep": str(elapsed_hours + int(duration / 60) + 6), "keep_action": "terminate"} + update_sct_runner_tags(backend=backend, test_runner_ip=runner_ip, tags=tags_to_update) + LOGGER.info("Updated runner tags: %s", tags_to_update) + + sct_runner_ip_path.write_text(runner_ip) + LOGGER.info("SCT Runner IP written to %s: %s", sct_runner_ip_path, runner_ip) @cli.command("clean-runner-instances", help="Clean all unused SCT runner instances") diff --git a/sdcm/cluster.py b/sdcm/cluster.py index 2a23330704e..aa383e73474 100644 --- a/sdcm/cluster.py +++ b/sdcm/cluster.py @@ -67,7 +67,7 @@ from sdcm.prometheus import start_metrics_server, PrometheusAlertManagerListener, AlertSilencer from sdcm.log import SDCMAdapter from sdcm.provision.common.configuration_script import ConfigurationScriptBuilder -from sdcm.provision.common.utils import disable_daily_apt_triggers +from sdcm.provision.common.utils import configure_vector_target_script, disable_daily_apt_triggers from sdcm.provision.scylla_yaml import ScyllaYamlNodeAttrBuilder from sdcm.provision.scylla_yaml.certificate_builder import ScyllaYamlCertificateAttrBuilder from sdcm.provision.scylla_yaml.cluster_builder import ScyllaYamlClusterAttrBuilder @@ -316,6 +316,18 @@ class NodeCleanedAfterDecommissionAborted(Exception): """raise after decommission aborted and node cleaned from group0(Raft)""" +def reconfigure_vector_on_node(node, params, test_config): + """Update vector config on a reused node to point to the current runner's vector container.""" + if params.get("logs_transport") != "vector" or not test_config.VECTOR_ADDRESS: + return + + host, port = test_config.VECTOR_ADDRESS + node.log.info("Reconfiguring vector to %s:%s", host, port) + script = configure_vector_target_script(host=host, port=port) + node.remoter.sudo(shell_script_cmd(script, quote="'")) + node.remoter.sudo("systemctl restart vector.service") + + def prepend_user_prefix(user_prefix: str, base_name: str): return "%s-%s" % (user_prefix, base_name) @@ -6066,7 +6078,7 @@ def _scylla_post_install(node: BaseNode, new_scylla_installed: bool, devname: st node.scylla_setup(disks, devname) def _reuse_cluster_setup(self, node): - pass + reconfigure_vector_on_node(node, self.params, self.test_config) def _generate_db_node_certs(self, node): """Generate per-node SSL certificates for a DB node""" @@ -6445,6 +6457,7 @@ def node_setup(self, node, verbose=False, **kwargs): if TestConfig().REUSE_CLUSTER: self.kill_stress_thread() + reconfigure_vector_on_node(node, self.params, TestConfig()) if self.params.get("client_encrypt") and not (node.ssl_conf_dir / TLSAssets.CLIENT_CERT).exists(): self._generate_loader_certs(node) install_client_certificate(node.remoter, node.ip_address, force=True) @@ -6697,6 +6710,7 @@ def node_setup(self, node, **kwargs): self.mgmt_auth_token = self.monitor_id if self.test_config.REUSE_CLUSTER: + reconfigure_vector_on_node(node, self.params, self.test_config) self.configure_scylla_monitoring(node) self.restart_scylla_monitoring(sct_metrics=True) set_grafana_url(f"http://{normalize_ipv6_url(node.external_address)}:{self.grafana_port}") diff --git a/sdcm/cluster_aws.py b/sdcm/cluster_aws.py index cc9ec078217..c5280af661a 100644 --- a/sdcm/cluster_aws.py +++ b/sdcm/cluster_aws.py @@ -1190,6 +1190,7 @@ def _scylla_post_install(self, node: AWSNode, new_scylla_installed: bool, devnam node.set_web_listen_address() def _reuse_cluster_setup(self, node): + super()._reuse_cluster_setup(node) node.run_startup_script() # Reconfigure syslog-ng. def destroy(self): diff --git a/sdcm/cluster_azure.py b/sdcm/cluster_azure.py index 7b87858eb2c..81d354d8be7 100644 --- a/sdcm/cluster_azure.py +++ b/sdcm/cluster_azure.py @@ -342,6 +342,7 @@ def _wait_for_preinstalled_scylla(node): node.wait_for_machine_image_configured() def _reuse_cluster_setup(self, node: AzureNode) -> None: + super()._reuse_cluster_setup(node) node.run_startup_script() diff --git a/sdcm/cluster_gce.py b/sdcm/cluster_gce.py index 06f0c966cd8..3e951f876bb 100644 --- a/sdcm/cluster_gce.py +++ b/sdcm/cluster_gce.py @@ -564,6 +564,7 @@ def _wait_for_preinstalled_scylla(node): node.wait_for_machine_image_configured() def _reuse_cluster_setup(self, node: GCENode) -> None: + super()._reuse_cluster_setup(node) node.run_startup_script() diff --git a/sdcm/cluster_oci.py b/sdcm/cluster_oci.py index 0eec728ad6e..8080f2ade79 100644 --- a/sdcm/cluster_oci.py +++ b/sdcm/cluster_oci.py @@ -403,6 +403,7 @@ def _wait_for_preinstalled_scylla(node): node.wait_for_machine_image_configured() def _reuse_cluster_setup(self, node: OciNode) -> None: + super()._reuse_cluster_setup(node) node.run_startup_script() diff --git a/sdcm/provision/common/configuration_script.py b/sdcm/provision/common/configuration_script.py index bbac874bae6..a191e903500 100644 --- a/sdcm/provision/common/configuration_script.py +++ b/sdcm/provision/common/configuration_script.py @@ -75,19 +75,24 @@ def _skip_if_already_run_syslogng() -> str: fi """) - @staticmethod - def _skip_if_already_run_vector() -> str: + def _skip_if_already_run_vector(self) -> str: """ If a node was configured before sct-runner, skip vector installation. Just ensure that logging destination is updated in the configuration and the service is restarted, to retrigger sending logs. """ - return dedent(f""" + host, port = self.syslog_host_port + vector_config = configure_vector_target_script(host=host, port=port) + return ( + dedent(f""" if [ -f {CLOUD_INIT_SCRIPTS_PATH}/done ] && command -v vector >/dev/null 2>&1; then - sudo systemctl restart vector + """) + + vector_config + + dedent(""" exit 0 fi """) + ) @staticmethod def _mark_script_as_done() -> str: diff --git a/sdcm/sct_runner.py b/sdcm/sct_runner.py index 83b725e5008..a40e03fb91e 100644 --- a/sdcm/sct_runner.py +++ b/sdcm/sct_runner.py @@ -2063,9 +2063,6 @@ def clean_sct_runners( ) if not force and sct_runner_info.keep: - if "alive" in str(sct_runner_info.keep): - LOGGER.info("Skip %s because `keep' == `alive. No runners have been terminated'", sct_runner_info) - continue if sct_runner_info.keep_action != "terminate": LOGGER.info("Skip %s because keep_action `keep_action' != `terminate'", sct_runner_info) continue diff --git a/unit_tests/test_sct_runner.py b/unit_tests/test_sct_runner.py index fb1e7f2de6e..98e3e1bdb74 100644 --- a/unit_tests/test_sct_runner.py +++ b/unit_tests/test_sct_runner.py @@ -11,12 +11,15 @@ # # Copyright (c) 2025 ScyllaDB +from datetime import datetime, timedelta, timezone from unittest.mock import MagicMock, patch -from datetime import datetime, timezone + +import pytest from sdcm.sct_runner import ( list_sct_runners, clean_sct_runners, + update_sct_runner_tags, SctRunnerInfo, AwsSctRunner, GceSctRunner, @@ -286,23 +289,19 @@ def test_clean_sct_runners_force(self, mock_list_runners, mock_ssh_cmd): @patch("sdcm.sct_runner.ssh_run_cmd") @patch("sdcm.sct_runner.list_sct_runners") def test_clean_sct_runners_respect_keep_tags(self, mock_list_runners, mock_ssh_cmd): - """Test keep tags are respected when clean is not forced.""" - # runner with keep 'alive' tag - mock_runner_keep_alive = MagicMock( - keep="alive", + """Test numeric keep tags are respected when clean is not forced.""" + # runner with numeric keep value and recent launch (not expired) + mock_runner = MagicMock( + keep="120", keep_action="terminate", launch_time=datetime.now(timezone.utc), - public_ips=["1.2.3.4"], - cloud_provider="aws", - instance_name="test-runner-alive", - region_az="us-east-1a", ) - mock_list_runners.return_value = [mock_runner_keep_alive] + mock_list_runners.return_value = [mock_runner] mock_ssh_cmd.return_value = MagicMock(stdout="") clean_sct_runners(test_status="", user="test_user", force=False, dry_run=False) - mock_runner_keep_alive.terminate.assert_not_called() + mock_runner.terminate.assert_not_called() @patch("sdcm.sct_runner.list_sct_runners") def test_clean_sct_runners_no_runners_found(self, mock_list_runners): @@ -311,3 +310,97 @@ def test_clean_sct_runners_no_runners_found(self, mock_list_runners): clean_sct_runners(test_status="", user="nonexistent_user", dry_run=True) mock_list_runners.assert_called_once() + + @patch("sdcm.sct_runner.ssh_run_cmd") + @patch("sdcm.sct_runner.list_sct_runners") + def test_clean_sct_runners_terminates_expired_runner(self, mock_list_runners, mock_ssh_cmd): + """Test that runner past its numeric keep hours is terminated.""" + mock_runner = MagicMock( + keep="24", + keep_action="terminate", + launch_time=datetime.now(timezone.utc) - timedelta(hours=25), + ) + mock_list_runners.return_value = [mock_runner] + mock_ssh_cmd.return_value = MagicMock(stdout="") + + clean_sct_runners(test_status="", force=False, dry_run=False) + mock_runner.terminate.assert_called_once() + + @patch("sdcm.sct_runner.ssh_run_cmd") + @patch("sdcm.sct_runner.list_sct_runners") + def test_clean_sct_runners_skips_runner_without_terminate_action(self, mock_list_runners, mock_ssh_cmd): + """Test that runner with keep_action != 'terminate' is not terminated.""" + mock_runner = MagicMock( + keep="120", + keep_action="none", + launch_time=datetime.now(timezone.utc), + ) + mock_list_runners.return_value = [mock_runner] + mock_ssh_cmd.return_value = MagicMock(stdout="") + + clean_sct_runners(test_status="", force=False, dry_run=False) + mock_runner.terminate.assert_not_called() + + +class TestFindRunnerInstance: + """Test the find-runner-instance logic (list_sct_runners + update_sct_runner_tags orchestration).""" + + @pytest.fixture + def runner_info(self): + return SctRunnerInfo( + sct_runner_class=AwsSctRunner, + cloud_service_instance=None, + region_az="us-east-1a", + instance=MagicMock(), + instance_name="reuse-runner-1", + public_ips=["10.0.0.1"], + test_id="original-test-id", + keep="120", + keep_action="terminate", + ) + + @patch("sdcm.sct_runner.AwsSctRunner.list_sct_runners") + def test_find_runner_by_test_id(self, mock_aws_list, runner_info): + """Test that list_sct_runners finds a runner by test_id for reuse.""" + mock_aws_list.return_value = [runner_info] + + runners = list_sct_runners(backend="aws", test_id="original-test-id", verbose=False) + + assert len(runners) == 1 + assert runners[0].public_ips == ["10.0.0.1"] + assert runners[0].instance_name == "reuse-runner-1" + + @patch("sdcm.sct_runner.AwsSctRunner.list_sct_runners") + def test_find_runner_returns_empty_for_unknown_test_id(self, mock_aws_list, runner_info): + """Test that no runner is found for an unknown test_id.""" + mock_aws_list.return_value = [runner_info] + runners = list_sct_runners(backend="aws", test_id="nonexistent-test-id", verbose=False) + assert len(runners) == 0 + + @patch("sdcm.sct_runner.list_sct_runners") + def test_update_tags_on_reuse(self, mock_list, runner_info): + """Test that runner keep/keep_action tags can be updated on reuse.""" + mock_list.return_value = [runner_info] + runner_info.sct_runner_class = MagicMock() + + update_sct_runner_tags( + backend="aws", + test_runner_ip="10.0.0.1", + tags={"keep": "12", "keep_action": "terminate"}, + ) + + runner_info.sct_runner_class.set_tags.assert_called_once_with( + runner_info, + tags={"keep": "12", "keep_action": "terminate"}, + ) + + @pytest.mark.parametrize( + "elapsed_hours,duration_minutes,expected", + [ + (48, 360, "60"), + (2, 120, "10"), + ], + ) + def test_keep_tag_calculation_from_duration(self, elapsed_hours, duration_minutes, expected): + """Test the keep tag value: elapsed_hours + duration_minutes / 60 + 6 hours buffer.""" + assert str(elapsed_hours + int(duration_minutes / 60) + 6) == expected diff --git a/vars/cleanSctRunners.groovy b/vars/cleanSctRunners.groovy index ae26f76b959..3defc7c89c1 100644 --- a/vars/cleanSctRunners.groovy +++ b/vars/cleanSctRunners.groovy @@ -11,6 +11,10 @@ def call(Map params, RunWrapper currentBuild){ def test_status = currentBuild.currentResult + def shouldKeepRunner = ['db_nodes', 'loader_nodes', 'monitor_nodes'].every { + params."post_behavior_${it}" == 'keep' + } + sh """#!/bin/bash set -xe @@ -21,9 +25,16 @@ def call(Map params, RunWrapper currentBuild){ echo "Starting to clean runner instances" if [[ "$cloud_provider" == "aws" || "$cloud_provider" == "gce" || "$cloud_provider" == "azure" || "$cloud_provider" == "oci" ]]; then export RUNNER_IP=\$(cat sct_runner_ip||echo "") - ./docker/env/hydra.sh clean-runner-instances \ - --test-status "$test_status" --runner-ip \${RUNNER_IP} --backend "$cloud_provider" - + if [[ "${shouldKeepRunner}" == "true" ]]; then + echo "All post_behavior_* settings are 'keep'. Preserving SCT runner for cluster reuse." + # keep=120 (5 days from launch) acts as a safety ceiling; + # the existing cleanup logic terminates runners once elapsed hours exceed the keep value + ./docker/env/hydra.sh set-runner-tags \${RUNNER_IP} --backend "$cloud_provider" \ + --tags keep 120 --tags keep_action terminate + else + ./docker/env/hydra.sh clean-runner-instances \ + --test-status "$test_status" --runner-ip \${RUNNER_IP} --backend "$cloud_provider" + fi else echo "Not running on AWS, GCP nor Azure. Skipping cleaning runner instances." fi diff --git a/vars/createArgusTestRun.groovy b/vars/createArgusTestRun.groovy index 16fb7474412..257f95e2207 100644 --- a/vars/createArgusTestRun.groovy +++ b/vars/createArgusTestRun.groovy @@ -13,6 +13,10 @@ def call(Map params) { export SCT_CLUSTER_BACKEND="${params.backend}" export SCT_CONFIG_FILES=${test_config} + if [[ -n "${params.reuse_cluster ?: ''}" ]] ; then + export SCT_REUSE_CLUSTER="${params.reuse_cluster}" + fi + if [[ "${params.backend}" == "xcloud" ]] ; then export SCT_XCLOUD_PROVIDER="${params.xcloud_provider}" export SCT_XCLOUD_ENV="${params.xcloud_env}" diff --git a/vars/createSctRunner.groovy b/vars/createSctRunner.groovy index 110907a2004..8efdb6605b8 100644 --- a/vars/createSctRunner.groovy +++ b/vars/createSctRunner.groovy @@ -53,13 +53,21 @@ def call(Map params, Integer test_duration, String region) { export BUILD_USER_REQUESTED_BY=${params.requested_by_user} fi - ./docker/env/hydra.sh create-runner-instance \ - --cloud-provider ${cloud_provider} \ - $region_zone_arg \ - $availability_zone_arg \ - --test-id \${SCT_TEST_ID} \ - --duration ${test_duration} \ - --test-name ${test_name} + if [[ -n "${params.reuse_cluster ?: ''}" ]] ; then + echo "Reuse mode: looking up existing SCT runner for test_id=${params.reuse_cluster}" + ./docker/env/hydra.sh find-runner-instance \ + --test-id "${params.reuse_cluster}" \ + --backend "${params.backend}" \ + --duration ${test_duration} + else + ./docker/env/hydra.sh create-runner-instance \ + --cloud-provider ${cloud_provider} \ + $region_zone_arg \ + $availability_zone_arg \ + --test-id \${SCT_TEST_ID} \ + --duration ${test_duration} \ + --test-name ${test_name} + fi else echo "Currently, <$cloud_provider> not supported to. Will run on regular builder." diff --git a/vars/finishArgusTestRun.groovy b/vars/finishArgusTestRun.groovy index cf8c8b83888..19fe1ecc05e 100644 --- a/vars/finishArgusTestRun.groovy +++ b/vars/finishArgusTestRun.groovy @@ -12,6 +12,11 @@ def call(Map params, RunWrapper currentBuild) { export SCT_CLUSTER_BACKEND="${params.backend}" export SCT_CONFIG_FILES=${test_config} + + if [[ -n "${params.reuse_cluster ?: ''}" ]] ; then + export SCT_REUSE_CLUSTER="${params.reuse_cluster}" + fi + if [[ "${params.backend}" == "xcloud" ]] ; then export SCT_XCLOUD_PROVIDER="${params.xcloud_provider}" export SCT_XCLOUD_ENV="${params.xcloud_env}" diff --git a/vars/jepsenPipeline.groovy b/vars/jepsenPipeline.groovy index fc1fb677fcf..2eb1d46f1bf 100644 --- a/vars/jepsenPipeline.groovy +++ b/vars/jepsenPipeline.groovy @@ -55,6 +55,9 @@ def call(Map pipelineParams) { string(defaultValue: "${pipelineParams.get('post_behavior_monitor_nodes', 'destroy')}", description: 'keep|keep-on-failure|destroy', name: 'post_behavior_monitor_nodes') + string(defaultValue: '', + description: 'Test ID of an existing cluster to reuse. When set, provisioning is skipped and the existing cluster is used.', + name: 'reuse_cluster') string(defaultValue: "${pipelineParams.get('provision_type', 'on_demand')}", description: 'spot|on_demand|spot_fleet', name: 'provision_type') diff --git a/vars/longevityPipeline.groovy b/vars/longevityPipeline.groovy index 695855e9f35..b014f4c9481 100644 --- a/vars/longevityPipeline.groovy +++ b/vars/longevityPipeline.groovy @@ -103,6 +103,12 @@ def call(Map pipelineParams) { description: 'keep|keep-on-failure|destroy', name: 'post_behavior_vector_store_nodes') + // Cluster Reuse + separator(name: 'CLUSTER_REUSE', sectionHeader: 'Cluster Reuse') + string(defaultValue: '', + description: 'Test ID of an existing cluster to reuse. When set, provisioning is skipped and the existing cluster is used.', + name: 'reuse_cluster') + // SSH Configuration separator(name: 'SSH_CONFIG', sectionHeader: 'SSH Configuration') string(defaultValue: "${pipelineParams.get('ip_ssh_connections', 'private')}", @@ -319,7 +325,7 @@ def call(Map pipelineParams) { script { wrap([$class: 'BuildUser']) { dir('scylla-cluster-tests') { - timeout(time: 5, unit: 'MINUTES') { + timeout(time: params.reuse_cluster ? 10 : 5, unit: 'MINUTES') { createSctRunner(params, runnerTimeout , builder.region) } } diff --git a/vars/managerPipeline.groovy b/vars/managerPipeline.groovy index 9770a94d185..27bb273ff38 100644 --- a/vars/managerPipeline.groovy +++ b/vars/managerPipeline.groovy @@ -114,6 +114,10 @@ def call(Map pipelineParams) { string(defaultValue: "${pipelineParams.get('post_behavior_monitor_nodes', 'destroy')}", description: 'keep|keep-on-failure|destroy', name: 'post_behavior_monitor_nodes') + separator(name: 'CLUSTER_REUSE', sectionHeader: 'Cluster Reuse') + string(defaultValue: '', + description: 'Test ID of an existing cluster to reuse. When set, provisioning is skipped and the existing cluster is used.', + name: 'reuse_cluster') separator(name: 'SSH_CONFIG', sectionHeader: 'SSH Configuration') string(defaultValue: "${pipelineParams.get('ip_ssh_connections', 'private')}", description: 'private|public|ipv6', diff --git a/vars/perfRegressionParallelPipeline.groovy b/vars/perfRegressionParallelPipeline.groovy index 5cd1147199b..17412564aaa 100644 --- a/vars/perfRegressionParallelPipeline.groovy +++ b/vars/perfRegressionParallelPipeline.groovy @@ -85,6 +85,11 @@ def call(Map pipelineParams) { string(defaultValue: "${pipelineParams.get('post_behavior_k8s_cluster', 'destroy')}", description: 'keep|keep-on-failure|destroy', name: 'post_behavior_k8s_cluster') + // Cluster Reuse + separator(name: 'CLUSTER_REUSE', sectionHeader: 'Cluster Reuse') + string(defaultValue: '', + description: 'Test ID of an existing cluster to reuse. When set, provisioning is skipped and the existing cluster is used.', + name: 'reuse_cluster') // Performance Test Configuration separator(name: 'PERF_TEST', sectionHeader: 'Performance Test Configuration') string(defaultValue: "false", diff --git a/vars/provisionResources.groovy b/vars/provisionResources.groovy index 1389a8b257d..511a6d8ef21 100644 --- a/vars/provisionResources.groovy +++ b/vars/provisionResources.groovy @@ -1,6 +1,11 @@ #!groovy def call(Map params, String region){ + if (params.reuse_cluster) { + echo "Cluster reuse mode: skipping resource provisioning" + return + } + def current_region = initAwsRegionParam(params.region, region) def test_config = groovy.json.JsonOutput.toJson(params.test_config) def cloud_provider = getCloudProviderFromBackend(params.backend) diff --git a/vars/rollingUpgradePipeline.groovy b/vars/rollingUpgradePipeline.groovy index 7ada0295f26..f4dad36f474 100644 --- a/vars/rollingUpgradePipeline.groovy +++ b/vars/rollingUpgradePipeline.groovy @@ -72,6 +72,11 @@ def call(Map pipelineParams) { description: 'keep|keep-on-failure|destroy', name: 'post_behavior_k8s_cluster') string(defaultValue: '', description: 'scylla option: internode_compression', name: 'internode_compression') + // Cluster Reuse + separator(name: 'CLUSTER_REUSE', sectionHeader: 'Cluster Reuse') + string(defaultValue: '', + description: 'Test ID of an existing cluster to reuse. When set, provisioning is skipped and the existing cluster is used.', + name: 'reuse_cluster') separator(name: 'EMAIL_TEST', sectionHeader: 'Email and Test Configuration') string(defaultValue: "${pipelineParams.get('email_recipients', 'qa@scylladb.com')}", description: 'email recipients of email report', diff --git a/vars/runCleanupResource.groovy b/vars/runCleanupResource.groovy index 8b101d72d10..f5263e13404 100644 --- a/vars/runCleanupResource.groovy +++ b/vars/runCleanupResource.groovy @@ -17,6 +17,10 @@ def call(Map params, String region){ export SCT_CONFIG_FILES=${test_config} export SCT_CLUSTER_BACKEND="${params.backend}" + if [[ -n "${params.reuse_cluster ?: ''}" ]] ; then + export SCT_REUSE_CLUSTER="${params.reuse_cluster}" + fi + if [[ "${params.backend}" == "xcloud" ]] ; then export SCT_XCLOUD_PROVIDER="${params.xcloud_provider}" export SCT_XCLOUD_ENV="${params.xcloud_env}" @@ -54,7 +58,8 @@ def call(Map params, String region){ echo "Starting to clean resources ..." RUNNER_IP=\$(cat sct_runner_ip||echo "") if [[ -n "\${RUNNER_IP}" ]] ; then - ./docker/env/hydra.sh --execute-on-runner \${RUNNER_IP} clean-resources --post-behavior --test-id \$SCT_TEST_ID + CLEANUP_TEST_ID=\${SCT_REUSE_CLUSTER:-\$SCT_TEST_ID} + ./docker/env/hydra.sh --execute-on-runner \${RUNNER_IP} clean-resources --post-behavior --test-id \$CLEANUP_TEST_ID else ./docker/env/hydra.sh clean-resources --post-behavior --logdir "`pwd`" fi diff --git a/vars/runCollectLogs.groovy b/vars/runCollectLogs.groovy index 00053e409be..e181a22f8e0 100644 --- a/vars/runCollectLogs.groovy +++ b/vars/runCollectLogs.groovy @@ -30,6 +30,10 @@ def call(Map params, String region){ export SCT_CONFIG_FILES=${test_config} + if [[ -n "${params.reuse_cluster ?: ''}" ]] ; then + export SCT_REUSE_CLUSTER="${params.reuse_cluster}" + fi + echo "start collect logs ..." RUNNER_IP=\$(cat sct_runner_ip||echo "") if [[ -n "\${RUNNER_IP}" ]] ; then diff --git a/vars/runSctTest.groovy b/vars/runSctTest.groovy index 33d6926e16a..d9b14593a33 100644 --- a/vars/runSctTest.groovy +++ b/vars/runSctTest.groovy @@ -31,6 +31,10 @@ def call(Map params, String region, functional_test = false, Map pipelineParams export SCT_CONFIG_FILES=${test_config} export SCT_COLLECT_LOGS=false + if [[ -n "${params.reuse_cluster ?: ''}" ]] ; then + export SCT_REUSE_CLUSTER="${params.reuse_cluster}" + fi + if [[ "${params.backend}" == "xcloud" ]] ; then export SCT_XCLOUD_PROVIDER="${params.xcloud_provider}" export SCT_XCLOUD_ENV="${params.xcloud_env}" diff --git a/vars/runSendEmail.groovy b/vars/runSendEmail.groovy index 63be8f75ae3..e04b454255f 100644 --- a/vars/runSendEmail.groovy +++ b/vars/runSendEmail.groovy @@ -23,6 +23,10 @@ def call(Map params, RunWrapper currentBuild){ RUNNER_IP=\$(cat sct_runner_ip||echo "") export SCT_CONFIG_FILES=${test_config} + if [[ -n "${params.reuse_cluster ?: ''}" ]] ; then + export SCT_REUSE_CLUSTER="${params.reuse_cluster}" + fi + if [[ -z "${email_recipients}" ]]; then echo "Email was not sent because no recipient addresses were provided" else