scylladb · fruch · Apr 5, 2026 · Mar 18, 2026
@@ -70,6 +70,72 @@ hydra update-scylla-packages --test-id $SCT_REUSE_CLUSTER -p ~/new_scylla_packag
 hydra run-test longevity_test.LongevityTest.test_custom_time --backend aws --config test-cases/longevity/longevity-10gb-3h.yaml --config configurations/network_config/test_communication_public.yaml
 ```
 
+### Jenkins pipeline reuse
+
+Cluster reuse is also supported in Jenkins pipelines. This allows re-running a test against an existing cluster directly from the Jenkins UI, without provisioning new infrastructure.
+
+#### How it works
+
+When a Jenkins build runs with `post_behavior_*` parameters set to `keep`, the test cluster and the SCT runner VM are both preserved after the build completes. A subsequent build can then reuse the preserved cluster and runner by specifying the original run's `test_id` in the `reuse_cluster` parameter.
+
+The reuse flow:
+1. **SCT runner**: instead of creating a new runner VM, the pipeline looks up the runner from the original test by its `test_id` and reuses it
+2. **Provisioning**: the `Provision Resources` stage is skipped entirely
+3. **Test execution**: the test runs against the existing cluster using `SCT_REUSE_CLUSTER`
+4. **Cleanup**: post-test cleanup respects the current run's `post_behavior` settings — set them to `keep` again to preserve the cluster for another reuse, or `destroy` to tear everything down
+
+#### Step-by-step
+
+1. **Run the initial build with `post_behavior_*=keep`**
+
+   In the Jenkins build parameters, set:
+   - `post_behavior_db_nodes` = `keep`
+   - `post_behavior_loader_nodes` = `keep`
+   - `post_behavior_monitor_nodes` = `keep`
+
+   This preserves both the test cluster and the SCT runner after the build finishes.
+
+2. **Find the test ID**
+
+   The `test_id` of the completed build can be found in:
+   - The Argus link in the Jenkins build description (the UUID in the URL)
+   - Jenkins build logs — search for `test_id`
+   - The `SCT_TEST_ID` value shown in the build's environment variables
+
+3. **Start a reuse build**
+
+   Trigger a new build of the same pipeline (or a compatible one). In the build parameters:
+   - Set `reuse_cluster` to the `test_id` from step 2
+   - Set `post_behavior_*` to `keep` if you plan to reuse again, or `destroy` to clean up
+
+4. **Verify the reuse**
+
+   In the build log, look for:
+   - `"Reuse mode: looking up existing SCT runner"` — confirms runner reuse
+   - `"Cluster reuse mode: skipping resource provisioning"` — confirms provisioning was skipped
+
+#### Supported pipelines
+
+The `reuse_cluster` parameter is available in the following pipeline types:
+- Longevity (`longevityPipeline`)
+- Manager (`managerPipeline`)
+- Rolling Upgrade (`rollingUpgradePipeline`)
+- Jepsen (`jepsenPipeline`)
+- Performance Regression (`perfRegressionParallelPipeline`)
+
+#### Safety: runner expiry
+
+Preserved runners are tagged with a numeric `keep` value (hours from VM launch time) that acts as a safety ceiling. The existing cleanup logic automatically terminates runners once the elapsed time since launch exceeds this value (default: 120 hours / 5 days).
+
+If a runner expires between builds, the reuse build will fail with an error indicating no runner was found. In that case, run a fresh build without `reuse_cluster`.
+
+#### Limitations
+
+- **Same backend required**: the reuse build must use the same cloud backend as the original build
+- **Same pipeline type recommended**: while cross-pipeline reuse is technically possible, it is only reliable when both pipelines use compatible test configurations
+- **No partial reuse**: you cannot reuse only the DB nodes and create new loaders — the entire test environment is reused
+- **Runner state**: a reused runner may have artifacts from the previous run; if the reuse build fails due to runner issues, run a fresh build
+
 ### Scylla Cloud (xcloud) backend
 
 When reusing clusters deployed in Scylla Cloud backend, the test environment consists of:

diff --git a/sct.py b/sct.py
@@ -1894,8 +1894,9 @@ def send_email(
     email_recipients = email_recipients.split(",")
     sct_config = SCTConfiguration()
 
-    LOGGER.info("Sending email for test %s...", test_id)
-    client = init_argus_client(os.environ.get("SCT_TEST_ID"))
+    argus_test_id = os.environ.get("SCT_REUSE_CLUSTER") or os.environ.get("SCT_TEST_ID")
+    LOGGER.info("Sending email for test %s...", argus_test_id)
+    client = init_argus_client(argus_test_id)
     run = client.get_run()
     title_template_data = {**dict(sct_config), **run}
 
@@ -2202,16 +2203,59 @@ def create_runner_instance(
 
 @cli.command("set-runner-tags")
 @click.argument("runner-ip", type=str)
+@click.option("-b", "--backend", type=click.Choice(available_backends), help="Cloud backend to use")
 @click.option(
     "-t",
     "--tags",
     type=(str, str),
     help="Space separated key value pair to add as a new tag to the runner",
     multiple=True,
 )
-def set_runner_tags(runner_ip, tags):
+def set_runner_tags(runner_ip, backend, tags):
     add_file_logger()
-    update_sct_runner_tags(test_runner_ip=runner_ip, tags=dict(tags))
+    update_sct_runner_tags(test_runner_ip=runner_ip, backend=backend, tags=dict(tags))
+
+
+@cli.command("find-runner-instance", help="Find an existing SCT runner by test ID and write its IP to sct_runner_ip")
+@click.option("-t", "--test-id", required=True, type=str, help="Test ID to find the runner for")
+@click.option("-b", "--backend", type=click.Choice(available_backends), help="Cloud backend to use")
+@click.option(
+    "-d", "--duration", required=False, type=int, default=0, help="New test duration in minutes (extends keep tag)"
+)
+def find_runner_instance(test_id, backend, duration):
+    add_file_logger()
+    sct_runner_ip_path = Path("sct_runner_ip")
+    sct_runner_ip_path.unlink(missing_ok=True)
+
+    runners = list_sct_runners(backend=backend, test_id=test_id)
+    if not runners:
+        LOGGER.error("No SCT runner found for test_id: %s", test_id)
+        sys.exit(1)
+
+    runner = runners[0]
+    if not runner.public_ips:
+        LOGGER.error("SCT runner %s has no public IPs", runner.instance_name)
+        sys.exit(1)
+
+    runner_ip = runner.public_ips[0]
+    LOGGER.info(
+        "Found SCT runner %s at %s for test %s",
+        runner.instance_name,
+        runner_ip,
+        test_id,
+    )
+
+    if duration:
+        # keep is relative to launch_time, so add elapsed hours to give the test enough time
+        elapsed_hours = (
+            int((datetime.now(UTC) - runner.launch_time).total_seconds() / 3600) if runner.launch_time else 0
+        )
+        tags_to_update = {"keep": str(elapsed_hours + int(duration / 60) + 6), "keep_action": "terminate"}
+        update_sct_runner_tags(backend=backend, test_runner_ip=runner_ip, tags=tags_to_update)
+        LOGGER.info("Updated runner tags: %s", tags_to_update)
+
+    sct_runner_ip_path.write_text(runner_ip)
+    LOGGER.info("SCT Runner IP written to %s: %s", sct_runner_ip_path, runner_ip)
 
 
 @cli.command("clean-runner-instances", help="Clean all unused SCT runner instances")

diff --git a/sdcm/cluster.py b/sdcm/cluster.py
@@ -67,7 +67,7 @@
 from sdcm.prometheus import start_metrics_server, PrometheusAlertManagerListener, AlertSilencer
 from sdcm.log import SDCMAdapter
 from sdcm.provision.common.configuration_script import ConfigurationScriptBuilder
-from sdcm.provision.common.utils import disable_daily_apt_triggers
+from sdcm.provision.common.utils import configure_vector_target_script, disable_daily_apt_triggers
 from sdcm.provision.scylla_yaml import ScyllaYamlNodeAttrBuilder
 from sdcm.provision.scylla_yaml.certificate_builder import ScyllaYamlCertificateAttrBuilder
 from sdcm.provision.scylla_yaml.cluster_builder import ScyllaYamlClusterAttrBuilder
@@ -316,6 +316,18 @@ class NodeCleanedAfterDecommissionAborted(Exception):
     """raise after decommission aborted and node cleaned from group0(Raft)"""
 
 
+def reconfigure_vector_on_node(node, params, test_config):
+    """Update vector config on a reused node to point to the current runner's vector container."""
+    if params.get("logs_transport") != "vector" or not test_config.VECTOR_ADDRESS:
+        return
+
+    host, port = test_config.VECTOR_ADDRESS
+    node.log.info("Reconfiguring vector to %s:%s", host, port)
+    script = configure_vector_target_script(host=host, port=port)
+    node.remoter.sudo(shell_script_cmd(script, quote="'"))
+    node.remoter.sudo("systemctl restart vector.service")
+
+
 def prepend_user_prefix(user_prefix: str, base_name: str):
     return "%s-%s" % (user_prefix, base_name)
 
@@ -6066,7 +6078,7 @@ def _scylla_post_install(node: BaseNode, new_scylla_installed: bool, devname: st
             node.scylla_setup(disks, devname)
 
     def _reuse_cluster_setup(self, node):
-        pass
+        reconfigure_vector_on_node(node, self.params, self.test_config)
 
     def _generate_db_node_certs(self, node):
         """Generate per-node SSL certificates for a DB node"""
@@ -6445,6 +6457,7 @@ def node_setup(self, node, verbose=False, **kwargs):
 
         if TestConfig().REUSE_CLUSTER:
             self.kill_stress_thread()
+            reconfigure_vector_on_node(node, self.params, TestConfig())
             if self.params.get("client_encrypt") and not (node.ssl_conf_dir / TLSAssets.CLIENT_CERT).exists():
                 self._generate_loader_certs(node)
                 install_client_certificate(node.remoter, node.ip_address, force=True)
@@ -6697,6 +6710,7 @@ def node_setup(self, node, **kwargs):
         self.mgmt_auth_token = self.monitor_id
 
         if self.test_config.REUSE_CLUSTER:
+            reconfigure_vector_on_node(node, self.params, self.test_config)
             self.configure_scylla_monitoring(node)
             self.restart_scylla_monitoring(sct_metrics=True)
             set_grafana_url(f"http://{normalize_ipv6_url(node.external_address)}:{self.grafana_port}")

@@ -1190,6 +1190,7 @@ def _scylla_post_install(self, node: AWSNode, new_scylla_installed: bool, devnam
             node.set_web_listen_address()
 
     def _reuse_cluster_setup(self, node):
+        super()._reuse_cluster_setup(node)
         node.run_startup_script()  # Reconfigure syslog-ng.
 
     def destroy(self):

@@ -342,6 +342,7 @@ def _wait_for_preinstalled_scylla(node):
         node.wait_for_machine_image_configured()
 
     def _reuse_cluster_setup(self, node: AzureNode) -> None:
+        super()._reuse_cluster_setup(node)
         node.run_startup_script()
 
 

@@ -564,6 +564,7 @@ def _wait_for_preinstalled_scylla(node):
         node.wait_for_machine_image_configured()
 
     def _reuse_cluster_setup(self, node: GCENode) -> None:
+        super()._reuse_cluster_setup(node)
         node.run_startup_script()
 
 

diff --git a/sdcm/cluster_oci.py b/sdcm/cluster_oci.py
@@ -403,6 +403,7 @@ def _wait_for_preinstalled_scylla(node):
         node.wait_for_machine_image_configured()
 
     def _reuse_cluster_setup(self, node: OciNode) -> None:
+        super()._reuse_cluster_setup(node)
         node.run_startup_script()
 
 

diff --git a/sdcm/provision/common/configuration_script.py b/sdcm/provision/common/configuration_script.py
@@ -75,19 +75,24 @@ def _skip_if_already_run_syslogng() -> str:
         fi
         """)
 
-    @staticmethod
-    def _skip_if_already_run_vector() -> str:
+    def _skip_if_already_run_vector(self) -> str:
         """
         If a node was configured before sct-runner, skip vector installation. Just ensure
         that logging destination is updated in the configuration and the service is
         restarted, to retrigger sending logs.
         """
-        return dedent(f"""
+        host, port = self.syslog_host_port
+        vector_config = configure_vector_target_script(host=host, port=port)
+        return (
+            dedent(f"""
         if [ -f {CLOUD_INIT_SCRIPTS_PATH}/done ] && command -v vector >/dev/null 2>&1; then
-            sudo systemctl restart vector
+        """)
+            + vector_config
+            + dedent("""
             exit 0
         fi
         """)
+        )
 
     @staticmethod
     def _mark_script_as_done() -> str:

diff --git a/sdcm/sct_runner.py b/sdcm/sct_runner.py
@@ -2063,9 +2063,6 @@ def clean_sct_runners(
             )
 
         if not force and sct_runner_info.keep:
-            if "alive" in str(sct_runner_info.keep):
-                LOGGER.info("Skip %s because `keep' == `alive. No runners have been terminated'", sct_runner_info)
-                continue
             if sct_runner_info.keep_action != "terminate":
                 LOGGER.info("Skip %s because keep_action `keep_action' != `terminate'", sct_runner_info)
                 continue