refactor: update dbsync snapshot test and service usage

mkoura · mkoura · commit 96be26cd82a0 · 2025-12-02T14:06:34.000+01:00
- Refactor `get_test_id` to accept either ClusterLib or ClusterManager.
- Update `TestDBSyncSnapshot` to use ClusterManager and instantiate
  DBSyncSnapshotService directly, removing the fixture.
- Move imports to use module import for dbsync_snapshot_service.
- Use `dataclasses` and `typing` consistently in
  dbsync_snapshot_service.py.
- Minor docstring and formatting improvements.
diff --git a/cardano_node_tests/tests/common.py b/cardano_node_tests/tests/common.py
@@ -198,22 +198,30 @@ def unique_time_str() -> str:
     return str(time.time()).replace(".", "")[-8:]
 
 
-def get_test_id(cluster_obj: clusterlib.ClusterLib) -> str:
+def get_test_id(
+    cluster_or_manager: clusterlib.ClusterLib | cluster_management.ClusterManager,
+) -> str:
     """Return unique test ID - function name + assigned cluster instance + random string.
 
     Log the test ID into cluster manager log file.
     """
+    if isinstance(cluster_or_manager, clusterlib.ClusterLib):
+        cid_part = f"_ci{cluster_or_manager.cluster_id}"
+        cm: cluster_management.ClusterManager = cluster_or_manager._cluster_manager  # type: ignore
+    else:
+        cid_part = ""
+        cm = cluster_or_manager
+
+    cinstance = str(cm._cluster_instance_num) if cm._cluster_instance_num != -1 else ""
+
     curr_test = pytest_utils.get_current_test()
     rand_str = clusterlib.get_rand_str(6)
-    test_id = (
-        f"{curr_test.test_function}{curr_test.test_params}_ci{cluster_obj.cluster_id}_{rand_str}"
-    )
+    test_id = f"{curr_test.test_function}{curr_test.test_params}{cid_part}_{rand_str}"
 
     # Log test ID to cluster manager log file - getting test ID happens early
     # after the start of a test, so the log entry can be used for determining
     # time of the test start
-    cm: cluster_management.ClusterManager = cluster_obj._cluster_manager  # type: ignore
-    cm.log(f"c{cm.cluster_instance_num}: got ID `{test_id}` for '{curr_test.full}'")
+    cm.log(f"c{cinstance}: got ID `{test_id}` for '{curr_test.full}'")
 
     return test_id
 
diff --git a/cardano_node_tests/tests/test_dbsync.py b/cardano_node_tests/tests/test_dbsync.py
@@ -18,11 +18,10 @@
 from cardano_node_tests.utils import cluster_nodes
 from cardano_node_tests.utils import clusterlib_utils
 from cardano_node_tests.utils import dbsync_queries
+from cardano_node_tests.utils import dbsync_snapshot_service
 from cardano_node_tests.utils import dbsync_utils
 from cardano_node_tests.utils import helpers
 from cardano_node_tests.utils import logfiles
-from cardano_node_tests.utils.dbsync_snapshot_service import DBSyncSnapshotService
-from cardano_node_tests.utils.dbsync_snapshot_service import SnapshotFile
 from cardano_node_tests.utils.versions import VERSIONS
 
 LOGGER = logging.getLogger(__name__)
@@ -290,8 +289,7 @@ def test_reconnect_dbsync(
         cluster_manager: cluster_management.ClusterManager,
         worker_id: str,
     ):
-        """
-        Check that db-sync reconnects to the node after the node is restarted.
+        """Check that db-sync reconnects to the node after the node is restarted.
 
         * restart all nodes of the running cluster
         * submit a transaction
@@ -391,38 +389,40 @@ def test_epoch(self, cluster: clusterlib.ClusterLib):
 class TestDBSyncSnapshot:
     """Tests for db-sync snapshot availability and freshness."""
 
-    @pytest.fixture()
-    def db_sync_snapshots(
-        self,
-    ) -> DBSyncSnapshotService | None:
-        return DBSyncSnapshotService()
-
     @allure.link(helpers.get_vcs_link())
     @pytest.mark.smoke
-    def test_latest_snapshot_freshness(self, db_sync_snapshots: DBSyncSnapshotService):
-        """
-        Check that the latest db-sync snapshot is not older than 5 days.
+    def test_latest_snapshot_freshness(
+        self,
+        cluster_manager: cluster_management.ClusterManager,
+    ):
+        """Check that the latest db-sync snapshot is not older than 5 days.
 
         This test uses the S3 REST API to query the Cardano mainnet snapshot repository
         and verifies that the most recent snapshot is fresh.
         """
+        common.get_test_id(cluster_manager)
+        db_sync_snapshots = dbsync_snapshot_service.DBSyncSnapshotService()
+
         # 1. Find latest version
         latest_version = db_sync_snapshots.get_latest_version()
         LOGGER.info(f"Latest db-sync version: {latest_version}")
 
         # 2. Get latest snapshot for that version
-        latest_snapshot: SnapshotFile = db_sync_snapshots.get_latest_snapshot(latest_version)
+        latest_snapshot: dbsync_snapshot_service.SnapshotFile = (
+            db_sync_snapshots.get_latest_snapshot(latest_version)
+        )
 
         LOGGER.info(f"Latest snapshot: {latest_snapshot.name}")
         LOGGER.info(f"Snapshot date: {latest_snapshot.last_modified.isoformat()}")
         LOGGER.info(f"Snapshot size: {latest_snapshot.size_gb:.2f} GB")
 
         # 3. Perform freshness check
-        five_days_ago = datetime.now(timezone.utc) - timedelta(days=5)
+        now_utc = datetime.now(timezone.utc)
+        five_days_ago = now_utc - timedelta(days=5)
 
         assert latest_snapshot.last_modified >= five_days_ago, (
             f"The latest snapshot is too old. "
-            f"Age: {(datetime.now(timezone.utc) - latest_snapshot.last_modified).days} days. "
+            f"Age: {(now_utc - latest_snapshot.last_modified).days} days. "
             f"Snapshot date: {latest_snapshot.last_modified.strftime('%Y-%m-%d %H:%M:%S UTC')}, "
             f"Limit: 5 days ago ({five_days_ago.strftime('%Y-%m-%d %H:%M:%S UTC')})."
         )
diff --git a/cardano_node_tests/utils/dbsync_snapshot_service.py b/cardano_node_tests/utils/dbsync_snapshot_service.py
@@ -1,12 +1,9 @@
+import dataclasses
 import logging
 import re
 import xml.etree.ElementTree as ET
-from dataclasses import dataclass
-from dataclasses import field
 from datetime import datetime
 from datetime import timezone
-from typing import List
-from typing import Tuple
 
 import requests
 
@@ -16,15 +13,15 @@
 S3_NS_URL = "http://s3.amazonaws.com/doc/2006-03-01/"
 
 
-@dataclass
+@dataclasses.dataclass
 class SnapshotFile:
     """Dataclass to hold parsed snapshot file information."""
 
     key: str
     name: str
     last_modified: datetime  # Timezone-aware datetime object
     size: int
-    size_gb: float = field(init=False)
+    size_gb: float = dataclasses.field(init=False)
 
     def __post_init__(self) -> None:
         self.size_gb = self.size / (1024**3)
@@ -40,11 +37,11 @@ def _get_s3_objects(self, prefix: str = "", delimiter: str = "") -> bytes:
         """Fetch XML content from the S3 bucket using REST API."""
         params = {"list-type": "2", "prefix": prefix, "delimiter": delimiter}
 
-        response = requests.get(self.BUCKET_URL, params=params)
+        response = requests.get(self.BUCKET_URL, params=params, timeout=30)
         response.raise_for_status()
         return response.content
 
-    def _parse_s3_xml(self, xml_content: bytes) -> Tuple[List[str], List[SnapshotFile]]:
+    def _parse_s3_xml(self, xml_content: bytes) -> tuple[list[str], list[SnapshotFile]]:
         """Parse S3 XML response using exact namespace search paths with None checks."""
         root = ET.fromstring(xml_content)
         ns_tag = f"{{{S3_NS_URL}}}"
@@ -66,39 +63,16 @@ def _parse_s3_xml(self, xml_content: bytes) -> Tuple[List[str], List[SnapshotFil
             modified_tag = content.find(f"{ns_tag}LastModified")
             size_tag = content.find(f"{ns_tag}Size")
 
-            if not all(
-                [
-                    key_tag is not None and key_tag.text,
-                    modified_tag is not None and modified_tag.text,
-                    size_tag is not None and size_tag.text,
-                ]
+            if (key_tag is None or modified_tag is None or size_tag is None) or not (
+                key_tag.text and modified_tag.text and size_tag.text
             ):
                 logger.warning(
-                    "Skipping malformed S3 object entry: Missing Key, LastModified, or Size."
+                    "Skipping malformed S3 object entry: Missing Key, LastModified, or Size tag."
                 )
-                continue  # Skip this entry if critical data is missing
+                continue  # Skip this entry if critical tags are missing
 
-            # Use explicit variables to store the text content only if it exists
-            key_text = key_tag.text if key_tag is not None else None
-            modified_text = modified_tag.text if modified_tag is not None else None
-            size_text = size_tag.text if size_tag is not None else None
-
-            # Ensure all three critical tags and their text content exist
-            if not all([key_text, modified_text, size_text]):
-                logger.warning(
-                    "Skipping malformed S3 object entry: Missing Key, LastModified, or Size."
-                )
-                continue  # Skip this entry if critical data is missing
-
-            key = key_text
-            last_modified_str = modified_text
-            size_str = size_text
-
-            if last_modified_str is None:
-                continue
-
-            if key is None:
-                continue
+            key = key_tag.text or ""
+            last_modified_str = modified_tag.text or ""
 
             file_date = datetime.strptime(last_modified_str, "%Y-%m-%dT%H:%M:%S.%fZ").replace(
                 tzinfo=timezone.utc
@@ -109,7 +83,7 @@ def _parse_s3_xml(self, xml_content: bytes) -> Tuple[List[str], List[SnapshotFil
                     key=key,
                     name=key.split("/")[-1],
                     last_modified=file_date,
-                    size=int(size_str) if size_str else 0,
+                    size=int(size_tag.text or 0),
                 )
             )
 
@@ -137,7 +111,6 @@ def get_latest_snapshot(self, version: str) -> SnapshotFile:
         xml_content = self._get_s3_objects(prefix=version_prefix)
         _, files = self._parse_s3_xml(xml_content)
 
-        # Filter: Revert to the original working filter (.tgz AND 'snapshot')
         snapshot_files = [
             f for f in files if f.name.endswith(".tgz") and "snapshot" in f.name.lower()
         ]
@@ -146,7 +119,8 @@ def get_latest_snapshot(self, version: str) -> SnapshotFile:
             file_names = [f.name for f in files]
             logger.warning(f"Files found in S3 response for {version_prefix}: {file_names}")
             error_msg = (
-                f"No snapshot files found for version {version}. Filtered files: {file_names}"
+                f"No snapshot files found for version {version}."
+                f" All files in response: {file_names}"
             )
             raise RuntimeError(error_msg)