Skip to content

Commit 96be26c

Browse files
committed
refactor: update dbsync snapshot test and service usage
- Refactor `get_test_id` to accept either ClusterLib or ClusterManager. - Update `TestDBSyncSnapshot` to use ClusterManager and instantiate DBSyncSnapshotService directly, removing the fixture. - Move imports to use module import for dbsync_snapshot_service. - Use `dataclasses` and `typing` consistently in dbsync_snapshot_service.py. - Minor docstring and formatting improvements.
1 parent e522a5d commit 96be26c

File tree

3 files changed

+44
-62
lines changed

3 files changed

+44
-62
lines changed

cardano_node_tests/tests/common.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -198,22 +198,30 @@ def unique_time_str() -> str:
198198
return str(time.time()).replace(".", "")[-8:]
199199

200200

201-
def get_test_id(cluster_obj: clusterlib.ClusterLib) -> str:
201+
def get_test_id(
202+
cluster_or_manager: clusterlib.ClusterLib | cluster_management.ClusterManager,
203+
) -> str:
202204
"""Return unique test ID - function name + assigned cluster instance + random string.
203205
204206
Log the test ID into cluster manager log file.
205207
"""
208+
if isinstance(cluster_or_manager, clusterlib.ClusterLib):
209+
cid_part = f"_ci{cluster_or_manager.cluster_id}"
210+
cm: cluster_management.ClusterManager = cluster_or_manager._cluster_manager # type: ignore
211+
else:
212+
cid_part = ""
213+
cm = cluster_or_manager
214+
215+
cinstance = str(cm._cluster_instance_num) if cm._cluster_instance_num != -1 else ""
216+
206217
curr_test = pytest_utils.get_current_test()
207218
rand_str = clusterlib.get_rand_str(6)
208-
test_id = (
209-
f"{curr_test.test_function}{curr_test.test_params}_ci{cluster_obj.cluster_id}_{rand_str}"
210-
)
219+
test_id = f"{curr_test.test_function}{curr_test.test_params}{cid_part}_{rand_str}"
211220

212221
# Log test ID to cluster manager log file - getting test ID happens early
213222
# after the start of a test, so the log entry can be used for determining
214223
# time of the test start
215-
cm: cluster_management.ClusterManager = cluster_obj._cluster_manager # type: ignore
216-
cm.log(f"c{cm.cluster_instance_num}: got ID `{test_id}` for '{curr_test.full}'")
224+
cm.log(f"c{cinstance}: got ID `{test_id}` for '{curr_test.full}'")
217225

218226
return test_id
219227

cardano_node_tests/tests/test_dbsync.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,10 @@
1818
from cardano_node_tests.utils import cluster_nodes
1919
from cardano_node_tests.utils import clusterlib_utils
2020
from cardano_node_tests.utils import dbsync_queries
21+
from cardano_node_tests.utils import dbsync_snapshot_service
2122
from cardano_node_tests.utils import dbsync_utils
2223
from cardano_node_tests.utils import helpers
2324
from cardano_node_tests.utils import logfiles
24-
from cardano_node_tests.utils.dbsync_snapshot_service import DBSyncSnapshotService
25-
from cardano_node_tests.utils.dbsync_snapshot_service import SnapshotFile
2625
from cardano_node_tests.utils.versions import VERSIONS
2726

2827
LOGGER = logging.getLogger(__name__)
@@ -290,8 +289,7 @@ def test_reconnect_dbsync(
290289
cluster_manager: cluster_management.ClusterManager,
291290
worker_id: str,
292291
):
293-
"""
294-
Check that db-sync reconnects to the node after the node is restarted.
292+
"""Check that db-sync reconnects to the node after the node is restarted.
295293
296294
* restart all nodes of the running cluster
297295
* submit a transaction
@@ -391,38 +389,40 @@ def test_epoch(self, cluster: clusterlib.ClusterLib):
391389
class TestDBSyncSnapshot:
392390
"""Tests for db-sync snapshot availability and freshness."""
393391

394-
@pytest.fixture()
395-
def db_sync_snapshots(
396-
self,
397-
) -> DBSyncSnapshotService | None:
398-
return DBSyncSnapshotService()
399-
400392
@allure.link(helpers.get_vcs_link())
401393
@pytest.mark.smoke
402-
def test_latest_snapshot_freshness(self, db_sync_snapshots: DBSyncSnapshotService):
403-
"""
404-
Check that the latest db-sync snapshot is not older than 5 days.
394+
def test_latest_snapshot_freshness(
395+
self,
396+
cluster_manager: cluster_management.ClusterManager,
397+
):
398+
"""Check that the latest db-sync snapshot is not older than 5 days.
405399
406400
This test uses the S3 REST API to query the Cardano mainnet snapshot repository
407401
and verifies that the most recent snapshot is fresh.
408402
"""
403+
common.get_test_id(cluster_manager)
404+
db_sync_snapshots = dbsync_snapshot_service.DBSyncSnapshotService()
405+
409406
# 1. Find latest version
410407
latest_version = db_sync_snapshots.get_latest_version()
411408
LOGGER.info(f"Latest db-sync version: {latest_version}")
412409

413410
# 2. Get latest snapshot for that version
414-
latest_snapshot: SnapshotFile = db_sync_snapshots.get_latest_snapshot(latest_version)
411+
latest_snapshot: dbsync_snapshot_service.SnapshotFile = (
412+
db_sync_snapshots.get_latest_snapshot(latest_version)
413+
)
415414

416415
LOGGER.info(f"Latest snapshot: {latest_snapshot.name}")
417416
LOGGER.info(f"Snapshot date: {latest_snapshot.last_modified.isoformat()}")
418417
LOGGER.info(f"Snapshot size: {latest_snapshot.size_gb:.2f} GB")
419418

420419
# 3. Perform freshness check
421-
five_days_ago = datetime.now(timezone.utc) - timedelta(days=5)
420+
now_utc = datetime.now(timezone.utc)
421+
five_days_ago = now_utc - timedelta(days=5)
422422

423423
assert latest_snapshot.last_modified >= five_days_ago, (
424424
f"The latest snapshot is too old. "
425-
f"Age: {(datetime.now(timezone.utc) - latest_snapshot.last_modified).days} days. "
425+
f"Age: {(now_utc - latest_snapshot.last_modified).days} days. "
426426
f"Snapshot date: {latest_snapshot.last_modified.strftime('%Y-%m-%d %H:%M:%S UTC')}, "
427427
f"Limit: 5 days ago ({five_days_ago.strftime('%Y-%m-%d %H:%M:%S UTC')})."
428428
)

cardano_node_tests/utils/dbsync_snapshot_service.py

Lines changed: 14 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
1+
import dataclasses
12
import logging
23
import re
34
import xml.etree.ElementTree as ET
4-
from dataclasses import dataclass
5-
from dataclasses import field
65
from datetime import datetime
76
from datetime import timezone
8-
from typing import List
9-
from typing import Tuple
107

118
import requests
129

@@ -16,15 +13,15 @@
1613
S3_NS_URL = "http://s3.amazonaws.com/doc/2006-03-01/"
1714

1815

19-
@dataclass
16+
@dataclasses.dataclass
2017
class SnapshotFile:
2118
"""Dataclass to hold parsed snapshot file information."""
2219

2320
key: str
2421
name: str
2522
last_modified: datetime # Timezone-aware datetime object
2623
size: int
27-
size_gb: float = field(init=False)
24+
size_gb: float = dataclasses.field(init=False)
2825

2926
def __post_init__(self) -> None:
3027
self.size_gb = self.size / (1024**3)
@@ -40,11 +37,11 @@ def _get_s3_objects(self, prefix: str = "", delimiter: str = "") -> bytes:
4037
"""Fetch XML content from the S3 bucket using REST API."""
4138
params = {"list-type": "2", "prefix": prefix, "delimiter": delimiter}
4239

43-
response = requests.get(self.BUCKET_URL, params=params)
40+
response = requests.get(self.BUCKET_URL, params=params, timeout=30)
4441
response.raise_for_status()
4542
return response.content
4643

47-
def _parse_s3_xml(self, xml_content: bytes) -> Tuple[List[str], List[SnapshotFile]]:
44+
def _parse_s3_xml(self, xml_content: bytes) -> tuple[list[str], list[SnapshotFile]]:
4845
"""Parse S3 XML response using exact namespace search paths with None checks."""
4946
root = ET.fromstring(xml_content)
5047
ns_tag = f"{{{S3_NS_URL}}}"
@@ -66,39 +63,16 @@ def _parse_s3_xml(self, xml_content: bytes) -> Tuple[List[str], List[SnapshotFil
6663
modified_tag = content.find(f"{ns_tag}LastModified")
6764
size_tag = content.find(f"{ns_tag}Size")
6865

69-
if not all(
70-
[
71-
key_tag is not None and key_tag.text,
72-
modified_tag is not None and modified_tag.text,
73-
size_tag is not None and size_tag.text,
74-
]
66+
if (key_tag is None or modified_tag is None or size_tag is None) or not (
67+
key_tag.text and modified_tag.text and size_tag.text
7568
):
7669
logger.warning(
77-
"Skipping malformed S3 object entry: Missing Key, LastModified, or Size."
70+
"Skipping malformed S3 object entry: Missing Key, LastModified, or Size tag."
7871
)
79-
continue # Skip this entry if critical data is missing
72+
continue # Skip this entry if critical tags are missing
8073

81-
# Use explicit variables to store the text content only if it exists
82-
key_text = key_tag.text if key_tag is not None else None
83-
modified_text = modified_tag.text if modified_tag is not None else None
84-
size_text = size_tag.text if size_tag is not None else None
85-
86-
# Ensure all three critical tags and their text content exist
87-
if not all([key_text, modified_text, size_text]):
88-
logger.warning(
89-
"Skipping malformed S3 object entry: Missing Key, LastModified, or Size."
90-
)
91-
continue # Skip this entry if critical data is missing
92-
93-
key = key_text
94-
last_modified_str = modified_text
95-
size_str = size_text
96-
97-
if last_modified_str is None:
98-
continue
99-
100-
if key is None:
101-
continue
74+
key = key_tag.text or ""
75+
last_modified_str = modified_tag.text or ""
10276

10377
file_date = datetime.strptime(last_modified_str, "%Y-%m-%dT%H:%M:%S.%fZ").replace(
10478
tzinfo=timezone.utc
@@ -109,7 +83,7 @@ def _parse_s3_xml(self, xml_content: bytes) -> Tuple[List[str], List[SnapshotFil
10983
key=key,
11084
name=key.split("/")[-1],
11185
last_modified=file_date,
112-
size=int(size_str) if size_str else 0,
86+
size=int(size_tag.text or 0),
11387
)
11488
)
11589

@@ -137,7 +111,6 @@ def get_latest_snapshot(self, version: str) -> SnapshotFile:
137111
xml_content = self._get_s3_objects(prefix=version_prefix)
138112
_, files = self._parse_s3_xml(xml_content)
139113

140-
# Filter: Revert to the original working filter (.tgz AND 'snapshot')
141114
snapshot_files = [
142115
f for f in files if f.name.endswith(".tgz") and "snapshot" in f.name.lower()
143116
]
@@ -146,7 +119,8 @@ def get_latest_snapshot(self, version: str) -> SnapshotFile:
146119
file_names = [f.name for f in files]
147120
logger.warning(f"Files found in S3 response for {version_prefix}: {file_names}")
148121
error_msg = (
149-
f"No snapshot files found for version {version}. Filtered files: {file_names}"
122+
f"No snapshot files found for version {version}."
123+
f" All files in response: {file_names}"
150124
)
151125
raise RuntimeError(error_msg)
152126

0 commit comments

Comments
 (0)