Skip to content

Commit 047e302

Browse files
authored
feat: health check (#104)
1 parent b41068e commit 047e302

File tree

8 files changed

+598
-3
lines changed

8 files changed

+598
-3
lines changed

tests/test_api.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,10 +134,19 @@ class BaseAppTestCase(AioHTTPTestCase):
134134

135135
@unittest_run_loop
136136
async def test_health_check(self):
137-
resp = await self.client.request("GET", "/health-check")
137+
health_check_result = MagicMock()
138+
health_check_result.get_http_status_code.return_value = 200
139+
health_check_result.to_json.return_value = {"status": "pass"}
140+
141+
async def side_effect():
142+
return health_check_result
143+
144+
self.healthcheck.get_health_check.side_effect = side_effect
145+
146+
resp = await self.client.request("GET", "/health")
138147
assert resp.status == 200
139148
data = await resp.json()
140-
self.assertTrue(data["success"])
149+
self.assertEqual(data, {"status": "pass"})
141150

142151
@unittest_run_loop
143152
async def test_mining_status(self):
@@ -709,7 +718,8 @@ class AppTestCase(BaseAppTestCase):
709718

710719
async def get_application(self):
711720
self.manager = TxMiningManager(backend=None, pubsub=MagicMock(), address=None)
712-
self.myapp = App(self.manager)
721+
self.healthcheck = MagicMock()
722+
self.myapp = App(self.manager, self.healthcheck)
713723
self.version_check = False
714724
return self.myapp.app
715725

@@ -719,8 +729,10 @@ class AppVersionCheckTestCase(BaseAppTestCase):
719729

720730
async def get_application(self):
721731
self.manager = TxMiningManager(backend=None, pubsub=MagicMock(), address=None)
732+
self.healthcheck = MagicMock()
722733
self.myapp = App(
723734
self.manager,
735+
self.healthcheck,
724736
min_wallet_desktop_version="0.23.0",
725737
min_wallet_mobile_version="1.18.3",
726738
min_wallet_headless_version="0.14.88",

tests/test_healthcheck.py

Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
from datetime import datetime
2+
from unittest.mock import MagicMock, patch
3+
4+
import asynctest # type: ignore[import]
5+
6+
from txstratum.healthcheck.healthcheck import (
7+
ComponentType,
8+
FullnodeHealthCheck,
9+
HealthCheck,
10+
HealthCheckStatus,
11+
MiningHealthCheck,
12+
)
13+
14+
15+
class TestFullnodeHealthCheck(asynctest.TestCase): # type: ignore[misc]
16+
def setUp(self) -> None:
17+
self.mock_hathor_client = MagicMock()
18+
self.fullnode_health_check = FullnodeHealthCheck(
19+
backend=self.mock_hathor_client
20+
)
21+
22+
async def test_get_health_check_with_a_healthy_fullnode(self):
23+
"""Test the response we should generated for a healthy fullnode"""
24+
# Mock the implementation of the hathor_client.version.
25+
async def side_effect():
26+
return {"version": "1.0.0"}
27+
28+
self.mock_hathor_client.version.side_effect = side_effect
29+
self.mock_hathor_client._base_url = "http://localhost:8080"
30+
31+
result = await self.fullnode_health_check.get_health_check()
32+
self.assertEqual(result.component_name, "fullnode")
33+
self.assertEqual(result.component_type, ComponentType.FULLNODE)
34+
self.assertEqual(result.status, HealthCheckStatus.PASS)
35+
self.assertEqual(result.output, "Fullnode is responding correctly")
36+
self.assertEqual(result.component_id, "http://localhost:8080")
37+
38+
async def test_get_health_check_with_an_unhealthy_fullnode(self):
39+
"""Test the response we should generated for an unhealthy fullnode"""
40+
self.mock_hathor_client.version.side_effect = Exception("error")
41+
self.mock_hathor_client._base_url = "http://localhost:8080"
42+
43+
result = await self.fullnode_health_check.get_health_check()
44+
self.assertEqual(result.component_name, "fullnode")
45+
self.assertEqual(result.component_type, ComponentType.FULLNODE)
46+
self.assertEqual(result.status, HealthCheckStatus.FAIL)
47+
self.assertEqual(result.output, "Couldn't connect to fullnode: error")
48+
self.assertEqual(result.component_id, "http://localhost:8080")
49+
50+
51+
class TestMiningHealthCheck(asynctest.TestCase): # type: ignore[misc]
52+
def setUp(self):
53+
self.manager = MagicMock()
54+
self.mining_health_check = MiningHealthCheck(manager=self.manager)
55+
56+
async def test_get_health_check_no_miners(self):
57+
# Preparation
58+
self.manager.has_any_miner.return_value = False
59+
# Execution
60+
result = await self.mining_health_check.get_health_check()
61+
# Assertion
62+
self.assertEqual(result.component_name, "manager")
63+
self.assertEqual(result.component_type, ComponentType.INTERNAL)
64+
self.assertEqual(result.status, HealthCheckStatus.FAIL)
65+
self.assertEqual(result.output, "No miners connected")
66+
67+
async def test_get_health_check_no_submitted_job_in_period(self):
68+
# Preparation
69+
self.manager.has_any_miner.return_value = True
70+
self.manager.has_any_submitted_job_in_period.return_value = False
71+
# Execution
72+
result = await self.mining_health_check.get_health_check()
73+
# Assertion
74+
self.assertEqual(result.component_name, "manager")
75+
self.assertEqual(result.component_type, ComponentType.INTERNAL)
76+
self.assertEqual(result.status, HealthCheckStatus.FAIL)
77+
self.assertEqual(result.output, "No miners submitted a job in the last 1 hour")
78+
79+
async def test_get_health_check_failed_job(self):
80+
# Preparation
81+
self.manager.has_any_miner.return_value = True
82+
self.manager.has_any_submitted_job_in_period.return_value = True
83+
job = MagicMock()
84+
job.is_failed.return_value = True
85+
job.total_time = 9
86+
self.manager.tx_jobs = {"job_id": job}
87+
# Execution
88+
result = await self.mining_health_check.get_health_check()
89+
# Assertion
90+
self.assertEqual(result.component_name, "manager")
91+
self.assertEqual(result.component_type, ComponentType.INTERNAL)
92+
self.assertEqual(result.status, HealthCheckStatus.FAIL)
93+
self.assertEqual(
94+
result.output,
95+
"We had 1 failed jobs and 0 long running jobs in the last 5 minutes",
96+
)
97+
98+
async def test_get_health_check_slow_job(self):
99+
# Preparation
100+
self.manager.has_any_miner.return_value = True
101+
self.manager.has_any_submitted_job_in_period.return_value = True
102+
job = MagicMock()
103+
job.is_failed.return_value = False
104+
job.total_time = 11
105+
self.manager.tx_jobs = {"job_id": job}
106+
# Execution
107+
result = await self.mining_health_check.get_health_check()
108+
# Assertion
109+
self.assertEqual(result.component_name, "manager")
110+
self.assertEqual(result.component_type, ComponentType.INTERNAL)
111+
self.assertEqual(result.status, HealthCheckStatus.WARN)
112+
self.assertEqual(
113+
result.output,
114+
"We had 0 failed jobs and 1 long running jobs in the last 5 minutes",
115+
)
116+
117+
async def test_get_health_check_ok(self):
118+
# Preparation
119+
self.manager.has_any_miner.return_value = True
120+
self.manager.has_any_submitted_job_in_period.return_value = True
121+
job = MagicMock()
122+
job.is_failed.return_value = False
123+
job.total_time = 9
124+
self.manager.tx_jobs = {"job_id": job}
125+
# Execution
126+
result = await self.mining_health_check.get_health_check()
127+
# Assertion
128+
self.assertEqual(result.component_name, "manager")
129+
self.assertEqual(result.component_type, ComponentType.INTERNAL)
130+
self.assertEqual(result.status, HealthCheckStatus.PASS)
131+
self.assertEqual(result.output, "Everything is ok")
132+
133+
# Patch datetime.utcnow() to return a fixed value
134+
@patch("txstratum.healthcheck.models.datetime")
135+
async def test_return_last_status(self, datetime_mock):
136+
"""
137+
This tests the case where we have no tx_jobs in the last 5 minutes, but we had a previous status of failure.
138+
139+
We should return the previous status and include its output in the new output.
140+
"""
141+
# Preparation
142+
self.manager.has_any_miner.return_value = True
143+
self.manager.has_any_submitted_job_in_period.return_value = True
144+
self.manager.tx_jobs = {}
145+
mock_date = datetime(2021, 1, 1, 0, 0, 0)
146+
datetime_mock.utcnow.return_value = mock_date
147+
self.mining_health_check.last_manager_status.update(
148+
status=HealthCheckStatus.FAIL,
149+
output="We had 1 failed jobs and 0 long running jobs in the last 5 minutes",
150+
)
151+
# Execution
152+
result = await self.mining_health_check.get_health_check()
153+
# Assertion
154+
self.assertEqual(result.component_name, "manager")
155+
self.assertEqual(result.component_type, ComponentType.INTERNAL)
156+
self.assertEqual(result.status, HealthCheckStatus.FAIL)
157+
self.assertEqual(
158+
result.output,
159+
(
160+
"We had no tx_jobs in the last 5 minutes, so we are just returning the last observed status from"
161+
f" {mock_date.strftime('%Y-%m-%dT%H:%M:%SZ')}. The output was: We had 1 failed jobs and 0 long"
162+
" running jobs in the last 5 minutes"
163+
),
164+
)
165+
166+
167+
class TestHealthCheck(asynctest.TestCase): # type: ignore[misc]
168+
def setUp(self):
169+
self.mock_hathor_client = MagicMock()
170+
self.mock_manager = MagicMock()
171+
172+
self.health_check = HealthCheck(
173+
manager=self.mock_manager, backend=self.mock_hathor_client
174+
)
175+
176+
async def test_get_health_check_success(self):
177+
"""Tests the response we should generate when everything is ok"""
178+
# Mock the implementation of the hathor_client.version.
179+
async def side_effect():
180+
return {"version": "1.0.0"}
181+
182+
self.mock_hathor_client.version.side_effect = side_effect
183+
self.mock_hathor_client._base_url = "http://localhost:8080"
184+
185+
self.mock_manager.has_any_miner.return_value = True
186+
self.mock_manager.has_any_submitted_job_in_period.return_value = True
187+
job = MagicMock()
188+
job.is_failed.return_value = False
189+
job.total_time = 9
190+
self.mock_manager.tx_jobs = {"job_id": job}
191+
192+
result = await self.health_check.get_health_check()
193+
self.assertEqual(result.checks["manager"][0].status, HealthCheckStatus.PASS)
194+
self.assertEqual(result.checks["manager"][0].output, "Everything is ok")
195+
self.assertEqual(result.checks["fullnode"][0].status, HealthCheckStatus.PASS)
196+
self.assertEqual(
197+
result.checks["fullnode"][0].output, "Fullnode is responding correctly"
198+
)
199+
self.assertEqual(result.status, HealthCheckStatus.PASS)
200+
201+
async def test_get_health_check_fullnode_failure(self):
202+
"""Tests the response we should generate when the fullnode is unhealthy"""
203+
self.mock_hathor_client.version.side_effect = Exception("error")
204+
self.mock_hathor_client._base_url = "http://localhost:8080"
205+
206+
self.mock_manager.has_any_miner.return_value = True
207+
self.mock_manager.has_any_submitted_job_in_period.return_value = True
208+
job = MagicMock()
209+
job.is_failed.return_value = False
210+
job.total_time = 9
211+
self.mock_manager.tx_jobs = {"job_id": job}
212+
213+
result = await self.health_check.get_health_check()
214+
self.assertEqual(result.checks["manager"][0].status, HealthCheckStatus.PASS)
215+
self.assertEqual(result.checks["manager"][0].output, "Everything is ok")
216+
self.assertEqual(result.checks["fullnode"][0].status, HealthCheckStatus.FAIL)
217+
self.assertEqual(
218+
result.checks["fullnode"][0].output, "Couldn't connect to fullnode: error"
219+
)
220+
self.assertEqual(result.status, HealthCheckStatus.FAIL)
221+
222+
async def test_get_health_check_mining_failure(self):
223+
"""Tests the response we should generate when the mining is unhealthy"""
224+
# Mock the implementation of the hathor_client.version.
225+
async def side_effect():
226+
return {"version": "1.0.0"}
227+
228+
self.mock_hathor_client.version.side_effect = side_effect
229+
self.mock_hathor_client._base_url = "http://localhost:8080"
230+
231+
self.mock_manager.has_any_miner.return_value = True
232+
self.mock_manager.has_any_submitted_job_in_period.return_value = False
233+
234+
result = await self.health_check.get_health_check()
235+
self.assertEqual(result.checks["manager"][0].status, HealthCheckStatus.FAIL)
236+
self.assertEqual(
237+
result.checks["manager"][0].output,
238+
"No miners submitted a job in the last 1 hour",
239+
)
240+
self.assertEqual(result.checks["fullnode"][0].status, HealthCheckStatus.PASS)
241+
self.assertEqual(
242+
result.checks["fullnode"][0].output, "Fullnode is responding correctly"
243+
)
244+
self.assertEqual(result.status, HealthCheckStatus.FAIL)

txstratum/api.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
import txstratum.time
1717
from txstratum.exceptions import JobAlreadyExists, NewJobRefused
18+
from txstratum.healthcheck.healthcheck import HealthCheck
1819
from txstratum.jobs import JobStatus, TxJob
1920
from txstratum.middleware import create_middleware_version_check
2021

@@ -45,6 +46,7 @@ class App:
4546
def __init__(
4647
self,
4748
manager: "TxMiningManager",
49+
health_check: "HealthCheck",
4850
*,
4951
max_tx_weight: Optional[float] = None,
5052
max_timestamp_delta: Optional[int] = None,
@@ -61,6 +63,7 @@ def __init__(
6163
super().__init__()
6264
self.log = logger.new()
6365
self.manager = manager
66+
self.health_check_manager = health_check
6467
self.max_tx_weight: float = max_tx_weight or MAX_TX_WEIGHT
6568
self.max_output_script_size = max_output_script_size or MAX_OUTPUT_SCRIPT_SIZE
6669
self.max_timestamp_delta: float = max_timestamp_delta or MAX_TIMESTAMP_DELTA
@@ -77,13 +80,22 @@ def __init__(
7780
]
7881
)
7982
self.app.router.add_get("/health-check", self.health_check)
83+
self.app.router.add_get("/health", self.health)
8084
self.app.router.add_get("/mining-status", self.mining_status)
8185
self.app.router.add_get("/job-status", self.job_status)
8286
self.app.router.add_post("/submit-job", self.submit_job)
8387
self.app.router.add_post("/cancel-job", self.cancel_job)
8488

8589
self.fix_invalid_timestamp: bool = fix_invalid_timestamp
8690

91+
async def health(self, request: web.Request) -> web.Response:
92+
"""Return the health check status for the tx-mining-service."""
93+
health_check_result = await self.health_check_manager.get_health_check()
94+
http_status = health_check_result.get_http_status_code()
95+
96+
return web.json_response(health_check_result.to_json(), status=http_status)
97+
98+
# XXX: DEPRECATED, Use /health instead
8799
async def health_check(self, request: web.Request) -> web.Response:
88100
"""Return that the service is running."""
89101
return web.json_response({"success": True})

txstratum/cli.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
from txstratum.api import App
1919
from txstratum.filters import FileFilter, TOIFilter, TXFilter
20+
from txstratum.healthcheck.healthcheck import HealthCheck
2021
from txstratum.manager import TxMiningManager
2122
from txstratum.pubsub import PubSubManager
2223
from txstratum.toi_client import TOIAsyncClient
@@ -151,6 +152,7 @@ class RunService:
151152
manager: TxMiningManager
152153
loop: AbstractEventLoop
153154
tx_filters: List[TXFilter]
155+
health_check: HealthCheck
154156

155157
def __init__(self, args: Namespace) -> None:
156158
"""Initialize the service."""
@@ -167,6 +169,7 @@ def __init__(self, args: Namespace) -> None:
167169
pubsub=self.pubsub,
168170
address=args.address,
169171
)
172+
self.health_check = HealthCheck(self.manager, self.backend)
170173

171174
def configure_logging(self, args: Namespace) -> None:
172175
"""Configure logging."""
@@ -253,6 +256,7 @@ def execute(self) -> None:
253256

254257
api_app = App(
255258
self.manager,
259+
self.health_check,
256260
max_tx_weight=self.args.max_tx_weight,
257261
max_timestamp_delta=self.args.max_timestamp_delta,
258262
tx_timeout=self.args.tx_timeout,

0 commit comments

Comments
 (0)