Skip to content

Commit 19eca04

Browse files
authored
Merge pull request #379 from yungwine/bot
Alerting via Telegram Bot
2 parents f8aa2f3 + bf84a53 commit 19eca04

File tree

9 files changed

+382
-46
lines changed

9 files changed

+382
-46
lines changed

modules/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,16 @@
88
from modules.validator import ValidatorModule
99
from modules.controller import ControllerModule
1010
from modules.liteserver import LiteserverModule
11+
from modules.alert_bot import AlertBotModule
1112

1213

1314
MODES = {
1415
'validator': ValidatorModule,
1516
'nominator-pool': NominatorPoolModule,
1617
'single-nominator': SingleNominatorModule,
1718
'liquid-staking': ControllerModule,
18-
'liteserver': LiteserverModule
19+
'liteserver': LiteserverModule,
20+
'alert-bot': AlertBotModule
1921
}
2022

2123

@@ -55,6 +57,8 @@ class Setting:
5557
'defaultCustomOverlaysUrl': Setting(None, 'https://ton-blockchain.github.io/fallback_custom_overlays.json', 'Default custom overlays config url'),
5658
'debug': Setting(None, False, 'Debug mtc console mode. Prints Traceback on errors'),
5759
'subscribe_tg_channel': Setting('validator', False, 'Disables warning about subscribing to the `TON STATUS` channel'),
60+
'BotToken': Setting('alert-bot', None, 'Alerting Telegram bot token'),
61+
'ChatId': Setting('alert-bot', None, 'Alerting Telegram chat id')
5862
}
5963

6064

modules/alert_bot.py

Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
import dataclasses
2+
import time
3+
import requests
4+
5+
from modules.module import MtcModule
6+
from mypylib.mypylib import get_timestamp, print_table, color_print
7+
from mytoncore import get_hostname
8+
from mytonctrl.utils import timestamp2utcdatetime
9+
10+
11+
@dataclasses.dataclass
12+
class Alert:
13+
severity: str
14+
text: str
15+
timeout: int
16+
17+
18+
HOUR = 3600
19+
VALIDATION_PERIOD = 65536
20+
FREEZE_PERIOD = 32768
21+
22+
23+
ALERTS = {
24+
"low_wallet_balance": Alert(
25+
"low",
26+
"Validator wallet {wallet} balance is low: {balance} TON.",
27+
18*HOUR
28+
),
29+
"db_usage_80": Alert(
30+
"high",
31+
"""TON DB usage > 80%. Clean the TON database:
32+
https://docs.ton.org/participate/nodes/node-maintenance-and-security#database-grooming
33+
or (and) set node\'s archive ttl to lower value.""",
34+
24*HOUR
35+
),
36+
"db_usage_95": Alert(
37+
"critical",
38+
"""TON DB usage > 95%. Disk is almost full, clean the TON database immediately:
39+
https://docs.ton.org/participate/nodes/node-maintenance-and-security#database-grooming
40+
or (and) set node\'s archive ttl to lower value.""",
41+
6*HOUR
42+
),
43+
"low_efficiency": Alert(
44+
"high",
45+
"""Validator efficiency is low: {efficiency}%.""",
46+
VALIDATION_PERIOD // 3
47+
),
48+
"out_of_sync": Alert(
49+
"critical",
50+
"Node is out of sync on {sync} sec.",
51+
0
52+
),
53+
"service_down": Alert(
54+
"critical",
55+
"validator.service is down.",
56+
0
57+
),
58+
"adnl_connection_failed": Alert(
59+
"high",
60+
"ADNL connection to node failed",
61+
3*HOUR
62+
),
63+
"zero_block_created": Alert(
64+
"critical",
65+
"Validator has not created any blocks in the last {hours} hours.",
66+
VALIDATION_PERIOD // 3
67+
),
68+
"validator_slashed": Alert(
69+
"high",
70+
"Validator has been slashed in previous round for {amount} TON",
71+
FREEZE_PERIOD
72+
),
73+
}
74+
75+
76+
class AlertBotModule(MtcModule):
77+
78+
description = 'Telegram bot alerts'
79+
default_value = False
80+
81+
def __init__(self, ton, local, *args, **kwargs):
82+
super().__init__(ton, local, *args, **kwargs)
83+
self.validator_module = None
84+
self.inited = False
85+
self.hostname = None
86+
self.token = self.ton.local.db.get("BotToken")
87+
self.chat_id = self.ton.local.db.get("ChatId")
88+
89+
def send_message(self, text: str):
90+
if self.token is None:
91+
raise Exception("send_message error: token is not initialized")
92+
if self.chat_id is None:
93+
raise Exception("send_message error: chat_id is not initialized")
94+
request_url = f"https://api.telegram.org/bot{self.token}/sendMessage"
95+
data = {'chat_id': self.chat_id, 'text': text, 'parse_mode': 'HTML'}
96+
response = requests.post(request_url, data=data, timeout=3)
97+
if response.status_code != 200:
98+
raise Exception(f"send_message error: {response.text}")
99+
response = response.json()
100+
if not response['ok']:
101+
raise Exception(f"send_message error: {response}")
102+
103+
def send_alert(self, alert_name: str, *args, **kwargs):
104+
if not self.alert_is_enabled(alert_name):
105+
return
106+
last_sent = self.get_alert_sent(alert_name)
107+
time_ = timestamp2utcdatetime(int(time.time()))
108+
alert = ALERTS.get(alert_name)
109+
if alert is None:
110+
raise Exception(f"Alert {alert_name} not found")
111+
text = f'''
112+
❗️ <b>MyTonCtrl Alert {alert_name}</b> ❗️
113+
114+
Hostname: <code>{self.hostname}</code>
115+
Time: <code>{time_}</code> (<code>{int(time.time())}</code>)
116+
Severity: <code>{alert.severity}</code>
117+
118+
Alert text:
119+
<blockquote> {alert.text.format(*args, **kwargs)} </blockquote>
120+
'''
121+
if time.time() - last_sent > alert.timeout:
122+
self.send_message(text)
123+
self.set_alert_sent(alert_name)
124+
125+
def set_global_vars(self):
126+
# set global vars for correct alerts timeouts for current network
127+
config15 = self.ton.GetConfig15()
128+
global VALIDATION_PERIOD, FREEZE_PERIOD
129+
VALIDATION_PERIOD = config15["validatorsElectedFor"]
130+
FREEZE_PERIOD = config15["stakeHeldFor"]
131+
132+
def init(self):
133+
if not self.ton.get_mode_value('alert-bot'):
134+
return
135+
if self.token is None or self.chat_id is None:
136+
raise Exception("BotToken or ChatId is not set")
137+
from modules.validator import ValidatorModule
138+
self.validator_module = ValidatorModule(self.ton, self.local)
139+
self.hostname = get_hostname()
140+
self.set_global_vars()
141+
self.inited = True
142+
143+
def get_alert_from_db(self, alert_name: str):
144+
if 'alerts' not in self.ton.local.db:
145+
self.ton.local.db['alerts'] = {}
146+
if alert_name not in self.ton.local.db['alerts']:
147+
self.ton.local.db['alerts'][alert_name] = {'sent': 0, 'enabled': True}
148+
return self.ton.local.db['alerts'][alert_name]
149+
150+
def set_alert_sent(self, alert_name: str):
151+
alert = self.get_alert_from_db(alert_name)
152+
alert['sent'] = int(time.time())
153+
154+
def get_alert_sent(self, alert_name: str):
155+
alert = self.get_alert_from_db(alert_name)
156+
return alert.get('sent', 0)
157+
158+
def alert_is_enabled(self, alert_name: str):
159+
alert = self.get_alert_from_db(alert_name)
160+
return alert.get('enabled', True) # default is True
161+
162+
def set_alert_enabled(self, alert_name: str, enabled: bool):
163+
alert = self.get_alert_from_db(alert_name)
164+
alert['enabled'] = enabled
165+
self.ton.local.save()
166+
167+
def enable_alert(self, args):
168+
if len(args) != 1:
169+
raise Exception("Usage: enable_alert <alert_name>")
170+
alert_name = args[0]
171+
self.set_alert_enabled(alert_name, True)
172+
color_print("enable_alert - {green}OK{endc}")
173+
174+
def disable_alert(self, args):
175+
if len(args) != 1:
176+
raise Exception("Usage: disable_alert <alert_name>")
177+
alert_name = args[0]
178+
self.set_alert_enabled(alert_name, False)
179+
color_print("disable_alert - {green}OK{endc}")
180+
181+
def print_alerts(self, args):
182+
table = [['Name', 'Enabled', 'Last sent']]
183+
for alert_name in ALERTS:
184+
alert = self.get_alert_from_db(alert_name)
185+
table.append([alert_name, alert['enabled'], alert['sent']])
186+
print_table(table)
187+
188+
def test_alert(self, args):
189+
self.send_message('Test alert')
190+
191+
def check_db_usage(self):
192+
usage = self.ton.GetDbUsage()
193+
if usage > 95:
194+
self.send_alert("db_usage_95")
195+
elif usage > 80:
196+
self.send_alert("db_usage_80")
197+
198+
def check_validator_wallet_balance(self):
199+
if not self.ton.using_validator():
200+
return
201+
validator_wallet = self.ton.GetValidatorWallet()
202+
validator_account = self.ton.GetAccount(validator_wallet.addrB64)
203+
if validator_account.balance < 10:
204+
self.send_alert("low_wallet_balance", wallet=validator_wallet.addrB64, balance=validator_account.balance)
205+
206+
def check_efficiency(self):
207+
if not self.ton.using_validator():
208+
return
209+
validator = self.validator_module.find_myself(self.ton.GetValidatorsList(fast=True))
210+
if validator is None or validator.is_masterchain is False or validator.efficiency is None:
211+
return
212+
config34 = self.ton.GetConfig34()
213+
if (time.time() - config34.startWorkTime) / (config34.endWorkTime - config34.startWorkTime) < 0.8:
214+
return # less than 80% of round passed
215+
if validator.efficiency < 90:
216+
self.send_alert("low_efficiency", efficiency=validator.efficiency)
217+
218+
def check_validator_working(self):
219+
validator_status = self.ton.GetValidatorStatus()
220+
if not validator_status.is_working:
221+
self.send_alert("service_down")
222+
223+
def check_sync(self):
224+
validator_status = self.ton.GetValidatorStatus()
225+
if validator_status.is_working and validator_status.out_of_sync >= 20:
226+
self.send_alert("out_of_sync", sync=validator_status.out_of_sync)
227+
228+
def check_zero_blocks_created(self):
229+
if not self.ton.using_validator():
230+
return
231+
ts = get_timestamp()
232+
period = VALIDATION_PERIOD // 3 # 6h for mainnet, 40m for testnet
233+
start, end = ts - period, ts - 60
234+
config34 = self.ton.GetConfig34()
235+
if start < config34.startWorkTime: # round started recently
236+
return
237+
validators = self.ton.GetValidatorsList(start=start, end=end)
238+
validator = self.validator_module.find_myself(validators)
239+
if validator is None or validator.blocks_created > 0:
240+
return
241+
self.send_alert("zero_block_created", hours=round(period // 3600, 1))
242+
243+
def check_slashed(self):
244+
if not self.ton.using_validator():
245+
return
246+
c = self.validator_module.get_my_complaint()
247+
if c is not None:
248+
self.send_alert("validator_slashed", amount=int(c['suggestedFine']))
249+
250+
def check_adnl_connection_failed(self):
251+
from modules.utilities import UtilitiesModule
252+
utils_module = UtilitiesModule(self.ton, self.local)
253+
ok, error = utils_module.check_adnl_connection()
254+
if not ok:
255+
self.send_alert("adnl_connection_failed")
256+
257+
def check_status(self):
258+
if not self.inited:
259+
self.init()
260+
261+
self.local.try_function(self.check_db_usage)
262+
self.local.try_function(self.check_validator_wallet_balance)
263+
self.local.try_function(self.check_efficiency) # todo: alert if validator is going to be slashed
264+
self.local.try_function(self.check_validator_working)
265+
self.local.try_function(self.check_zero_blocks_created)
266+
self.local.try_function(self.check_sync)
267+
self.local.try_function(self.check_slashed)
268+
self.local.try_function(self.check_adnl_connection_failed)
269+
270+
def add_console_commands(self, console):
271+
console.AddItem("enable_alert", self.enable_alert, self.local.translate("enable_alert_cmd"))
272+
console.AddItem("disable_alert", self.disable_alert, self.local.translate("disable_alert_cmd"))
273+
console.AddItem("list_alerts", self.print_alerts, self.local.translate("list_alerts_cmd"))
274+
console.AddItem("test_alert", self.test_alert, self.local.translate("test_alert_cmd"))

modules/utilities.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
import base64
21
import json
3-
import os
2+
import random
43
import subprocess
54
import time
65

6+
import requests
7+
78
from mypylib.mypylib import color_print, print_table, color_text, timeago, bcolors
89
from modules.module import MtcModule
910

@@ -335,6 +336,35 @@ def print_validator_list(self, args):
335336
print_table(table)
336337
# end define
337338

339+
def check_adnl_connection(self):
340+
telemetry = self.ton.local.db.get("sendTelemetry", False)
341+
check_adnl = self.ton.local.db.get("checkAdnl", telemetry)
342+
if not check_adnl:
343+
return True, ''
344+
self.local.add_log('Checking ADNL connection to local node', 'info')
345+
hosts = ['45.129.96.53', '5.154.181.153', '2.56.126.137', '91.194.11.68', '45.12.134.214', '138.124.184.27',
346+
'103.106.3.171']
347+
hosts = random.sample(hosts, k=3)
348+
data = self.ton.get_local_adnl_data()
349+
error = ''
350+
ok = True
351+
for host in hosts:
352+
url = f'http://{host}/adnl_check'
353+
try:
354+
response = requests.post(url, json=data, timeout=5).json()
355+
except Exception as e:
356+
ok = False
357+
error = f'{{red}}Failed to check ADNL connection to local node: {type(e)}: {e}{{endc}}'
358+
continue
359+
result = response.get("ok")
360+
if result:
361+
ok = True
362+
break
363+
if not result:
364+
ok = False
365+
error = f'{{red}}Failed to check ADNL connection to local node: {response.get("message")}{{endc}}'
366+
return ok, error
367+
338368
def get_pool_data(self, args):
339369
try:
340370
pool_name = args[0]

modules/validator.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,17 @@ def check_efficiency(self, args):
9191
print("Couldn't find this validator in the current round")
9292
# end define
9393

94+
def get_my_complaint(self):
95+
config32 = self.ton.GetConfig32()
96+
save_complaints = self.ton.GetSaveComplaints()
97+
complaints = save_complaints.get(str(config32['startWorkTime']))
98+
if not complaints:
99+
return
100+
for c in complaints.values():
101+
if c["adnl"] == self.ton.GetAdnlAddr() and c["isPassed"]:
102+
return c
103+
# end define
104+
94105
def add_console_commands(self, console):
95106
console.AddItem("vo", self.vote_offer, self.local.translate("vo_cmd"))
96107
console.AddItem("ve", self.vote_election_entry, self.local.translate("ve_cmd"))

mytoncore/functions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,10 @@ def General(local):
569569
from modules.custom_overlays import CustomOverlayModule
570570
local.start_cycle(CustomOverlayModule(ton, local).custom_overlays, sec=60, args=())
571571

572+
if ton.get_mode_value('alert-bot'):
573+
from modules.alert_bot import AlertBotModule
574+
local.start_cycle(AlertBotModule(ton, local).check_status, sec=1000, args=())
575+
572576
thr_sleep()
573577
# end define
574578

0 commit comments

Comments
 (0)