Skip to content

Commit e0ead70

Browse files
authored
Merge pull request #382 from ton-blockchain/dev
merge dev into master
2 parents a467af5 + 2777801 commit e0ead70

File tree

16 files changed

+697
-171
lines changed

16 files changed

+697
-171
lines changed

modules/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,16 @@
88
from modules.validator import ValidatorModule
99
from modules.controller import ControllerModule
1010
from modules.liteserver import LiteserverModule
11+
from modules.alert_bot import AlertBotModule
1112

1213

1314
MODES = {
1415
'validator': ValidatorModule,
1516
'nominator-pool': NominatorPoolModule,
1617
'single-nominator': SingleNominatorModule,
1718
'liquid-staking': ControllerModule,
18-
'liteserver': LiteserverModule
19+
'liteserver': LiteserverModule,
20+
'alert-bot': AlertBotModule
1921
}
2022

2123

@@ -55,6 +57,8 @@ class Setting:
5557
'defaultCustomOverlaysUrl': Setting(None, 'https://ton-blockchain.github.io/fallback_custom_overlays.json', 'Default custom overlays config url'),
5658
'debug': Setting(None, False, 'Debug mtc console mode. Prints Traceback on errors'),
5759
'subscribe_tg_channel': Setting('validator', False, 'Disables warning about subscribing to the `TON STATUS` channel'),
60+
'BotToken': Setting('alert-bot', None, 'Alerting Telegram bot token'),
61+
'ChatId': Setting('alert-bot', None, 'Alerting Telegram chat id')
5862
}
5963

6064

modules/alert_bot.py

Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
import dataclasses
2+
import time
3+
import requests
4+
5+
from modules.module import MtcModule
6+
from mypylib.mypylib import get_timestamp, print_table, color_print
7+
from mytoncore import get_hostname
8+
from mytonctrl.utils import timestamp2utcdatetime
9+
10+
11+
@dataclasses.dataclass
12+
class Alert:
13+
severity: str
14+
text: str
15+
timeout: int
16+
17+
18+
HOUR = 3600
19+
VALIDATION_PERIOD = 65536
20+
FREEZE_PERIOD = 32768
21+
22+
23+
ALERTS = {
24+
"low_wallet_balance": Alert(
25+
"low",
26+
"Validator wallet {wallet} balance is low: {balance} TON.",
27+
18*HOUR
28+
),
29+
"db_usage_80": Alert(
30+
"high",
31+
"""TON DB usage > 80%. Clean the TON database:
32+
https://docs.ton.org/participate/nodes/node-maintenance-and-security#database-grooming
33+
or (and) set node\'s archive ttl to lower value.""",
34+
24*HOUR
35+
),
36+
"db_usage_95": Alert(
37+
"critical",
38+
"""TON DB usage > 95%. Disk is almost full, clean the TON database immediately:
39+
https://docs.ton.org/participate/nodes/node-maintenance-and-security#database-grooming
40+
or (and) set node\'s archive ttl to lower value.""",
41+
6*HOUR
42+
),
43+
"low_efficiency": Alert(
44+
"high",
45+
"""Validator efficiency is low: {efficiency}%.""",
46+
VALIDATION_PERIOD // 3
47+
),
48+
"out_of_sync": Alert(
49+
"critical",
50+
"Node is out of sync on {sync} sec.",
51+
0
52+
),
53+
"service_down": Alert(
54+
"critical",
55+
"validator.service is down.",
56+
0
57+
),
58+
"adnl_connection_failed": Alert(
59+
"high",
60+
"ADNL connection to node failed",
61+
3*HOUR
62+
),
63+
"zero_block_created": Alert(
64+
"critical",
65+
"Validator has not created any blocks in the last {hours} hours.",
66+
VALIDATION_PERIOD // 3
67+
),
68+
"validator_slashed": Alert(
69+
"high",
70+
"Validator has been slashed in previous round for {amount} TON",
71+
FREEZE_PERIOD
72+
),
73+
}
74+
75+
76+
class AlertBotModule(MtcModule):
77+
78+
description = 'Telegram bot alerts'
79+
default_value = False
80+
81+
def __init__(self, ton, local, *args, **kwargs):
82+
super().__init__(ton, local, *args, **kwargs)
83+
self.validator_module = None
84+
self.inited = False
85+
self.hostname = None
86+
self.token = None
87+
self.chat_id = None
88+
self.last_db_check = 0
89+
90+
def send_message(self, text: str):
91+
if self.token is None:
92+
raise Exception("send_message error: token is not initialized")
93+
if self.chat_id is None:
94+
raise Exception("send_message error: chat_id is not initialized")
95+
request_url = f"https://api.telegram.org/bot{self.token}/sendMessage"
96+
data = {'chat_id': self.chat_id, 'text': text, 'parse_mode': 'HTML'}
97+
response = requests.post(request_url, data=data, timeout=3)
98+
if response.status_code != 200:
99+
raise Exception(f"send_message error: {response.text}")
100+
response = response.json()
101+
if not response['ok']:
102+
raise Exception(f"send_message error: {response}")
103+
104+
def send_alert(self, alert_name: str, *args, **kwargs):
105+
if not self.alert_is_enabled(alert_name):
106+
return
107+
last_sent = self.get_alert_sent(alert_name)
108+
time_ = timestamp2utcdatetime(int(time.time()))
109+
alert = ALERTS.get(alert_name)
110+
if alert is None:
111+
raise Exception(f"Alert {alert_name} not found")
112+
text = f'''
113+
❗️ <b>MyTonCtrl Alert {alert_name}</b> ❗️
114+
115+
Hostname: <code>{self.hostname}</code>
116+
Time: <code>{time_}</code> (<code>{int(time.time())}</code>)
117+
Severity: <code>{alert.severity}</code>
118+
119+
Alert text:
120+
<blockquote> {alert.text.format(*args, **kwargs)} </blockquote>
121+
'''
122+
if time.time() - last_sent > alert.timeout:
123+
self.send_message(text)
124+
self.set_alert_sent(alert_name)
125+
126+
def set_global_vars(self):
127+
# set global vars for correct alerts timeouts for current network
128+
config15 = self.ton.GetConfig15()
129+
global VALIDATION_PERIOD, FREEZE_PERIOD
130+
VALIDATION_PERIOD = config15["validatorsElectedFor"]
131+
FREEZE_PERIOD = config15["stakeHeldFor"]
132+
133+
def init(self):
134+
if not self.ton.get_mode_value('alert-bot'):
135+
return
136+
self.token = self.ton.local.db.get("BotToken")
137+
self.chat_id = self.ton.local.db.get("ChatId")
138+
if self.token is None or self.chat_id is None:
139+
raise Exception("BotToken or ChatId is not set")
140+
from modules.validator import ValidatorModule
141+
self.validator_module = ValidatorModule(self.ton, self.local)
142+
self.hostname = get_hostname()
143+
self.set_global_vars()
144+
self.inited = True
145+
146+
def get_alert_from_db(self, alert_name: str):
147+
if 'alerts' not in self.ton.local.db:
148+
self.ton.local.db['alerts'] = {}
149+
if alert_name not in self.ton.local.db['alerts']:
150+
self.ton.local.db['alerts'][alert_name] = {'sent': 0, 'enabled': True}
151+
return self.ton.local.db['alerts'][alert_name]
152+
153+
def set_alert_sent(self, alert_name: str):
154+
alert = self.get_alert_from_db(alert_name)
155+
alert['sent'] = int(time.time())
156+
157+
def get_alert_sent(self, alert_name: str):
158+
alert = self.get_alert_from_db(alert_name)
159+
return alert.get('sent', 0)
160+
161+
def alert_is_enabled(self, alert_name: str):
162+
alert = self.get_alert_from_db(alert_name)
163+
return alert.get('enabled', True) # default is True
164+
165+
def set_alert_enabled(self, alert_name: str, enabled: bool):
166+
alert = self.get_alert_from_db(alert_name)
167+
alert['enabled'] = enabled
168+
self.ton.local.save()
169+
170+
def enable_alert(self, args):
171+
if len(args) != 1:
172+
raise Exception("Usage: enable_alert <alert_name>")
173+
alert_name = args[0]
174+
self.set_alert_enabled(alert_name, True)
175+
color_print("enable_alert - {green}OK{endc}")
176+
177+
def disable_alert(self, args):
178+
if len(args) != 1:
179+
raise Exception("Usage: disable_alert <alert_name>")
180+
alert_name = args[0]
181+
self.set_alert_enabled(alert_name, False)
182+
color_print("disable_alert - {green}OK{endc}")
183+
184+
def print_alerts(self, args):
185+
table = [['Name', 'Enabled', 'Last sent']]
186+
for alert_name in ALERTS:
187+
alert = self.get_alert_from_db(alert_name)
188+
table.append([alert_name, alert['enabled'], alert['sent']])
189+
print_table(table)
190+
191+
def test_alert(self, args):
192+
self.send_message('Test alert')
193+
194+
def check_db_usage(self):
195+
if time.time() - self.last_db_check < 600:
196+
return
197+
self.last_db_check = time.time()
198+
usage = self.ton.GetDbUsage()
199+
if usage > 95:
200+
self.send_alert("db_usage_95")
201+
elif usage > 80:
202+
self.send_alert("db_usage_80")
203+
204+
def check_validator_wallet_balance(self):
205+
if not self.ton.using_validator():
206+
return
207+
validator_wallet = self.ton.GetValidatorWallet()
208+
validator_account = self.ton.GetAccount(validator_wallet.addrB64)
209+
if validator_account.balance < 10:
210+
self.send_alert("low_wallet_balance", wallet=validator_wallet.addrB64, balance=validator_account.balance)
211+
212+
def check_efficiency(self):
213+
if not self.ton.using_validator():
214+
return
215+
validator = self.validator_module.find_myself(self.ton.GetValidatorsList())
216+
if validator is None or validator.efficiency is None:
217+
return
218+
config34 = self.ton.GetConfig34()
219+
if (time.time() - config34.startWorkTime) / (config34.endWorkTime - config34.startWorkTime) < 0.8:
220+
return # less than 80% of round passed
221+
if validator.is_masterchain is False:
222+
if validator.efficiency != 0:
223+
return
224+
if validator.efficiency < 90:
225+
self.send_alert("low_efficiency", efficiency=validator.efficiency)
226+
227+
def check_validator_working(self):
228+
validator_status = self.ton.GetValidatorStatus()
229+
if not validator_status.is_working:
230+
self.send_alert("service_down")
231+
232+
def check_sync(self):
233+
validator_status = self.ton.GetValidatorStatus()
234+
if validator_status.is_working and validator_status.out_of_sync >= 20:
235+
self.send_alert("out_of_sync", sync=validator_status.out_of_sync)
236+
237+
def check_zero_blocks_created(self):
238+
if not self.ton.using_validator():
239+
return
240+
ts = get_timestamp()
241+
period = VALIDATION_PERIOD // 3 # 6h for mainnet, 40m for testnet
242+
start, end = ts - period, ts - 60
243+
config34 = self.ton.GetConfig34()
244+
if start < config34.startWorkTime: # round started recently
245+
return
246+
validators = self.ton.GetValidatorsList(start=start, end=end)
247+
validator = self.validator_module.find_myself(validators)
248+
if validator is None or validator.blocks_created > 0:
249+
return
250+
self.send_alert("zero_block_created", hours=round(period // 3600, 1))
251+
252+
def check_slashed(self):
253+
if not self.ton.using_validator():
254+
return
255+
c = self.validator_module.get_my_complaint()
256+
if c is not None:
257+
self.send_alert("validator_slashed", amount=int(c['suggestedFine']))
258+
259+
def check_adnl_connection_failed(self):
260+
from modules.utilities import UtilitiesModule
261+
utils_module = UtilitiesModule(self.ton, self.local)
262+
ok, error = utils_module.check_adnl_connection()
263+
if not ok:
264+
self.send_alert("adnl_connection_failed")
265+
266+
def check_status(self):
267+
if not self.ton.using_alert_bot():
268+
return
269+
if not self.inited:
270+
self.init()
271+
272+
self.local.try_function(self.check_db_usage)
273+
self.local.try_function(self.check_validator_wallet_balance)
274+
self.local.try_function(self.check_efficiency) # todo: alert if validator is going to be slashed
275+
self.local.try_function(self.check_validator_working)
276+
self.local.try_function(self.check_zero_blocks_created)
277+
self.local.try_function(self.check_sync)
278+
self.local.try_function(self.check_slashed)
279+
self.local.try_function(self.check_adnl_connection_failed)
280+
281+
def add_console_commands(self, console):
282+
console.AddItem("enable_alert", self.enable_alert, self.local.translate("enable_alert_cmd"))
283+
console.AddItem("disable_alert", self.disable_alert, self.local.translate("disable_alert_cmd"))
284+
console.AddItem("list_alerts", self.print_alerts, self.local.translate("list_alerts_cmd"))
285+
console.AddItem("test_alert", self.test_alert, self.local.translate("test_alert_cmd"))

modules/utilities.py

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
import base64
21
import json
3-
import os
2+
import random
43
import subprocess
54
import time
65

6+
import requests
7+
78
from mypylib.mypylib import color_print, print_table, color_text, timeago, bcolors
89
from modules.module import MtcModule
910

@@ -335,6 +336,50 @@ def print_validator_list(self, args):
335336
print_table(table)
336337
# end define
337338

339+
def check_adnl_connection(self):
340+
telemetry = self.ton.local.db.get("sendTelemetry", False)
341+
check_adnl = self.ton.local.db.get("checkAdnl", telemetry)
342+
if not check_adnl:
343+
return True, ''
344+
self.local.add_log('Checking ADNL connection to local node', 'info')
345+
hosts = ['45.129.96.53', '5.154.181.153', '2.56.126.137', '91.194.11.68', '45.12.134.214', '138.124.184.27',
346+
'103.106.3.171']
347+
hosts = random.sample(hosts, k=3)
348+
data = self.ton.get_local_adnl_data()
349+
error = ''
350+
ok = True
351+
for host in hosts:
352+
url = f'http://{host}/adnl_check'
353+
try:
354+
response = requests.post(url, json=data, timeout=5).json()
355+
except Exception as e:
356+
ok = False
357+
error = f'{{red}}Failed to check ADNL connection to local node: {type(e)}: {e}{{endc}}'
358+
continue
359+
result = response.get("ok")
360+
if result:
361+
ok = True
362+
break
363+
if not result:
364+
ok = False
365+
error = f'{{red}}Failed to check ADNL connection to local node: {response.get("message")}{{endc}}'
366+
return ok, error
367+
368+
def get_pool_data(self, args):
369+
try:
370+
pool_name = args[0]
371+
except:
372+
color_print("{red}Bad args. Usage:{endc} get_pool_data <pool-name | pool-addr>")
373+
return
374+
if self.ton.IsAddr(pool_name):
375+
pool_addr = pool_name
376+
else:
377+
pool = self.ton.GetLocalPool(pool_name)
378+
pool_addr = pool.addrB64
379+
pool_data = self.ton.GetPoolData(pool_addr)
380+
print(json.dumps(pool_data, indent=4))
381+
# end define
382+
338383
def add_console_commands(self, console):
339384
console.AddItem("vas", self.view_account_status, self.local.translate("vas_cmd"))
340385
console.AddItem("vah", self.view_account_history, self.local.translate("vah_cmd"))
@@ -350,3 +395,4 @@ def add_console_commands(self, console):
350395
console.AddItem("vl", self.print_validator_list, self.local.translate("vl_cmd"))
351396
console.AddItem("cl", self.print_complaints_list, self.local.translate("cl_cmd"))
352397

398+
console.AddItem("get_pool_data", self.get_pool_data, self.local.translate("get_pool_data_cmd"))

0 commit comments

Comments
 (0)