Skip to content

Commit 987c076

Browse files
committed
fix(ns-api): ha, improve status output
Read keepalived status from /tmp/keepalived.json: simpler to parse and do not use /var/log/messages to find current state. To obtain the file, use: kill -37 $(cat /tmp/run/keepalived.pid)
1 parent 487ca72 commit 987c076

File tree

2 files changed

+67
-120
lines changed

2 files changed

+67
-120
lines changed

packages/ns-api/files/ns.ha

Lines changed: 50 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -241,23 +241,53 @@ def find_device_config(uci, device, config=None):
241241
return config
242242

243243
def parse_node_state():
244-
state = "unknown"
244+
state_map = {
245+
-1: "unknown",
246+
0: "init",
247+
1: "backup",
248+
2: "master",
249+
3: "fault"
250+
}
245251
# Check if keepalived is running
246252
try:
247253
proc = subprocess.run(["/etc/init.d/keepalived", "info"], check=True, capture_output=True, text=True)
248254
status = json.loads(proc.stdout)
249255
if not status['keepalived']['instances']['instance1']['running']:
250-
return "stopped"
256+
return "unknown", {}, -1, -1
251257
except:
252-
return "stopped"
258+
return "unknown", {}, -1, -1
259+
260+
# Using uci to query the state, with EUci(confdir="/var/state/"), the library can't read the data
261+
last_sync_status = subprocess.run(["uci", "-P", "/var/state/", "get", "keepalived.ha_peer.last_sync_status"], capture_output=True, text=True).stdout.strip()
262+
last_sync_time = subprocess.run(["uci", "-P", "/var/state/", "get", "keepalived.ha_peer.last_sync_time"], capture_output=True, text=True).stdout.strip()
263+
264+
# Integrate keepalived stats and state
265+
stats = {}
266+
state = -1
267+
try:
268+
# Send 37 signal to keepalived to dump stats
269+
pid_file = "/tmp/run/keepalived.pid"
270+
stats_file = "/tmp/keepalived.json"
271+
if os.path.isfile(pid_file):
272+
with open(pid_file, "r") as f:
273+
pid = int(f.read().strip())
274+
os.kill(pid, 37)
275+
# Wait a moment for stats to be written
276+
time.sleep(0.2)
277+
if os.path.isfile(stats_file):
278+
with open(stats_file, "r") as f:
279+
stats_content = json.load(f)
280+
# Parse stats into the required structure
281+
if len(stats_content) == 0:
282+
ret["keepalived_stats"] = {}
283+
else:
284+
stats = stats_content[0].get("stats", {})
285+
state = stats_content[0].get("data", {}).get("state", -1)
286+
except Exception as e:
287+
pass
288+
289+
return state_map.get(state, "unknown"), stats, last_sync_status, last_sync_time
253290

254-
# Open the log file for reading
255-
with open('/var/log/messages', 'r') as file:
256-
for line in file:
257-
# Check if the line is something like "Keepalived_vrrp[10271]: (master) Entering MASTER STATE"
258-
if "Entering" in line and "STATE" in line:
259-
state = line.split("Entering")[1].split("STATE")[0].strip().lower()
260-
return state
261291

262292
### API functions
263293

@@ -684,7 +714,15 @@ def init_remote(ssh_password):
684714

685715

686716
def status():
687-
ret = {"role": "", "state": "unknown", "status": "only_primary_node_configured", "last_sync_status": "", "last_sync_time": "", "virtual_ips": []}
717+
ret = {
718+
"role": "unknown",
719+
"state": "unknown",
720+
"status": "unknown",
721+
"last_sync_status": "",
722+
"last_sync_time": "",
723+
"virtual_ips": [],
724+
"keepalived_stats": {}
725+
}
688726

689727
u = EUci()
690728
try:
@@ -700,100 +738,8 @@ def status():
700738
ret["status"] = "disabled"
701739

702740
if ret["status"] == "enabled":
703-
ret["state"] = parse_node_state()
741+
ret["state"], ret["keepalived_stats"], ret["last_sync_status"], ret["last_sync_time"]= parse_node_state()
704742

705-
# Using uci to query the state, with EUci(confdir="/var/state/"), the library can't read the data
706-
ret["last_sync_status"] = subprocess.run(["uci", "-P", "/var/state/", "get", "keepalived.ha_peer.last_sync_status"], capture_output=True, text=True).stdout.strip()
707-
ret["last_sync_time"] = subprocess.run(["uci", "-P", "/var/state/", "get", "keepalived.ha_peer.last_sync_time"], capture_output=True, text=True).stdout.strip()
708-
# Integrate keepalived stats
709-
try:
710-
# Send USR2 signal to keepalived to dump stats
711-
pid_file = "/tmp/run/keepalived.pid"
712-
stats_file = "/tmp/keepalived.stats"
713-
if os.path.isfile(pid_file):
714-
with open(pid_file, "r") as f:
715-
pid = int(f.read().strip())
716-
os.kill(pid, signal.SIGUSR2)
717-
# Wait a moment for stats to be written
718-
time.sleep(0.2)
719-
if os.path.isfile(stats_file):
720-
with open(stats_file, "r") as f:
721-
stats_content = f.read()
722-
# Parse stats into the required structure
723-
stats = {}
724-
current_instance = None
725-
last_section = None
726-
for line in stats_content.splitlines():
727-
line = line.rstrip()
728-
if line.startswith("VRRP Instance:"):
729-
current_instance = line.split(":", 1)[1].strip().lower()
730-
stats[current_instance] = {
731-
"advertisements": {"received": 0, "sent": 0},
732-
"became_master": 0,
733-
"released_master": 0,
734-
"packet_errors": {
735-
"length": 0,
736-
"ttl": 0,
737-
"invalid_type": 0,
738-
"advertisement_interval": 0,
739-
"address_list": 0
740-
},
741-
"authentication_errors": {
742-
"invalid_type": 0,
743-
"type_mismatch": 0,
744-
"failure": 0
745-
},
746-
"priority_zero": {"received": 0, "sent": 0}
747-
}
748-
last_section = None
749-
elif current_instance and line.strip():
750-
stripped_line = line.strip()
751-
# Section headers
752-
if stripped_line.lower() == "advertisements:":
753-
last_section = "advertisements"
754-
elif stripped_line.lower() == "packet errors:":
755-
last_section = "packet_errors"
756-
elif stripped_line.lower() == "authentication errors:":
757-
last_section = "authentication_errors"
758-
elif stripped_line.lower() == "priority zero:":
759-
last_section = "priority_zero"
760-
elif ":" in stripped_line:
761-
# Parse key-value pairs
762-
key, value = stripped_line.split(":", 1)
763-
key = key.strip().lower()
764-
value = value.strip()
765-
766-
try:
767-
value_int = int(value)
768-
if last_section == "advertisements":
769-
if key == "received":
770-
stats[current_instance]["advertisements"]["received"] = value_int
771-
elif key == "sent":
772-
stats[current_instance]["advertisements"]["sent"] = value_int
773-
elif last_section == "packet_errors":
774-
key_normalized = key.replace(" ", "_").replace("-", "_")
775-
if key_normalized in stats[current_instance]["packet_errors"]:
776-
stats[current_instance]["packet_errors"][key_normalized] = value_int
777-
elif last_section == "authentication_errors":
778-
key_normalized = key.replace(" ", "_").replace("-", "_")
779-
if key_normalized in stats[current_instance]["authentication_errors"]:
780-
stats[current_instance]["authentication_errors"][key_normalized] = value_int
781-
elif last_section == "priority_zero":
782-
if key == "received":
783-
stats[current_instance]["priority_zero"]["received"] = value_int
784-
elif key == "sent":
785-
stats[current_instance]["priority_zero"]["sent"] = value_int
786-
if key == "became master":
787-
stats[current_instance]["became_master"] = value_int
788-
elif key == "released master":
789-
stats[current_instance]["released_master"] = value_int
790-
except ValueError:
791-
pass
792-
ret["keepalived_stats"] = stats
793-
else:
794-
ret["keepalived_stats"] = {}
795-
except Exception as e:
796-
ret["keepalived_stats"] = {"error": str(e)}
797743
# List all configured virtual IPs
798744
try:
799745
virtual_ips = utils.get_all_by_type(u, 'keepalived', 'ipaddress')

packages/ns-ha/README.md

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -359,28 +359,29 @@ After few minutes, the status should be like this:
359359
Status: enabled
360360
Role: primary
361361
Current State: master
362-
Last Sync Status: Up to Date
363-
Last Sync Time: Fri Apr 18 13:09:08 UTC 2025
362+
Last Sync Status: Successful
363+
Last Sync Time: Mon Jun 9 07:21:15 UTC 2025
364364
365365
Virtual IPs:
366366
lan_ipaddress: 192.168.100.240/24 (br-lan)
367367
wan_ipaddress: 192.168.122.49/24 (eth1)
368368
369369
Keepalived Statistics:
370-
backup.advertisements.received: 1965
371-
backup.advertisements.sent: 0
372-
backup.became_master: 0
373-
backup.released_master: 0
374-
backup.packet_errors.length: 0
375-
backup.packet_errors.ttl: 0
376-
backup.packet_errors.invalid_type: 0
377-
backup.packet_errors.advertisement_interval: 0
378-
backup.packet_errors.address_list: 0
379-
backup.authentication_errors.invalid_type: 0
380-
backup.authentication_errors.type_mismatch: 0
381-
backup.authentication_errors.failure: 0
382-
backup.priority_zero.received: 0
383-
backup.priority_zero.sent: 0
370+
advert_rcvd: 0
371+
advert_sent: 1730
372+
become_master: 1
373+
release_master: 0
374+
packet_len_err: 0
375+
advert_interval_err: 0
376+
ip_ttl_err: 0
377+
invalid_type_rcvd: 0
378+
addr_list_err: 0
379+
invalid_authtype: 0
380+
authtype_mismatch: 0
381+
auth_failure: 0
382+
pri_zero_rcvd: 0
383+
pri_zero_sent: 0
384+
384385
```
385386

386387
## Alerting

0 commit comments

Comments
 (0)