Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
114 commits
Select commit Hold shift + click to select a range
4e3a096
Did the instrumentation for gnoi-reboot.service
rameshraghupathy May 13, 2025
4a7e6bf
Modified based on the Redis based IPC
rameshraghupathy May 21, 2025
c2f9cb8
Modified based on the Redis based IPC
rameshraghupathy May 21, 2025
db7848f
made check_platform.sh executable
rameshraghupathy May 21, 2025
f946e72
Did some cleanup
rameshraghupathy May 21, 2025
4434463
Draft version. Need to test again
rameshraghupathy Jul 7, 2025
91897ed
Fixing test failure
rameshraghupathy Jul 10, 2025
118a27a
Working on coverage
rameshraghupathy Jul 10, 2025
1654d44
Working on coverage
rameshraghupathy Jul 10, 2025
b1ca2a3
Merge branch 'sonic-net:master' into graceful-shutdown
rameshraghupathy Aug 12, 2025
f6936e5
refactored based on the revised HLD
rameshraghupathy Aug 12, 2025
4b709ea
refactored based on the revised HLD
rameshraghupathy Aug 14, 2025
d510290
Fixing ut
rameshraghupathy Aug 20, 2025
dfa9761
Fixing ut
rameshraghupathy Aug 20, 2025
380b5f9
Improving coverage
rameshraghupathy Aug 20, 2025
62450d6
Refactored for graceful shutdown
rameshraghupathy Aug 24, 2025
a7f1a39
Refactored for graceful shutdown
rameshraghupathy Aug 25, 2025
f45358a
Fixing ut
rameshraghupathy Aug 26, 2025
14f20e6
Fixing ut
rameshraghupathy Aug 26, 2025
8d647fa
Fixing ut
rameshraghupathy Aug 26, 2025
e2c2a71
Fixing ut
rameshraghupathy Aug 26, 2025
ada6883
Fixing ut
rameshraghupathy Aug 26, 2025
ca6d463
Fixing ut
rameshraghupathy Aug 26, 2025
e2bbe5f
Fixing ut
rameshraghupathy Aug 26, 2025
28bc69b
Fixing ut
rameshraghupathy Aug 26, 2025
29183bd
Fixing ut
rameshraghupathy Aug 26, 2025
e228ffb
workign on coverage
rameshraghupathy Aug 26, 2025
37d73ce
workign on coverage
rameshraghupathy Aug 26, 2025
601cb90
workign on coverage
rameshraghupathy Aug 26, 2025
dfda223
workign on coverage
rameshraghupathy Aug 26, 2025
fb51c33
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 8, 2025
4650d23
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 8, 2025
dece2a0
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 9, 2025
6a8524f
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 9, 2025
a381400
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 9, 2025
da39422
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 10, 2025
d5ab77b
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 10, 2025
78de30a
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 10, 2025
39db631
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 10, 2025
ee497b9
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 10, 2025
e5558b6
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 10, 2025
05571bb
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 10, 2025
7285eda
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 10, 2025
2009207
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 10, 2025
2470888
Addressed copilot PR comments
rameshraghupathy Sep 15, 2025
c62e79f
Made the timeout logic common
rameshraghupathy Sep 20, 2025
2106099
working on coverage
rameshraghupathy Sep 20, 2025
ffe85ec
working on coverage
rameshraghupathy Sep 20, 2025
22654c8
working on coverage
rameshraghupathy Sep 20, 2025
cac4b67
Addressed PR comments
rameshraghupathy Sep 26, 2025
6d46f60
Addressed review comments related to refactoring
rameshraghupathy Oct 1, 2025
4b092dc
Fixing test failures
rameshraghupathy Oct 1, 2025
b0bfd18
Fixing test failures
rameshraghupathy Oct 1, 2025
aeac810
Addressed review comments related to refactoring
rameshraghupathy Oct 1, 2025
5c98c46
Addressing review comments
rameshraghupathy Oct 21, 2025
8d829cc
Addressing review comments
rameshraghupathy Oct 21, 2025
942874c
Addressing review comments
rameshraghupathy Oct 21, 2025
d1533a8
Addressing review comments
rameshraghupathy Oct 21, 2025
8454a37
Addressing review comments
rameshraghupathy Oct 21, 2025
7e3bf57
Addressing review comments
rameshraghupathy Oct 21, 2025
3c93891
Addressing review comments
rameshraghupathy Oct 21, 2025
b1f6139
Update scripts/wait-for-sonic-core.sh
rameshraghupathy Oct 21, 2025
6a76f95
Update scripts/wait-for-sonic-core.sh
rameshraghupathy Oct 21, 2025
6005650
Merge branch 'sonic-net:master' into graceful-shutdown
rameshraghupathy Nov 7, 2025
39c5889
Aligning with the new changes in module_base.py PR:#608
rameshraghupathy Nov 7, 2025
4e46ef1
Fixing imports in test
rameshraghupathy Nov 7, 2025
8fa0d79
Fixing test issue
rameshraghupathy Nov 7, 2025
dd66d4c
Cleaned up the _handle_successful_reboot function, as the current imp…
rameshraghupathy Nov 7, 2025
74dfe3d
Increasing coverage
rameshraghupathy Nov 7, 2025
aa03811
Increasing coverage
rameshraghupathy Nov 7, 2025
e379e9e
Doing UT
rameshraghupathy Nov 11, 2025
e5a564e
Tested version with the recent module_base changes
rameshraghupathy Nov 12, 2025
693f3a5
Fixed test issue
rameshraghupathy Nov 12, 2025
7b658d2
Fixed test issue
rameshraghupathy Nov 12, 2025
ddff999
Fixed test issue
rameshraghupathy Nov 12, 2025
68ce97a
Update scripts/gnoi_shutdown_daemon.py
rameshraghupathy Nov 12, 2025
ba36f56
Update scripts/gnoi_shutdown_daemon.py
rameshraghupathy Nov 12, 2025
96f8d99
Update scripts/gnoi_shutdown_daemon.py
rameshraghupathy Nov 12, 2025
4bd5631
Update scripts/gnoi_shutdown_daemon.py
rameshraghupathy Nov 12, 2025
a04ccc7
Update scripts/gnoi_shutdown_daemon.py
rameshraghupathy Nov 12, 2025
3a22b62
Update setup.py
rameshraghupathy Nov 12, 2025
22e5684
Update scripts/gnoi_shutdown_daemon.py
rameshraghupathy Nov 12, 2025
6660cc8
Update scripts/gnoi_shutdown_daemon.py
rameshraghupathy Nov 12, 2025
0ef829c
Merge branch 'sonic-net:master' into graceful-shutdown
rameshraghupathy Nov 12, 2025
83ca4a1
Addressed review comments
rameshraghupathy Nov 12, 2025
8da027b
Aligning tests with ddressed review comments
rameshraghupathy Nov 12, 2025
e326d70
Fixing a syntax issue
rameshraghupathy Nov 12, 2025
42d3d49
Fixing tests and coverage
rameshraghupathy Nov 13, 2025
dc9ad31
Fixing tests and coverage
rameshraghupathy Nov 13, 2025
74125be
Fixing tests and coverage
rameshraghupathy Nov 13, 2025
0f50662
testing the import approach in ut
rameshraghupathy Nov 13, 2025
50fe6ea
added tests
rameshraghupathy Nov 13, 2025
8d2b58f
fixing test issue
rameshraghupathy Nov 13, 2025
8a3bfa3
Update scripts/wait-for-sonic-core.sh
rameshraghupathy Nov 13, 2025
755c8b9
Update scripts/wait-for-sonic-core.sh
rameshraghupathy Nov 13, 2025
910f19d
Update setup.py
rameshraghupathy Nov 13, 2025
e146d52
addressed some cosmetic changes suggested by copilot
rameshraghupathy Nov 13, 2025
a8567b8
improving coverage
rameshraghupathy Nov 13, 2025
8dbad2e
improving coverage
rameshraghupathy Nov 13, 2025
5e6ccbb
improving coverage
rameshraghupathy Nov 13, 2025
6ab850d
triggering a run
rameshraghupathy Nov 13, 2025
f438f3d
triggering a run
rameshraghupathy Nov 13, 2025
a20a9f4
triggering a run
rameshraghupathy Nov 13, 2025
5e52daa
cleaning up
rameshraghupathy Nov 13, 2025
c6013f3
adding one more test
rameshraghupathy Nov 13, 2025
59521cf
adding one more test
rameshraghupathy Nov 13, 2025
d25029a
Update scripts/gnoi_shutdown_daemon.py
rameshraghupathy Nov 13, 2025
0410028
Update scripts/gnoi_shutdown_daemon.py
rameshraghupathy Nov 13, 2025
e6c71c4
Update scripts/check_platform.py
rameshraghupathy Nov 13, 2025
7f86b98
addressed review comments
rameshraghupathy Nov 13, 2025
183c337
Fix test failures - add returncode mocking and fix exception test
rameshraghupathy Nov 13, 2025
6ff92ad
addressed review comments
rameshraghupathy Nov 14, 2025
fd0037b
added gnoi_shutdown_daemon.py to setup
rameshraghupathy Nov 15, 2025
565294c
Addressed review comments
rameshraghupathy Nov 15, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions data/debian/rules
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@ override_dh_installsystemd:
dh_installsystemd --no-start --name=procdockerstatsd
dh_installsystemd --no-start --name=determine-reboot-cause
dh_installsystemd --no-start --name=process-reboot-cause
dh_installsystemd --no-start --name=gnoi-reboot
dh_installsystemd $(HOST_SERVICE_OPTS) --name=sonic-hostservice

13 changes: 13 additions & 0 deletions data/debian/sonic-host-services-data.gnoi-reboot.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[Unit]
Description=gNOI based DPU Graceful Shutdown Daemon
After=rc-local.service

[Service]
Type=simple
ExecStartPre=/usr/local/bin/check_platform.sh
ExecStart=/usr/bin/env python3 /usr/local/bin/gnoi-reboot-daemon
Restart=always
RestartSec=5

[Install]
WantedBy=multi-user.target
11 changes: 11 additions & 0 deletions scripts/check_platform.sh
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need to be bash? Can it be python? Inline script is difficult to maintain.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hdwhdw Fixed

Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

subtype=$(sonic-cfggen -d -v DEVICE_METADATA.localhost.subtype)
is_dpu=$(python3 -c "from utilities_common.chassis import is_dpu; print(is_dpu())")

if [[ "$subtype" == "SmartSwitch" && "$is_dpu" != "True" ]]; then
exit 0
else
echo "gnoi-reboot-daemon is intended for SmartSwitch platforms only."
exit 1
fi
165 changes: 165 additions & 0 deletions scripts/gnoi-reboot-daemon
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
#!/usr/bin/env python3
#
# gnoi-reboot-daemon
#
# This daemon facilitates gNOI-based reboot operations for DPU subcomponents within the SONiC platform.
# It monitors RedisDB for reboot requests and executes the corresponding gNOI Reboot RPCs.
#
# It is designed to operate on SmartSwitch platforms and not on DPU modules.

try:
import os
import json
import subprocess
import time
from swsssdk import SonicV2Connector
from sonic_py_common import syslogger

except ImportError as err:
raise ImportError("%s - required module not found" % str(err))

SYSLOG_IDENTIFIER = "gnoi-reboot-daemon"

FIFO_PATH = "/var/run/gnoi_reboot.pipe"

# Global logger class instance
logger = syslogger.SysLogger(SYSLOG_IDENTIFIER)

def execute_gnoi_command(command_args):
try:
result = subprocess.run(command_args, capture_output=True, text=True, timeout=60)
return result.returncode, result.stdout.strip(), result.stderr.strip()
except subprocess.TimeoutExpired:
return -1, "", "Command timed out."

def get_dpu_ip(dpu_name):
config_db = ConfigDBConnector()
config_db.connect()
key = f"bridge-midplane|{dpu_name}"
entry = config_db.get_entry("DHCP_SERVER_IPV4_PORT", key)
dpu_ip = entry.get("ips@")
if not dpu_ip:
raise ValueError(f"DPU IP not found for {dpu_name}")
return dpu_ip

def get_gnmi_port(dpu_name):
config_db = ConfigDBConnector()
config_db.connect()
entry = config_db.get_entry("DPU_PORT", dpu_name)
gnmi_port = entry.get("gnmi_port", "8080") # Default to 8080 if not specified
return gnmi_port

def get_reboot_timeout():
db = SonicV2Connector()
db.connect(db.CONFIG_DB)

# Retrieve the platform value from CONFIG_DB
platform = db.get_entry('DEVICE_METADATA', 'localhost').get('platform')
if not platform:
raise ValueError("Platform information not found in CONFIG_DB.")

# Construct the path to platform.json
platform_json_path = f"/usr/share/sonic/device/{platform}/platform.json"

# Read the timeout value from platform.json
try:
with open(platform_json_path, "r") as f:
data = json.load(f)
timeout = data.get("dpu_halt_services_timeout")
if timeout is None:
return 60 # Default timeout
return int(timeout)
except Exception:
return 60 # Default timeout

def main():
db = SonicV2Connector()
db.connect(db.STATE_DB)
pubsub = db.pubsub()
pubsub.psubscribe("__keyspace@6__:GNOI_REBOOT_REQUEST*")

logger.log_info("gnoi-reboot-daemon started and listening for reboot requests.")

while True:
message = pubsub.get_message()
if message and message['type'] == 'pmessage':
key = message['channel'].split(":")[-1]
dpu_name = key.split("|")[1]
request = db.get_all(db.STATE_DB, key)
if request and request.get("start") == "true":
method = request.get("method", "3")
message_text = request.get("message", "User initiated reboot")

try:
dpu_ip = get_dpu_ip(dpu_name)
port = get_gnmi_port(dpu_name)
except ValueError as e:
logger.log_error(str(e))
continue

logger.log_info(f"Processing reboot request for {dpu_name} at {dpu_ip}:{port}")

# Step 1: Send Reboot Command
reboot_cmd = [
"docker", "exec", "gnmi", "gnoi_client",
f"-target={dpu_ip}:{port}",
"-logtostderr", "-notls",
"-module", "System",
"-rpc", "Reboot",
"-jsonin", json.dumps({"method": int(method), "message": message_text})
]
returncode, stdout, stderr = execute_gnoi_command(reboot_cmd)
if returncode != 0:
logger.log_error(f"Reboot command failed: {stderr}")
result_entry = {
"start": "true",
"status": "failure",
"message": stderr,
"timestamp": str(int(time.time()))
}
db.set_entry("GNOI_REBOOT_RESULT", dpu_name, result_entry)
db.set_entry("GNOI_REBOOT_REQUEST", dpu_name, {"start": "false"})
continue

# Step 2: Poll for Reboot Status
timeout = get_reboot_timeout()
interval = 5
elapsed = 0
reboot_successful = False
while elapsed < timeout:
status_cmd = [
"docker", "exec", "gnmi", "gnoi_client",
f"-target={dpu_ip}:{port}",
"-logtostderr", "-notls",
"-module", "System",
"-rpc", "RebootStatus"
]
returncode, stdout, stderr = execute_gnoi_command(status_cmd)
if returncode == 0 and "reboot complete" in stdout.lower():
reboot_successful = True
break
time.sleep(interval)
elapsed += interval

# Step 3: Update Result Table
if reboot_successful:
result_entry = {
"start": "true",
"status": "success",
"message": "Reboot completed successfully.",
"timestamp": str(int(time.time()))
}
else:
result_entry = {
"start": "true",
"status": "timeout",
"message": "Reboot status polling timed out.",
"timestamp": str(int(time.time()))
}
db.set_entry("GNOI_REBOOT_RESULT", dpu_name, result_entry)
db.set_entry("GNOI_REBOOT_REQUEST", dpu_name, {"start": "false"})

time.sleep(1)

if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
'scripts/procdockerstatsd',
'scripts/determine-reboot-cause',
'scripts/process-reboot-cause',
'scripts/check_platform.sh',
'scripts/gnoi-reboot-daemon',
'scripts/sonic-host-server',
'scripts/ldap.py'
],
Expand Down
Loading