Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
77 commits
Select commit Hold shift + click to select a range
0409850
Added pre_shutdown_hook() function to module_base.py
rameshraghupathy May 13, 2025
69fc737
Modified based on the Redis based IPC
rameshraghupathy May 21, 2025
52fed94
Did some cleanup
rameshraghupathy May 21, 2025
13ccb54
Modified set_admin_state API to handle DPU Graceful Shutdown
rameshraghupathy May 21, 2025
f80d453
Draft version. Need to test again
rameshraghupathy Jul 7, 2025
075f9fe
Merge branch 'master' into graceful-shutdown
rameshraghupathy Jul 10, 2025
5c76c4f
Merge branch 'sonic-net:master' into graceful-shutdown
rameshraghupathy Aug 12, 2025
954d205
refactored based on the revised HLD
rameshraghupathy Aug 12, 2025
dda9062
refactored based on the revised HLD
rameshraghupathy Aug 13, 2025
f736e7b
Fixing ut
rameshraghupathy Aug 20, 2025
9dd80f6
Fixing ut
rameshraghupathy Aug 20, 2025
9b59745
Fixing ut
rameshraghupathy Aug 20, 2025
44b44f7
Fixing ut
rameshraghupathy Aug 20, 2025
6ad3a4c
Improving coverage
rameshraghupathy Aug 21, 2025
d2c5010
Improving coverage
rameshraghupathy Aug 21, 2025
b53413c
Improving coverage
rameshraghupathy Aug 21, 2025
d64f1c8
Improving coverage
rameshraghupathy Aug 21, 2025
f75f7e2
Refactored for graceful shutdown
rameshraghupathy Aug 24, 2025
24c5eaa
Refactored for graceful shutdown, fixing UT
rameshraghupathy Aug 31, 2025
3d3c431
Refactored for graceful shutdown, fixing UT
rameshraghupathy Sep 1, 2025
337baa1
Refactored for graceful shutdown, fixing UT
rameshraghupathy Sep 1, 2025
f2302e8
Refactored for graceful shutdown, fixing UT
rameshraghupathy Sep 1, 2025
71668a8
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 3, 2025
dd3e462
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 3, 2025
6ea46bc
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 8, 2025
2b99de1
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 9, 2025
6f1e7a2
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 9, 2025
0cdc5eb
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 9, 2025
258a1e3
Refactored for graceful shutdown, fixing UT - Final round of tweaks
rameshraghupathy Sep 9, 2025
bf55d0c
Remove SMARTSWITCH build flag across platforms
rameshraghupathy Sep 11, 2025
4ffa284
Made the timeout logic common
rameshraghupathy Sep 20, 2025
c6e7c20
working on coverage
rameshraghupathy Sep 20, 2025
194010b
restoring pci and sensor related tests
rameshraghupathy Sep 20, 2025
14333dc
fixing an indent issue
rameshraghupathy Sep 20, 2025
0e4d7ca
Addressed PR comments
rameshraghupathy Sep 26, 2025
c80fa7c
Addressed PR comments
rameshraghupathy Sep 26, 2025
35c92f2
Addressed PR comments
rameshraghupathy Sep 26, 2025
5211b46
Did a minor cleanup
rameshraghupathy Sep 28, 2025
ec98ff3
Did some clean up to address the review comments
rameshraghupathy Sep 30, 2025
273ac84
Did some clean up to address the review comments
rameshraghupathy Sep 30, 2025
38e93ba
Did some clean up to address the review comments
rameshraghupathy Sep 30, 2025
2be697b
Did some clean up to address the review comments
rameshraghupathy Sep 30, 2025
d9208ab
Addressed review comments and included transition in progress check i…
rameshraghupathy Sep 30, 2025
0a8610e
Fixing test failure
rameshraghupathy Sep 30, 2025
a4464a5
Fixing test failure
rameshraghupathy Sep 30, 2025
97835fd
Fixing test failure
rameshraghupathy Sep 30, 2025
1eb15ce
Fixing test failure
rameshraghupathy Sep 30, 2025
46ed271
Addressed review comments related to refactoring
rameshraghupathy Oct 1, 2025
f72c96d
Fixing test failures
rameshraghupathy Oct 1, 2025
0197e54
Fixing test failures
rameshraghupathy Oct 1, 2025
ae65492
Addressed review comments related to refactoring
rameshraghupathy Oct 1, 2025
937658f
Merge branch 'master' into graceful-shutdown
rameshraghupathy Oct 1, 2025
5973578
Did some cleanup of the comments
rameshraghupathy Oct 2, 2025
e9485bf
Did some cleanup based on review comments
rameshraghupathy Oct 2, 2025
7cb3872
Fixed test failure
rameshraghupathy Oct 2, 2025
82a983f
Addressing review comments
rameshraghupathy Oct 8, 2025
fc9c331
Addressing review comments
rameshraghupathy Oct 19, 2025
22fdade
Fix graceful shutdown implementation and clean up whitespace
rameshraghupathy Oct 19, 2025
c479203
Revert unrelated sonic_xcvr changes
rameshraghupathy Oct 19, 2025
fe70485
Aligning tests with the changes in module_base.py
rameshraghupathy Oct 19, 2025
05f786f
fixed whitespace
rameshraghupathy Oct 19, 2025
545457d
fixed test issues
rameshraghupathy Oct 19, 2025
8009ca7
fixed test issues
rameshraghupathy Oct 19, 2025
55a4c6d
fixed test issues
rameshraghupathy Oct 19, 2025
1c862cd
fixed test issues
rameshraghupathy Oct 19, 2025
897562f
fixed test issues
rameshraghupathy Oct 19, 2025
6c8a306
fixed test issues
rameshraghupathy Oct 19, 2025
d2dd8c8
Revert "Revert unrelated sonic_xcvr changes"
rameshraghupathy Oct 21, 2025
2f8e72d
Update tests/module_base_test.py
rameshraghupathy Oct 21, 2025
ee58019
t rebase --abort
rameshraghupathy Oct 21, 2025
3e1dd0a
Addressing review comments
rameshraghupathy Oct 23, 2025
61a091b
Fixing test failures
rameshraghupathy Oct 23, 2025
3c09b88
Update sonic_platform_base/module_base.py
rameshraghupathy Oct 23, 2025
da94d73
Update sonic_platform_base/module_base.py
rameshraghupathy Oct 23, 2025
41f05ce
Update sonic_platform_base/module_base.py
rameshraghupathy Oct 23, 2025
6e0e7cb
Merge branch 'sonic-net:master' into graceful-shutdown
rameshraghupathy Oct 23, 2025
ab9680e
Addressing review comments
rameshraghupathy Oct 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 88 additions & 11 deletions sonic_platform_base/module_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,17 @@
to interact with a module (as used in a modular chassis) SONiC.
"""

import sys
import os
import json
import time
import errno
import sys
import select
from swsssdk import SonicV2Connector
from utilities_common.chassis import is_dpu
from sonic_py_common import device_info
import fcntl
from . import device_base
import json
import threading
import contextlib
import shutil
Expand Down Expand Up @@ -187,6 +193,77 @@ def get_oper_status(self):
"""
raise NotImplementedError

def get_reboot_timeout(self):
db = SonicV2Connector()
db.connect(db.CONFIG_DB)

# Retrieve the platform value from CONFIG_DB
platform = db.get_entry('DEVICE_METADATA', 'localhost').get('platform')
if not platform:
raise ValueError("Platform information not found in CONFIG_DB.")

# Construct the path to platform.json
platform_json_path = f"/usr/share/sonic/device/{platform}/platform.json"

# Read the timeout value from platform.json
try:
with open(platform_json_path, "r") as f:
data = json.load(f)
timeout = data.get("dpu_halt_services_timeout")
if timeout is None:
return 60 # Default timeout
return int(timeout)
except Exception:
return 60 # Default timeout

def graceful_shutdown_handler(self):
"""
Graceful shutdown handler for SmartSwitch DPU modules.

Waits for either:
1. CHASSIS_MODULE_INFO_TABLE's state_transition_in_progress to become "False", or
2. get_oper_status() returns "Offline"

The first condition that occurs is accepted as completion of graceful shutdown.
"""
dpu_name = self.name
db = SonicV2Connector()
db.connect(db.STATE_DB)

key = f"CHASSIS_MODULE_INFO_TABLE|{dpu_name}"

# Step 1: Set transition flag
transition_info = {
"state_transition_in_progress": "True",
"transition_type": "shutdown",
"transition_start_time": str(int(time.time()))
}
db.set_entry("CHASSIS_MODULE_INFO_TABLE", dpu_name, transition_info)

# Step 2: Wait for either completion event
timeout = self.get_reboot_timeout()
interval = 2 # check every 2 seconds
elapsed = 0

while elapsed < timeout:
result = db.get_all(db.STATE_DB, key)
if result and result.get("state_transition_in_progress") == "False":
break

op_state = self.get_oper_status()
if op_state and op_state.lower() == "offline":
# Mark transition complete
db.set_entry("CHASSIS_MODULE_INFO_TABLE", dpu_name, {
"state_transition_in_progress": "False",
"transition_type": "shutdown"
})
break

time.sleep(interval)
elapsed += interval
else:
raise TimeoutError(f"Graceful shutdown timeout for {dpu_name}")

def reboot(self, reboot_type):
"""
Request to reboot the module
Expand All @@ -207,20 +284,20 @@ def reboot(self, reboot_type):

def set_admin_state(self, up):
"""
Request to keep the card in administratively up/down state.
The down state will power down the module and the status should show
MODULE_STATUS_OFFLINE.
The up state will take the module to MODULE_STATUS_FAULT or
MODULE_STATUS_ONLINE states.
Request to set the module's administrative state.

Args:
up: A boolean, True to set the admin-state to UP. False to set the
admin-state to DOWN.
up (bool): True to set the admin-state to UP; False to set it to DOWN.

Returns:
bool: True if the request has been issued successfully, False if not
bool: True if the request has been issued successfully; False otherwise.
"""
raise NotImplementedError
if not up:
subtype = device_info.get_device_subtype()
if subtype == "SmartSwitch" and not is_dpu():
self.graceful_shutdown_handler()
# Proceed to set the admin state using the platform-specific implementation
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is this supposed to work? super here will call set_admin_state of the base class, not of the derived one.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the refactored implementation the platform will graceful_shutdown_handler()

return super().set_admin_state(up)

def get_maximum_consumed_power(self):
"""
Expand Down
53 changes: 53 additions & 0 deletions tests/module_base_test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import unittest
from unittest.mock import patch, MagicMock
from sonic_platform_base.module_base import ModuleBase
import pytest
import json
Expand Down Expand Up @@ -70,6 +72,57 @@ def test_sensors(self):
assert(module.get_all_current_sensors() == ["s1"])
assert(module.get_current_sensor(0) == "s1")


class DummyModule(ModuleBase):
def __init__(self, name="DPU0"):
self.name = name

def set_admin_state(self, up):
return True # Dummy override


class TestModuleBaseGracefulShutdown:

@patch("sonic_platform_base.module_base.SonicV2Connector")
def test_get_reboot_timeout_default(self, mock_db):
mock_instance = mock_db.return_value
mock_instance.get_entry.return_value = {'platform': 'x86_64-foo'}
with patch("builtins.open", unittest.mock.mock_open(read_data='{}')):
module = DummyModule()
timeout = module.get_reboot_timeout()
assert timeout == 60

@patch("sonic_platform_base.module_base.SonicV2Connector")
def test_graceful_shutdown_handler_success(self, mock_db):
dpu_name = "DPU0"
mock_instance = mock_db.return_value
mock_instance.get_all.side_effect = [
{}, # First poll
{"start": "true", "status": "success", "message": "OK"} # Second poll
]

module = DummyModule(name=dpu_name)

with patch.object(module, "get_reboot_timeout", return_value=10), \
patch("time.sleep"):
module.graceful_shutdown_handler()
mock_instance.set_entry.assert_any_call("GNOI_REBOOT_RESULT", dpu_name, {"start": "false"})

@patch("sonic_platform_base.module_base.SonicV2Connector")
def test_graceful_shutdown_handler_timeout(self, mock_db):
dpu_name = "DPU1"
mock_instance = mock_db.return_value
mock_instance.get_all.return_value = {}

module = DummyModule(name=dpu_name)

with patch.object(module, "get_reboot_timeout", return_value=5), \
patch("time.sleep"):
try:
module.graceful_shutdown_handler()
except TimeoutError as e:
assert "timeout" in str(e).lower()

def test_pci_entry_state_db(self):
module = ModuleBase()
mock_connector = MagicMock()
Expand Down
Loading