From c26da7dca70fbade72ee4fa0e250ad935c71672c Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Tue, 28 Oct 2025 03:36:24 +0000 Subject: [PATCH 1/5] Replace set_admin_state with set_admin_state_gracefully --- sonic-chassisd/scripts/chassisd | 58 +-------------------------- sonic-chassisd/tests/mock_platform.py | 4 ++ sonic-chassisd/tests/test_chassisd.py | 29 +++++--------- 3 files changed, 16 insertions(+), 75 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 42f4dee6f..ee6dc0310 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -254,16 +254,7 @@ class SmartSwitchModuleConfigUpdater(logger.Logger): self.log_warning("Invalid admin_state value: {}".format(admin_state)) def submit_callback(self, module_index, admin_state, key): - if admin_state == MODULE_ADMIN_DOWN: - # This is only valid on platforms which have pci_detach and sensord changes required. If it is not implemented, - # there are no actions taken during this function execution. - try_get(self.chassis.get_module(module_index).module_pre_shutdown, default=False) - try_get(self.chassis.get_module(module_index).set_admin_state, admin_state, default=False) - if admin_state == MODULE_ADMIN_UP: - # This is only valid on platforms which have pci_rescan sensord changes required. If it is not implemented, - # there are no actions taken during this function execution. - try_get(self.chassis.get_module(module_index).module_post_startup, default=False) - pass + try_get(self.chassis.get_module(module_index).set_admin_state_gracefully, admin_state, default=False) # # Module Updater ============================================================== @@ -723,7 +714,6 @@ class SmartSwitchModuleUpdater(ModuleUpdater): self.module_reboot_table = swsscommon.Table(self.chassis_state_db, CHASSIS_MODULE_REBOOT_INFO_TABLE) self.down_modules = {} self.chassis_app_db_clean_sha = None - self.module_transition_flag_helper = ModuleTransitionFlagHelper() self.midplane_initialized = try_get(chassis.init_midplane_switch, default=False) if not self.midplane_initialized: @@ -815,9 +805,6 @@ class SmartSwitchModuleUpdater(ModuleUpdater): # Persist dpu down time self.persist_dpu_reboot_time(key) # persist reboot cause - # Clear transition flag in STATE_DB - self.module_transition_flag_helper.clear_transition_flag(key) - reboot_cause = try_get(self.chassis.get_module(module_index).get_reboot_cause) self.persist_dpu_reboot_cause(reboot_cause, key) # publish reboot cause to db @@ -852,9 +839,6 @@ class SmartSwitchModuleUpdater(ModuleUpdater): self.persist_dpu_reboot_cause(reboot_cause, key) self.update_dpu_reboot_cause_to_db(key) - # Clear transition flag in STATE_DB - self.module_transition_flag_helper.clear_transition_flag(key) - def _get_module_info(self, module_index): """ Retrieves module info of this module @@ -1336,34 +1320,6 @@ class DpuStateUpdater(logger.Logger): self._update_dp_dpu_state('down') self._update_cp_dpu_state('down') -class ModuleTransitionFlagHelper(logger.Logger): - def __init__(self, log_identifier = SYSLOG_IDENTIFIER): - super(ModuleTransitionFlagHelper, self).__init__(log_identifier) - # Use new connector to avoid redis failures - """Create a helper function to get the module table, - since multiple threads updating with the same connector will cause redis failures""" - state_db = daemon_base.db_connect("STATE_DB") - self.module_table = swsscommon.Table(state_db, CHASSIS_MODULE_INFO_TABLE) - - def set_transition_flag(self, module_name): - try: - self.module_table.hset(module_name, 'state_transition_in_progress', 'True') - self.module_table.hset(module_name, 'transition_start_time', datetime.now(timezone.utc).replace(tzinfo=None).isoformat()) - except Exception as e: - self.log_error(f"Error setting transition flag for {module_name}: {e}") - - def clear_transition_flag(self, module_name): - try: - self.log_info(f"Clearing transition flag for {module_name}") - self.module_table.hdel(module_name, 'state_transition_in_progress') - self.module_table.hdel(module_name, 'transition_start_time') - except Exception as e: - self.log_error(f"Error clearing transition flag for {module_name}: {e}") - - def clear_all_transition_flags(self): - for module_name in self.module_table.getKeys(): - self.clear_transition_flag(module_name) - # # Daemon ======================================================================= # @@ -1406,8 +1362,7 @@ class ChassisdDaemon(daemon_base.DaemonBase): try_get(self.module_updater.chassis.get_module(module_index).module_pre_shutdown, default=False) # Set admin_state change in progress using the centralized method if admin_state == MODULE_ADMIN_DOWN: - ModuleTransitionFlagHelper().set_transition_flag(module_name) - try_get(self.module_updater.chassis.get_module(module_index).set_admin_state, admin_state, default=False) + try_get(self.module_updater.chassis.get_module(module_index).set_admin_state_gracefully, admin_state, default=False) def set_initial_dpu_admin_state(self): """Send admin_state trigger once to modules those are powered up""" @@ -1486,16 +1441,7 @@ class ChassisdDaemon(daemon_base.DaemonBase): # Set the initial DPU admin state for SmartSwitch if self.smartswitch: - # Clear all stale transition flags for SmartSwitch on startup - ModuleTransitionFlagHelper().clear_all_transition_flags() - self.set_initial_dpu_admin_state() - # Clear all transition flags for SmartSwitch after setting the initial DPU admin state - module_transition_flag_helper = ModuleTransitionFlagHelper() - # Clear all stale transition flags for SmartSwitch on startup - module_transition_flag_helper.clear_all_transition_flags() self.set_initial_dpu_admin_state() - # Clear all transition flags for SmartSwitch after setting the initial DPU admin state - module_transition_flag_helper.clear_all_transition_flags() while not self.stop.wait(CHASSIS_INFO_UPDATE_PERIOD_SECS): self.module_updater.module_db_update() diff --git a/sonic-chassisd/tests/mock_platform.py b/sonic-chassisd/tests/mock_platform.py index d555a24c6..a7bcf618c 100644 --- a/sonic-chassisd/tests/mock_platform.py +++ b/sonic-chassisd/tests/mock_platform.py @@ -78,6 +78,10 @@ def module_pre_shutdown(self): def module_post_startup(self): pass + def set_admin_state_gracefully(self, up): + """Mock implementation of set_admin_state_gracefully""" + return self.set_admin_state(up) + def is_midplane_reachable(self): return self.midplane_access diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index 0a5a92906..cacf2ed7f 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -628,19 +628,15 @@ def test_smartswitch_configupdater_check_admin_state(): # Test setting admin state to down admin_state = 0 - with patch.object(module, 'module_pre_shutdown') as mock_module_pre_shutdown, \ - patch.object(module, 'set_admin_state') as mock_set_admin_state: + with patch.object(module, 'set_admin_state_gracefully') as mock_set_admin_state_gracefully: config_updater.module_config_update(name, admin_state) - mock_module_pre_shutdown.assert_called_once() - mock_set_admin_state.assert_called_once_with(admin_state) + mock_set_admin_state_gracefully.assert_called_once_with(admin_state) # Test setting admin state to up admin_state = 1 - with patch.object(module, 'set_admin_state') as mock_set_admin_state, \ - patch.object(module, 'module_post_startup') as mock_module_post_startup: + with patch.object(module, 'set_admin_state_gracefully') as mock_set_admin_state_gracefully: config_updater.module_config_update(name, admin_state) - mock_set_admin_state.assert_called_once_with(admin_state) - mock_module_post_startup.assert_called_once() + mock_set_admin_state_gracefully.assert_called_once_with(admin_state) @patch("chassisd.glob.glob") @@ -1966,24 +1962,19 @@ def test_submit_dpu_callback(): # Test MODULE_ADMIN_DOWN scenario with patch.object(module, 'module_pre_shutdown') as mock_pre_shutdown, \ - patch.object(module, 'set_admin_state') as mock_set_admin_state, \ - patch.object(module, 'module_post_startup') as mock_post_startup: + patch.object(module, 'set_admin_state_gracefully') as mock_set_admin_state_gracefully: daemon_chassisd.submit_dpu_callback(index, MODULE_ADMIN_DOWN, name) # Verify correct functions are called for admin down mock_pre_shutdown.assert_called_once() - mock_set_admin_state.assert_called_once_with(MODULE_ADMIN_DOWN) - mock_post_startup.assert_not_called() + mock_set_admin_state_gracefully.assert_called_once_with(MODULE_ADMIN_DOWN) - - # Reset mocks for next test + # Test non-MODULE_ADMIN_DOWN scenario (e.g., MODULE_PRE_SHUTDOWN) with patch.object(module, 'module_pre_shutdown') as mock_pre_shutdown, \ - patch.object(module, 'set_admin_state') as mock_set_admin_state, \ - patch.object(module, 'module_post_startup') as mock_post_startup: + patch.object(module, 'set_admin_state_gracefully') as mock_set_admin_state_gracefully: module_updater.module_table.get = MagicMock(return_value=(True, [])) daemon_chassisd.submit_dpu_callback(index, MODULE_PRE_SHUTDOWN, name) - # Verify correct functions are called for admin up + # Verify only pre_shutdown is called for non-admin-down states mock_pre_shutdown.assert_called_once() - mock_set_admin_state.assert_not_called() - mock_post_startup.assert_not_called() + mock_set_admin_state_gracefully.assert_not_called() From 24b5bd8d4256ebccf63a3ad86c7386c94beb0ba8 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Fri, 31 Oct 2025 17:19:27 +0000 Subject: [PATCH 2/5] thermalctld test fixes --- sonic-thermalctld/tests/mock_swsscommon.py | 32 ++----------------- .../mocked_libs/swsscommon/swsscommon.py | 4 +++ sonic-thermalctld/tests/test_thermalctld.py | 22 ++++++++----- 3 files changed, 21 insertions(+), 37 deletions(-) diff --git a/sonic-thermalctld/tests/mock_swsscommon.py b/sonic-thermalctld/tests/mock_swsscommon.py index ade0d3541..102063b6b 100644 --- a/sonic-thermalctld/tests/mock_swsscommon.py +++ b/sonic-thermalctld/tests/mock_swsscommon.py @@ -2,33 +2,7 @@ Mock implementation of swsscommon package for unit testing ''' -STATE_DB = '' -CHASSIS_STATE_DB = '' +from .mocked_libs.swsscommon import swsscommon - -class Table: - def __init__(self, db, table_name): - self.table_name = table_name - self.mock_dict = {} - - def _del(self, key): - del self.mock_dict[key] - pass - - def set(self, key, fvs): - self.mock_dict[key] = fvs.fv_dict - pass - - def get(self, key): - if key in self.mock_dict: - return self.mock_dict[key] - return None - - def get_size(self): - return (len(self.mock_dict)) - - -class FieldValuePairs: - def __init__(self, fvs): - self.fv_dict = dict(fvs) - pass +Table = swsscommon.Table +FieldValuePairs = swsscommon.FieldValuePairs diff --git a/sonic-thermalctld/tests/mocked_libs/swsscommon/swsscommon.py b/sonic-thermalctld/tests/mocked_libs/swsscommon/swsscommon.py index 13c49dec1..41e5e0fda 100644 --- a/sonic-thermalctld/tests/mocked_libs/swsscommon/swsscommon.py +++ b/sonic-thermalctld/tests/mocked_libs/swsscommon/swsscommon.py @@ -5,6 +5,7 @@ from swsssdk import ConfigDBConnector, SonicDBConfig, SonicV2Connector STATE_DB = '' +CHASSIS_STATE_DB = '' class Table: @@ -28,6 +29,9 @@ def get(self, key): def get_size(self): return (len(self.mock_dict)) + def getKeys(self): + return list(self.mock_dict.keys()) + class FieldValuePairs: fv_dict = {} diff --git a/sonic-thermalctld/tests/test_thermalctld.py b/sonic-thermalctld/tests/test_thermalctld.py index 729b80e43..b4fbcbfa2 100644 --- a/sonic-thermalctld/tests/test_thermalctld.py +++ b/sonic-thermalctld/tests/test_thermalctld.py @@ -1,13 +1,10 @@ import os import sys import multiprocessing -from imp import load_source # TODO: Replace with importlib once we no longer need to support Python 2 +import importlib.machinery +import importlib.util -# TODO: Clean this up once we no longer need to support Python 2 -if sys.version_info.major == 3: - from unittest import mock -else: - import mock +from unittest import mock import pytest tests_path = os.path.dirname(os.path.abspath(__file__)) @@ -34,6 +31,15 @@ scripts_path = os.path.join(modules_path, 'scripts') sys.path.insert(0, modules_path) +# Replacement for imp.load_source from the Python3.12 docs: +# https://docs.python.org/3/whatsnew/3.12.html#imp +def load_source(modname, filename): + loader = importlib.machinery.SourceFileLoader(modname, filename) + spec = importlib.util.spec_from_file_location(modname, filename, loader=loader) + module = importlib.util.module_from_spec(spec) + sys.modules[module.__name__] = module + loader.exec_module(module) + load_source('thermalctld', os.path.join(scripts_path, 'thermalctld')) import thermalctld @@ -813,7 +819,7 @@ def test_get_chassis_exception(self): # ThermalControlDaemon should raise SystemExit with CHASSIS_GET_ERROR code when chassis initialization fails with pytest.raises(SystemExit) as exc_info: - daemon_thermalctld = thermalctld.ThermalControlDaemon() + daemon_thermalctld = thermalctld.ThermalControlDaemon(5, 60, 30) # Verify it exits with the correct error code assert exc_info.value.code == thermalctld.CHASSIS_GET_ERROR @@ -836,7 +842,7 @@ def test_get_chassis_success(self): mock_platform_instance.get_chassis.return_value = mock_chassis mock_platform_class.return_value = mock_platform_instance - daemon_thermalctld = thermalctld.ThermalControlDaemon() + daemon_thermalctld = thermalctld.ThermalControlDaemon(5, 60, 30) # Verify chassis was set correctly assert daemon_thermalctld.chassis is mock_chassis From 4bd906d5fc374dc4e34b2e9b3edafe57d87b8edd Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sat, 8 Nov 2025 16:32:14 +0000 Subject: [PATCH 3/5] Clear all DPU transitions flags in set_initial_dpu_admin_state() --- sonic-chassisd/scripts/chassisd | 17 +- sonic-chassisd/tests/test_chassisd.py | 228 +++++++++++++++++++------- 2 files changed, 181 insertions(+), 64 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index ee6dc0310..686614c7f 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -1356,21 +1356,28 @@ class ChassisdDaemon(daemon_base.DaemonBase): else: self.log_warning("Caught unhandled signal '{}' - ignoring...".format(SIGNALS_TO_NAMES_DICT[sig])) - def submit_dpu_callback(self, module_index, admin_state, module_name): + def submit_dpu_callback(self, module_index, admin_state): # This is only valid on platforms which have pci_detach and sensord changes required. If it is not implemented, # there are no actions taken during this function execution. - try_get(self.module_updater.chassis.get_module(module_index).module_pre_shutdown, default=False) + if admin_state == MODULE_PRE_SHUTDOWN: + try_get(self.module_updater.chassis.get_module(module_index).module_pre_shutdown, default=False) # Set admin_state change in progress using the centralized method if admin_state == MODULE_ADMIN_DOWN: - try_get(self.module_updater.chassis.get_module(module_index).set_admin_state_gracefully, admin_state, default=False) + try_get(self.module_updater.chassis.get_module(module_index).set_admin_state_gracefully, + admin_state, default=False) def set_initial_dpu_admin_state(self): """Send admin_state trigger once to modules those are powered up""" threads = [] for module_index in range(0, self.module_updater.num_modules): - op = None - # Get operational state of DPU module_name = self.platform_chassis.get_module(module_index).get_name() + + # Clear any existing state transition flags + self.module_updater.chassis.get_module(module_index).clear_module_state_transition() + self.module_updater.chassis.get_module(module_index).clear_module_gnoi_halt_in_progress() + + # Get operational state of DPU + op = None operational_state = self.platform_chassis.get_module(module_index).get_oper_status() try: diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index cacf2ed7f..ccc72c163 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1450,7 +1450,7 @@ def test_daemon_run_smartswitch(): daemon_chassisd.run() def test_set_initial_dpu_admin_state_down(): - # Test the chassisd run + """Test set_initial_dpu_admin_state when admin state is down""" chassis = MockSmartSwitchChassis() # DPU0 details @@ -1458,14 +1458,64 @@ def test_set_initial_dpu_admin_state_down(): name = "DPU0" desc = "DPU Module 0" slot = 0 - sup_slot = 0 serial = "DPU0-0000" module_type = ModuleBase.MODULE_TYPE_DPU module = MockModule(index, name, desc, module_type, slot, serial) module.set_midplane_ip() + + # Set initial state for DPU0 - OFFLINE + status = ModuleBase.MODULE_STATUS_OFFLINE + module.set_oper_status(status) + chassis.module_list.append(module) + + # Supervisor ModuleUpdater + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) + module_updater.module_db_update() + module_updater.modules_num_update() + + # ChassisdDaemon setup + daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER, chassis) + daemon_chassisd.module_updater = module_updater + daemon_chassisd.platform_chassis = chassis + daemon_chassisd.smartswitch = True + + # Mock the necessary methods + with patch.object(module_updater, 'get_module_admin_status', return_value='down'), \ + patch.object(module_updater, 'update_dpu_state') as mock_update_dpu_state, \ + patch.object(daemon_chassisd, 'submit_dpu_callback') as mock_submit_callback, \ + patch.object(module, 'clear_module_state_transition') as mock_clear_transition, \ + patch.object(module, 'clear_module_gnoi_halt_in_progress') as mock_clear_gnoi: + + # Run the function + daemon_chassisd.set_initial_dpu_admin_state() + + # Verify state transition flags were cleared + mock_clear_transition.assert_called_once() + mock_clear_gnoi.assert_called_once() + + # Verify DPU state was updated with 'down' since operational state is OFFLINE + mock_update_dpu_state.assert_called_once_with("DPU_STATE|DPU0", 'down') + + # Verify callback was submitted with MODULE_ADMIN_DOWN since admin state is 'down' and oper state is OFFLINE + mock_submit_callback.assert_called_once_with(0, MODULE_ADMIN_DOWN, "DPU0") + + +def test_set_initial_dpu_admin_state_up(): + """Test set_initial_dpu_admin_state when admin state is up""" + chassis = MockSmartSwitchChassis() - # Set initial state for DPU0 - status = ModuleBase.MODULE_STATUS_PRESENT + # DPU0 details + index = 0 + name = "DPU0" + desc = "DPU Module 0" + slot = 0 + serial = "DPU0-0000" + module_type = ModuleBase.MODULE_TYPE_DPU + module = MockModule(index, name, desc, module_type, slot, serial) + module.set_midplane_ip() + + # Set initial state for DPU0 - ONLINE + status = ModuleBase.MODULE_STATUS_ONLINE module.set_oper_status(status) chassis.module_list.append(module) @@ -1477,69 +1527,128 @@ def test_set_initial_dpu_admin_state_down(): # ChassisdDaemon setup daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER, chassis) daemon_chassisd.module_updater = module_updater - daemon_chassisd.stop = MagicMock() - daemon_chassisd.stop.wait.return_value = True + daemon_chassisd.platform_chassis = chassis daemon_chassisd.smartswitch = True + + # Mock the necessary methods + with patch.object(module_updater, 'get_module_admin_status', return_value='up'), \ + patch.object(module_updater, 'update_dpu_state') as mock_update_dpu_state, \ + patch.object(daemon_chassisd, 'submit_dpu_callback') as mock_submit_callback, \ + patch.object(module, 'clear_module_state_transition') as mock_clear_transition, \ + patch.object(module, 'clear_module_gnoi_halt_in_progress') as mock_clear_gnoi: + + # Run the function + daemon_chassisd.set_initial_dpu_admin_state() + + # Verify state transition flags were cleared + mock_clear_transition.assert_called_once() + mock_clear_gnoi.assert_called_once() + + # Verify DPU state was updated with 'up' since operational state is ONLINE + mock_update_dpu_state.assert_called_once_with("DPU_STATE|DPU0", 'up') + + # Verify callback was NOT submitted since admin state is 'up' + mock_submit_callback.assert_not_called() + + +def test_set_initial_dpu_admin_state_empty(): + """Test set_initial_dpu_admin_state when admin state is empty""" + chassis = MockSmartSwitchChassis() - # Import platform and use chassis as platform_chassis - import sonic_platform.platform - platform_chassis = chassis + # DPU0 details + index = 0 + name = "DPU0" + desc = "DPU Module 0" + slot = 0 + serial = "DPU0-0000" + module_type = ModuleBase.MODULE_TYPE_DPU + module = MockModule(index, name, desc, module_type, slot, serial) + module.set_midplane_ip() - # Mock objects - mock_chassis = MagicMock() - mock_module_updater = MagicMock() + # Set initial state for DPU0 - PRESENT + status = ModuleBase.MODULE_STATUS_PRESENT + module.set_oper_status(status) + chassis.module_list.append(module) - # Mock the module (DPU0) - mock_module = MagicMock() - mock_module.get_name.return_value = "DPU0" + # Supervisor ModuleUpdater + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) + module_updater.module_db_update() + module_updater.modules_num_update() - # Mock chassis.get_module to return the mock_module for DPU0 - def mock_get_module(index): - if index == 0: # For DPU0 - return mock_module - return None # No other modules available in this test case + # ChassisdDaemon setup + daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER, chassis) + daemon_chassisd.module_updater = module_updater + daemon_chassisd.platform_chassis = chassis + daemon_chassisd.smartswitch = True - # Apply the side effect for chassis.get_module - mock_chassis.get_module.side_effect = mock_get_module + # Mock the necessary methods - admin state is EMPTY + with patch.object(module_updater, 'get_module_admin_status', return_value=ModuleBase.MODULE_STATUS_EMPTY), \ + patch.object(module_updater, 'update_dpu_state') as mock_update_dpu_state, \ + patch.object(daemon_chassisd, 'submit_dpu_callback') as mock_submit_callback, \ + patch.object(module, 'clear_module_state_transition') as mock_clear_transition, \ + patch.object(module, 'clear_module_gnoi_halt_in_progress') as mock_clear_gnoi: - # Mock state_db - mock_state_db = MagicMock() - # fvs_mock = [True, {CHASSIS_MIDPLANE_INFO_ACCESS_FIELD: 'True'}] - # mock_state_db.get.return_value = fvs_mock + # Run the function + daemon_chassisd.set_initial_dpu_admin_state() - # Mock db_connect - mock_db_connect = MagicMock() - mock_db_connect.return_value = mock_state_db + # Verify state transition flags were cleared + mock_clear_transition.assert_called_once() + mock_clear_gnoi.assert_called_once() + + # Verify DPU state was updated with 'down' since operational state is PRESENT (not ONLINE) + mock_update_dpu_state.assert_called_once_with("DPU_STATE|DPU0", 'down') + + # Verify callback was submitted with MODULE_PRE_SHUTDOWN since admin state is EMPTY and oper state is not OFFLINE + mock_submit_callback.assert_called_once_with(0, MODULE_PRE_SHUTDOWN, "DPU0") + + +def test_set_initial_dpu_admin_state_exception(): + """Test set_initial_dpu_admin_state handles exceptions gracefully""" + chassis = MockSmartSwitchChassis() - # Mock admin_status - # mock_module_updater.get_module_admin_status.return_value = 'down' + # DPU0 details + index = 0 + name = "DPU0" + desc = "DPU Module 0" + slot = 0 + serial = "DPU0-0000" + module_type = ModuleBase.MODULE_TYPE_DPU + module = MockModule(index, name, desc, module_type, slot, serial) + module.set_midplane_ip() - # Set access of DPU0 Down - midplane_table = module_updater.midplane_table - module.set_midplane_reachable(True) - module_updater.check_midplane_reachability() - fvs = midplane_table.get(name) - assert fvs != None - if isinstance(fvs, list): - fvs = dict(fvs[-1]) - assert module.get_midplane_ip() == fvs[CHASSIS_MIDPLANE_INFO_IP_FIELD] - assert str(module.is_midplane_reachable()) == fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD] + # Set initial state for DPU0 + status = ModuleBase.MODULE_STATUS_ONLINE + module.set_oper_status(status) + chassis.module_list.append(module) - # Patching platform's Chassis object to return the mocked module - with patch.object(sonic_platform.platform.Chassis, 'is_smartswitch') as mock_is_smartswitch, \ - patch.object(sonic_platform.platform.Chassis, 'get_module', side_effect=mock_get_module): - - # Simulate that the system is a SmartSwitch - mock_is_smartswitch.return_value = True + # Supervisor ModuleUpdater + module_updater = SmartSwitchModuleUpdater(SYSLOG_IDENTIFIER, chassis) + module_updater.module_db_update() + module_updater.modules_num_update() - # Patch num_modules for the updater - with patch.object(daemon_chassisd.module_updater, 'num_modules', 1), \ - patch.object(daemon_chassisd.module_updater, 'get_module_admin_status', return_value='down'): - # Now run the function that sets the initial admin state - daemon_chassisd.set_initial_dpu_admin_state() + # ChassisdDaemon setup + daemon_chassisd = ChassisdDaemon(SYSLOG_IDENTIFIER, chassis) + daemon_chassisd.module_updater = module_updater + daemon_chassisd.platform_chassis = chassis + daemon_chassisd.smartswitch = True + # Mock the necessary methods to raise an exception + with patch.object(module_updater, 'get_module_admin_status', side_effect=Exception("Test error")), \ + patch.object(daemon_chassisd, 'log_error') as mock_log_error, \ + patch.object(module, 'clear_module_state_transition') as mock_clear_transition, \ + patch.object(module, 'clear_module_gnoi_halt_in_progress') as mock_clear_gnoi: + + # Run the function - should not raise exception + daemon_chassisd.set_initial_dpu_admin_state() + + # Verify state transition flags were cleared before exception + mock_clear_transition.assert_called_once() + mock_clear_gnoi.assert_called_once() + + # Verify error was logged + mock_log_error.assert_called_once() + assert "Error in run: Test error" in str(mock_log_error.call_args) -def test_set_initial_dpu_admin_state_up(): # Test the chassisd run chassis = MockSmartSwitchChassis() @@ -1963,18 +2072,19 @@ def test_submit_dpu_callback(): # Test MODULE_ADMIN_DOWN scenario with patch.object(module, 'module_pre_shutdown') as mock_pre_shutdown, \ patch.object(module, 'set_admin_state_gracefully') as mock_set_admin_state_gracefully: - daemon_chassisd.submit_dpu_callback(index, MODULE_ADMIN_DOWN, name) - # Verify correct functions are called for admin down - mock_pre_shutdown.assert_called_once() + daemon_chassisd.submit_dpu_callback(index, MODULE_ADMIN_DOWN) + # Verify pre_shutdown is not called for admin down + mock_pre_shutdown.assert_not_called() + # Verify set_admin_state_gracefully is called with MODULE_ADMIN_DOWN mock_set_admin_state_gracefully.assert_called_once_with(MODULE_ADMIN_DOWN) - # Test non-MODULE_ADMIN_DOWN scenario (e.g., MODULE_PRE_SHUTDOWN) + # Test MODULE_PRE_SHUTDOWN scenario with patch.object(module, 'module_pre_shutdown') as mock_pre_shutdown, \ patch.object(module, 'set_admin_state_gracefully') as mock_set_admin_state_gracefully: module_updater.module_table.get = MagicMock(return_value=(True, [])) - daemon_chassisd.submit_dpu_callback(index, MODULE_PRE_SHUTDOWN, name) + daemon_chassisd.submit_dpu_callback(index, MODULE_PRE_SHUTDOWN) - # Verify only pre_shutdown is called for non-admin-down states + # Verify only pre_shutdown is called for pre-shutdown state mock_pre_shutdown.assert_called_once() mock_set_admin_state_gracefully.assert_not_called() From 3364bb3a6cbf9821cc29765b6bcf7ace4e0d50b2 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Mon, 10 Nov 2025 19:36:24 +0000 Subject: [PATCH 4/5] Fix the call to clear_module_state_transition --- sonic-chassisd/scripts/chassisd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 686614c7f..51a899e48 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -1373,7 +1373,7 @@ class ChassisdDaemon(daemon_base.DaemonBase): module_name = self.platform_chassis.get_module(module_index).get_name() # Clear any existing state transition flags - self.module_updater.chassis.get_module(module_index).clear_module_state_transition() + self.module_updater.chassis.get_module(module_index).clear_module_state_transition(module_name) self.module_updater.chassis.get_module(module_index).clear_module_gnoi_halt_in_progress() # Get operational state of DPU From 6949abfa1cad616e6499dd23569f2742b85a0381 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Wed, 12 Nov 2025 21:59:35 +0000 Subject: [PATCH 5/5] Fix set_initial_dpu_admin_state() and also test failures --- sonic-chassisd/scripts/chassisd | 9 ++++++--- sonic-chassisd/tests/mock_platform.py | 8 ++++++++ sonic-chassisd/tests/test_chassisd.py | 10 +++++----- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 51a899e48..f6c3c8d83 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -1385,9 +1385,12 @@ class ChassisdDaemon(daemon_base.DaemonBase): admin_state = self.module_updater.get_module_admin_status(module_name) if admin_state == ModuleBase.MODULE_STATUS_EMPTY: op = MODULE_PRE_SHUTDOWN - if operational_state != ModuleBase.MODULE_STATUS_OFFLINE: - # shutdown DPU + if operational_state == ModuleBase.MODULE_STATUS_ONLINE: + # DPU is online and needs full shutdown op = MODULE_ADMIN_DOWN + elif admin_state == 'down': + # Admin state is explicitly set to down - issue shutdown + op = MODULE_ADMIN_DOWN # Initialize DPU_STATE DB table on bootup dpu_state_key = "DPU_STATE|" + module_name @@ -1399,7 +1402,7 @@ class ChassisdDaemon(daemon_base.DaemonBase): if op is not None: # Create and start a thread for the DPU logic - thread = threading.Thread(target=self.submit_dpu_callback, args=(module_index, op, module_name)) + thread = threading.Thread(target=self.submit_dpu_callback, args=(module_index, op)) thread.daemon = True # Set as a daemon thread thread.start() threads.append(thread) diff --git a/sonic-chassisd/tests/mock_platform.py b/sonic-chassisd/tests/mock_platform.py index a7bcf618c..d3b560ff7 100644 --- a/sonic-chassisd/tests/mock_platform.py +++ b/sonic-chassisd/tests/mock_platform.py @@ -82,6 +82,14 @@ def set_admin_state_gracefully(self, up): """Mock implementation of set_admin_state_gracefully""" return self.set_admin_state(up) + def clear_module_state_transition(self, module_name): + """Mock implementation of clear_module_state_transition""" + return True + + def clear_module_gnoi_halt_in_progress(self): + """Mock implementation of clear_module_gnoi_halt_in_progress""" + return True + def is_midplane_reachable(self): return self.midplane_access diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index ccc72c163..660b90113 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -1479,8 +1479,8 @@ def test_set_initial_dpu_admin_state_down(): daemon_chassisd.platform_chassis = chassis daemon_chassisd.smartswitch = True - # Mock the necessary methods - with patch.object(module_updater, 'get_module_admin_status', return_value='down'), \ + # Mock the necessary methods - admin state is EMPTY (not 'down') + with patch.object(module_updater, 'get_module_admin_status', return_value=ModuleBase.MODULE_STATUS_EMPTY), \ patch.object(module_updater, 'update_dpu_state') as mock_update_dpu_state, \ patch.object(daemon_chassisd, 'submit_dpu_callback') as mock_submit_callback, \ patch.object(module, 'clear_module_state_transition') as mock_clear_transition, \ @@ -1496,8 +1496,8 @@ def test_set_initial_dpu_admin_state_down(): # Verify DPU state was updated with 'down' since operational state is OFFLINE mock_update_dpu_state.assert_called_once_with("DPU_STATE|DPU0", 'down') - # Verify callback was submitted with MODULE_ADMIN_DOWN since admin state is 'down' and oper state is OFFLINE - mock_submit_callback.assert_called_once_with(0, MODULE_ADMIN_DOWN, "DPU0") + # Verify callback was submitted with MODULE_PRE_SHUTDOWN since admin state is EMPTY and oper state is OFFLINE + mock_submit_callback.assert_called_once_with(0, MODULE_PRE_SHUTDOWN) def test_set_initial_dpu_admin_state_up(): @@ -1599,7 +1599,7 @@ def test_set_initial_dpu_admin_state_empty(): mock_update_dpu_state.assert_called_once_with("DPU_STATE|DPU0", 'down') # Verify callback was submitted with MODULE_PRE_SHUTDOWN since admin state is EMPTY and oper state is not OFFLINE - mock_submit_callback.assert_called_once_with(0, MODULE_PRE_SHUTDOWN, "DPU0") + mock_submit_callback.assert_called_once_with(0, MODULE_PRE_SHUTDOWN) def test_set_initial_dpu_admin_state_exception():