From 8365d5f6b9131a93304d67aea9c5b8710985b81c Mon Sep 17 00:00:00 2001 From: Sonic Build Admin Date: Mon, 13 Oct 2025 18:23:22 +0000 Subject: [PATCH] Fixing state_db not having delete_field attribute causing a crash when DPUs in bad state ### What I did Fixed the AttributeError caused by missing [delete_field](vscode-file://vscode-app/Applications/Visual%20Studio%20Code%205.app/Contents/Resources/app/out/vs/code/electron-browser/workbench/workbench.html) method in the [StateDBHelper](vscode-file://vscode-app/Applications/Visual%20Studio%20Code%205.app/Contents/Resources/app/out/vs/code/electron-browser/workbench/workbench.html) class when managing DPU state transitions. The code was attempting to call [state_db.delete_field()](vscode-file://vscode-app/Applications/Visual%20Studio%20Code%205.app/Contents/Resources/app/out/vs/code/electron-browser/workbench/workbench.html) to remove the 'transition_start_time' field from the database, but this method didn't exist, causing crashes when DPUs were in bad state. ### How I did it Added the missing [delete_field](vscode-file://vscode-app/Applications/Visual%20Studio%20Code%205.app/Contents/Resources/app/out/vs/code/electron-browser/workbench/workbench.html) method to the [StateDBHelper](vscode-file://vscode-app/Applications/Visual%20Studio%20Code%205.app/Contents/Resources/app/out/vs/code/electron-browser/workbench/workbench.html) class that properly removes fields from Redis using the [hdel](vscode-file://vscode-app/Applications/Visual%20Studio%20Code%205.app/Contents/Resources/app/out/vs/code/electron-browser/workbench/workbench.html) command Maintained the existing logic in [set_state_transition_in_progress](vscode-file://vscode-app/Applications/Visual%20Studio%20Code%205.app/Contents/Resources/app/out/vs/code/electron-browser/workbench/workbench.html) that removes 'transition_start_time' from both local state and database when transitioning to 'False' Ensured consistency between local dictionary state and database state by properly implementing field deletion The fix addresses the root cause of the AttributeError while preserving the intended behavior of cleaning up transition timestamps when state transitions complete. Code Changes: Added [delete_field(self, table, key, field)](vscode-file://vscode-app/Applications/Visual%20Studio%20Code%205.app/Contents/Resources/app/out/vs/code/electron-browser/workbench/workbench.html) method to [StateDBHelper](vscode-file://vscode-app/Applications/Visual%20Studio%20Code%205.app/Contents/Resources/app/out/vs/code/electron-browser/workbench/workbench.html) class Method uses [client.hdel(redis_key, field)](vscode-file://vscode-app/Applications/Visual%20Studio%20Code%205.app/Contents/Resources/app/out/vs/code/electron-browser/workbench/workbench.html) to properly delete fields from Redis database Existing call to [state_db.delete_field()](vscode-file://vscode-app/Applications/Visual%20Studio%20Code%205.app/Contents/Resources/app/out/vs/code/electron-browser/workbench/workbench.html) on line 85 now works correctly ### How to verify it Make the DPU midplane unreachable for one of the DPUs Toggle the DPU ON/OFF state a couple of times using config chassis modules startup/shutdown DPUx Verify that the commands complete without AttributeError crashes Confirm that 'transition_start_time' field is properly removed from STATE_DB when transitions complete Check that both local state and database state remain synchronized Why this approach vs. removing the line: This fix maintains the original intended behavior of cleaning up transition timestamps, ensuring database consistency and preventing stale data accumulation, while properly implementing the missing functionality that was causing the crash. Which release branch to backport (provide reason below if selected) [x] 202505 [x] 202506 --- config/chassis_modules.py | 20 +++++++++++++++++--- tests/chassis_modules_test.py | 30 +++++++++++++++++++++++++----- 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/config/chassis_modules.py b/config/chassis_modules.py index 56768a5eb..ee8d6abd6 100755 --- a/config/chassis_modules.py +++ b/config/chassis_modules.py @@ -6,7 +6,7 @@ import subprocess import utilities_common.cli as clicommon from utilities_common.chassis import is_smartswitch, get_all_dpus -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone TIMEOUT_SECS = 10 TRANSITION_TIMEOUT = timedelta(seconds=240) # 4 minutes @@ -27,6 +27,12 @@ def set_entry(self, table, key, entry): for field, value in entry.items(): self.db.set("STATE_DB", redis_key, field, value) + def delete_field(self, table, key, field): + """Delete a specific field from table|key.""" + redis_key = f"{table}|{key}" + client = self.db.get_redis_client("STATE_DB") + return client.hdel(redis_key, field) + # # 'chassis_modules' group ('config chassis_modules ...') # @@ -72,8 +78,9 @@ def set_state_transition_in_progress(db, chassis_module_name, value): entry = state_db.get_entry('CHASSIS_MODULE_TABLE', chassis_module_name) or {} entry['state_transition_in_progress'] = value if value == 'True': - entry['transition_start_time'] = datetime.utcnow().isoformat() + entry['transition_start_time'] = datetime.now(timezone.utc).isoformat() else: + # Remove transition_start_time from both local entry and database entry.pop('transition_start_time', None) state_db.delete_field('CHASSIS_MODULE_TABLE', chassis_module_name, 'transition_start_time') state_db.set_entry('CHASSIS_MODULE_TABLE', chassis_module_name, entry) @@ -92,7 +99,14 @@ def is_transition_timed_out(db, chassis_module_name): start_time = datetime.fromisoformat(start_time_str) except ValueError: return False - return datetime.utcnow() - start_time > TRANSITION_TIMEOUT + + # Use UTC everywhere for consistent comparison + current_time = datetime.now(timezone.utc) + if start_time.tzinfo is None: + # If stored time is naive, assume it's UTC + start_time = start_time.replace(tzinfo=timezone.utc) + + return current_time - start_time > TRANSITION_TIMEOUT # # Name: check_config_module_state_with_timeout diff --git a/tests/chassis_modules_test.py b/tests/chassis_modules_test.py index c1fd653ec..91c51b483 100755 --- a/tests/chassis_modules_test.py +++ b/tests/chassis_modules_test.py @@ -1,7 +1,7 @@ import sys import os from click.testing import CliRunner -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from config.chassis_modules import ( set_state_transition_in_progress, is_transition_timed_out, @@ -489,7 +489,7 @@ def test_shutdown_triggers_transition_in_progress(self): fvs = { 'admin_status': 'up', 'state_transition_in_progress': 'True', - 'transition_start_time': datetime.utcnow().isoformat() + 'transition_start_time': datetime.now(timezone.utc).isoformat() } db.cfgdb.set_entry('CHASSIS_MODULE', "DPU0", fvs) @@ -516,7 +516,7 @@ def test_shutdown_triggers_transition_timeout(self): fvs = { 'admin_status': 'up', 'state_transition_in_progress': 'True', - 'transition_start_time': (datetime.utcnow() - timedelta(minutes=30)).isoformat() + 'transition_start_time': (datetime.now(timezone.utc) - timedelta(minutes=30)).isoformat() } db.cfgdb.set_entry('CHASSIS_MODULE', "DPU0", fvs) @@ -592,15 +592,35 @@ def test_is_transition_timed_out_all_paths(self): assert is_transition_timed_out(db, "DPU0") is False # Case 4: Timed out - old_time = (datetime.utcnow() - TRANSITION_TIMEOUT - timedelta(seconds=1)).isoformat() + old_time = (datetime.now(timezone.utc) - TRANSITION_TIMEOUT - timedelta(seconds=1)).isoformat() db.statedb.get_entry.return_value = {"transition_start_time": old_time} assert is_transition_timed_out(db, "DPU0") is True # Case 5: Not timed out yet - now = datetime.utcnow().isoformat() + now = datetime.now(timezone.utc).isoformat() db.statedb.get_entry.return_value = {"transition_start_time": now} assert is_transition_timed_out(db, "DPU0") is False + def test_delete_field(self): + """Single test to cover missing delete_field and timezone handling lines""" + from config.chassis_modules import StateDBHelper + from datetime import timezone + + # Test delete_field method (covers lines 32-34) + mock_sonic_db = mock.MagicMock() + mock_redis_client = mock.MagicMock() + mock_sonic_db.get_redis_client.return_value = mock_redis_client + helper = StateDBHelper(mock_sonic_db) + helper.delete_field('TEST_TABLE', 'test_key', 'test_field') + mock_redis_client.hdel.assert_called_once_with("TEST_TABLE|test_key", "test_field") + + # Test timezone-aware datetime handling (covers line 109) + db = mock.MagicMock() + db.statedb = mock.MagicMock() + tz_time = (datetime.now(timezone.utc) - TRANSITION_TIMEOUT - timedelta(seconds=1)).isoformat() + db.statedb.get_entry.return_value = {"transition_start_time": tz_time} + assert is_transition_timed_out(db, "DPU0") is True + @classmethod def teardown_class(cls): print("TEARDOWN")