Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
cf713d9
add exception handling in add_exception_psu
LinJin23 Aug 11, 2025
6063a6e
address review comments
LinJin23 Aug 26, 2025
188652c
Fix tests and add handler to platform_psuutil
LinJin23 Sep 23, 2025
455572d
Fix comment
LinJin23 Sep 23, 2025
048fc91
Fix test
LinJin23 Sep 23, 2025
bf4dede
Fix test
LinJin23 Sep 23, 2025
cf7161e
Add tests
LinJin23 Sep 23, 2025
5bdf678
add init value when hit exception
LinJin23 Sep 24, 2025
2fcc059
Revert "add init value when hit exception"
LinJin23 Sep 25, 2025
9a11b05
fix format
LinJin23 Oct 7, 2025
3995d5f
remove unused logger
LinJin23 Oct 8, 2025
98bee9b
refactor: rename _wrapper_get_psu_status to _wrapper_get_psu_presence…
LinJin23 Oct 10, 2025
a7e21b5
address copilot comments
LinJin23 Oct 10, 2025
8a52b30
Revert to the previous behavior to keep exit when failed in get psuutil
LinJin23 Oct 23, 2025
508d208
enhance PSU wrapper functions to accept logger for improved error han…
LinJin23 Oct 23, 2025
0a37102
Revert to the original psuutil exception handling logic
LinJin23 Oct 24, 2025
23efbcf
add docstring
LinJin23 Oct 24, 2025
1ae9698
add error handling and logging for PSU presence and status retrieval
LinJin23 Nov 4, 2025
5058880
add null check for chassis_tbl before deleting keys
LinJin23 Nov 4, 2025
101d8fe
enhance error logging in PSU presence and status retrieval functions
LinJin23 Nov 5, 2025
d30d02a
fix unit test
LinJin23 Nov 6, 2025
1dea21c
remove redundant exception
LinJin23 Nov 6, 2025
9c89a65
format whitespace
LinJin23 Nov 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 65 additions & 28 deletions sonic-psud/scripts/psud
Original file line number Diff line number Diff line change
Expand Up @@ -90,31 +90,65 @@ exit_code = 0
# temporary wrappers that are compliable with both new platform api and old-style plugin mode


def _wrapper_get_num_psus():
def _wrapper_get_num_psus(logger):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is logger being passed in here? its not used.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing it out. I’ve removed it.

if platform_chassis is not None:
try:
return platform_chassis.get_num_psus()
except NotImplementedError:
pass
return platform_psuutil.get_num_psus()
if platform_psuutil is not None:
return platform_psuutil.get_num_psus()
return 0
Copy link

Copilot AI Oct 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function should maintain consistent fallback behavior. When platform_psuutil is None, returning 0 may not accurately represent the actual number of PSUs on the system. Consider returning a default value that makes sense for the platform or raising an appropriate exception.

Suggested change
return 0
raise RuntimeError("Unable to determine number of PSUs: neither platform_chassis nor platform_psuutil is available")

Copilot uses AI. Check for mistakes.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a warning log something like, if we have to return 0

if logger:
    logger.log_warning("No PSU provider available; assuming 0 PSUs")

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done



def _wrapper_get_psu_presence(psu_index):
def _wrapper_get_psu(logger, psu_index):
"""
Get PSU object from platform chassis
Copy link

Copilot AI Oct 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docstring is missing the logger parameter description. Add ':param logger: Logger instance for error/warning messages' to document the logger parameter.

Suggested change
Get PSU object from platform chassis
Get PSU object from platform chassis
:param logger: Logger instance for error/warning messages

Copilot uses AI. Check for mistakes.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@LinJin23 Pls fix this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

:param psu_index: PSU index (1-based)
:return: PSU object if available, None otherwise
"""
if platform_chassis is not None:
try:
return platform_chassis.get_psu(psu_index - 1).get_presence()
except NotImplementedError:
pass
return platform_psuutil.get_psu_presence(psu_index)
return platform_chassis.get_psu(psu_index - 1)
except NotImplementedError as e:
if logger:
logger.log_warning("get_psu() not implemented by platform chassis: {}".format(str(e)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be less than a warning imo, maybe a notice

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @vvolam, you mentioned in an earlier comment that this should use a warning level, so I’m a bit unsure which one to use here. Could you please help confirm which level is appropriate?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes NotImplemented exception could be notice as we are logging warning for failed case.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

notice is not available, so you can leave this as warning for now.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, got it, I’ll keep it as warning for now.

return None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"return None" at line 121 is sufficient and line 116 and 120 is redundant

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

except Exception as e:
if logger:
logger.log_warning("Failed to get PSU {} from platform chassis: {}".format(psu_index, str(e)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be an error type log

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

return None
return None


def _wrapper_get_psu_status(psu_index):
def _wrapper_get_psu_presence(logger, psu_index):
if platform_chassis is not None:
try:
return platform_chassis.get_psu(psu_index - 1).get_powergood_status()
except NotImplementedError:
pass
return platform_psuutil.get_psu_status(psu_index)
psu = _wrapper_get_psu(logger, psu_index)
if psu:
try:
return psu.get_presence()
except NotImplementedError:
pass
if platform_psuutil is not None:
return platform_psuutil.get_psu_presence(psu_index)
Copy link

Copilot AI Oct 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fallback logic should handle potential exceptions from platform_psuutil.get_psu_presence(). Consider wrapping this call in a try-except block to prevent unhandled exceptions from propagating up.

Suggested change
return platform_psuutil.get_psu_presence(psu_index)
try:
return platform_psuutil.get_psu_presence(psu_index)
except Exception as e:
if logger:
logger.log_error("Exception in platform_psuutil.get_psu_presence({}): {}".format(psu_index, str(e)))
return False

Copilot uses AI. Check for mistakes.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please wrap around this exception and log a warning message if it is not implemented.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

if logger:
logger.log_error("Failed to get PSU {} presence".format(psu_index))
return False


def _wrapper_get_psu_status(logger, psu_index):
if platform_chassis is not None:
psu = _wrapper_get_psu(logger, psu_index)
if psu:
try:
return psu.get_powergood_status()
except NotImplementedError:
pass
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment as above, add another condition as
"except Exception as e:
return False
"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

if platform_psuutil is not None:
return platform_psuutil.get_psu_status(psu_index)
Copy link

Copilot AI Oct 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fallback logic should handle potential exceptions from platform_psuutil.get_psu_status(). Consider wrapping this call in a try-except block to prevent unhandled exceptions from propagating up.

Suggested change
return platform_psuutil.get_psu_status(psu_index)
try:
return platform_psuutil.get_psu_status(psu_index)
except Exception as e:
if logger:
logger.log_error("Exception in platform_psuutil.get_psu_status({}): {}".format(psu_index, str(e)))
return False

Copilot uses AI. Check for mistakes.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add this exception

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done, and at the suggested line 152, I changed log_error to log_warning.

if logger:
logger.log_error("Failed to get PSU {} status".format(psu_index))
return False


#
Expand All @@ -123,13 +157,15 @@ def _wrapper_get_psu_status(psu_index):

def get_psu_key(psu_index):
if platform_chassis is not None:
Copy link

Copilot AI Nov 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The get_psu_key function passes None as the logger parameter to _wrapper_get_psu. While this is intentional to avoid logging in this context, the function should include a comment explaining why logging is suppressed here, as this is a deviation from other usages where self (the logger) is passed.

Suggested change
if platform_chassis is not None:
if platform_chassis is not None:
# Intentionally pass None as the logger to suppress logging in this context,
# as get_psu_key is only used for key generation and not for error reporting.
# This deviates from other usages where a logger is passed.

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don’t think it’s necessary.

try:
return platform_chassis.get_psu(psu_index - 1).get_name()
except NotImplementedError:
pass
except IndexError:
#some functionality is expectent on returning an expected key even if the psu object itself does not exist
pass
psu = _wrapper_get_psu(None, psu_index)
if psu:
try:
return psu.get_name()
except NotImplementedError:
pass
Copy link

Copilot AI Oct 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment explaining the IndexError exception handling was removed. This comment provided valuable context about why the exception is caught and should be preserved: 'some functionality is expectent on returning an expected key even if the psu object itself does not exist'.

Suggested change
pass
pass
# some functionality is expectent on returning an expected key even if the psu object itself does not exist

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

except IndexError:
#some functionality is expectent on returning an expected key even if the psu object itself does not exist
pass
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment as above

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

return PSU_INFO_KEY_TEMPLATE.format(psu_index)


Expand Down Expand Up @@ -398,8 +434,9 @@ class DaemonPsud(daemon_base.DaemonBase):
try:
platform_psuutil = self.load_platform_util(PLATFORM_SPECIFIC_MODULE_NAME, PLATFORM_SPECIFIC_CLASS_NAME)
except Exception as e:
self.log_error("Failed to load psuutil: %s" % (str(e)), True)
sys.exit(PSUUTIL_LOAD_ERROR)
self.log_warning("Failed to load psuutil: %s" % (str(e)), True)
Copy link

Copilot AI Oct 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The log_warning method is being called with a second parameter True, but logger methods typically don't take a boolean parameter for console output. This could cause runtime errors if the logger doesn't support this parameter pattern.

Suggested change
self.log_warning("Failed to load psuutil: %s" % (str(e)), True)
self.log_warning("Failed to load psuutil: %s" % (str(e)))

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

# Don't expect the PSUD to exit just because psuutil is not available
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this behavior being changed? is there a benefit to continuing when psuutil isn't present?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don’t want psud to crash when psuutil cannot be loaded. Instead, we prefer to let platform_psuutil continue with default behavior. Also, since platform_psuutil is a global variable, I didn’t set it to None after the error to avoid overwriting any existing value.

@vvolam , please correct me if I’m wrong.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@LinJin23, I too think we should stick to earlier behavior .. i.e if the platform psuutil not found, need to exit.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Revert to the previous behavior

Copy link

Copilot AI Oct 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a grammatical error in the comment. It should be 'Don't expect PSUD to exit' instead of 'Don't expect the PSUD to exit'.

Suggested change
# Don't expect the PSUD to exit just because psuutil is not available
# Don't expect PSUD to exit just because psuutil is not available

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line has been removed.



Copy link

Copilot AI Sep 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's trailing whitespace on line 439. Consider removing it for cleaner code.

Suggested change

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

# Connect to STATE_DB and create psu/chassis info tables
state_db = daemon_base.db_connect("STATE_DB")
Expand All @@ -409,7 +446,7 @@ class DaemonPsud(daemon_base.DaemonBase):
self.phy_entity_tbl = swsscommon.Table(state_db, PHYSICAL_ENTITY_INFO_TABLE)

# Post psu number info to STATE_DB
self.num_psus = _wrapper_get_num_psus()
self.num_psus = _wrapper_get_num_psus(self)
fvs = swsscommon.FieldValuePairs([(CHASSIS_INFO_PSU_NUM_FIELD, str(self.num_psus))])
self.chassis_tbl.set(CHASSIS_INFO_KEY, fvs)

Expand Down Expand Up @@ -474,7 +511,7 @@ class DaemonPsud(daemon_base.DaemonBase):

def _update_single_psu_data(self, index, psu):
name = get_psu_key(index)
presence = _wrapper_get_psu_presence(index)
presence = _wrapper_get_psu_presence(self, index)
power_good = False
voltage = NOT_AVAILABLE
voltage_high_threshold = NOT_AVAILABLE
Expand All @@ -488,7 +525,7 @@ class DaemonPsud(daemon_base.DaemonBase):
in_current = NOT_AVAILABLE
max_power = NOT_AVAILABLE
if presence:
power_good = _wrapper_get_psu_status(index)
power_good = _wrapper_get_psu_status(self, index)
voltage = try_get(psu.get_voltage, NOT_AVAILABLE)
voltage_high_threshold = try_get(psu.get_voltage_high_threshold, NOT_AVAILABLE)
voltage_low_threshold = try_get(psu.get_voltage_low_threshold, NOT_AVAILABLE)
Expand Down Expand Up @@ -612,8 +649,8 @@ class DaemonPsud(daemon_base.DaemonBase):
(PSU_INFO_IN_CURRENT_FIELD, str(in_current)),
(PSU_INFO_IN_VOLTAGE_FIELD, str(in_voltage)),
(PSU_INFO_POWER_MAX_FIELD, str(max_power)),
(PSU_INFO_PRESENCE_FIELD, 'true' if _wrapper_get_psu_presence(index) else 'false'),
(PSU_INFO_STATUS_FIELD, 'true' if _wrapper_get_psu_status(index) else 'false'),
(PSU_INFO_PRESENCE_FIELD, 'true' if _wrapper_get_psu_presence(self, index) else 'false'),
(PSU_INFO_STATUS_FIELD, 'true' if _wrapper_get_psu_status(self, index) else 'false'),
])
self.psu_tbl.set(name, fvs)

Expand Down Expand Up @@ -642,7 +679,7 @@ class DaemonPsud(daemon_base.DaemonBase):
:return:
"""
psu_name = get_psu_key(psu_index)
presence = _wrapper_get_psu_presence(psu_index)
presence = _wrapper_get_psu_presence(self, psu_index)
fan_list = psu.get_all_fans()
for index, fan in enumerate(fan_list):
fan_name = try_get(fan.get_name, '{} FAN {}'.format(psu_name, index + 1))
Expand Down
Loading
Loading