Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 90 additions & 1 deletion sonic-xcvrd/tests/test_xcvrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -1792,7 +1792,8 @@ def handle_port_change_event(self, port_event):
def test_handle_port_config_change(self, mock_select, mock_sub_table):
mock_selectable = MagicMock()
mock_selectable.pop = MagicMock(
side_effect=[('Ethernet0', swsscommon.SET_COMMAND, (('index', '1'), )), (None, None, None)])
side_effect=[('Ethernet0', swsscommon.SET_COMMAND, (('index', '1'), ('speed', '200000'))),
(None, None, None)])
mock_select.return_value = (swsscommon.Select.OBJECT, mock_selectable)
mock_sub_table.return_value = mock_selectable

Expand All @@ -1809,6 +1810,25 @@ def test_handle_port_config_change(self, mock_select, mock_sub_table):
assert port_mapping.get_physical_to_logical(1) == ['Ethernet0']
assert port_mapping.get_logical_to_physical('Ethernet0') == [1]

mock_selectable.pop = MagicMock(
side_effect=[('Ethernet0', swsscommon.SET_COMMAND, (('index', '1'), ('speed', '100000'))),
(None, None, None)])
handle_port_config_change(sel, asic_context, stop_event, port_mapping,
logger, port_mapping.handle_port_change_event)
assert port_mapping.logical_port_list.count('Ethernet0')
assert port_mapping.get_asic_id_for_logical_port('Ethernet0') == 0
assert port_mapping.get_physical_to_logical(1) == ['Ethernet0']
assert port_mapping.get_logical_to_physical('Ethernet0') == [1]
expected_cache = {
('Ethernet0'): {
'port_name': 'Ethernet0',
'index': '1',
'asic_id': 0,
'speed': '100000',
}
}
assert port_mapping.port_event_cache == expected_cache

mock_selectable.pop = MagicMock(
side_effect=[('Ethernet0', swsscommon.DEL_COMMAND, (('index', '1'), )), (None, None, None)])
handle_port_config_change(sel, asic_context, stop_event, port_mapping,
Expand Down Expand Up @@ -3817,6 +3837,7 @@ def test_SfpStateUpdateTask_handle_port_change_event(self, mock_del_port_sfp_dom
mock_table_helper.get_int_tbl = MagicMock(return_value=mock_table)
mock_table_helper.get_dom_tbl = MagicMock(return_value=mock_table)
mock_table_helper.get_dom_threshold_tbl = MagicMock(return_value=mock_table)
mock_table_helper.get_state_port_tbl = MagicMock(return_value=mock_table)
stop_event = threading.Event()
sfp_error_event = threading.Event()
port_mapping = PortMapping()
Expand All @@ -3826,6 +3847,7 @@ def test_SfpStateUpdateTask_handle_port_change_event(self, mock_del_port_sfp_dom
task.xcvr_table_helper.get_status_tbl = mock_table_helper.get_status_tbl
task.xcvr_table_helper.get_intf_tbl = mock_table_helper.get_intf_tbl
task.xcvr_table_helper.get_dom_tbl = mock_table_helper.get_dom_tbl
task.xcvr_table_helper.get_state_port_tbl = mock_table_helper.get_state_port_tbl
port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_ADD)
wait_time = 5
while wait_time > 0:
Expand All @@ -3840,6 +3862,19 @@ def test_SfpStateUpdateTask_handle_port_change_event(self, mock_del_port_sfp_dom
assert task.port_mapping.get_logical_to_physical('Ethernet0') == [1]
assert mock_del_port_sfp_dom_info_from_db.call_count == 0

port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_SET)
wait_time = 5
while wait_time > 0:
task.on_port_config_change(port_change_event)
if task.port_mapping.logical_port_list:
break
wait_time -= 1
time.sleep(1)
assert task.port_mapping.logical_port_list.count('Ethernet0')
assert task.port_mapping.get_asic_id_for_logical_port('Ethernet0') == 0
assert task.port_mapping.get_physical_to_logical(1) == ['Ethernet0']
assert task.port_mapping.get_logical_to_physical('Ethernet0') == [1]

port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_REMOVE)
wait_time = 5
while wait_time > 0:
Expand Down Expand Up @@ -4120,6 +4155,60 @@ class MockTable:
mock_update_status.assert_called_with(
'Ethernet0', status_sw_tbl, task.sfp_error_dict[1][0], 'Blocking EEPROM from being read|Power budget exceeded')

@patch('xcvrd.xcvrd.XcvrTableHelper')
@patch('xcvrd.xcvrd._wrapper_get_presence')
@patch('xcvrd.xcvrd_utilities.media_settings_parser.notify_media_setting')
@patch('xcvrd.xcvrd.post_port_sfp_info_to_db')
def test_SfpStateUpdateTask_on_update_logical_port(self, mock_post_sfp_info,
mock_update_media_setting, mock_get_presence, mock_table_helper):
class MockTable:
pass

status_sw_tbl = MockTable()
status_sw_tbl.get = MagicMock(return_value=(True, (('status', SFP_STATUS_INSERTED),)))
status_sw_tbl.set = MagicMock()
int_tbl = MockTable()
int_tbl.get = MagicMock(return_value=(True, (('key2', 'value2'),)))
int_tbl.set = MagicMock()
state_port_tbl = MockTable()
state_port_tbl.get = MagicMock(return_value=(True, (('key5', 'value5'),)))
state_port_tbl.set = MagicMock()
mock_table_helper.get_status_sw_tbl = MagicMock(return_value=status_sw_tbl)
mock_table_helper.get_intf_tbl = MagicMock(return_value=int_tbl)
mock_table_helper.get_state_port_tbl = MagicMock(return_value=state_port_tbl)

port_mapping = PortMapping()
mock_sfp_obj_dict = MagicMock()
stop_event = threading.Event()
sfp_error_event = threading.Event()
task = SfpStateUpdateTask(DEFAULT_NAMESPACE, port_mapping, mock_sfp_obj_dict, stop_event, sfp_error_event)
task.xcvr_table_helper = XcvrTableHelper(DEFAULT_NAMESPACE)
task.xcvr_table_helper.get_status_sw_tbl = mock_table_helper.get_status_sw_tbl
task.xcvr_table_helper.get_intf_tbl = mock_table_helper.get_intf_tbl
task.xcvr_table_helper.get_state_port_tbl = mock_table_helper.get_state_port_tbl
port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_ADD)
task.port_mapping.handle_port_change_event(port_change_event)

status_sw_tbl.get.return_value = (False, ())
mock_get_presence.return_value = True
mock_post_sfp_info.return_value = SFP_EEPROM_NOT_READY
# SFP information is not in the DB, and SFP is present
task.on_update_logical_port(port_change_event)
assert mock_post_sfp_info.call_count == 1
mock_post_sfp_info.assert_called_with('Ethernet0', task.port_mapping, int_tbl, {})
assert mock_update_media_setting.call_count == 0
assert 'Ethernet0' in task.retry_eeprom_set
task.retry_eeprom_set.clear()

mock_post_sfp_info.return_value = None
mock_post_sfp_info.reset_mock()
# SFP information is in the DB, and SFP is present
task.on_update_logical_port(port_change_event)
assert mock_post_sfp_info.call_count == 1
mock_post_sfp_info.assert_called_with('Ethernet0', task.port_mapping, int_tbl, {})
assert mock_update_media_setting.call_count == 1
assert 'Ethernet0' not in task.retry_eeprom_set

def test_sfp_insert_events(self):
from xcvrd.xcvrd import _wrapper_soak_sfp_insert_event
sfp_insert_events = {}
Expand Down
27 changes: 27 additions & 0 deletions sonic-xcvrd/xcvrd/xcvrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -1998,6 +1998,8 @@ def on_port_config_change(self , port_change_event):
elif port_change_event.event_type == port_event_helper.PortChangeEvent.PORT_ADD:
self.port_mapping.handle_port_change_event(port_change_event)
self.on_add_logical_port(port_change_event)
elif port_change_event.event_type == port_event_helper.PortChangeEvent.PORT_SET:
self.on_update_logical_port(port_change_event)

def on_remove_logical_port(self, port_change_event):
"""Called when a logical port is removed from CONFIG_DB.
Expand Down Expand Up @@ -2095,6 +2097,31 @@ def on_add_logical_port(self, port_change_event):
status = sfp_status_helper.SFP_STATUS_REMOVED if not status else status
update_port_transceiver_status_table_sw(port_change_event.port_name, status_sw_tbl, status, error_description)

def on_update_logical_port(self, port_change_event):
"""Called when a logical port is updated.
Invoked to update media settings, when port speed is changed.

Args:
port_change_event (object): port change event
"""
int_tbl = self.xcvr_table_helper.get_intf_tbl(port_change_event.asic_id)
state_port_table = self.xcvr_table_helper.get_state_port_tbl(port_change_event.asic_id)
found, state_port_table_fvs = state_port_table.get(port_change_event.port_name)
if not found:
helper_logger.log_warning("Logical port {} not found in STATE_DB PORT_TABLE".format(port_change_event.port_name))
return
# Initialize the NPU_SI_SETTINGS_SYNC_STATUS to default value
state_port_table.set(port_change_event.port_name, [(NPU_SI_SETTINGS_SYNC_STATUS_KEY, NPU_SI_SETTINGS_DEFAULT_VALUE)])

if _wrapper_get_presence(port_change_event.port_index):
transceiver_dict = {}
rc = post_port_sfp_info_to_db(port_change_event.port_name, self.port_mapping, int_tbl, transceiver_dict)
if rc == SFP_EEPROM_NOT_READY:
# Failed to read EEPROM, put it to retry set
self.retry_eeprom_set.add(port_change_event.port_name)
else:
media_settings_parser.notify_media_setting(port_change_event.port_name, transceiver_dict, self.xcvr_table_helper, self.port_mapping)

def retry_eeprom_reading(self):
"""Retry EEPROM reading, if retry succeed, remove the logical port from the retry set
"""
Expand Down
25 changes: 25 additions & 0 deletions sonic-xcvrd/xcvrd/xcvrd_utilities/port_event_helper.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pavannaregundi How are we ensuring NPU_SI_SETTINGS_SYNC_STATUS_KEY is set properly? @mihirpat1 FYI

Also, does the CmisManager reinitialize the port on speed change only after ensuring the following? Do you have the log to confirm?

  1. The host serdes has set the right media settings
  2. A valid host signal as per the new speed change (with newly notified media settings) is being sent to the module - This is ensured via host_tx_ready between swss and xcvrd

Copy link
Author

@pavannaregundi pavannaregundi Aug 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@prgeor Please find my answers inline. Also please refer to the logs
media_settings_with_speed_change_log.txt.txt

These logs are taken with following command,

# config interface speed Ethernet256 400000
# config interface speed Ethernet280 400000

@pavannaregundi How are we ensuring NPU_SI_SETTINGS_SYNC_STATUS_KEY is set properly? @mihirpat1 FYI

NPU_SI_SETTINGS_SYNC_STATUS_KEY is always set to NPU_SI_SETTINGS_DEFAULT_VALUE in on_update_logical_port() before calling notify_media_setting(), to make sure serdes values are newly calculated as per the current port speed. Finally it is set to NPU_SI_SETTINGS_NOTIFIED_VALUE by notify_media_setting() which indicates the serdes values are notified. State db dump of the ports under tests are attached in logs file to show value is NPU_SI_SETTINGS_NOTIFIED_VALUE.

Also, does the CmisManager reinitialize the port on speed change only after ensuring the following? Do you have the log to confirm?

  1. The host serdes has set the right media settings

In attached log file, sairedis.logs shows that old serdes object is removed and new values are applied to syncd SAI SDK. Media setting values are same as the one shown in the issue sonic-net/sonic-buildimage#23480

  1. A valid host signal as per the new speed change (with newly notified media settings) is being sent to the module - This is ensured via host_tx_ready between swss and xcvrd

In attached log file, sairedis.log/syslog show 'SAI_PORT_HOST_TX_READY_STATUS_READY' and corresponding 'host_tx_ready' in swss#orchagent. Also indicates CMIS reinit after host_tx_ready set to 'true'.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pavannaregundi I would suggest to use a port re-creation approach so that Xcvrd can delete the OLD port and create the new PORT even if only port speed attribute is changed. There are other attributes like lanes, fec etc which may be different with new speed. So a cleaner approach is to delete the old port from config DB. This way, when the new PORT is created, Xcvrd should publish the SI settings accordingly as here:-

media_settings_parser.notify_media_setting(port_change_event.port_name, transceiver_dict, self.xcvr_table_helper, self.port_mapping)

Copy link
Author

@pavannaregundi pavannaregundi Nov 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@prgeor This change is only about speed change - "config interface speed interface speed". Where current code is treating speed change as update to config DB. This is not a delete and create operation. Port delete and create in config db for speed change will be much bigger change.

https://github.com/sonic-net/SONiC/blob/master/doc/port-si/Port_SI_Per_Speed.md HLD has this statement in this regard - "After applying the port SI per speed enhancements, it will also be carried out upon port speed change events: Whenever a port speed change is detected by listening to CONFIG_DB, Notify-Media-Settings-Process will be called to send the most applicable SI values in the JSON to SAI. Port speed changes require invoking the Notify-Media-Settings-Process becuase after such a change, the lane_speed_key used for lookup in the JSON changes accordingly, and the previously configured SI values in the ASIC are no longer relevant."
However, change required to achieve this got missed in code.

Number of lane change does happen only if there is a breakout and it will automatically trigger a port delete and port add even for CONFIG_DB. So it will end up with delete and add for xcvrd too. (SAI_PORT_ATTR_HW_LANE_LIST in sai is a create only attribute.)
FEC changes is not a parameter for media settings. It should not invoke any change in media settings.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pavannaregundi The HLD can be updated to what I mentioned. Dont want to invent the wheel for each of the following scenarios. I would like to fix it one for all.

  1. speed is changed but not lanes
  2. speed is changed along with fec and other port attributes.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pavannaregundi I don't know what you mean by bigger change. During breakout of the port speed, we delete the old port and create the new port anyways.

Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,13 @@ def __init__(self):
self.physical_to_logical = {}
# Logical port name to ASIC ID mapping
self.logical_to_asic = {}
self.port_event_cache = {}

def apply_filter_to_fvp(self, filter, fvp):
if filter is not None:
for key in fvp.copy().keys():
if key not in (set(filter) | set({'index', 'port_name', 'asic_id'})):
del fvp[key]

def handle_port_change_event(self, port_change_event):
if port_change_event.event_type == PortChangeEvent.PORT_ADD:
Expand Down Expand Up @@ -313,6 +320,12 @@ def read_port_config_change(asic_context, port_mapping, logger, port_change_even
fvp = dict(fvp)
if not multi_asic.is_front_panel_port(key, fvp.get(multi_asic.PORT_ROLE, None)):
continue

fvp['port_name'] = key
fvp['asic_id'] = asic_context[port_tbl]
filter = ['speed']
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One concern here:

Consider the following settings:
COPPER100 {
...
}
Copper50 {
}

If the breakout goes from Ethernet0 400G-8 (using Copper50 tuning values) to 2x400G-4 (Copper100 values)

I think that event will be missed based on speed alone, the number of lanes should also be a factor

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bobby-nexthop Thank you for review. Number of lane change only happens due to port breakout and breakout triggers a port remove and add. This scenario already works as, PORT ADD and PORT REMOVE is taken care with existing code base.

We do see a error w.r.t NPU_SI_SETTINGS_SYNC_STATUS and this change is already there for review with PR #622

port_mapping.apply_filter_to_fvp(filter, fvp)

if op == swsscommon.SET_COMMAND:
if 'index' not in fvp:
continue
Expand All @@ -333,6 +346,15 @@ def read_port_config_change(asic_context, port_mapping, logger, port_change_even

port_change_event = PortChangeEvent(key, new_physical_index, asic_context[port_tbl], PortChangeEvent.PORT_ADD)
port_change_event_handler(port_change_event)
else:
if key in port_mapping.port_event_cache:
# Compare current event with last event on this key, to see if
# there's really a need to update.
diff = set(fvp.items()) - set(port_mapping.port_event_cache[key].items())
# Create set event handler if there is a difference
if diff:
port_change_event = PortChangeEvent(key, new_physical_index, asic_context[port_tbl], PortChangeEvent.PORT_SET)
port_change_event_handler(port_change_event)
elif op == swsscommon.DEL_COMMAND:
if port_mapping.is_logical_port(key):
port_change_event = PortChangeEvent(key,
Expand All @@ -343,6 +365,9 @@ def read_port_config_change(asic_context, port_mapping, logger, port_change_even
else:
logger.log_warning('Invalid DB operation: {}'.format(op))

# Update the latest event to the cache
port_mapping.port_event_cache[key] = fvp

def get_port_mapping(namespaces):
"""Get port mapping from CONFIG_DB
"""
Expand Down
Loading