From fba9331b050873803a9903b63505721cb3682341 Mon Sep 17 00:00:00 2001 From: aditya-nexthop Date: Mon, 27 Oct 2025 23:33:30 +0000 Subject: [PATCH] Keep track of DPInitPending in software to work around transceiver firmware issues Certain transceiver firmwares clear DPInitPending on other datapaths when setting it for currently transitioning datapaths. This requires keeping a track of DPInitPending state in software so that the config loop does not fail when two datapaths in a module are being configured in an interleaved manner. --- sonic-xcvrd/tests/test_xcvrd.py | 1 + sonic-xcvrd/xcvrd/xcvrd.py | 43 +++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/sonic-xcvrd/tests/test_xcvrd.py b/sonic-xcvrd/tests/test_xcvrd.py index 90de5c180..58cd837c2 100644 --- a/sonic-xcvrd/tests/test_xcvrd.py +++ b/sonic-xcvrd/tests/test_xcvrd.py @@ -2348,6 +2348,7 @@ def test_CmisManagerTask_update_port_transceiver_status_table_sw_cmis_state(self mock_get_status_tbl = MagicMock() mock_get_status_tbl.set = MagicMock() task.xcvr_table_helper.get_status_sw_tbl.return_value = mock_get_status_tbl + task.port_dict['Ethernet0'] = dict(asic_id=0) task.update_port_transceiver_status_table_sw_cmis_state("Ethernet0", CMIS_STATE_INSERTED) assert mock_get_status_tbl.set.call_count == 1 diff --git a/sonic-xcvrd/xcvrd/xcvrd.py b/sonic-xcvrd/xcvrd/xcvrd.py index fe6a33ea6..2d1acc5d8 100644 --- a/sonic-xcvrd/xcvrd/xcvrd.py +++ b/sonic-xcvrd/xcvrd/xcvrd.py @@ -338,6 +338,7 @@ def __init__(self, namespaces, port_mapping, main_thread_stop_event, skip_cmis_m self.skip_cmis_mgr = skip_cmis_mgr self.namespaces = namespaces self.xcvr_table_helper = XcvrTableHelper(self.namespaces) + self.dp_init_pending_dict = {} def log_debug(self, message): helper_logger.log_debug("CMIS: {}".format(message)) @@ -360,6 +361,9 @@ def update_port_transceiver_status_table_sw_cmis_state(self, lport, cmis_state_t fvs = swsscommon.FieldValuePairs([('cmis_state', cmis_state_to_set)]) status_table.set(lport, fvs) + if cmis_state_to_set in ( CMIS_STATE_INSERTED, CMIS_STATE_FAILED, ): + pport = self.port_dict[lport].get('index', "-1") + self.clear_data_path_init_pending(pport, lport) def on_port_update_event(self, port_change_event): if port_change_event.event_type not in [port_change_event.PORT_SET, port_change_event.PORT_DEL]: @@ -790,6 +794,31 @@ def check_datapath_init_pending(self, api, host_lanes_mask): return pending + def check_any_datapath_init_pending(self, pport, lport): + """ + Check if the CMIS datapath init is pending on any lanes + + Args: + api: + XcvrApi object + + Returns: + Boolean, true if any lanes are pending datapath init, otherwise false + """ + if pport not in self.dp_init_pending_dict: + self.log_notice("{}: Adding datapath init pending flag".format(lport)) + self.dp_init_pending_dict[pport] = lport + return not (self.dp_init_pending_dict[pport] == lport) + + def clear_data_path_init_pending(self, pport, lport): + """ + Clear the datapath init pending flag + """ + if pport in self.dp_init_pending_dict: + if lport == self.dp_init_pending_dict[pport]: + self.log_notice("{}: Clearing datapath init pending flag".format(lport)) + del self.dp_init_pending_dict[pport] + def check_datapath_state(self, api, host_lanes_mask, states): """ Check if the CMIS datapath states are in the specified state @@ -1298,6 +1327,19 @@ def task_worker(self): # Set Explicit control bit to apply Custom Host SI settings ec = 1 + # Some transceiver firmwares clear DpInitPending on unrelated datapaths when + # Apply_DataPathInit is written for a different datapath. So delay configuration + # until DpInitPending is cleared on all datapaths. + if hasattr(api, 'get_cmis_rev'): + # Check datapath init pending on module that supports CMIS 5.x + majorRev = int(api.get_cmis_rev().split('.')[0]) + if majorRev >= 5 and self.check_any_datapath_init_pending(pport, lport): + self.log_notice("{}: datapath init was pending, delay config".format(lport)) + if self.is_timer_expired(expired): + self.log_notice("{}: timeout for clearing data path init pending".format(lport)) + self.force_cmis_reinit(lport, retries + 1) + continue + # D.1.3 Software Configuration and Initialization api.set_application(host_lanes_mask, appl, ec) if not api.scs_apply_datapath_init(host_lanes_mask): @@ -1340,6 +1382,7 @@ def task_worker(self): # D.1.3 Software Configuration and Initialization api.set_datapath_init(host_lanes_mask) + self.clear_data_path_init_pending(pport, lport) dpInitDuration = self.get_cmis_dp_init_duration_secs(api) self.log_notice("{}: DpInit duration {} secs".format(lport, dpInitDuration)) self.update_cmis_state_expiration_time(lport, dpInitDuration)