@@ -477,7 +477,6 @@ class CmisManagerTask(threading.Thread):
477477 CMIS_MODULE_TYPES = ['QSFP-DD' , 'QSFP_DD' , 'OSFP' , 'OSFP-8X' , 'QSFP+C' ]
478478 CMIS_MAX_HOST_LANES = 8
479479 CMIS_EXPIRATION_BUFFER_MS = 2
480- ALL_LANES_MASK = 0xff
481480
482481 def __init__ (self , namespaces , port_mapping , main_thread_stop_event , skip_cmis_mgr = False ):
483482 threading .Thread .__init__ (self )
@@ -698,98 +697,217 @@ def get_cmis_media_lanes_mask(self, api, appl, lport, subport):
698697
699698 def clear_decomm_pending (self , lport ):
700699 """
701- Clear the decommission pending status for the entire physical port this logical port belongs to .
700+ Clear the decommission pending status for this logical port.
702701
703702 Args:
704703 lport:
705704 String, logical port name
706705 """
707- self .decomm_pending_dict .pop (self .port_dict .get (lport , {}).get ('index' ), None )
706+ physical_port_idx = self .port_dict .get (lport , {}).get ('index' )
707+ if physical_port_idx not in self .decomm_pending_dict :
708+ return
709+ self .decomm_pending_dict [physical_port_idx ].pop (lport , None )
710+ # If there are no more logical ports pending decommission on this physical port,
711+ # remove the physical port entry from the decomm_pending_dict
712+ if not self .decomm_pending_dict [physical_port_idx ]:
713+ self .decomm_pending_dict .pop (physical_port_idx )
708714
709- def set_decomm_pending (self , lport ):
715+ def set_decomm_pending (self , lport , api ):
710716 """
711- Set the decommission pending status.
717+ Set the decommission pending status for this logical port to start decommissioning.
718+
719+ Decommissioning can be done on a per-logical-port basis: for each logical port,
720+ only decommission the minimal set of host lanes to allow the logical port to
721+ be able to apply the new appl code without config errors.
722+
723+ decomm_pending_dict stores the host lanes pending to be decommissioned for a logical
724+ port.
725+ i.e. self.decomm_pending_dict[physical_port_idx][lport] = lanes_mask_requiring_decomm
726+ lanes_mask_requiring_decomm of a logical port can be wider than the logical port itself.
712727
713728 Args:
714729 lport:
715730 String, logical port name
731+ api:
732+ XcvrApi object
733+ Returns:
734+ Boolean, True if skip the rest of the processing of the current CMIS state
716735 """
717- physical_port_idx = self .port_dict [lport ]['index' ]
718- if physical_port_idx in self .decomm_pending_dict :
719- # only one logical port can be the lead logical port doing the
720- # decommission state machine.
721- return
722- self .decomm_pending_dict [physical_port_idx ] = lport
723- self .log_notice ("{}: DECOMMISSION: setting decomm_pending for physical port "
724- "{}" .format (lport , physical_port_idx ))
736+ skip_rest_processing = False
737+
738+ lport_host_lanes_mask = self .port_dict [lport ]['host_lanes_mask' ]
739+ lanes_mask_requiring_decomm = self .get_host_lanes_mask_requiring_decomm (lport , api )
740+
741+ # Check if other lports are doing decommission on the lanes overlapping with this lport
742+ total_affected_lanes_mask = lport_host_lanes_mask | lanes_mask_requiring_decomm
743+ if total_affected_lanes_mask & self .get_decomm_pending_host_lanes_mask (lport , exclude_lports = [lport ]):
744+ self .clear_decomm_pending (lport )
745+
746+ if total_affected_lanes_mask & self .get_decomm_failed_host_lanes_mask (lport ):
747+ # Fail this lport if any of its host lanes are in decommissioning failed state
748+ self .update_port_transceiver_status_table_sw_cmis_state (lport , CMIS_STATE_FAILED )
749+ decomm_status_str = "failed"
750+ else :
751+ decomm_status_str = "waiting for completion"
752+
753+ self .log_notice ("{}: DECOMM: decommission initiated by other lports is still in progress on "
754+ "host lanes {:#x}, {}" .format (lport , total_affected_lanes_mask , decomm_status_str ))
755+
756+ skip_rest_processing = True
757+ elif lanes_mask_requiring_decomm :
758+ self .decomm_pending_dict .setdefault (self .port_dict [lport ]['index' ], {})[lport ] = lanes_mask_requiring_decomm
759+ self .log_notice ("{}: DECOMM: setting decomm_pending for host lanes "
760+ "{:#x}" .format (lport , lanes_mask_requiring_decomm ))
761+
762+ self .port_dict [lport ]['appl' ] = 0
763+ self .port_dict [lport ]['host_lanes_mask' ] = lanes_mask_requiring_decomm
764+ self .port_dict [lport ]['media_lanes_mask' ] = lanes_mask_requiring_decomm
765+ self .log_notice ("{}: DECOMM: setting appl={} and host_lanes_mask/media_lanes_mask="
766+ "{:#x}" .format (lport , self .port_dict [lport ]['appl' ], lanes_mask_requiring_decomm ))
767+
768+ # Skip rest of the deinit/pre-init when this is the logical port doing decommission
769+ self .update_port_transceiver_status_table_sw_cmis_state (lport , CMIS_STATE_DP_DEINIT )
770+ skip_rest_processing = True
771+ else : # For the case of lanes_mask_requiring_decomm == 0x0:
772+ # If lport was previously marked as pending, then decommissioning is no more needed, clear its status
773+ self .clear_decomm_pending (lport )
774+
775+ return skip_rest_processing
725776
726- def is_decomm_lead_lport (self , lport ):
777+ def is_decomm_pending (self , lport ):
727778 """
728- Check if this is the lead logical port doing the decommission state machine .
779+ Check if this logical port is in middle of decommissioning .
729780
730781 Args:
731782 lport:
732783 String, logical port name
733784 Returns:
734785 Boolean, True if decommission pending, False otherwise
735786 """
736- return self .decomm_pending_dict .get (self .port_dict [lport ]['index' ]) == lport
787+ return lport in self .decomm_pending_dict .get (self .port_dict [lport ]['index' ], {})
737788
738- def is_decomm_pending (self , lport ):
789+ def get_decomm_pending_host_lanes_mask (self , lport , exclude_lports = [] ):
739790 """
740- Get the decommission pending status for the physical port the given logical port belongs to.
791+ Get the host lanes in decommission pending status for the entire
792+ physical port the given logical port belongs to.
741793
742794 Args:
743795 lport:
744796 String, logical port name
797+ exclude_lports:
798+ List of logical ports to exclude from the mask
745799 Returns:
746- Boolean, True if decommission pending, False otherwise
800+ Integer, bitmask of host lanes that are decommission pending
747801 """
748- return self .port_dict [lport ]['index' ] in self .decomm_pending_dict
802+ physical_port_idx = self .port_dict [lport ]['index' ]
803+ decomm_ports = self .decomm_pending_dict .get (physical_port_idx , {})
804+ if not decomm_ports :
805+ return 0
749806
750- def is_decomm_failed (self , lport ):
807+ decomm_pending_mask = 0
808+ for logical_port , mask in decomm_ports .items ():
809+ if logical_port in exclude_lports :
810+ continue
811+ decomm_pending_mask |= mask
812+
813+ return decomm_pending_mask
814+
815+ def get_decomm_failed_host_lanes_mask (self , lport ):
751816 """
752- Get the decommission failed status for the physical port the given logical port belongs to.
817+ Get the host lanes in decommissioning failed state for the entire
818+ physical port the given logical port belongs to.
753819
754820 Args:
755821 lport:
756822 String, logical port name
757823 Returns:
758- Boolean, True if decommission failed, False otherwise
824+ Integer, bitmask of host lanes in decommissioning failed state
759825 """
826+ failed_mask = 0
827+
760828 physical_port_idx = self .port_dict [lport ]['index' ]
761- lead_logical_port = self .decomm_pending_dict .get (physical_port_idx )
762- if lead_logical_port is None :
763- return False
764- return (
765- get_cmis_state_from_state_db (
766- lead_logical_port ,
767- self .xcvr_table_helper .get_status_sw_tbl (
768- self .get_asic_id (lead_logical_port )
769- )
770- )
771- == CMIS_STATE_FAILED
772- )
829+ if physical_port_idx not in self .decomm_pending_dict :
830+ return failed_mask
773831
774- def is_decommission_required (self , api , app_new ):
832+ for logical_port , mask in self .decomm_pending_dict [physical_port_idx ].items ():
833+ if get_cmis_state_from_state_db (
834+ logical_port , self .xcvr_table_helper .get_status_sw_tbl (self .get_asic_id (logical_port ))
835+ ) != CMIS_STATE_FAILED :
836+ continue
837+ failed_mask |= mask
838+
839+ return failed_mask
840+
841+ def get_host_lanes_mask_requiring_decomm (self , lport , api ):
775842 """
776- Check if the CMIS decommission (i.e. reset appl code to 0 for all lanes
777- of the entire physical port) is required
843+ Get the minimal set of host lanes that require decommissioning to allow
844+ the given logical port to apply its new appl code successfully.
778845
779846 Args:
847+ lport:
848+ String, logical port name
780849 api:
781850 XcvrApi object
782- app_new:
783- Integer, the new desired appl code
784851 Returns:
785- True, if decommission is required
786- False, if decommission is not required
852+ Integer, bitmask of host lanes that require decommissioning
787853 """
788- for lane in range (self .CMIS_MAX_HOST_LANES ):
789- app_cur = api .get_application (lane )
790- if app_cur != 0 and app_cur != app_new :
791- return True
792- return False
854+ def get_data_path_mask (app_advt , app , lane_idx ):
855+ """
856+ Get the host lane mask for the entire data path based on the appl code on one of its lanes.
857+
858+ Args:
859+ app_advt: The application advertisement dictionary
860+ app: The application code
861+ lane_idx: The index of the lane this appl code is assigned to
862+ Returns:
863+ Integer, the host lane mask for this data path
864+ """
865+ host_lane_assignment_options = app_advt .get (app , {}).get ('host_lane_assignment_options' )
866+ host_lane_count = app_advt .get (app , {}).get ('host_lane_count' )
867+
868+ if not host_lane_assignment_options or not host_lane_count :
869+ return 0
870+
871+ mask_for_single_lane = 1 << lane_idx
872+
873+ for start_lane_idx in range (self .CMIS_MAX_HOST_LANES ):
874+ if not (host_lane_assignment_options & (1 << start_lane_idx )):
875+ continue
876+ data_path_mask = ((1 << host_lane_count ) - 1 ) << start_lane_idx
877+ if data_path_mask & mask_for_single_lane :
878+ return data_path_mask
879+
880+ return 0
881+
882+ lport_host_lanes_mask = self .port_dict [lport ]['host_lanes_mask' ]
883+ app_advt = api .get_application_advertisement ()
884+ active_app_dict = api .get_active_apsel_hostlane ()
885+ app_new = self .port_dict [lport ]['appl' ]
886+
887+ conflicting_data_paths_host_lanes_mask = 0
888+ # Identify the configured data paths that share host lanes with this logical port
889+ # and have conflicting appl codes
890+ for lane_idx in range (self .CMIS_MAX_HOST_LANES ):
891+ if not (1 << lane_idx & lport_host_lanes_mask ):
892+ continue
893+ app_cur = active_app_dict .get ('ActiveAppSelLane{}' .format (lane_idx + 1 ), 0 )
894+ if app_cur == 0 or app_cur == app_new :
895+ continue
896+ conflicting_data_paths_host_lanes_mask |= get_data_path_mask (app_advt , app_cur , lane_idx )
897+
898+ # If conflicting_data_paths_host_lanes_mask is covered by current lport's mask,
899+ # then new appl code can be applied directly without decommissioning
900+ host_lanes_mask_requiring_decomm = (
901+ 0 if not (conflicting_data_paths_host_lanes_mask & ~ lport_host_lanes_mask )
902+ else conflicting_data_paths_host_lanes_mask
903+ )
904+
905+ log_func = self .log_debug if not host_lanes_mask_requiring_decomm else self .log_notice
906+ log_func ("{}: DECOMM: based on ActiveAppSel(lane 8->1) {}, to apply appl {} on {:#010b}, "
907+ "host lanes requiring decomm is {:#010b}" .format (
908+ lport , list (reversed (active_app_dict .values ())), app_new , lport_host_lanes_mask ,
909+ host_lanes_mask_requiring_decomm ))
910+ return host_lanes_mask_requiring_decomm
793911
794912 def is_cmis_application_update_required (self , api , app_new , host_lanes_mask ):
795913 """
@@ -1211,16 +1329,15 @@ def task_worker(self):
12111329 host_lanes_mask = self .port_dict [lport ].get ('host_lanes_mask' , 0 )
12121330 appl = self .port_dict [lport ].get ('appl' , 0 )
12131331 # appl can be 0 if this lport is in decommission state machine, which should not be considered as failed case.
1214- if state != CMIS_STATE_INSERTED and not self .is_decomm_lead_lport (lport ) and (host_lanes_mask <= 0 or appl < 1 ):
1332+ if state != CMIS_STATE_INSERTED and not self .is_decomm_pending (lport ) and (host_lanes_mask <= 0 or appl < 1 ):
12151333 self .log_error ("{}: Unexpected value for host_lanes_mask {} or appl {} in "
12161334 "{} state" .format (lport , host_lanes_mask , appl , state ))
12171335 self .update_port_transceiver_status_table_sw_cmis_state (lport , CMIS_STATE_FAILED )
12181336 continue
12191337
12201338 self .log_notice ("{}: {}G, lanemask=0x{:x}, CMIS state={}{}, Module state={}, DP state={}, appl {} host_lane_count {} "
12211339 "retries={}" .format (lport , int (speed / 1000 ), host_lanes_mask , state ,
1222- "(decommission" + ("*" if self .is_decomm_lead_lport (lport ) else "" ) + ")"
1223- if self .is_decomm_pending (lport ) else "" ,
1340+ "(decommission)" if self .is_decomm_pending (lport ) else "" ,
12241341 api .get_module_state (), api .get_datapath_state (), appl , host_lane_count , retries ))
12251342 if retries > self .CMIS_MAX_RETRIES :
12261343 self .log_error ("{}: FAILED" .format (lport ))
@@ -1264,27 +1381,7 @@ def task_worker(self):
12641381 media_lanes_mask = self .port_dict [lport ]['media_lanes_mask' ]
12651382 self .log_notice ("{}: Setting media_lanemask=0x{:x}" .format (lport , media_lanes_mask ))
12661383
1267- if self .is_decommission_required (api , appl ):
1268- self .set_decomm_pending (lport )
1269-
1270- if self .is_decomm_lead_lport (lport ):
1271- # Set all the DP lanes AppSel to unused(0) when non default app code needs to be configured
1272- self .port_dict [lport ]['appl' ] = appl = 0
1273- self .port_dict [lport ]['host_lanes_mask' ] = host_lanes_mask = self .ALL_LANES_MASK
1274- self .port_dict [lport ]['media_lanes_mask' ] = self .ALL_LANES_MASK
1275- self .log_notice ("{}: DECOMMISSION: setting appl={} and "
1276- "host_lanes_mask/media_lanes_mask={:#x}" .format (lport , appl , self .ALL_LANES_MASK ))
1277- # Skip rest of the deinit/pre-init when this is the lead logical port for decommission
1278- self .update_port_transceiver_status_table_sw_cmis_state (lport , CMIS_STATE_DP_DEINIT )
1279- continue
1280- elif self .is_decomm_pending (lport ):
1281- if self .is_decomm_failed (lport ):
1282- self .update_port_transceiver_status_table_sw_cmis_state (lport , CMIS_STATE_FAILED )
1283- decomm_status_str = "failed"
1284- else :
1285- decomm_status_str = "waiting for completion"
1286- self .log_notice ("{}: DECOMMISSION: decommission has already started for this physical port, "
1287- "{}" .format (lport , decomm_status_str ))
1384+ if self .set_decomm_pending (lport , api ):
12881385 continue
12891386
12901387 if self .port_dict [lport ]['host_tx_ready' ] != 'true' or \
@@ -1444,7 +1541,7 @@ def task_worker(self):
14441541
14451542 # Clear decommission status and invoke CMIS reinit so that normal CMIS initialization can begin
14461543 if self .is_decomm_pending (lport ):
1447- self .log_notice ("{}: DECOMMISSION: done for physical port { }" .format (lport , self .port_dict [lport ]['index ' ]))
1544+ self .log_notice ("{}: DECOMM: decommission done for host lanes {:#x }" .format (lport , self .port_dict [lport ]['host_lanes_mask ' ]))
14481545 self .clear_decomm_pending (lport )
14491546 self .force_cmis_reinit (lport )
14501547 continue
0 commit comments