@@ -500,21 +500,43 @@ public void populateRoutingRulesForLinkStatusChange(Link linkDown, Link linkUp,
500500 // comparing all routes of existing ECMP SPG to new ECMP SPG
501501 routeChanges = computeRouteChange (switchDown );
502502
503- // deal with linkUp of a seen-before link
504- if (linkUp != null && seenBefore ) {
505- // link previously seen before
506- // do hash-bucket changes instead of a re-route
507- processHashGroupChange (routeChanges , false , null );
508- // clear out routesChanges so a re-route is not attempted
509- routeChanges = ImmutableSet .of ();
510- hashGroupsChanged = true ;
503+ // deal with linkUp
504+ if (linkUp != null ) {
505+ // deal with linkUp of a seen-before link
506+ if (seenBefore ) {
507+ // link previously seen before
508+ // do hash-bucket changes instead of a re-route
509+ processHashGroupChangeForLinkUp (routeChanges );
510+ // clear out routesChanges so a re-route is not attempted
511+ routeChanges = ImmutableSet .of ();
512+ hashGroupsChanged = true ;
513+ } else {
514+ // do hash-bucket changes first, method will return changed routes;
515+ // for each route not changed it will perform a reroute
516+ Set <ArrayList <DeviceId >> changedRoutes = processHashGroupChangeForLinkUp (routeChanges );
517+ Set <ArrayList <DeviceId >> routeChangesTemp = getExpandedRoutes (routeChanges );
518+ changedRoutes .forEach (routeChangesTemp ::remove );
519+ // if routesChanges is empty a re-route is not attempted
520+ routeChanges = routeChangesTemp ;
521+ for (ArrayList <DeviceId > route : routeChanges ) {
522+ log .debug ("remaining routes Target -> Root" );
523+ if (route .size () == 1 ) {
524+ log .debug (" : all -> {}" , route .get (0 ));
525+ } else {
526+ log .debug (" : {} -> {}" , route .get (0 ), route .get (1 ));
527+ }
528+ }
529+ // Mark hash groups as changed
530+ if (!changedRoutes .isEmpty ()) {
531+ hashGroupsChanged = true ;
532+ }
533+ }
534+
511535 }
512- // for a linkUp of a never-seen-before link
513- // let it fall through to a reroute of the routeChanges
514536
515537 //deal with switchDown
516538 if (switchDown != null ) {
517- processHashGroupChange (routeChanges , true , switchDown );
539+ processHashGroupChangeForFailure (routeChanges , switchDown );
518540 // clear out routesChanges so a re-route is not attempted
519541 routeChanges = ImmutableSet .of ();
520542 hashGroupsChanged = true ;
@@ -523,7 +545,7 @@ public void populateRoutingRulesForLinkStatusChange(Link linkDown, Link linkUp,
523545 // link has gone down
524546 // Compare existing ECMP SPG only with the link that went down
525547 routeChanges = computeDamagedRoutes (linkDown );
526- processHashGroupChange (routeChanges , true , null );
548+ processHashGroupChangeForFailure (routeChanges , null );
527549 // clear out routesChanges so a re-route is not attempted
528550 routeChanges = ImmutableSet .of ();
529551 hashGroupsChanged = true ;
@@ -545,6 +567,10 @@ public void populateRoutingRulesForLinkStatusChange(Link linkDown, Link linkUp,
545567 return ;
546568 }
547569
570+ if (hashGroupsChanged ) {
571+ log .debug ("Hash-groups changed for link status change" );
572+ }
573+
548574 // reroute of routeChanges
549575 if (redoRouting (routeChanges , edgePairs , null )) {
550576 log .debug ("populateRoutingRulesForLinkStatusChange: populationStatus is SUCCEEDED" );
@@ -587,25 +613,10 @@ public void populateRoutingRulesForLinkStatusChange(Link linkDown, Link linkUp,
587613 private boolean redoRouting (Set <ArrayList <DeviceId >> routeChanges ,
588614 Set <EdgePair > edgePairs , Set <IpPrefix > subnets ) {
589615 // first make every entry two-elements
590- Set <ArrayList <DeviceId >> changedRoutes = new HashSet <>();
591- for (ArrayList <DeviceId > route : routeChanges ) {
592- if (route .size () == 1 ) {
593- DeviceId dstSw = route .get (0 );
594- EcmpShortestPathGraph ec = updatedEcmpSpgMap .get (dstSw );
595- if (ec == null ) {
596- log .warn ("No graph found for {} .. aborting redoRouting" , dstSw );
597- return false ;
598- }
599- ec .getAllLearnedSwitchesAndVia ().keySet ().forEach (key -> {
600- ec .getAllLearnedSwitchesAndVia ().get (key ).keySet ().forEach (target -> {
601- changedRoutes .add (Lists .newArrayList (target , dstSw ));
602- });
603- });
604- } else {
605- DeviceId targetSw = route .get (0 );
606- DeviceId dstSw = route .get (1 );
607- changedRoutes .add (Lists .newArrayList (targetSw , dstSw ));
608- }
616+ Set <ArrayList <DeviceId >> changedRoutes = getExpandedRoutes (routeChanges );
617+ // no valid routes - fail fast
618+ if (changedRoutes .isEmpty ()) {
619+ return false ;
609620 }
610621
611622 // now process changedRoutes according to edgePairs
@@ -1051,77 +1062,47 @@ private boolean populateEcmpRoutingRulePartial(DeviceId targetSw, DeviceId destS
10511062 }
10521063
10531064 /**
1054- * Processes a set a route-path changes by editing hash groups.
1065+ * Processes a set a route-path changes due to a switch/link failure by editing hash groups.
10551066 *
10561067 * @param routeChanges a set of route-path changes, where each route-path is
10571068 * a list with its first element the src-switch of the path
10581069 * and the second element the dst-switch of the path.
1059- * @param linkOrSwitchFailed true if the route changes are for a failed
1060- * switch or linkDown event
10611070 * @param failedSwitch the switchId if the route changes are for a failed switch,
10621071 * otherwise null
10631072 */
1064- private void processHashGroupChange (Set <ArrayList <DeviceId >> routeChanges ,
1065- boolean linkOrSwitchFailed ,
1066- DeviceId failedSwitch ) {
1067- Set <ArrayList <DeviceId >> changedRoutes = new HashSet <>();
1073+ private void processHashGroupChangeForFailure (Set <ArrayList <DeviceId >> routeChanges ,
1074+ DeviceId failedSwitch ) {
10681075 // first, ensure each routeChanges entry has two elements
1069- for (ArrayList <DeviceId > route : routeChanges ) {
1070- if (route .size () == 1 ) {
1071- // route-path changes are from everyone else to this switch
1072- DeviceId dstSw = route .get (0 );
1073- srManager .deviceService .getAvailableDevices ().forEach (sw -> {
1074- if (!sw .id ().equals (dstSw )) {
1075- changedRoutes .add (Lists .newArrayList (sw .id (), dstSw ));
1076- }
1077- });
1078- } else {
1079- changedRoutes .add (route );
1080- }
1081- }
1076+ Set <ArrayList <DeviceId >> changedRoutes = getAllExpandedRoutes (routeChanges );
10821077 boolean someFailed = false ;
1078+ boolean success ;
10831079 Set <DeviceId > updatedDevices = Sets .newHashSet ();
10841080 for (ArrayList <DeviceId > route : changedRoutes ) {
10851081 DeviceId targetSw = route .get (0 );
10861082 DeviceId dstSw = route .get (1 );
1087- if (linkOrSwitchFailed ) {
1088- boolean success = fixHashGroupsForRoute (route , true );
1089- // it's possible that we cannot fix hash groups for a route
1090- // if the target switch has failed. Nevertheless the ecmp graph
1091- // for the impacted switch must still be updated.
1092- if (!success && failedSwitch != null && targetSw .equals (failedSwitch )) {
1093- currentEcmpSpgMap .put (dstSw , updatedEcmpSpgMap .get (dstSw ));
1094- currentEcmpSpgMap .remove (targetSw );
1095- log .debug ("Updating ECMPspg for dst:{} removing failed switch "
1096- + "target:{}" , dstSw , targetSw );
1097- updatedDevices .add (targetSw );
1098- updatedDevices .add (dstSw );
1099- continue ;
1100- }
1101- //linkfailed - update both sides
1102- if (success ) {
1103- currentEcmpSpgMap .put (targetSw , updatedEcmpSpgMap .get (targetSw ));
1104- currentEcmpSpgMap .put (dstSw , updatedEcmpSpgMap .get (dstSw ));
1105- log .debug ("Updating ECMPspg for dst:{} and target:{} for linkdown"
1106- + " or switchdown" , dstSw , targetSw );
1107- updatedDevices .add (targetSw );
1108- updatedDevices .add (dstSw );
1109- } else {
1110- someFailed = true ;
1111- }
1083+ success = fixHashGroupsForRoute (route , true );
1084+ // it's possible that we cannot fix hash groups for a route
1085+ // if the target switch has failed. Nevertheless the ecmp graph
1086+ // for the impacted switch must still be updated.
1087+ if (!success && failedSwitch != null && targetSw .equals (failedSwitch )) {
1088+ currentEcmpSpgMap .put (dstSw , updatedEcmpSpgMap .get (dstSw ));
1089+ currentEcmpSpgMap .remove (targetSw );
1090+ log .debug ("Updating ECMPspg for dst:{} removing failed switch "
1091+ + "target:{}" , dstSw , targetSw );
1092+ updatedDevices .add (targetSw );
1093+ updatedDevices .add (dstSw );
1094+ continue ;
1095+ }
1096+ //linkfailed - update both sides
1097+ if (success ) {
1098+ currentEcmpSpgMap .put (targetSw , updatedEcmpSpgMap .get (targetSw ));
1099+ currentEcmpSpgMap .put (dstSw , updatedEcmpSpgMap .get (dstSw ));
1100+ log .debug ("Updating ECMPspg for dst:{} and target:{} for linkdown"
1101+ + " or switchdown" , dstSw , targetSw );
1102+ updatedDevices .add (targetSw );
1103+ updatedDevices .add (dstSw );
11121104 } else {
1113- //linkup of seen before link
1114- boolean success = fixHashGroupsForRoute (route , false );
1115- if (success ) {
1116- currentEcmpSpgMap .put (targetSw , updatedEcmpSpgMap .get (targetSw ));
1117- currentEcmpSpgMap .put (dstSw , updatedEcmpSpgMap .get (dstSw ));
1118- log .debug ("Updating ECMPspg for target:{} and dst:{} for linkup" ,
1119- targetSw , dstSw );
1120- updatedDevices .add (targetSw );
1121- updatedDevices .add (dstSw );
1122- } else {
1123- someFailed = true ;
1124- }
1105+ someFailed = true ;
11251106 }
11261107 }
11271108 if (!someFailed ) {
@@ -1135,6 +1116,52 @@ private void processHashGroupChange(Set<ArrayList<DeviceId>> routeChanges,
11351116 }
11361117 }
11371118
1119+ /**
1120+ * Processes a set a route-path changes due to link up by editing hash groups.
1121+ *
1122+ * @param routeChanges a set of route-path changes, where each route-path is
1123+ * a list with its first element the src-switch of the path
1124+ * and the second element the dst-switch of the path.
1125+ * @return set of changed routes
1126+ */
1127+ private Set <ArrayList <DeviceId >> processHashGroupChangeForLinkUp (Set <ArrayList <DeviceId >> routeChanges ) {
1128+ // Stores changed routes
1129+ Set <ArrayList <DeviceId >> doneRoutes = new HashSet <>();
1130+ // first, ensure each routeChanges entry has two elements
1131+ Set <ArrayList <DeviceId >> changedRoutes = getAllExpandedRoutes (routeChanges );
1132+ boolean someFailed = false ;
1133+ boolean success ;
1134+ Set <DeviceId > updatedDevices = Sets .newHashSet ();
1135+ for (ArrayList <DeviceId > route : changedRoutes ) {
1136+ DeviceId targetSw = route .get (0 );
1137+ DeviceId dstSw = route .get (1 );
1138+ // linkup - fix (if possible)
1139+ success = fixHashGroupsForRoute (route , false );
1140+ if (success ) {
1141+ currentEcmpSpgMap .put (targetSw , updatedEcmpSpgMap .get (targetSw ));
1142+ currentEcmpSpgMap .put (dstSw , updatedEcmpSpgMap .get (dstSw ));
1143+ log .debug ("Updating ECMPspg for target:{} and dst:{} for linkup" ,
1144+ targetSw , dstSw );
1145+ updatedDevices .add (targetSw );
1146+ updatedDevices .add (dstSw );
1147+ doneRoutes .add (route );
1148+ } else {
1149+ someFailed = true ;
1150+ }
1151+
1152+ }
1153+ if (!someFailed ) {
1154+ // here is where we update all devices not touched by this instance
1155+ updatedEcmpSpgMap .keySet ().stream ()
1156+ .filter (devId -> !updatedDevices .contains (devId ))
1157+ .forEach (devId -> {
1158+ currentEcmpSpgMap .put (devId , updatedEcmpSpgMap .get (devId ));
1159+ log .debug ("Updating ECMPspg for remaining dev:{}" , devId );
1160+ });
1161+ }
1162+ return doneRoutes ;
1163+ }
1164+
11381165 /**
11391166 * Edits hash groups in the src-switch (targetSw) of a route-path by
11401167 * calling the groupHandler to either add or remove buckets in an existing
@@ -1700,6 +1727,56 @@ private Set<ArrayList<DeviceId>> computeRouteChange(DeviceId failedSwitch) {
17001727 return changedRoutes ;
17011728 }
17021729
1730+ // Utility method to expands the route changes in two elements array using
1731+ // the ECMP graph. Caller represents all to dst switch routes with an
1732+ // array containing only the dst switch.
1733+ private Set <ArrayList <DeviceId >> getExpandedRoutes (Set <ArrayList <DeviceId >> routeChanges ) {
1734+ Set <ArrayList <DeviceId >> changedRoutes = new HashSet <>();
1735+ // Ensure each routeChanges entry has two elements
1736+ for (ArrayList <DeviceId > route : routeChanges ) {
1737+ if (route .size () == 1 ) {
1738+ DeviceId dstSw = route .get (0 );
1739+ EcmpShortestPathGraph ec = updatedEcmpSpgMap .get (dstSw );
1740+ if (ec == null ) {
1741+ log .warn ("No graph found for {} .. aborting redoRouting" , dstSw );
1742+ return Collections .emptySet ();
1743+ }
1744+ ec .getAllLearnedSwitchesAndVia ().keySet ().forEach (key -> {
1745+ ec .getAllLearnedSwitchesAndVia ().get (key ).keySet ().forEach (target -> {
1746+ changedRoutes .add (Lists .newArrayList (target , dstSw ));
1747+ });
1748+ });
1749+ } else {
1750+ DeviceId targetSw = route .get (0 );
1751+ DeviceId dstSw = route .get (1 );
1752+ changedRoutes .add (Lists .newArrayList (targetSw , dstSw ));
1753+ }
1754+ }
1755+ return changedRoutes ;
1756+ }
1757+
1758+ // Utility method to expands the route changes in two elements array using
1759+ // the available devices. Caller represents all to dst switch routes with an
1760+ // array containing only the dst switch.
1761+ private Set <ArrayList <DeviceId >> getAllExpandedRoutes (Set <ArrayList <DeviceId >> routeChanges ) {
1762+ Set <ArrayList <DeviceId >> changedRoutes = new HashSet <>();
1763+ // Ensure each routeChanges entry has two elements
1764+ for (ArrayList <DeviceId > route : routeChanges ) {
1765+ if (route .size () == 1 ) {
1766+ // route-path changes are from everyone else to this switch
1767+ DeviceId dstSw = route .get (0 );
1768+ srManager .deviceService .getAvailableDevices ().forEach (sw -> {
1769+ if (!sw .id ().equals (dstSw )) {
1770+ changedRoutes .add (Lists .newArrayList (sw .id (), dstSw ));
1771+ }
1772+ });
1773+ } else {
1774+ changedRoutes .add (route );
1775+ }
1776+ }
1777+ return changedRoutes ;
1778+ }
1779+
17031780 /**
17041781 * For the root switch, searches all the target nodes reachable in the base
17051782 * graph, and compares paths to the ones in the comp graph.
0 commit comments