3030 MAX_REPEATER_FAILURES_BEFORE_LOGIN ,
3131 REPEATER_BACKOFF_BASE ,
3232 REPEATER_BACKOFF_MAX_MULTIPLIER ,
33+ MAX_FAILURES_BEFORE_PATH_RESET ,
34+ MAX_RETRY_ATTEMPTS ,
3335 CONF_CONTACT_REFRESH_INTERVAL ,
3436 DEFAULT_CONTACT_REFRESH_INTERVAL ,
3537 CONF_REPEATER_TELEMETRY_ENABLED ,
@@ -128,6 +130,34 @@ def __init__(
128130
129131 if not hasattr (self , "last_update_success_time" ):
130132 self .last_update_success_time = self ._current_time ()
133+
134+ # Initialize reliability stats tracking
135+ self ._reliability_stats = {}
136+
137+ def _increment_success (self , pubkey_prefix : str ) -> None :
138+ """Increment success counter for a node."""
139+ stats_key = f"{ pubkey_prefix } _request_successes"
140+ self ._reliability_stats [stats_key ] = self ._reliability_stats .get (stats_key , 0 ) + 1
141+
142+ def _increment_failure (self , pubkey_prefix : str ) -> None :
143+ """Increment failure counter for a node."""
144+ stats_key = f"{ pubkey_prefix } _request_failures"
145+ self ._reliability_stats [stats_key ] = self ._reliability_stats .get (stats_key , 0 ) + 1
146+
147+ async def _reset_node_path (self , contact , node_name : str ) -> bool :
148+ """Reset routing path for a node and return success status."""
149+ try :
150+ result = await self .api .mesh_core .commands .reset_path (contact )
151+ if result and result .type != EventType .ERROR :
152+ self .logger .info (f"Successfully reset path for { node_name } " )
153+ return True
154+ else :
155+ error_msg = result .payload if result and result .type == EventType .ERROR else "no response or unexpected result"
156+ self .logger .warning (f"Failed to reset path for { node_name } : { error_msg } " )
157+ return False
158+ except Exception as ex :
159+ self .logger .warning (f"Exception resetting path for { node_name } : { ex } " )
160+ return False
131161
132162 def update_telemetry_settings (self , config_entry : ConfigEntry ) -> None :
133163 """Update telemetry settings from config entry."""
@@ -190,15 +220,19 @@ async def _update_repeater(self, repeater_config):
190220 repeater_config .get (CONF_REPEATER_PASSWORD , "" )
191221 )
192222
193- if login_result .type == EventType .ERROR :
194- self .logger .error (f"Login to repeater { repeater_name } failed: { login_result .payload } " )
195- else :
223+ if login_result and login_result .type != None and login_result .type != EventType .ERROR :
196224 self .logger .info (f"Successfully logged in to repeater { repeater_name } " )
225+ self ._increment_success (pubkey_prefix )
197226 # Track login time for telemetry refresh
198227 self ._repeater_login_times [pubkey_prefix ] = self ._current_time ()
228+ else :
229+ error_msg = login_result .payload if login_result and login_result .type == EventType .ERROR else "timeout or no response"
230+ self .logger .error (f"Login to repeater { repeater_name } failed: { error_msg } " )
231+ self ._increment_failure (pubkey_prefix )
199232
200233 except Exception as ex :
201234 self .logger .error (f"Exception during login to repeater { repeater_name } : { ex } " )
235+ self ._increment_failure (pubkey_prefix )
202236
203237 # Reset failures after login attempt regardless of outcome
204238 # This prevents repeated login attempts if they keep failing
@@ -219,27 +253,26 @@ async def _update_repeater(self, repeater_config):
219253 # Increment failure count and apply backoff
220254 new_failure_count = failure_count + 1
221255 self ._repeater_consecutive_failures [pubkey_prefix ] = new_failure_count
256+ self ._increment_failure (pubkey_prefix )
222257
223- # Reset path after 5 failures if there's an established path
224- if new_failure_count == 5 and contact and contact .get ("out_path_len" , 0 ) != - 1 :
225- try :
226- await self .api .mesh_core .commands .reset_path (pubkey_prefix )
227- self .logger .info (f"Reset path for repeater { repeater_name } after 5 failures" )
228- except Exception as ex :
229- self .logger .warning (f"Failed to reset path for repeater { repeater_name } : { ex } " )
258+ # Reset path after configured failures if there's an established path
259+ if new_failure_count == MAX_FAILURES_BEFORE_PATH_RESET and contact and contact .get ("out_path_len" , - 1 ) >= 0 :
260+ await self ._reset_node_path (contact , repeater_name )
230261
231262 update_interval = repeater_config .get (CONF_REPEATER_UPDATE_INTERVAL , DEFAULT_REPEATER_UPDATE_INTERVAL )
232263 self ._apply_repeater_backoff (pubkey_prefix , new_failure_count , update_interval )
233264 elif result .payload .get ('uptime' , 0 ) == 0 :
234265 self .logger .warn (f"Malformed status response from repeater { repeater_name } : { result .payload } " )
235266 new_failure_count = failure_count + 1
236267 self ._repeater_consecutive_failures [pubkey_prefix ] = new_failure_count
268+ self ._increment_failure (pubkey_prefix )
237269 update_interval = repeater_config .get (CONF_REPEATER_UPDATE_INTERVAL , DEFAULT_REPEATER_UPDATE_INTERVAL )
238270 self ._apply_repeater_backoff (pubkey_prefix , new_failure_count , update_interval )
239271 else :
240272 self .logger .debug (f"Successfully updated repeater { repeater_name } " )
241273 # Reset failure count on success
242274 self ._repeater_consecutive_failures [pubkey_prefix ] = 0
275+ self ._increment_success (pubkey_prefix )
243276
244277 # Trigger state updates for any entities listening for this repeater
245278 self .async_set_updated_data (self .data )
@@ -254,6 +287,7 @@ async def _update_repeater(self, repeater_config):
254287 # Increment failure count and apply backoff
255288 new_failure_count = self ._repeater_consecutive_failures .get (pubkey_prefix , 0 ) + 1
256289 self ._repeater_consecutive_failures [pubkey_prefix ] = new_failure_count
290+ self ._increment_failure (pubkey_prefix )
257291 update_interval = repeater_config .get (CONF_REPEATER_UPDATE_INTERVAL , DEFAULT_REPEATER_UPDATE_INTERVAL )
258292 self ._apply_repeater_backoff (pubkey_prefix , new_failure_count , update_interval )
259293 finally :
@@ -264,13 +298,21 @@ async def _update_repeater(self, repeater_config):
264298 def _apply_backoff (self , pubkey_prefix : str , failure_count : int , update_interval : int , update_type : str = "repeater" ) -> None :
265299 """Apply exponential backoff delay for failed updates.
266300
301+ Uses dynamic base interval to ensure max 5 retries within the refresh window.
302+ Resets failure count after MAX_RETRY_ATTEMPTS to start fresh.
303+
267304 Args:
268305 pubkey_prefix: The node's public key prefix
269306 failure_count: Number of consecutive failures
270307 update_interval: The configured update interval to cap the backoff at
271308 update_type: Type of update ("repeater" or "telemetry")
272309 """
273- backoff_delay = min (REPEATER_BACKOFF_BASE ** failure_count , update_interval )
310+ # Calculate base interval to fit 5 retries within refresh window
311+ # Sum of geometric series: base * (2^5 - 1) / (2 - 1) = base * 31
312+ # We want this to be roughly half the refresh interval for safety
313+ base_interval = max (1 , update_interval // (31 * 2 ))
314+
315+ backoff_delay = min (base_interval * (REPEATER_BACKOFF_BASE ** failure_count ), update_interval )
274316 next_update_time = self ._current_time () + backoff_delay
275317
276318 if update_type == "telemetry" :
@@ -280,6 +322,7 @@ def _apply_backoff(self, pubkey_prefix: str, failure_count: int, update_interval
280322
281323 self .logger .debug (f"Applied backoff for { update_type } { pubkey_prefix } : "
282324 f"failure_count={ failure_count } , "
325+ f"base_interval={ base_interval } s, "
283326 f"delay={ backoff_delay } s, "
284327 f"interval_cap={ update_interval } s" )
285328
@@ -313,6 +356,7 @@ async def _update_node_telemetry(self, contact, pubkey_prefix: str, node_name: s
313356 self .logger .debug (f"Telemetry response received from { node_name } : { telemetry_result } " )
314357 # Reset failure count on success
315358 self ._telemetry_consecutive_failures [pubkey_prefix ] = 0
359+ self ._increment_success (pubkey_prefix )
316360 # Schedule next telemetry update
317361 next_telemetry_time = self ._current_time () + update_interval
318362 self ._next_telemetry_update_times [pubkey_prefix ] = next_telemetry_time
@@ -321,14 +365,11 @@ async def _update_node_telemetry(self, contact, pubkey_prefix: str, node_name: s
321365 # Increment failure count and apply backoff
322366 new_failure_count = failure_count + 1
323367 self ._telemetry_consecutive_failures [pubkey_prefix ] = new_failure_count
368+ self ._increment_failure (pubkey_prefix )
324369
325- # Reset path after 5 failures if there's an established path
326- if new_failure_count == 5 and contact and contact .get ("out_path_len" , 0 ) != - 1 :
327- try :
328- await self .api .mesh_core .commands .reset_path (pubkey_prefix )
329- self .logger .info (f"Reset path for node { node_name } after 5 telemetry failures" )
330- except Exception as ex :
331- self .logger .warning (f"Failed to reset path for node { node_name } : { ex } " )
370+ # Reset path after configured failures if there's an established path
371+ if new_failure_count == MAX_FAILURES_BEFORE_PATH_RESET and contact and contact .get ("out_path_len" , - 1 ) >= 0 :
372+ await self ._reset_node_path (contact , node_name )
332373
333374 self ._apply_backoff (pubkey_prefix , new_failure_count , update_interval , "telemetry" )
334375
@@ -337,12 +378,13 @@ async def _update_node_telemetry(self, contact, pubkey_prefix: str, node_name: s
337378 # Increment failure count and apply backoff
338379 new_failure_count = failure_count + 1
339380 self ._telemetry_consecutive_failures [pubkey_prefix ] = new_failure_count
381+ self ._increment_failure (pubkey_prefix )
340382
341- # Reset path after 5 failures if there's an established path
342- if new_failure_count == 5 and contact and contact .get ("out_path_len" , 0 ) != - 1 :
383+ # Reset path after configured failures if there's an established path
384+ if new_failure_count == MAX_FAILURES_BEFORE_PATH_RESET and contact and contact .get ("out_path_len" , - 1 ) != - 1 :
343385 try :
344386 await self .api .mesh_core .commands .reset_path (pubkey_prefix )
345- self .logger .info (f"Reset path for node { node_name } after 5 telemetry failures" )
387+ self .logger .info (f"Reset path for node { node_name } after { MAX_FAILURES_BEFORE_PATH_RESET } telemetry failures" )
346388 except Exception as reset_ex :
347389 self .logger .warning (f"Failed to reset path for node { node_name } : { reset_ex } " )
348390
@@ -479,7 +521,7 @@ async def _async_update_data(self) -> None:
479521 pubkey_prefix = repeater_config .get ("pubkey_prefix" )
480522 repeater_name = repeater_config .get ("name" )
481523
482- # Clean up completed or failed tasks
524+ # Clean c completed or failed tasks
483525 if pubkey_prefix in self ._active_repeater_tasks :
484526 task = self ._active_repeater_tasks [pubkey_prefix ]
485527 if task .done ():
0 commit comments