Skip to content

Commit c50bc74

Browse files
committed
speed up entry in SYNCHRONIZATION state
1 parent 6df98c4 commit c50bc74

22 files changed

+161
-69
lines changed

CHANGES.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,13 @@
33
## 0.19.1 (2026-xx-xx)
44

55
* Fix [Issue #134](https://github.com/julien6387/supvisors/issues/134).
6-
Reload the Supvisors process configuration after Supervisor is updated.
6+
Reload the **Supvisors** process configuration after Supervisor is updated.
77

8-
* Strengthen robustness against `psutil` issues realted to certain operating systems.
8+
* Set the lower limit of the option `synchro_timeout` to 0 instead of 15 (Default value remains unchanged).
9+
10+
* Speed-up the entry in SYNCHRONIZATION state.
11+
12+
* Strengthen robustness against `psutil` issues related to certain operating systems.
913

1014
* Review HTTP error codes returned by the REST API in `supvisorsflask`.
1115

docs/configuration.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ behavior may happen. The present section details where it is applicable.
278278
``synchro_timeout``
279279

280280
The time in seconds that |Supvisors| waits for all expected |Supvisors| instances to publish their TICK.
281-
Value in [``15`` ; ``1200``]. |br|
281+
Value in [``0`` ; ``1200``]. |br|
282282
This option is taken into account only if ``TIMEOUT`` is selected in the ``synchro_options``. |br|
283283
The use of this option is more detailed in :ref:`synchronizing`.
284284

supvisors/context.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,7 @@ def on_authorization(self, status: SupvisorsInstanceStatus, event: Payload) -> N
489489
f' from Supvisors={status.usage_identifier}')
490490
# go back to STOPPED to give it a chance at next TICK
491491
status.state = SupvisorsInstanceStates.STOPPED
492+
self.export_status(status)
492493
elif authorization == AuthorizationTypes.NOT_AUTHORIZED:
493494
self.logger.warn('Context.on_authorization: the local Supvisors instance is isolated'
494495
f' by Supvisors={status.usage_identifier}')
@@ -501,6 +502,12 @@ def on_authorization(self, status: SupvisorsInstanceStatus, event: Payload) -> N
501502
self.logger.info(f'Context.on_authorization: the local Supvisors instance is authorized to work with'
502503
f' Supvisors={status.usage_identifier}')
503504
status.state = SupvisorsInstanceStates.CHECKED
505+
self.export_status(status)
506+
# if status is the local instance, go straight to RUNNING to speed up the entry in SYNCHRONIZATION state
507+
if status.identifier == self.local_identifier:
508+
status.state = SupvisorsInstanceStates.RUNNING
509+
self.export_status(status)
510+
return None
504511

505512
def on_local_tick_event(self, event: Payload) -> None:
506513
""" Method called upon reception of a tick event from the local Supvisors instance.
@@ -710,3 +717,4 @@ def on_process_state_event(self, status: SupvisorsInstanceStatus, event: Payload
710717
self.external_publisher.send_process_status(process.serial())
711718
self.external_publisher.send_application_status(application.serial())
712719
return process
720+
return None

supvisors/internal_com/mapper.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -480,14 +480,14 @@ def configure(self, supvisors_list: NameList, stereotypes: NameSet, core_list: N
480480
# get Supervisor identification from each element
481481
for item in supvisors_list:
482482
self.add_instance(item)
483-
# keep information about the initial Supvisors identifiers added to the configuration
484-
self.initial_identifiers = list(self._instances.keys())
485483
else:
486484
# if supvisors_list is empty, use self identification from supervisor internal data
487485
supervisor = self.supvisors.supervisor_data
488486
item = f'<{supervisor.identifier}>{socket.gethostname()}:{supervisor.server_port}'
489487
self.logger.info(f'SupvisorsMapper.configure: define local Supvisors as {item}')
490488
self.add_instance(item)
489+
# keep information about the initial Supvisors identifiers added to the configuration
490+
self.initial_identifiers = list(self._instances.keys())
491491
self.logger.info(f'SupvisorsMapper.configure: identifiers={self._nick_identifiers}')
492492
self.logger.info(f'SupvisorsMapper.configure: nodes={self.nodes}')
493493
# get local Supervisor identification from list

supvisors/listener.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,8 @@ def on_running(self, _):
162162
self.supvisors.external_publisher = create_external_publisher(self.supvisors)
163163
# Trigger the FSM
164164
self.fsm.next()
165+
# Force first Tick to speed up the entry in SYNCHRONIZATION state
166+
self.on_tick(events.TickEvent(time.time(), None))
165167
except Exception:
166168
# Supvisors shall never endanger the Supervisor thread
167169
self.logger.critical(f'SupervisorListener.on_running: {traceback.format_exc()}')

supvisors/options.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@
3131
Ipv4Address, NameList, Payload, StatisticsTypes,
3232
GroupConfigInfo, ProgramConfig, SupvisorsProcessConfig)
3333

34+
# Constants
35+
SYNCHRO_TIMEOUT_MIN = 0
36+
SYNCHRO_LIST_TIMEOUT_MIN = 15
37+
SYNCHRO_TIMEOUT_MAX = 1200
38+
3439

3540
# Options of main section
3641
def get_logger_configuration(**config) -> Payload:
@@ -65,7 +70,7 @@ class SupvisorsOptions:
6570
- multicast_interface: UDP Multicast Group interface ;
6671
- multicast_ttl: UDP Multicast time-to-live ;
6772
- rules_files: list of absolute or relative paths to the XML rules files ;
68-
- css_files: list of css files used to override the Supvisors default CSS ;
73+
- css_files: list of CSS files used to override the Supvisors default CSS ;
6974
- event_link: type of the event link used to publish all Supvisors events ;
7075
- event_port: port number used to publish all Supvisors events ;
7176
- auto_fence: when True, Supvisors won't try to reconnect to a Supvisors instance that has been inactive ;
@@ -88,9 +93,6 @@ class SupvisorsOptions:
8893
- tailf_limit: the number of bytes used to display the log tail of the file in the Web UI (tail -f mode).
8994
"""
9095

91-
SYNCHRO_TIMEOUT_MIN = 15
92-
SYNCHRO_TIMEOUT_MAX = 1200
93-
9496
# default SynchronizationOptions list that is equivalent to previous Supvisors versions
9597
SYNCHRO_DEFAULT_OPTIONS = [SynchronizationOptions.STRICT,
9698
SynchronizationOptions.TIMEOUT,
@@ -132,7 +134,7 @@ def __init__(self, supervisord, logger: Logger, **config):
132134
self.auto_fence = self._get_value(config, 'auto_fence', False, boolean)
133135
self.synchro_options = self._get_value(config, 'synchro_options', self.SYNCHRO_DEFAULT_OPTIONS,
134136
self.to_synchro_options)
135-
self.synchro_timeout = self._get_value(config, 'synchro_timeout', self.SYNCHRO_TIMEOUT_MIN, self.to_timeout)
137+
self.synchro_timeout = self._get_value(config, 'synchro_timeout', SYNCHRO_LIST_TIMEOUT_MIN, self.to_timeout)
136138
self.inactivity_ticks = self._get_value(config, 'inactivity_ticks', self.INACTIVITY_TICKS_MIN, self.to_ticks)
137139
# get the minimum list of identifiers to end the synchronization phase
138140
self.core_identifiers = self._get_value(config, 'core_identifiers', set(),
@@ -212,10 +214,10 @@ def check_options(self):
212214
' with no core_identifiers')
213215
self.synchro_options.remove(SynchronizationOptions.CORE)
214216
# when using LIST in synchro_options, supvisors_list cannot be empty
215-
if not self.supvisors_list and SynchronizationOptions.STRICT in self.synchro_options:
216-
self.logger.warn('SupvisorsOptions:check_options: cancellation of synchro_options STRICT'
217-
' with no supvisors_list')
218-
self.synchro_options.remove(SynchronizationOptions.STRICT)
217+
#if not self.supvisors_list and SynchronizationOptions.STRICT in self.synchro_options:
218+
# self.logger.warn('SupvisorsOptions:check_options: cancellation of synchro_options STRICT'
219+
# ' with no supvisors_list')
220+
# self.synchro_options.remove(SynchronizationOptions.STRICT)
219221
# synchro_options must not be empty
220222
if not self.synchro_options:
221223
raise ValueError('synchro_options shall not be empty')
@@ -226,6 +228,13 @@ def check_options(self):
226228
self.logger.warn('SupvisorsOptions:check_options: force supvisors_failure_strategy=CONTINUE'
227229
' because it is incompatible with synchro_options=TIMEOUT')
228230
self.supvisors_failure_strategy = SupvisorsFailureStrategies.CONTINUE
231+
# use a minimum timeout of 15 seconds when LIST is in synchro_options to give a chance to discovered instances
232+
#if (SynchronizationOptions.LIST in self.synchro_options
233+
# and SynchronizationOptions.TIMEOUT in self.synchro_options
234+
# and self.synchro_timeout < SYNCHRO_LIST_TIMEOUT_MIN):
235+
# self.logger.warn(f'SupvisorsOptions:check_options: force synchro_timeout={SYNCHRO_LIST_TIMEOUT_MIN}'
236+
# ' to give a chance with synchro_options=TIMEOUT')
237+
# self.synchro_timeout = SYNCHRO_LIST_TIMEOUT_MIN
229238

230239
def check_dirpath(self, file_path: str) -> str:
231240
""" Check if the path provided exists and create the folder tree if necessary.
@@ -418,20 +427,20 @@ def to_synchro_options(value: str) -> List[SynchronizationOptions]:
418427

419428
@staticmethod
420429
def to_timeout(value: str) -> int:
421-
""" Convert a string into a timeout value, in [15;1200].
430+
""" Convert a string into a timeout value, in [0;1200].
422431
423-
:param value: the timeout as a string
424-
:return: the timeout as an integer
432+
:param value: the timeout as a string.
433+
:return: the timeout as an integer.
425434
"""
426435
try:
427436
timeout = integer(value)
428-
if SupvisorsOptions.SYNCHRO_TIMEOUT_MIN > timeout or timeout > SupvisorsOptions.SYNCHRO_TIMEOUT_MAX:
437+
if SYNCHRO_TIMEOUT_MIN > timeout or timeout > SYNCHRO_TIMEOUT_MAX:
429438
raise ValueError
430439
return timeout
431440
except ValueError:
432441
raise ValueError(f'invalid value for synchro_timeout: "{value}".'
433-
f' integer expected in [{SupvisorsOptions.SYNCHRO_TIMEOUT_MIN};'
434-
f'{SupvisorsOptions.SYNCHRO_TIMEOUT_MAX}] (seconds)')
442+
f' integer expected in [{SYNCHRO_TIMEOUT_MIN};'
443+
f'{SYNCHRO_TIMEOUT_MAX}] (seconds)')
435444

436445
@staticmethod
437446
def to_ticks(value: str) -> int:

supvisors/statemachine.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,16 @@
2121

2222
from .context import Context
2323
from .instancestatus import SupvisorsInstanceStatus
24-
from .options import SupvisorsOptions
2524
from .process import ProcessStatus
2625
from .statemodes import SupvisorsStateModes
2726
from .strategy import conciliate_conflicts
2827
from .ttypes import (SupvisorsInstanceStates, SupvisorsStates, SynchronizationOptions,
2928
RunningFailureStrategies, SupvisorsFailureStrategies,
3029
NameList, Payload, PayloadList)
3130

31+
# Constants
32+
OFF_UPTIME = 10 # seconds
33+
3234

3335
# FSM base states
3436
class _SupvisorsBaseState:
@@ -178,7 +180,7 @@ def _check_consistence(self) -> Optional[SupvisorsStates]:
178180
# get duration from start date
179181
uptime: float = self.context.uptime
180182
# log current status
181-
if uptime >= SupvisorsOptions.SYNCHRO_TIMEOUT_MIN:
183+
if uptime >= OFF_UPTIME:
182184
self.logger.critical(f'OffState.next: local Supvisors={self.local_identifier} still'
183185
f' not RUNNING after {int(uptime)} seconds')
184186
else:
@@ -346,7 +348,7 @@ def _check_end_sync_timeout(self, uptime: float) -> Optional[bool]:
346348
return False
347349
return None
348350

349-
def _check_end_sync_core(self, uptime: float) -> Optional[bool]:
351+
def _check_end_sync_core(self) -> Optional[bool]:
350352
""" End of sync phase if the CORE option is set, and all core Supvisors instances are RUNNING.
351353
352354
NOTE: If the condition is reached, the ELECTION state will eventually be reached with non-core Supvisors
@@ -360,13 +362,7 @@ def _check_end_sync_core(self, uptime: float) -> Optional[bool]:
360362
failure = self._check_core_failure()
361363
if failure is False:
362364
# all core Supvisors instances are running
363-
# in case of late start, a security limit of SYNCHRO_TIMEOUT_MIN is kept to give a chance
364-
# to other Supvisors instances and limit the number of re-distributions
365-
if uptime >= SupvisorsOptions.SYNCHRO_TIMEOUT_MIN:
366-
return True
367-
self.logger.info('SynchronizationState.check_end_sync_core: all core Supvisors instances are RUNNING,'
368-
f' waiting ({uptime} < {SupvisorsOptions.SYNCHRO_TIMEOUT_MIN})')
369-
return False
365+
return True
370366
return False if failure else None
371367

372368
def _check_end_sync_user(self) -> Optional[bool]:
@@ -404,7 +400,7 @@ def next(self) -> Optional[SupvisorsStates]:
404400
strict_sync = self._check_end_sync_strict()
405401
list_sync = self._check_end_sync_list()
406402
timeout_sync = self._check_end_sync_timeout(uptime)
407-
core_sync = self._check_end_sync_core(uptime)
403+
core_sync = self._check_end_sync_core()
408404
user_sync = self._check_end_sync_user()
409405
self.logger.debug(f'SynchronizationState.next: strict_sync={strict_sync} list_sync={list_sync}'
410406
f' timeout_sync={timeout_sync} core_sync={core_sync} user_sync={user_sync}')
@@ -1028,6 +1024,8 @@ def on_authorization(self, status: SupvisorsInstanceStatus, event: Payload) -> N
10281024
self.logger.debug(f'FiniteStateMachine.on_authorization: identifier={status.usage_identifier}'
10291025
f' event={event}')
10301026
self.context.on_authorization(status, event)
1027+
# evaluate next immediately to speed up the start sequence
1028+
self.next()
10311029

10321030
def on_process_state_event(self, status: SupvisorsInstanceStatus, event: Payload) -> None:
10331031
""" This event is used to refresh the process data related to the event sent from the Supvisors instance.

supvisors/test/etc/supervisord.conf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,13 @@ files = common/*/*.ini
2626
supervisor.rpcinterface_factory = supvisors.plugin:make_supvisors_rpcinterface
2727
software_name = Supvisors Tests
2828
software_icon = ../ui/img/icon.png
29-
supvisors_list = <supv-01>17.0.1.11,supv02,<supv-03>192.168.1.70:30000
29+
supvisors_list = <supv-01>17.0.1.11,supv02,<supv-03>192.168.1.15:30000
3030
rules_files = etc/my_movies*.xml
3131
css_files = ui/test.css
3232
auto_fence = false
3333
event_link = ZMQ
3434
event_port = 60002
35-
synchro_options = LIST,CORE,TIMEOUT,USER
35+
synchro_options = STRICT,TIMEOUT,CORE
3636
synchro_timeout = 30
3737
inactivity_ticks = 3
3838
core_identifiers = supv-01,supv-03

supvisors/test/etc/supervisord_alt.conf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ files = common/*/*.ini
2020

2121
[rpcinterface:supvisors]
2222
supervisor.rpcinterface_factory = supvisors.plugin:make_supvisors_rpcinterface
23-
supvisors_list = <supv-01>17.0.1.11:60000,supv02:60000,<supv-03>192.168.1.70
23+
supvisors_list = <supv-01>17.0.1.11:60000,supv02:60000,<supv-03>192.168.1.15
2424
stereotypes = third
2525
rules_files = etc/my_movies*.xml
2626
auto_fence = false
27-
synchro_options = LIST,CORE,TIMEOUT,USER
27+
synchro_options = STRICT,TIMEOUT,CORE
2828
synchro_timeout = 30
2929
inactivity_ticks = 3
3030
core_identifiers = supv-01,supv-03

supvisors/test/etc/supervisord_alt_mc.conf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ multicast_group = 239.0.0.1:60001
2323
multicast_ttl = 1
2424
rules_files = etc/my_movies*.xml
2525
auto_fence = false
26-
synchro_options = LIST,CORE,TIMEOUT,USER
27-
synchro_timeout = 20
26+
synchro_options = LIST,TIMEOUT,USER
27+
synchro_timeout = 30
2828
inactivity_ticks = 3
2929
starting_strategy = CONFIG
3030
conciliation_strategy = USER

0 commit comments

Comments
 (0)