Log manager counts on change, not every poll period (#4011)

benclifford · web-flow · commit 728cc1461f1e · 2025-10-28T12:30:43.000Z
The poll period is 10ms by default, meaning that before this PR, the
manager count was logged around 100 times per second. When
htex.worker_debug is turned on, that is a lot of disk logging activity.

For example in one run of the test suite, which takes around 18 seconds,
interchange log size

pytest parsl/tests/ --config parsl/tests/configs/htex_local.py

Before this PR:

6544 lines, 928kb with around 1800 of those being this affected manager
log line.

After this PR:

4703 lines, 678kb

With an interchange that is mostly idle, for example, with tasks
performing real work rather than the trivial test tasks here, I expect
the savings to be much greater.

History:
This line has always been logged like this, but previously the
interchange was verbose in other ways per-poll-iteration. Work since
then has reduced the per-poll log cost substantially.

# Changed Behaviour

much reduced debug logging

## Type of change

- Update to human readable text: Documentation/error messages/comments
diff --git a/parsl/executors/high_throughput/interchange.py b/parsl/executors/high_throughput/interchange.py
@@ -160,6 +160,7 @@ def __init__(self,
         logger.info(f"Bound to port {worker_port} for incoming worker connections")
 
         self._ready_managers: Dict[bytes, ManagerRecord] = {}
+        self._logged_manager_count_token: object = None
         self.connected_block_history: List[str] = []
 
         self.heartbeat_threshold = heartbeat_threshold
@@ -313,6 +314,7 @@ def start(self) -> None:
             self.process_manager_socket_message(interesting_managers, monitoring_radio, kill_event)
             self.expire_bad_managers(interesting_managers, monitoring_radio)
             self.expire_drained_managers(interesting_managers, monitoring_radio)
+            self.log_manager_counts(interesting_managers)
             self.process_tasks_to_send(interesting_managers, monitoring_radio)
 
         self.zmq_context.destroy()
@@ -523,15 +525,24 @@ def expire_drained_managers(self, interesting_managers: Set[bytes], monitoring_r
                 m['active'] = False
                 self._send_monitoring_info(monitoring_radio, m)
 
+    def log_manager_counts(self, interesting_managers: Set[bytes]) -> None:
+        count_interesting = len(interesting_managers)
+        count_ready = len(self._ready_managers)
+
+        new_logged_manager_count_token = (count_interesting, count_ready)
+
+        if self._logged_manager_count_token != new_logged_manager_count_token:
+
+            logger.debug(
+                "Managers count (interesting/total): %d/%d",
+                count_interesting,
+                count_ready
+            )
+            self._logged_manager_count_token = new_logged_manager_count_token
+
     def process_tasks_to_send(self, interesting_managers: Set[bytes], monitoring_radio: Optional[MonitoringRadioSender]) -> None:
         # Check if there are tasks that could be sent to managers
 
-        logger.debug(
-            "Managers count (interesting/total): %d/%d",
-            len(interesting_managers),
-            len(self._ready_managers)
-        )
-
         if interesting_managers and self.pending_task_queue:
             shuffled_managers = self.manager_selector.sort_managers(self._ready_managers, interesting_managers)