Fix candidate for messages processing with rate limits

imbeacon · imbeacon · commit e377d1e81348 · 2025-03-17T10:04:39.000+02:00
diff --git a/tb_device_mqtt.py b/tb_device_mqtt.py
@@ -60,7 +60,7 @@
     2: "invalid client identifier",
     3: "server unavailable",
     4: "bad username or password",
-    5: "not authorised",
+    5: "not authorized",
 }
 
 
@@ -235,12 +235,17 @@ def has_limit(self):
 
     def set_limit(self, rate_limit, percentage=80):
         with self.__lock:
+            self._minimal_timeout = DEFAULT_TIMEOUT
+            self._minimal_limit = 1000000000
             old_rate_limit_dict = deepcopy(self._rate_limit_dict)
             self._rate_limit_dict = {}
-            self.percentage = percentage if percentage != 0 else self.percentage
+            self.percentage = percentage if percentage > 0 else self.percentage
             rate_configs = rate_limit.split(";")
             if "," in rate_limit:
                 rate_configs = rate_limit.split(",")
+            if len(rate_configs) == 2 and rate_configs[0] == "0:0":
+                self._no_limit = True
+                return
             for rate in rate_configs:
                 if rate == "":
                     continue
@@ -718,18 +723,23 @@ def on_service_configuration(self, _, response, *args, **kwargs):
         if service_config.get('maxInflightMessages'):
             use_messages_rate_limit_factor = self._messages_rate_limit.has_limit()
             use_telemetry_rate_limit_factor = self._telemetry_rate_limit.has_limit()
+            service_config_inflight_messages = int(service_config.get('maxInflightMessages', 100))
             if use_messages_rate_limit_factor and use_telemetry_rate_limit_factor:
                 max_inflight_messages = int(min(self._messages_rate_limit.get_minimal_limit(),
                                                 self._telemetry_rate_limit.get_minimal_limit(),
-                                                service_config.get('maxInflightMessages', 100)) * 80 / 100)
+                                                service_config_inflight_messages) * 80 / 100)
             elif use_messages_rate_limit_factor:
                 max_inflight_messages = int(min(self._messages_rate_limit.get_minimal_limit(),
-                                                service_config.get('maxInflightMessages', 100)) * 80 / 100)
+                                                service_config_inflight_messages) * 80 / 100)
             elif use_telemetry_rate_limit_factor:
                 max_inflight_messages = int(min(self._telemetry_rate_limit.get_minimal_limit(),
-                                                service_config.get('maxInflightMessages', 100)) * 80 / 100)
+                                                service_config_inflight_messages) * 80 / 100)
             else:
                 max_inflight_messages = int(service_config.get('maxInflightMessages', 100) * 80 / 100)
+                if max_inflight_messages == 0:
+                    max_inflight_messages = 10_000  # No limitation on device queue on transport level
+            if max_inflight_messages < 1:
+                max_inflight_messages = 1
             self.max_inflight_messages_set(max_inflight_messages)
             self.max_queued_messages_set(max_inflight_messages)
         if service_config.get('maxPayloadSize'):
@@ -773,8 +783,8 @@ def _wait_for_rate_limit_released(self, timeout, message_rate_limit, dp_rate_lim
                 return TBPublishInfo(paho.MQTTMessageInfo(None))
             if not log_posted and limit_reached_check:
                 if isinstance(limit_reached_check, int):
-                    log.warning("Rate limit reached for %i seconds, waiting for rate limit to be released...",
-                                limit_reached_check)
+                    log.debug("Rate limit reached for %i seconds, waiting for rate limit to be released...",
+                              limit_reached_check)
                     waited = True
                 else:
                     log.debug("Waiting for rate limit to be released...")
@@ -787,28 +797,42 @@ def _wait_for_rate_limit_released(self, timeout, message_rate_limit, dp_rate_lim
     def _wait_until_current_queued_messages_processed(self):
         previous_notification_time = 0
         current_out_messages = len(self._client._out_messages) * 2
-        inflight_messages = self._client._max_inflight_messages or 5
+        max_inflight_messages = self._client._max_inflight_messages if self._client._max_inflight_messages > 0 else 5
         logger = None
         waiting_started = int(monotonic())
         connection_was_lost = False
-        timeout_for_break = 600
+        timeout_for_break = 300
 
         if current_out_messages > 0:
-            while current_out_messages >= inflight_messages and not self.stopped:
+            while current_out_messages >= max_inflight_messages and not self.stopped:
                 current_out_messages = len(self._client._out_messages)
-                if int(monotonic()) - previous_notification_time > 5 and current_out_messages > inflight_messages:
+                elapsed = monotonic() - waiting_started
+                remaining = timeout_for_break - elapsed
+
+                if int(monotonic()) - previous_notification_time > 5 and current_out_messages > max_inflight_messages:
                     if logger is None:
                         logger = logging.getLogger('tb_connection')
-                    logger.debug("Waiting for messages to be processed by paho client, current queue size - %r, max inflight messages: %r", # noqa
-                                 current_out_messages, inflight_messages)
+                    logger.debug(
+                        "Waiting for messages to be processed: current queue size: %r, max inflight: %r. "
+                        "Elapsed time: %.2f seconds, remaining timeout: %.2f seconds",
+                        current_out_messages, max_inflight_messages, elapsed, remaining
+                    )
                     previous_notification_time = int(monotonic())
+
                 if not self.is_connected():
+                    with self._client._out_message_mutex:
+                        self._client._out_messages.clear()
                     connection_was_lost = True
-                if current_out_messages >= inflight_messages:
-                    sleep(.001)
-                if int(monotonic()) - waiting_started > timeout_for_break and not connection_was_lost or self.stopped:
+
+                if current_out_messages >= max_inflight_messages:
+                    sleep(.01)
+
+                if (elapsed > timeout_for_break and not connection_was_lost) or self.stopped:
+                    logger.debug("Breaking wait loop after %.2f seconds due to timeout or stop signal.", elapsed)
                     break
 
+                sleep(.001)
+
     def _send_request(self, _type, kwargs, timeout=DEFAULT_TIMEOUT, device=None,
                       msg_rate_limit=None, dp_rate_limit=None):
         if msg_rate_limit is None:
@@ -901,7 +925,8 @@ def __send_split_message(self, results, part, kwargs, timeout, device, msg_rate_
         if msg_rate_limit.has_limit() or dp_rate_limit.has_limit():
             msg_rate_limit.increase_rate_limit_counter()
         kwargs["payload"] = dumps(part['message'])
-        self._wait_until_current_queued_messages_processed()
+        if msg_rate_limit.has_limit() or dp_rate_limit.has_limit():
+            self._wait_until_current_queued_messages_processed()
         if not self.stopped:
             if device is not None:
                 log.debug("Device: %s, Sending message to topic: %s ", device, topic)
@@ -919,11 +944,20 @@ def __send_split_message(self, results, part, kwargs, timeout, device, msg_rate_
                     log.debug("Data points rate limits after sending message: %r", dp_rate_limit.__dict__)
         result = self._client.publish(**kwargs)
         if result.rc == MQTT_ERR_QUEUE_SIZE:
+            error_appear_counter = 1
+            sleep_time = 0.1  # 100 ms, in case of change - change max tries in while loop
             while not self.stopped and result.rc == MQTT_ERR_QUEUE_SIZE:
+                error_appear_counter += 1
+                if error_appear_counter > 78:  # 78 tries ~ totally 300 seconds for sleep 0.1
+                    # Clearing the queue and trying to send the message again
+                    log.warning("!!! Queue size exceeded, clearing the paho out queue and trying to send message again !!!")  # Possible data loss, due to issue with paho queue clearing! # noqa
+                    with self._client._out_message_mutex:
+                        self._client._out_packet.clear()
+                        self._client._out_messages.clear()
                 if int(monotonic()) - self.__error_logged > 10:
-                    log.warning("Queue size exceeded, waiting for messages to be processed by paho client.")
+                    log.debug("Queue size exceeded, waiting for messages to be processed by paho client.")
                     self.__error_logged = int(monotonic())
-                sleep(.01)  # Give some time for paho to process messages
+                sleep(sleep_time)  # Give some time for paho to process messages
                 result = self._client.publish(**kwargs)
         results.append(result)