|
3 | 3 |
|
4 | 4 | import logging
|
5 | 5 | from datetime import datetime, timedelta
|
| 6 | +from time import sleep |
6 | 7 | from types import MethodType
|
7 | 8 | from typing import (
|
8 | 9 | Any,
|
@@ -330,6 +331,7 @@ def __init__(
|
330 | 331 | prepare_for_work=prepare_for_work,
|
331 | 332 | serializer=JSONSerializer,
|
332 | 333 | )
|
| 334 | + self.is_shutting_down = False |
333 | 335 |
|
334 | 336 | def _set_connection(
|
335 | 337 | self,
|
@@ -411,6 +413,91 @@ def handle_job_success(
|
411 | 413 |
|
412 | 414 | pipeline.execute()
|
413 | 415 |
|
| 416 | + def handle_warm_shutdown_request(self): |
| 417 | + self.is_shutting_down = True |
| 418 | + super().handle_warm_shutdown_request() |
| 419 | + |
| 420 | + # We are going to override the work function to create our own loop. |
| 421 | + # This will allow us to catch exceptions that the default work method will |
| 422 | + # not handle and restart our worker process if we hit them. |
| 423 | + def work( |
| 424 | + self, |
| 425 | + burst: bool = False, |
| 426 | + logging_level: str = "INFO", |
| 427 | + date_format: str = rq.defaults.DEFAULT_LOGGING_DATE_FORMAT, |
| 428 | + log_format: str = rq.defaults.DEFAULT_LOGGING_FORMAT, |
| 429 | + max_jobs: Optional[int] = None, |
| 430 | + with_scheduler: bool = False, |
| 431 | + ) -> bool: |
| 432 | + while True: |
| 433 | + # super.work() returns a value that we want to return on a normal |
| 434 | + # exit. |
| 435 | + return_value = None |
| 436 | + try: |
| 437 | + return_value = super().work( |
| 438 | + burst, |
| 439 | + logging_level, |
| 440 | + date_format, |
| 441 | + log_format, |
| 442 | + max_jobs, |
| 443 | + with_scheduler, |
| 444 | + ) |
| 445 | + except ( |
| 446 | + redis.exceptions.TimeoutError, |
| 447 | + redis.exceptions.ClusterDownError, |
| 448 | + redis.exceptions.ConnectionError, |
| 449 | + ) as e: |
| 450 | + # If we got one of these exceptions but are not on a Cluster go |
| 451 | + # ahead and raise it normally. |
| 452 | + if not isinstance(self.connection, DABRedisCluster): |
| 453 | + raise |
| 454 | + |
| 455 | + # There are a lot of different exceptions that inherit from |
| 456 | + # ConnectionError. So we need to make sure if we got that its |
| 457 | + # an actual ConnectionError. If not, go ahead and raise it. |
| 458 | + # Note: ClusterDownError and TimeoutError are not subclasses |
| 459 | + # of ConnectionError. |
| 460 | + if ( |
| 461 | + isinstance(e, redis.exceptions.ConnectionError) |
| 462 | + and type(e) is not redis.exceptions.ConnectionError |
| 463 | + ): |
| 464 | + raise |
| 465 | + |
| 466 | + # If we got a cluster issue we will loop here until we can ping |
| 467 | + # the server again. |
| 468 | + max_backoff = 60 |
| 469 | + current_backoff = 1 |
| 470 | + while True: |
| 471 | + backoff = min(current_backoff, max_backoff) |
| 472 | + logger.error( |
| 473 | + f"Connection to redis cluster failed. Attempting to " |
| 474 | + f"reconnect in {backoff}" |
| 475 | + ) |
| 476 | + sleep(backoff) |
| 477 | + current_backoff = 2 * current_backoff |
| 478 | + try: |
| 479 | + self.connection.ping() |
| 480 | + break |
| 481 | + # We could tighten this exception up. |
| 482 | + except Exception: |
| 483 | + pass |
| 484 | + # At this point return value is none so we are going to go |
| 485 | + # ahead and fall through to the loop to restart. |
| 486 | + |
| 487 | + # We are outside of the work function with either: |
| 488 | + # a "normal exist" |
| 489 | + # an exit that did not raise an exception |
| 490 | + if return_value: |
| 491 | + logger.debug(f"Working exited normally with {return_value}") |
| 492 | + return return_value |
| 493 | + elif self.is_shutting_down: |
| 494 | + # Get got a warm shutdown request, lets respect it |
| 495 | + return return_value |
| 496 | + else: |
| 497 | + logger.error( |
| 498 | + "Work exited no return value, going to restart the worker" |
| 499 | + ) |
| 500 | + |
414 | 501 |
|
415 | 502 | class DefaultWorker(Worker):
|
416 | 503 | """Custom default worker class used for non-activation tasks.
|
|
0 commit comments