Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions readthedocs/projects/tasks/builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,12 @@ class UpdateDocsTask(SyncRepositoryMixin, Task):
name = __name__ + ".update_docs_task"
autoretry_for = (BuildMaxConcurrencyError,)
max_retries = settings.RTD_BUILDS_MAX_RETRIES
# Default retry delay is 5 minutes (RTD_BUILDS_RETRY_DELAY = 300 seconds).
# NOTE: Large gaps between retries (30m-5h instead of 5m) have been observed.
# See https://github.com/readthedocs/readthedocs.org/issues/12472
# The task_executed_at field tracks when tasks are actually picked up
# to help diagnose timing issues. A warning is logged if the gap exceeds
# 10 minutes (see before_start method).
default_retry_delay = settings.RTD_BUILDS_RETRY_DELAY
retry_backoff = False

Expand Down
30 changes: 26 additions & 4 deletions readthedocs/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,14 @@ def RTD_RESTRICTED_DOMAINS(self):
RTD_STABLE = "stable"
RTD_STABLE_VERBOSE_NAME = "stable"
RTD_CLEAN_AFTER_BUILD = False
RTD_BUILD_HEALTHCHECK_TIMEOUT = 60 # seconds
RTD_BUILD_HEALTHCHECK_DELAY = 15 # seconds
RTD_BUILD_HEALTHCHECK_TIMEOUT = 60 # seconds
RTD_BUILD_HEALTHCHECK_DELAY = 15 # seconds
RTD_MAX_CONCURRENT_BUILDS = 4
RTD_BUILDS_MAX_RETRIES = 25
RTD_BUILDS_RETRY_DELAY = 5 * 60 # seconds
# Delay before retrying a build task that hit concurrency limits.
# NOTE: Observed retry gaps can be much larger (30m-5h) due to queue contention
# or Celery/Redis scheduling behavior. See issue #12472 for investigation.
RTD_BUILDS_RETRY_DELAY = 5 * 60 # seconds (5 minutes)
RTD_BUILD_STATUS_API_NAME = "docs/readthedocs"
RTD_ANALYTICS_DEFAULT_RETENTION_DAYS = 30 * 3
RTD_AUDITLOGS_DEFAULT_RETENTION_DAYS = 30 * 3
Expand Down Expand Up @@ -669,8 +672,27 @@ def BUILD_MEMORY_LIMIT(self):

# https://github.com/readthedocs/readthedocs.org/issues/12317#issuecomment-3070950434
# https://docs.celeryq.dev/en/stable/getting-started/backends-and-brokers/redis.html#visibility-timeout
#
# NOTE: Large retry timing gaps investigation (issue #12472)
# The visibility_timeout setting affects how long a task can remain
# unacknowledged before Redis considers it lost and redelivers it.
# With acks_late=True, tasks are only acknowledged after completion.
#
# Potential causes for retry timing gaps:
# 1. Redis sorted set polling: ETA/countdown tasks are stored in a sorted set
# that is polled periodically. High queue contention may delay pickup.
# 2. visibility_timeout interaction: If set too low, tasks running longer
# than visibility_timeout get redelivered (causing duplicates).
# If set too high, there's no direct effect on ETA task pickup, but
# stale tasks may accumulate.
# 3. Worker availability: If all workers are busy when the countdown expires,
# the task waits until a worker becomes available.
# 4. Queue backlog: Tasks with countdown/ETA compete with regular tasks.
#
# The Build.task_executed_at field tracks the actual execution time
# vs scheduled time for debugging retry timing issues.
BROKER_TRANSPORT_OPTIONS = {
'visibility_timeout': 18000, # 5 hours
'visibility_timeout': 18000, # 5 hours
}

CELERY_DEFAULT_QUEUE = "celery"
Expand Down