Skip to content

Commit 5449971

Browse files
committed
better kill tree
1 parent 3da523f commit 5449971

File tree

1 file changed

+47
-22
lines changed

1 file changed

+47
-22
lines changed

archivebox/machine/models.py

Lines changed: 47 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1548,7 +1548,10 @@ def terminate(self, graceful_timeout: float = 5.0) -> bool:
15481548

15491549
def kill_tree(self, graceful_timeout: float = 2.0) -> int:
15501550
"""
1551-
Kill this process and all its children (OS children, not DB children).
1551+
Kill this process and all its children (OS children, not DB children) in parallel.
1552+
1553+
Uses parallel polling approach - sends SIGTERM to all processes at once,
1554+
then polls all simultaneously with individual deadline tracking.
15521555
15531556
This consolidates the scattered child-killing logic from:
15541557
- crawls/models.py Crawl.cleanup() os.killpg()
@@ -1561,6 +1564,8 @@ def kill_tree(self, graceful_timeout: float = 2.0) -> int:
15611564
Number of processes killed (including self)
15621565
"""
15631566
import signal
1567+
import time
1568+
import os
15641569

15651570
killed_count = 0
15661571
proc = self.proc
@@ -1573,33 +1578,53 @@ def kill_tree(self, graceful_timeout: float = 2.0) -> int:
15731578
return 0
15741579

15751580
try:
1576-
# Get all children before killing parent
1581+
# Phase 1: Get all children and send SIGTERM to entire tree in parallel
15771582
children = proc.children(recursive=True)
1583+
deadline = time.time() + graceful_timeout
15781584

1579-
# Kill children first (reverse order - deepest first)
1580-
for child in reversed(children):
1585+
# Send SIGTERM to all children first (non-blocking)
1586+
for child in children:
15811587
try:
1582-
child.terminate()
1583-
except (psutil.NoSuchProcess, psutil.AccessDenied):
1584-
# Child already dead or we don't have permission - continue
1588+
os.kill(child.pid, signal.SIGTERM)
1589+
except (OSError, ProcessLookupError):
15851590
pass
15861591

1587-
# Wait briefly for children to exit
1588-
gone, alive = psutil.wait_procs(children, timeout=graceful_timeout)
1589-
killed_count += len(gone)
1590-
1591-
# Force kill remaining children
1592-
for child in alive:
1593-
try:
1594-
child.kill()
1595-
killed_count += 1
1596-
except (psutil.NoSuchProcess, psutil.AccessDenied):
1597-
# Child exited or we don't have permission - continue
1598-
pass
1592+
# Send SIGTERM to parent
1593+
try:
1594+
os.kill(proc.pid, signal.SIGTERM)
1595+
except (OSError, ProcessLookupError):
1596+
pass
15991597

1600-
# Now kill self
1601-
if self.terminate(graceful_timeout=graceful_timeout):
1602-
killed_count += 1
1598+
# Phase 2: Poll all processes in parallel
1599+
all_procs = children + [proc]
1600+
still_running = set(p.pid for p in all_procs)
1601+
1602+
while still_running and time.time() < deadline:
1603+
time.sleep(0.1)
1604+
1605+
for pid in list(still_running):
1606+
try:
1607+
# Check if process exited
1608+
os.kill(pid, 0) # Signal 0 checks if process exists
1609+
except (OSError, ProcessLookupError):
1610+
# Process exited
1611+
still_running.remove(pid)
1612+
killed_count += 1
1613+
1614+
# Phase 3: SIGKILL any stragglers that exceeded timeout
1615+
if still_running:
1616+
for pid in still_running:
1617+
try:
1618+
os.kill(pid, signal.SIGKILL)
1619+
killed_count += 1
1620+
except (OSError, ProcessLookupError):
1621+
pass
1622+
1623+
# Update self status
1624+
self.exit_code = 128 + signal.SIGTERM if killed_count > 0 else 0
1625+
self.status = self.StatusChoices.EXITED
1626+
self.ended_at = timezone.now()
1627+
self.save()
16031628

16041629
return killed_count
16051630

0 commit comments

Comments
 (0)