-
-
Notifications
You must be signed in to change notification settings - Fork 33.2k
gh-126434: Use multiprocessing.Value for multiprocessing.Event to avoid deadlock when there is reentrant usage of set
from is_set
, e.g. when handling system signals
#126437
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 8 commits
e5302e6
cbecc76
2f90fb2
0ca4a85
f9307f6
25236fe
b1103b2
bec9070
c15a45a
c308344
5a43697
574e91c
25f4c7e
88b45b9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import multiprocessing | ||
import os | ||
import signal | ||
import concurrent.futures | ||
import time | ||
|
||
|
||
def send_sigint(pid): | ||
time.sleep(1) | ||
os.kill(pid, signal.SIGINT) | ||
|
||
|
||
def run_signal_handler_set_is_set_test(): | ||
shutdown_event = multiprocessing.Event() | ||
|
||
def sigterm_handler(_signo, _stack_frame): | ||
shutdown_event.set() | ||
|
||
signal.signal(signal.SIGINT, sigterm_handler) | ||
|
||
with concurrent.futures.ProcessPoolExecutor() as executor: | ||
f = executor.submit(send_sigint, os.getpid()) | ||
while not shutdown_event.is_set(): | ||
pass | ||
f.result() | ||
|
||
|
||
if __name__ == '__main__': | ||
run_signal_handler_set_is_set_test() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import multiprocessing | ||
import sys | ||
|
||
|
||
# Reproduction code copied and modified from https://github.com/python/cpython/issues/95826 | ||
# Fixes the issue above | ||
|
||
class SimpleRepro: | ||
def __init__(self): | ||
self.heartbeat_event = multiprocessing.Event() | ||
self.shutdown_event = multiprocessing.Event() | ||
self.child_proc = multiprocessing.Process(target=self.child_process, daemon=True) | ||
self.child_proc.start() | ||
|
||
def child_process(self): | ||
while True: | ||
if self.shutdown_event.is_set(): | ||
return | ||
self.heartbeat_event.set() | ||
self.heartbeat_event.clear() | ||
|
||
def test_heartbeat(self): | ||
exit_code = 0 | ||
for i in range(2000): | ||
success = self.heartbeat_event.wait(100) | ||
if not success: | ||
exit_code = 1 | ||
break | ||
self.shutdown_event.set() | ||
self.child_proc.join() | ||
sys.exit(exit_code) | ||
|
||
|
||
if __name__ == '__main__': | ||
foo = SimpleRepro() | ||
foo.test_heartbeat() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import multiprocessing | ||
import signal | ||
import concurrent.futures | ||
import time | ||
import os | ||
|
||
|
||
# Shows that https://github.com/python/cpython/issues/85772 is fixed | ||
|
||
def send_sigint(pid): | ||
time.sleep(1) # Make sure shutdown_event.wait() is called | ||
os.kill(pid, signal.SIGINT) | ||
|
||
|
||
def run_signal_handler_wait_set_test(): | ||
shutdown_event = multiprocessing.Event() | ||
|
||
def sigterm_handler(_signo, _stack_frame): | ||
shutdown_event.set() | ||
|
||
signal.signal(signal.SIGINT, sigterm_handler) | ||
|
||
with concurrent.futures.ProcessPoolExecutor() as executor: | ||
f = executor.submit(send_sigint, os.getpid()) | ||
shutdown_event.wait() | ||
f.result() | ||
|
||
|
||
if __name__ == '__main__': | ||
run_signal_handler_wait_set_test() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
import os.path | ||
from test import support | ||
|
||
def load_tests(*args): | ||
return support.load_package_tests(os.path.dirname(__file__), *args) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
import unittest | ||
from test import support | ||
import sys | ||
import signal | ||
import os | ||
|
||
|
||
try: | ||
import multiprocessing | ||
from concurrent.futures import ProcessPoolExecutor | ||
_have_multiprocessing = True | ||
except (NotImplementedError, ModuleNotFoundError): | ||
_have_multiprocessing = False | ||
|
||
|
||
@unittest.skipUnless(_have_multiprocessing, | ||
"requires multiprocessing") | ||
@unittest.skipUnless(hasattr(signal, 'signal'), | ||
"Requires signal.signal") | ||
@unittest.skipUnless(hasattr(signal, 'SIGINT'), | ||
"Requires signal.SIGINT") | ||
@unittest.skipUnless(hasattr(os, 'kill'), | ||
"Requires os.kill") | ||
@unittest.skipUnless(hasattr(os, 'getppid'), | ||
"Requires os.getppid") | ||
@support.requires_subprocess() | ||
class TestEventSignalHandling(unittest.TestCase): | ||
def test_no_race_for_is_set_set(self): | ||
import subprocess | ||
|
||
script = support.findfile("is_set_set.py", subdir="multiprocessingdata") | ||
for x in range(10): | ||
try: | ||
assert 0 == subprocess.call([sys.executable, script], timeout=60) | ||
|
||
except subprocess.TimeoutExpired: | ||
assert False, 'subprocess.Timeoutexpired for is_set_set.py' | ||
|
||
def test_no_race_set_clear(self): | ||
import subprocess | ||
script = support.findfile("set_clear_race.py", subdir="multiprocessingdata") | ||
assert 0 == subprocess.call([sys.executable, script]) | ||
|
||
def test_wait_set_no_deadlock(self): | ||
import subprocess | ||
script = support.findfile("wait_set_no_deadlock.py", subdir="multiprocessingdata") | ||
assert 0 == subprocess.call([sys.executable, script]) | ||
|
||
def test_wait_timeout(self): | ||
event = multiprocessing.Event() | ||
# https://docs.python.org/3/library/multiprocessing.html#multiprocessing.Event | ||
# multiprocessing.Event: A clone of threading.Event. | ||
|
||
# threading.Event: https://docs.python.org/3/library/threading.html#threading.Event | ||
|
||
# threading.Event.wait(): https://docs.python.org/3/library/threading.html#threading.Event.wait | ||
# Block as long as the internal flag is false and the timeout, if given, has not expired. | ||
# The return value represents the reason that this blocking method returned; | ||
# True if returning because the internal flag is set to true, or | ||
# False if a timeout is given and the internal flag did not become true within the given wait time. | ||
|
||
# When the timeout argument is present and not None, it should be a floating-point number | ||
# specifying a timeout for the operation in seconds, or fractions thereof. | ||
|
||
# wait() supports both integer and float: | ||
flag_set = event.wait(1) | ||
assert flag_set == False | ||
|
||
flag_set = event.wait(0.1) | ||
assert flag_set == False | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This needs rewriting to describe the current approach. Also, please update the PR title and description. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks. I've updated the description now. What do you think @gpshead ? |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
All of :mod:`multiprocessing` ``Event`` is now reentrant and thread safe, and can thus be used from signal handlers. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Iff (lets not assume we should go this route - see my comment on the issue) we're going to abandon use of the native OS platform APIs which properly implement timeouts on inter-process semaphores without a busy loop via
_multiprocessing.SemLock
inModules/_multiprocessing/semaphore.c
, sleeping in the loop should be done in an exponential back-off fashion as was the case even inthreading
itself before we started using proper OS APIs there. See https://github.com/python/cpython/blob/v3.1.3/Lib/threading.py#L227 for the old exponential back-off delay example.Doing a busy loop with sleeps as a low level primitive in 2024 feels very wrong to me. They've always been really unfriendly to both latency due to unnecessary delays and power usage from frequent unnecessary wakes.
I suggested an alternate idea in #126434.
History
Prior to CPython 3.2
threading.Condition
was implemented with a back-off in a similar manner. 3.2 improved on that old hack by using the OS APIs for lock timeouts in 7c3e577There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks @gpshead and @ZeroIntensity
I will read your comments and suggestions thoroughly tomorrow, and come back to you.
Thanks and kind regards.