Skip to content
7 changes: 7 additions & 0 deletions Doc/library/pdb.rst
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,13 @@ can be overridden by the local file.
:pdbcmd:`interact` directs its output to the debugger's
output channel rather than :data:`sys.stderr`.

.. pdbcommand:: attach process

Attach to a running process. The *process* argument could be either a
:class:`subprocess.Popen`, :class:`multiprocessing.Process` or a process ID.

.. versionadded:: 3.15

.. _debugger-aliases:

.. pdbcommand:: alias [name [command]]
Expand Down
6 changes: 6 additions & 0 deletions Doc/whatsnew/3.15.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ New modules
Improved modules
================

pdb
---

* ``attach`` command is added to attach to a running process from :mod:`pdb`.
(Contributed by Tian Gao in :gh:`133954`.)

ssl
---

Expand Down
53 changes: 53 additions & 0 deletions Lib/pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,27 @@ def _get_asyncio_task(self):
task = None
return task

def _get_pid_from_process(self, process):
"""process could be a subprocess.Popen, multiprocessing.Process or a pid
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, this seems misleading - process is really a string that evaluates to a Popen or a Process or a pid.

"""
# They are not used elsewhere so do a lazy import
from multiprocessing import Process
from subprocess import Popen

try:
process = self._getval(process)
except:
# Error message is already displayed
return None

if isinstance(process, (Process, Popen)):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be worth considering accepting any object with a pid attribute... that'd save you the imports of multiprocessing and subprocess and would work on asyncio.subprocess.Process, too. And potentially processes from 3rd party libraries, too

return process.pid
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be worth ensuring that process.pid is an int (it might not be due to monkeypatching, for instance).

elif isinstance(process, int):
return process
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd suggest considering:

Suggested change
if isinstance(process, (Process, Popen)):
return process.pid
elif isinstance(process, int):
return process
if isinstance(process, int):
return process
pid = getattr(process, "pid", None)
if isinstance(pid, int):
return pid


self.error(f"Invalid process {process}")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably should use the repr here, instead:

Suggested change
self.error(f"Invalid process {process}")
self.error(f"Invalid process {process!r}")

return None

def interaction(self, frame, tb_or_exc):
# Restore the previous signal handler at the Pdb prompt.
if Pdb._previous_sigint_handler:
Expand Down Expand Up @@ -1961,6 +1982,23 @@ def do_debug(self, arg):

complete_debug = _complete_expression

def do_attach(self, process):
"""attach process

Attach to process, which can be a subprocess.Popen,
multiprocessing.Process or a pid.
"""
pid = self._get_pid_from_process(process)

if pid is not None:
self.message(f"Attaching to process {pid}")
try:
attach(pid)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are some assumptions in attach that the _PdbClient is being run from the main thread - the entire signal handling approach relies upon it, at the very least - I think it'll just wind up failing with an exception if run from a non-main thread, at the point where it tries to install the signal handler.

I think we should check if we're in the main thread explicitly and self.error() if not.

Or, thinking outside of the box, we could spawn a new process to do the attaching from its main thread, and return control back to the parent process after it finishes.

Copy link
Member Author

@gaogaotiantian gaogaotiantian May 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The great thing about attaching directly from the process is that there's the automatic parent-child relation. So even with the tracing restriction, you can still attach to your child processes - spawning a new process won't let you do that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's true, and that's a nice advantage - but certainly not one big enough to live with Ctrl+C not working! And you only get that advantage when starting off in non-remote PDB - if you started off in remote PDB, the child process won't be a child of the client (which means it also won't work when you attach to a child process and then attach again to a grandchild process).

The most reasonable choice might just be to make it so that the pdb.attach module level function raises an exception if it's called from any thread but the main thread. Then the attach PDB command will work from any thread when running remote PDB, and from the main thread when running normal PDB, but will fail when when called on a non-main thread in non-remote PDB.

Copy link
Member Author

@gaogaotiantian gaogaotiantian May 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(which means it also won't work when you attach to a child process and then attach again to a grandchild process).

Is that true? I thought the trace_scope applies to descendants, not only direct children. So grandchildren should work? (I have a test that does that, if our linux buildbot has the restriction, it should prove the theory)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The most reasonable choice might just be to make it so that the pdb.attach module level function raises an exception if it's called from any thread but the main thread.

That seems a reasonable option. It won't work when _PdbClient is not in the main thread. We can explore other options in the future, but for now, making what works work seems to be the way to go.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(which means it also won't work when you attach to a child process and then attach again to a grandchild process).

Is that true? I thought the trace_scope applies to descendants, not only direct children. So grandchildren should work?

Ah, yes, you're right.

except Exception as e:
self._error_exc()
return
self.message(f"Detached from process {pid}")

def do_quit(self, arg):
"""q(uit) | exit

Expand Down Expand Up @@ -2741,6 +2779,8 @@ def _ensure_valid_message(self, msg):
# Due to aliases this list is not static, but the client
# needs to know it for multi-line editing.
pass
case {"attach": int()}:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably worth adding a comment explaining what this message type is used for, if only for consistency with the other message types.

Suggested change
case {"attach": int()}:
case {"attach": int()}:
# Have the client to attach to a new process.

pass
case _:
raise AssertionError(
f"PDB message doesn't follow the schema! {msg}"
Expand Down Expand Up @@ -2925,6 +2965,11 @@ def detach(self):
# close() can fail if the connection was broken unexpectedly.
pass

def do_attach(self, process):
pid = self._get_pid_from_process(process)
if pid is not None:
self._send(attach=pid)

def do_debug(self, arg):
# Clear our cached list of valid commands; the recursive debugger might
# send its own differing list, and so ours needs to be re-sent.
Expand Down Expand Up @@ -3277,6 +3322,14 @@ def process_payload(self, payload):
state = "dumb"
self.state = state
self.prompt_for_reply(prompt)
case {"attach": int(pid)}:
print(f"Attaching to process {pid}")
try:
attach(pid)
print(f"Detached from process {pid}")
except Exception as exc:
msg = traceback.format_exception_only(exc)[-1].strip()
print("***", msg, flush=True)
case _:
raise RuntimeError(f"Unrecognized payload {payload}")

Expand Down
182 changes: 182 additions & 0 deletions Lib/test/test_remote_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -819,6 +819,18 @@ def test_reading_empty_json_during_completion(self):
expected_state={"state": "interact"},
)

def test_client_attach(self):
with unittest.mock.patch("pdb.attach") as mock_attach:
incoming = [
("server", {"attach": 1234}),
]
self.do_test(
incoming=incoming,
expected_outgoing=[],
expected_stdout_substring="Attaching to process 1234",
)
mock_attach.assert_called_once_with(1234)


class RemotePdbTestCase(unittest.TestCase):
"""Tests for the _PdbServer class."""
Expand Down Expand Up @@ -957,6 +969,15 @@ def test_registering_commands(self):
["_pdbcmd_silence_frame_status", "print('hi')"],
)

def test_server_attach(self):
self.sockfile.add_input({"reply": "attach 1234"})
self.sockfile.add_input({"signal": "EOF"})

self.pdb.cmdloop()

outputs = self.sockfile.get_output()
self.assertEqual(outputs[2], {"attach": 1234})

def test_detach(self):
"""Test the detach method."""
with unittest.mock.patch.object(self.sockfile, 'close') as mock_close:
Expand Down Expand Up @@ -1579,5 +1600,166 @@ def test_attach_to_process_with_colors(self):
self.assertNotIn("while x == 1", output["client"]["stdout"])
self.assertIn("while x == 1", re.sub("\x1b[^m]*m", "", output["client"]["stdout"]))


@unittest.skipIf(not sys.is_remote_debug_enabled(), "Remote debugging is not enabled")
@unittest.skipIf(sys.platform != "darwin" and sys.platform != "linux" and sys.platform != "win32",
"Test only runs on Linux, Windows and MacOS")
@cpython_only
@requires_subprocess()
class PdbAttachCommand(unittest.TestCase):

@classmethod
def setUpClass(cls):
# We need to do a quick test to see if we have the permission to remote
# execute the code. If not, just skip the whole test.
script_path = TESTFN + "script.py"
remote_path = TESTFN + "remote.py"
script = textwrap.dedent("""
import time
print("ready", flush=True)
while True:
print('hello')
time.sleep(0.1)
""")

with open(script_path, "w") as f:
f.write(script)

with open(remote_path, "w") as f:
f.write("pass\n")

with subprocess.Popen(
[sys.executable, script_path],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
) as proc:
try:
proc.stdout.readline()
sys.remote_exec(proc.pid, remote_path)
except PermissionError:
print("raise")
# Skip the test if we don't have permission to execute remote code
raise unittest.SkipTest("We don't have permission to execute remote code")
finally:
os.unlink(script_path)
os.unlink(remote_path)
proc.terminate()

def do_test(self, target, commands):
with tempfile.TemporaryDirectory() as tmpdir:
target = textwrap.dedent(target)
target_path = os.path.join(tmpdir, "target.py")
with open(target_path, "wt") as f:
f.write(target)

script = textwrap.dedent(
f"""
import subprocess
import sys
process = subprocess.Popen([sys.executable, {target_path!r}],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
breakpoint()
""")
script_path = os.path.join(tmpdir, "script.py")

with open(script_path, "wt") as f:
f.write(script)

process = subprocess.Popen(
[sys.executable, script_path],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE,
text=True
)

self.addCleanup(process.terminate)

self.addCleanup(process.stdout.close)
self.addCleanup(process.stderr.close)

stdout, stderr = process.communicate(textwrap.dedent(commands),
timeout=SHORT_TIMEOUT)

return stdout, stderr

def test_attach_simple(self):
"""Test basic attach command"""
target = """
block = True
import time
while block:
time.sleep(0.2)
def test_function():
x = 42
return x
test_function()
"""

commands = """
attach process
block = False
b test_function
c
n
p x + 42
quit
continue
"""
stdout, _ = self.do_test(target, commands)
self.assertIn("84", stdout)

def test_attach_multiprocessing(self):
"""Spawn a process with multiprocessing and attach to it."""
target = """
block = True
import time
import multiprocessing

def worker(queue):
block = True
queue.put(0)
while block:
time.sleep(0.2)
queue.put(42)

def test_function(queue):
data = queue.get()
return data

if __name__ == '__main__':
while block:
time.sleep(0.2)

queue = multiprocessing.Queue()
p = multiprocessing.Process(target=worker, args=(queue,))
p.start()
queue.get()
test_function(queue)
p.join()
"""

commands = """
attach process
block = False
b test_function
c
attach p
block = False
q
n
p data + 42
quit
continue
"""
stdout, _ = self.do_test(target, commands)
self.assertIn("84", stdout)



if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
``attach`` command is added to :mod:`pdb` to attach to a running process.
Loading