Skip to content
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ jobs:
shell: bash
run: |
COVERAGE_PROCESS_START=$GITHUB_WORKSPACE/.coveragerc \
PYTHONPATH='.:tests' python -m pytest -r s
PYTHONPATH='.:tests' python -m pytest -r s -vs
coverage combine --append
coverage xml -i
- name: Publish coverage results
Expand Down
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
In development
==============

- Make pickling of functions depending on globals in notebook more
deterministic. ([PR#560](https://github.com/cloudpipe/cloudpickle/pull/560))

3.1.2
=====

Expand Down
13 changes: 9 additions & 4 deletions cloudpickle/cloudpickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -835,6 +835,11 @@ def _code_reduce(obj):
# See the inline comment in _class_setstate for details.
co_name = "".join(obj.co_name)

# co_filename is not used in the constructor of code objects, so we can
# safely set it to indicate that this is dynamic code. This also makes
# the payload deterministic, independent of where the function is defined.
co_filename = "".join("<dynamic-code>")

# Create shallow copies of these tuple to make cloudpickle payload deterministic.
# When creating a code object during load, copies of these four tuples are
# created, while in the main process, these tuples can be shared.
Expand All @@ -857,7 +862,7 @@ def _code_reduce(obj):
obj.co_consts,
co_names,
co_varnames,
obj.co_filename,
co_filename,
co_name,
obj.co_qualname,
obj.co_firstlineno,
Expand All @@ -880,7 +885,7 @@ def _code_reduce(obj):
obj.co_consts,
co_names,
co_varnames,
obj.co_filename,
co_filename,
co_name,
obj.co_firstlineno,
obj.co_linetable,
Expand All @@ -901,7 +906,7 @@ def _code_reduce(obj):
obj.co_code,
obj.co_consts,
co_varnames,
obj.co_filename,
co_filename,
co_name,
obj.co_firstlineno,
obj.co_lnotab,
Expand All @@ -925,7 +930,7 @@ def _code_reduce(obj):
obj.co_consts,
co_names,
co_varnames,
obj.co_filename,
co_filename,
co_name,
obj.co_firstlineno,
obj.co_lnotab,
Expand Down
2 changes: 2 additions & 0 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ pytest-cov
psutil
# To be able to test tornado coroutines
tornado
# To be able to test behavior in jupyter-notebooks
ipykernel
# To be able to test numpy specific things
# but do not build numpy from source on Python nightly
numpy >=1.18.5; python_version <= '3.12'
Expand Down
96 changes: 96 additions & 0 deletions tests/cloudpickle_ipykernel_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import sys
import time
import pytest
import textwrap
from queue import Empty

from .testutils import check_deterministic_pickle

if sys.platform == "win32":
if sys.version_info < (3, 11):
pytest.skip(
"ipykernel requires Python 3.11 or later",
allow_module_level=True
)
ipykernel = pytest.importorskip("ipykernel")


def run_in_notebook(code, timeout=10):


km = ipykernel.connect.jupyter_client.KernelManager()
km.start_kernel()
kc = km.client()
kc.start_channels()
status, output, err = "kernel_started", None, None
try:
assert km.is_alive() and kc.is_alive()
kc.wait_for_ready()
idx = kc.execute(code)
running = True
while running:
try:
res = kc.iopub_channel.get_msg(timeout=timeout)
except Empty:
status = "timeout"
break
if res['parent_header'].get('msg_id') != idx:
continue
content = res['content']
if content.get("name", "state") == "stdout":
output = content['text']
if "traceback" in content:
err = "\n".join(content['traceback'])
status = "error"
running = res['content'].get('execution_state', None) != "idle"
finally:
kc.shutdown()
kc.stop_channels()
km.shutdown_kernel(now=True, restart=False)
assert not km.is_alive()
if status not in ["error", "timeout"]:
status = "ok" if not running else "exec_error"
return status, output, err


@pytest.mark.parametrize("code, expected", [
("1 + 1", "ok"),
("raise ValueError('This is a test error')", "error"),
("import time; time.sleep(100)", "timeout")
])
def test_run_in_notebook(code, expected):
code = textwrap.dedent(code)

t_start = time.time()
status, output, err = run_in_notebook(code, timeout=1)
duration = time.time() - t_start
assert status == expected, (
f"Unexpected status: {status}, output: {output}, err: {err}, duration: {duration}"
)
assert duration < 10, "Timeout not enforced properly"
if expected == "error":
assert "This is a test error" in err


def test_deterministic_payload_for_dynamic_func_in_notebook():
code = textwrap.dedent("""
import cloudpickle
MY_PI = 3.1415
def get_pi():
return MY_PI
print(cloudpickle.dumps(get_pi))
""")

status, output, err = run_in_notebook(code)
assert status == "ok"
payload = eval(output.strip(), {})

status, output, err = run_in_notebook(code)
assert status == "ok"
payload2 = eval(output.strip(), {})

check_deterministic_pickle(payload, payload2)
Loading