Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 70 additions & 13 deletions jupyter_ydoc/ynotebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@

import copy
from functools import partial
from typing import Any, Callable, Dict, Optional
from typing import Any, Callable, Dict, List, Optional
from uuid import uuid4
from warnings import warn

from pycrdt import Array, Awareness, Doc, Map, Text

Expand Down Expand Up @@ -102,8 +103,11 @@ def get_cell(self, index: int) -> Dict[str, Any]:
:return: A cell.
:rtype: Dict[str, Any]
"""
return self._cell_to_py(self._ycells[index])

def _cell_to_py(self, ycell: Map) -> Dict[str, Any]:
meta = self._ymeta.to_py()
cell = self._ycells[index].to_py()
cell = ycell.to_py()
cell.pop("execution_state", None)
cast_all(cell, float, int) # cells coming from Yjs have e.g. execution_count as float
if "id" in cell and meta["nbformat"] == 4 and meta["nbformat_minor"] <= 4:
Expand Down Expand Up @@ -234,7 +238,7 @@ def set(self, value: Dict) -> None:
nb_without_cells = {key: value[key] for key in value.keys() if key != "cells"}
nb = copy.deepcopy(nb_without_cells)
cast_all(nb, int, float) # Yjs expects numbers to be floating numbers
cells = value["cells"] or [
new_cells = value["cells"] or [
{
"cell_type": "code",
"execution_count": None,
Expand All @@ -245,26 +249,79 @@ def set(self, value: Dict) -> None:
"id": str(uuid4()),
}
]
old_ycells_by_id = {ycell["id"]: ycell for ycell in self._ycells}

with self._ydoc.transaction():
# clear document
self._ymeta.clear()
self._ycells.clear()
try:
new_cell_list: List[tuple[Map, dict]] = []
retained_cells = set()

# Determine cells to be retained
for new_cell in new_cells:
cell_id = new_cell.get("id")
if cell_id and (old_ycell := old_ycells_by_id.get(cell_id)):
old_cell = self._cell_to_py(old_ycell)
if old_cell == new_cell:
new_cell_list.append((old_ycell, old_cell))
retained_cells.add(cell_id)
continue
# New or changed cell
new_cell_list.append((self.create_ycell(new_cell), new_cell))

# First delete all non-retained cells
if not retained_cells:
# fast path if no cells were retained
self._ycells.clear()
else:
index = 0
for old_ycell in list(self._ycells):
if old_ycell["id"] not in retained_cells:
self._ycells.pop(index)
else:
index += 1

# Now add new cells
index = 0
for new_ycell, new_cell in new_cell_list:
if len(self._ycells) > index:
# we need to compare against a python cell to avoid
# an extra transaction on new cells which are not yet
# integrated into the ydoc document.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could use new_ycell.prelim.get("id") and avoid storing the python cells, or only store python cells and create ycells when needed?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good suggestion, I went for only storing python cells as to use prelim we would need to check if a cell is integrated or not and would get more overhead.

if self._ycells[index]["id"] == new_cell.get("id"):
# retained cell
index += 1
continue
self._ycells.insert(index, new_ycell)
index += 1

except Exception as e:
# Fallback to total overwrite, warn to allow debugging
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What kind of error are you expecting?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not expecting any, unless there is something around concurrency that could go wrong. This is just out of abundance of caution when making a change deep in the stack; do you think we should remove exception handling and the fallback?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Concurrency like in async? Because this function is not async.
I would rather let the exception propagate if the logic here is wrong, but no strong opinion.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather let the exception propagate

Done in b97f4de

warn(f"All cells were reloaded due to an error in granular reload logic: {e}")
self._ycells.clear()
self._ycells.extend([new_ycell for (new_ycell, _new_cell) in new_cell_list])

for key in [
k for k in self._ystate.keys() if k not in ("dirty", "path", "document_id")
]:
del self._ystate[key]

# initialize document
self._ycells.extend([self.create_ycell(cell) for cell in cells])
self._ymeta["nbformat"] = nb.get("nbformat", NBFORMAT_MAJOR_VERSION)
self._ymeta["nbformat_minor"] = nb.get("nbformat_minor", NBFORMAT_MINOR_VERSION)
nbformat_major = nb.get("nbformat", NBFORMAT_MAJOR_VERSION)
nbformat_minor = nb.get("nbformat_minor", NBFORMAT_MINOR_VERSION)

if self._ymeta.get("nbformat") != nbformat_major:
self._ymeta["nbformat"] = nbformat_major

if self._ymeta.get("nbformat_minor") != nbformat_minor:
self._ymeta["nbformat_minor"] = nbformat_minor

old_y_metadata = self._ymeta.get("metadata")
old_metadata = old_y_metadata.to_py() if old_y_metadata else {}
metadata = nb.get("metadata", {})
metadata.setdefault("language_info", {"name": ""})
metadata.setdefault("kernelspec", {"name": "", "display_name": ""})

self._ymeta["metadata"] = Map(metadata)
if metadata != old_metadata:
metadata.setdefault("language_info", {"name": ""})
metadata.setdefault("kernelspec", {"name": "", "display_name": ""})
self._ymeta["metadata"] = Map(metadata)

def observe(self, callback: Callable[[str, Any], None]) -> None:
"""
Expand Down
4 changes: 4 additions & 0 deletions jupyter_ydoc/yunicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ def set(self, value: str) -> None:
:param value: The content of the document.
:type value: str
"""
if self.get() == value:
# no-op if the values are already the same,
# to avoid side-effects such as cursor jumping to the top
return
with self._ydoc.transaction():
# clear document
self._ysource.clear()
Expand Down
104 changes: 104 additions & 0 deletions tests/test_ynotebook.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.

from pycrdt import Map

from jupyter_ydoc import YNotebook


def make_code_cell(source: str):
return {
"cell_type": "code",
"source": source,
"metadata": {},
"outputs": [],
"execution_count": None,
}


class AnyInstanceOf:
def __init__(self, cls):
self.cls = cls

def __eq__(self, other):
return isinstance(other, self.cls)


def test_set_preserves_cells_when_unchanged():
nb = YNotebook()
nb.set({"cells": [make_code_cell("print('a')\n"), make_code_cell("print('b')\n")]})

changes = []

def record_changes(topic, event):
changes.append((topic, event))

nb.observe(record_changes)

model = nb.get()

# Call set with identical structure
nb.set(model)

# No changes should be observed at all
assert changes == []


def test_set_preserves_cells_with_insert_and_remove():
nb = YNotebook()
nb.set(
{
"cells": [
make_code_cell("print('a')\n"), # original 0
make_code_cell("print('b')\n"), # original 1 (will remove)
make_code_cell("print('c')\n"), # original 2
]
}
)

# Capture textual content for sanity check
cell0_source_text = str(nb.ycells[0]["source"])
cell2_source_text = str(nb.ycells[2]["source"])

# Get the model as Python object
model = nb.get()

# Remove the middle cell and insert a new one between the retained cells
cells = model["cells"]
assert len(cells) == 3

# The cell ids are needed for retention logic; keep first and last
first = cells[0]
last = cells[2]

# New inserted cell
inserted = make_code_cell("print('x')\n")
model["cells"] = [first, inserted, last]

changes = []

def record_changes(topic, event):
changes.append((topic, event))

nb.observe(record_changes)
nb.set(model)

assert nb.cell_number == 3

# Content of the first and last cells should remain the same
assert str(nb.ycells[0]["source"]) == cell0_source_text
assert str(nb.ycells[2]["source"]) == cell2_source_text

# The middle cell should have a different source now
assert str(nb.ycells[1]["source"]) == "print('x')\n"

# We should have one cell event
cell_events = [e for t, e in changes if t == "cells"]
assert len(cell_events) == 1
event_transactions = cell_events[0]
assert len(event_transactions) == 1
assert event_transactions[0].delta == [
{"retain": 1},
{"delete": 1},
{"insert": [AnyInstanceOf(Map)]},
]
24 changes: 24 additions & 0 deletions tests/test_yunicode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.

from jupyter_ydoc import YUnicode


def test_set_no_op_if_unchaged():
text = YUnicode()
text.set("test content")

changes = []

def record_changes(topic, event):
changes.append((topic, event))

text.observe(record_changes)

model = text.get()

# Call set with identical text
text.set(model)

# No changes should be observed at all
assert changes == []
Loading