Skip to content

Commit df80078

Browse files
committed
implement contents lock to prevent overlapping reads/writes
1 parent c967836 commit df80078

File tree

2 files changed

+51
-20
lines changed

2 files changed

+51
-20
lines changed

jupyter_server_documents/rooms/yroom.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ async def _process_message_queue(self) -> None:
308308
# queue is empty in `self.stop()`.
309309
self._message_queue.task_done()
310310

311-
self.log.info(
311+
self.log.debug(
312312
"Stopped `self._process_message_queue()` background task "
313313
f"for YRoom '{self.room_id}'."
314314
)
@@ -709,13 +709,13 @@ def stop(self, close_code: int = 1001, immediately: bool = False):
709709
if immediately:
710710
self._clear_ydoc()
711711
else:
712-
# TODO: how to handle restarts after this safely?
713712
self._loop.create_task(
714713
self._save_then_clear_ydoc()
715714
)
716715

717716
self._stopped = True
718717

718+
719719
def _clear_ydoc(self):
720720
"""
721721
Clears the YDoc, awareness, and JupyterYDoc, freeing their memory to the
@@ -727,18 +727,27 @@ def _clear_ydoc(self):
727727
ydoc=self._ydoc,
728728
awareness=self._awareness
729729
)
730+
730731

731732
async def _save_then_clear_ydoc(self):
733+
"""
734+
Saves the JupyterYDoc, then calls `self._clear_ydoc()`.
735+
736+
This can be run safely in the background because the FileAPI uses an
737+
lock to prevent overlapping reads & writes to a single file.
738+
"""
732739
await self.file_api.save(self._jupyter_ydoc)
733740
self._clear_ydoc()
734-
741+
742+
735743
@property
736744
def stopped(self) -> bool:
737745
"""
738746
Returns whether the room is stopped.
739747
"""
740748
return self._stopped
741749

750+
742751
def restart(self, close_code: int = 1001, immediately: bool = False):
743752
"""
744753
Restarts the YRoom. This method re-initializes & reloads the YDoc,

jupyter_server_documents/rooms/yroom_file_api.py

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,18 @@ class YRoomFileAPI:
3636
log: logging.Logger
3737

3838
_fileid_manager: BaseFileIdManager
39+
"""
40+
Stores a reference to the Jupyter Server's File ID Manager.
41+
"""
42+
3943
_contents_manager: AsyncContentsManager | ContentsManager
44+
"""
45+
Stores a reference to the Jupyter Server's ContentsManager.
46+
47+
NOTE: any calls made on this attribute should acquire & release the
48+
`_content_lock`. See `_content_lock` for more info.
49+
"""
50+
4051
_loop: asyncio.AbstractEventLoop
4152
_save_scheduled: bool
4253
_content_loading: bool
@@ -81,6 +92,13 @@ class YRoomFileAPI:
8192
not running.
8293
"""
8394

95+
_content_lock: asyncio.Lock
96+
"""
97+
An `asyncio.Lock` that ensures `ContentsManager` calls reading/writing for a
98+
single file do not overlap. This prevents file corruption scenarios like
99+
dual-writes or dirty-reads.
100+
"""
101+
84102
def __init__(
85103
self,
86104
*,
@@ -108,9 +126,10 @@ def __init__(
108126
self._last_modified = None
109127
self._stopped = False
110128

111-
# Initialize loading & loaded states
129+
# Initialize content-related primitives
112130
self._content_loading = False
113131
self._content_load_event = asyncio.Event()
132+
self._content_lock = asyncio.Lock()
114133

115134

116135
def get_path(self) -> str | None:
@@ -169,11 +188,12 @@ async def _load_content(self, jupyter_ydoc: YBaseDoc) -> None:
169188

170189
# Load the content of the file from the path
171190
self.log.info(f"Loading content for room ID '{self.room_id}', found at path: '{path}'.")
172-
file_data = await ensure_async(self._contents_manager.get(
173-
path,
174-
type=self.file_type,
175-
format=self.file_format
176-
))
191+
async with self._content_lock:
192+
file_data = await ensure_async(self._contents_manager.get(
193+
path,
194+
type=self.file_type,
195+
format=self.file_format
196+
))
177197

178198
# Set JupyterYDoc content and set `dirty = False` to hide the "unsaved
179199
# changes" icon in the UI
@@ -289,9 +309,10 @@ async def _check_file(self):
289309
# If this raises `HTTPError(404)`, that indicates the file was
290310
# moved/deleted out-of-band.
291311
try:
292-
file_data = await ensure_async(self._contents_manager.get(
293-
path=path, format=file_format, type=file_type, content=False
294-
))
312+
async with self._content_lock:
313+
file_data = await ensure_async(self._contents_manager.get(
314+
path=path, format=file_format, type=file_type, content=False
315+
))
295316
except HTTPError as e:
296317
# If not 404, re-raise the exception as it is unknown
297318
if (e.status_code != 404):
@@ -344,14 +365,15 @@ async def save(self, jupyter_ydoc: YBaseDoc):
344365
self._save_scheduled = False
345366

346367
# Save the YDoc via the ContentsManager
347-
file_data = await ensure_async(self._contents_manager.save(
348-
{
349-
"format": file_format,
350-
"type": file_type,
351-
"content": content,
352-
},
353-
path
354-
))
368+
async with self._content_lock:
369+
file_data = await ensure_async(self._contents_manager.save(
370+
{
371+
"format": file_format,
372+
"type": file_type,
373+
"content": content,
374+
},
375+
path
376+
))
355377

356378
# Set most recent `last_modified` timestamp
357379
if file_data['last_modified']:

0 commit comments

Comments
 (0)