Loosen cell ID regex to match nbformat spec (#136)

ellisonbg · web-flow · commit 389f2af87a10 · 2025-07-15T09:30:44.000-07:00
diff --git a/jupyter_server_documents/outputs/handlers.py b/jupyter_server_documents/outputs/handlers.py
@@ -67,11 +67,9 @@ async def get(self, file_id=None, cell_id=None):
 # URL to handler mappings
 # -----------------------------------------------------------------------------
 
-# Strict UUID regex (matches standard 8-4-4-4-12 UUIDs)
-_uuid_regex = r"[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}"
-
-_file_id_regex = rf"(?P<file_id>{_uuid_regex})"
-_cell_id_regex = rf"(?P<cell_id>{_uuid_regex})"
+_file_id_regex = r"(?P<file_id>[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})"
+# In nbformat, cell_ids follow this format, compatible with uuid4
+_cell_id_regex = rf"(?P<cell_id>[a-zA-Z0-9_-]+)"
 
 # non-negative integers
 _output_index_regex = r"(?P<output_index>0|[1-9]\d*)"
@@ -80,4 +78,3 @@ async def get(self, file_id=None, cell_id=None):
     (rf"/api/outputs/{_file_id_regex}/{_cell_id_regex}(?:/{_output_index_regex}.output)?", OutputsAPIHandler),
     (rf"/api/outputs/{_file_id_regex}/{_cell_id_regex}/stream", StreamAPIHandler),
 ]
-
diff --git a/jupyter_server_documents/outputs/manager.py b/jupyter_server_documents/outputs/manager.py
@@ -207,6 +207,9 @@ def process_loaded_notebook(self, file_id: str, file_data: dict) -> dict:
         # Notebook content is a tree of nbformat.NotebookNode objects,
         # which are a subclass of dict.
         nb = file_data['content']
+        # We need cell ids which are only in nbformat >4.5. We use this to
+        # upgrade all notebooks to 4.5 or later
+        nb = nbformat.v4.upgrade(nb, from_version=nb.nbformat, from_minor=nb.nbformat_minor)
         
         # Check if the notebook metadata has placeholder_outputs set to True
         if nb.get('metadata', {}).get('placeholder_outputs') is True:
@@ -233,8 +236,12 @@ def _process_loaded_placeholders(self, file_id: str, nb: dict) -> dict:
             dict: The notebook with placeholder outputs loaded from disk
         """
         for cell in nb.get('cells', []):
+            # Ensure all cells have IDs regardless of type
+            if not cell.get('id'):
+                cell['id'] = str(uuid.uuid4())
+            
             if cell.get('cell_type') == 'code':
-                cell_id = cell.get('id', str(uuid.uuid4()))
+                cell_id = cell['id']
                 try:
                     # Try to get outputs from disk
                     output_strings = self.get_outputs(file_id=file_id, cell_id=cell_id)
@@ -268,10 +275,14 @@ def _process_loaded_no_placeholders(self, file_id: str, nb: dict) -> dict:
             dict: The notebook with outputs saved to disk and replaced with placeholders
         """
         for cell in nb.get('cells', []):
+            # Ensure all cells have IDs regardless of type
+            if not cell.get('id'):
+                cell['id'] = str(uuid.uuid4())
+                
             if cell.get('cell_type') != 'code' or 'outputs' not in cell:
                 continue
 
-            cell_id = cell.get('id', str(uuid.uuid4()))
+            cell_id = cell['id']
             processed_outputs = []
             for output in cell.get('outputs', []):
                 display_id = output.get('metadata', {}).get('display_id')