All outputs for cell (#91)

3coins · web-flow · commit 401675f9406f · 2025-06-06T12:00:09.000-07:00
* Removed unused code

* Added a method to get all outputs for a cell.

* Updated handler to accommodate all outputs

* Updated to sort outputs by last modified time

* Updated handler

* lint

* Updated to return jsonl response

* Removed duplicate import

* Sorting outputs by output index, simplified length check

* Moved placeholder to a separate function, ruff format.

* Updated method name

* Cleaned up for better naming

* Fixed a regression
diff --git a/jupyter_server_documents/handlers.py b/jupyter_server_documents/handlers.py
@@ -4,7 +4,6 @@
 import tornado
 
 from jupyter_server.auth.decorator import authorized
-from jupyter_server.base.handlers import APIHandler
 from tornado import web
 from tornado.escape import json_encode
 
diff --git a/jupyter_server_documents/outputs/handlers.py b/jupyter_server_documents/outputs/handlers.py
@@ -20,14 +20,20 @@ def outputs(self):
     @authorized
     async def get(self, file_id=None, cell_id=None, output_index=None):
         try:
-            output = self.outputs.get_output(file_id, cell_id, output_index)
+            if output_index:
+                output = self.outputs.get_output(file_id, cell_id, output_index)
+                content_type = "application/json"
+            else:
+                outputs = self.outputs.get_outputs(file_id, cell_id)
+                output = "\n".join(outputs)
+                content_type = "application/x-ndjson"
         except (FileNotFoundError, KeyError):
             self.set_status(404)
             self.finish({"error": "Output not found."})
         else:
             self.set_status(200)
-            self.set_header("Content-Type", "application/json")
             self.write(output)
+            self.finish(set_content_type=content_type)
 
 
 class StreamAPIHandler(APIHandler):
@@ -71,24 +77,7 @@ async def get(self, file_id=None, cell_id=None):
 _output_index_regex = r"(?P<output_index>0|[1-9]\d*)"
 
 outputs_handlers = [
-    (rf"/api/outputs/{_file_id_regex}/{_cell_id_regex}/{_output_index_regex}.output", OutputsAPIHandler),
+    (rf"/api/outputs/{_file_id_regex}/{_cell_id_regex}(?:/{_output_index_regex}.output)?", OutputsAPIHandler),
     (rf"/api/outputs/{_file_id_regex}/{_cell_id_regex}/stream", StreamAPIHandler),
 ]
 
-# def setup_handlers(web_app):
-#     """Setup the handlers for the outputs service."""
-
-#     handlers = [
-#         (rf"/api/outputs/{_file_id_regex}/{_cell_id_regex}/{_output_index_regex}.output", OutputsAPIHandler),
-#         (rf"/api/outputs/{_file_id_regex}/{_cell_id_regex}/stream", StreamAPIHandler),
-#     ]
-
-#     base_url = web_app.settings["base_url"]
-#     new_handlers = []
-#     for handler in handlers:
-#         pattern = url_path_join(base_url, handler[0])
-#         new_handler = (pattern, *handler[1:])
-#         new_handlers.append(new_handler)
-
-#     # Add the handler for all hosts
-#     web_app.add_handlers(".*$", new_handlers)
diff --git a/jupyter_server_documents/outputs/manager.py b/jupyter_server_documents/outputs/manager.py
@@ -6,17 +6,12 @@
 from pycrdt import Map
 
 from traitlets.config import LoggingConfigurable
-from traitlets import (
-    Dict,
-    Instance,
-    Int,
-    default
-)
+from traitlets import Dict, Instance, Int, default
 
 from jupyter_core.paths import jupyter_runtime_dir
 
-class OutputsManager(LoggingConfigurable):
 
+class OutputsManager(LoggingConfigurable):
     _last_output_index = Dict(default_value={})
     _stream_count = Dict(default_value={})
 
@@ -26,7 +21,7 @@ class OutputsManager(LoggingConfigurable):
     @default("outputs_path")
     def _default_outputs_path(self):
         return Path(jupyter_runtime_dir()) / "outputs"
-    
+
     def _ensure_path(self, file_id, cell_id):
         nested_dir = self.outputs_path / file_id / cell_id
         nested_dir.mkdir(parents=True, exist_ok=True)
@@ -38,16 +33,42 @@ def _build_path(self, file_id, cell_id=None, output_index=None):
         if output_index is not None:
             path = path / f"{output_index}.output"
         return path
-    
+
     def get_output(self, file_id, cell_id, output_index):
-        """Get an outputs by file_id, cell_id, and output_index."""
+        """Get an output by file_id, cell_id, and output_index."""
         path = self._build_path(file_id, cell_id, output_index)
         if not os.path.isfile(path):
             raise FileNotFoundError(f"The output file doesn't exist: {path}")
         with open(path, "r", encoding="utf-8") as f:
             output = json.loads(f.read())
         return output
 
+    def get_outputs(self, file_id, cell_id):
+        """Get all outputs by file_id, cell_id."""
+        path = self._build_path(file_id, cell_id)
+        if not os.path.isdir(path):
+            raise FileNotFoundError(f"The output dir doesn't exist: {path}")
+
+        outputs = []
+
+        output_files = [(f, int(f.stem)) for f in path.glob("*.output")]
+        output_files.sort(key=lambda x: x[1])
+        output_files = output_files[: self.stream_limit]
+        has_more_files = len(output_files) >= self.stream_limit
+
+        outputs = []
+        for file_path, _ in output_files:
+            with open(file_path, "r", encoding="utf-8") as f:
+                output = f.read()
+                outputs.append(output)
+
+        if has_more_files:
+            url = create_output_url(file_id, cell_id)
+            placeholder = create_placeholder_dict("display_data", url, full=True)
+            outputs.append(json.dumps(placeholder))
+
+        return outputs
+
     def get_stream(self, file_id, cell_id):
         "Get the stream output for a cell by file_id and cell_id."
         path = self._build_path(file_id, cell_id) / "stream"
@@ -59,7 +80,7 @@ def get_stream(self, file_id, cell_id):
 
     def write(self, file_id, cell_id, output):
         """Write a new output for file_id and cell_id.
-        
+
         Returns a placeholder output (pycrdt.Map) or None if no placeholder
         output should be written to the ydoc.
         """
@@ -77,10 +98,10 @@ def write_output(self, file_id, cell_id, output):
         data = json.dumps(output, ensure_ascii=False)
         with open(path, "w", encoding="utf-8") as f:
             f.write(data)
-        url = f"/api/outputs/{file_id}/{cell_id}/{index}.output"
+        url = create_output_url(file_id, cell_id, index)
         self.log.info(f"Wrote output: {url}")
         return create_placeholder_output(output["output_type"], url)
-
+    
     def write_stream(self, file_id, cell_id, output, placeholder) -> Map:
         # How many stream outputs have been written for this cell previously
         count = self._stream_count.get(cell_id, 0)
@@ -89,12 +110,10 @@ def write_stream(self, file_id, cell_id, output, placeholder) -> Map:
         self._ensure_path(file_id, cell_id)
         path = self._build_path(file_id, cell_id) / "stream"
         text = output["text"]
-        mode = 'a' if os.path.isfile(path) else 'w'
         with open(path, "a", encoding="utf-8") as f:
             f.write(text)
-        url = f"/api/outputs/{file_id}/{cell_id}/stream"
+        url = create_output_url(file_id, cell_id)
         self.log.info(f"Wrote stream: {url}")
-
         # Increment the count
         count = count + 1
         self._stream_count[cell_id] = count
@@ -105,12 +124,7 @@ def write_stream(self, file_id, cell_id, output, placeholder) -> Map:
             placeholder = placeholder
         elif count == self.stream_limit:
             # Return a link to the full stream output
-            placeholder = Map({
-                "output_type": "display_data",
-                "data": {
-                    'text/html': f'<a href="{url}">Click this link to see the full stream output</a>'
-                }
-            })
+            placeholder = create_placeholder_output("display_data", url, full=True)
         elif count > self.stream_limit:
             # Return None to indicate that no placeholder should be written to the ydoc
             placeholder = None
@@ -133,27 +147,71 @@ def clear(self, file_id, cell_id=None):
             pass
 
 
-def create_placeholder_output(output_type: str, url: str):
+def create_output_url(file_id: str, cell_id: str, output_index: int = None) -> str:
+        """
+        Create the URL for an output or stream.
+
+        Parameters:
+        - file_id (str): The ID of the file.
+        - cell_id (str): The ID of the cell.
+        - output_index (int, optional): The index of the output. If None, returns the stream URL.
+
+        Returns:
+        - str: The URL string for the output or stream.
+        """
+        if output_index is None:
+            return f"/api/outputs/{file_id}/{cell_id}/stream"
+        else:
+            return f"/api/outputs/{file_id}/{cell_id}/{output_index}.output"
+
+def create_placeholder_dict(output_type: str, url: str, full: bool = False):
+    """
+    Build a placeholder output dict for the given output_type and url.
+    If full is True and output_type is "display_data", returns a display_data output
+    with an HTML link to the full stream output.
+
+    Parameters:
+    - output_type (str): The type of the output.
+    - url (str): The URL associated with the output.
+    - full (bool): Whether to create a full output placeholder with a link.
+
+    Returns:
+    - dict: The placeholder output dictionary.
+
+    Raises:
+    - ValueError: If the output_type is unknown.
+    """
     metadata = dict(url=url)
+    if full and output_type == "display_data":
+        return {
+            "output_type": "display_data",
+            "data": {
+                "text/html": f'<a href="{url}">Click this link to see the full stream output</a>'
+            },
+        }
     if output_type == "stream":
-        output = Map({
-            "output_type": "stream",
-            "text": "",
-            "metadata": metadata
-        })
+        return {"output_type": "stream", "text": "", "metadata": metadata}
     elif output_type == "display_data":
-        output = Map({
-            "output_type": "display_data",
-            "metadata": metadata
-        })
+        return {"output_type": "display_data", "metadata": metadata}
     elif output_type == "execute_result":
-        output = Map({
-            "output_type": "execute_result",
-            "metadata": metadata
-        })
+        return {"output_type": "execute_result", "metadata": metadata}
     elif output_type == "error":
-        output = Map({
-            "output_type": "error",
-            "metadata": metadata
-        })
-    return output
+        return {"output_type": "error", "metadata": metadata}
+    else:
+        raise ValueError(f"Unknown output_type: {output_type}")
+
+def create_placeholder_output(output_type: str, url: str, full: bool = False):
+    """
+    Creates a placeholder output Map for the given output_type and url.
+    If full is True and output_type is "display_data", creates a display_data output with an HTML link.
+
+    Parameters:
+    - output_type (str): The type of the output.
+    - url (str): The URL associated with the output.
+    - full (bool): Whether to create a full output placeholder with a link.
+
+    Returns:
+    - Map: The placeholder output `ycrdt.Map`.
+    """
+    output_dict = create_placeholder_dict(output_type, url, full=full)
+    return Map(output_dict)
diff --git a/src/handler.ts b/src/handler.ts
@@ -28,11 +28,20 @@ export async function requestAPI<T>(
     throw new ServerConnection.NetworkError(error as any);
   }
 
-  let data: any = await response.text();
+  const contentType = response.headers.get('Content-Type') || '';
+  let data: any;
 
-  if (data.length > 0) {
+  // Read response text
+  const responseText = await response.text();
+
+  if (contentType.includes('application/x-ndjson')) {
+    data = responseText
+      .trim()
+      .split('\n')
+      .map(line => JSON.parse(line));
+  } else if (responseText.length > 0) {
     try {
-      data = JSON.parse(data);
+      data = JSON.parse(responseText);
     } catch (error) {
       console.log('Not a JSON response body.', response);
     }
diff --git a/src/notebook-factory/notebook-factory.ts b/src/notebook-factory/notebook-factory.ts
@@ -110,38 +110,34 @@ const DIRTY_CLASS = 'jp-mod-dirty';
 class RtcOutputAreaModel extends OutputAreaModel implements IOutputAreaModel {
   constructor(options: IOutputAreaModel.IOptions = {}) {
     super({ ...options, values: [] }); // Don't pass values to OutputAreaModel
-    if (options.values) {
-      // Create an array to store promises for each value
-      const valuePromises = options.values.map(value => {
-        console.debug('output #${index}, value: ${value}');
-        if ((value as any).metadata?.url) {
-          return requestAPI((value as any).metadata.url)
-            .then(data => {
-              return data;
-            })
-            .catch(error => {
-              console.error('Error fetching output:', error);
-              return null;
+    if (options.values?.length) {
+      const firstValue = options.values[0];
+      if ((firstValue as any).metadata?.url) {
+        let outputsUrl = (firstValue as any).metadata.url;
+        // Skip the last section with *.output
+        outputsUrl = outputsUrl.substring(0, outputsUrl.lastIndexOf('/'));
+        requestAPI(outputsUrl)
+          .then(outputs => {
+            (outputs as any).forEach((output: any) => {
+              if (!(this as any).isDisposed) {
+                const index = (this as any)._add(output) - 1;
+                const item = (this as any).list.get(index);
+                item.changed.connect((this as any)._onGenericChange, this);
+              }
             });
-        } else {
-          // For values without url, return immediately with original value
-          return Promise.resolve(value);
-        }
-      });
-
-      // Wait for all promises to resolve and add values in original order
-      Promise.all(valuePromises).then(results => {
-        console.log('After fetching from outputs service:');
-        // Add each value in order
-        results.forEach((data, index) => {
-          console.debug('output #${index}, data: ${data}');
-          if (data && !(this as any).isDisposed) {
-            const index = (this as any)._add(data) - 1;
+          })
+          .catch(error => {
+            console.error('Error fetching output:', error);
+          });
+      } else {
+        options.values.forEach((output: any) => {
+          if (!(this as any).isDisposed) {
+            const index = (this as any)._add(output) - 1;
             const item = (this as any).list.get(index);
             item.changed.connect((this as any)._onGenericChange, this);
           }
         });
-      });
+      }
     }
   }
 }