diff --git a/jupyter_ai_tools/toolkits/file_system.py b/jupyter_ai_tools/toolkits/file_system.py index 4dae671..1d0a431 100644 --- a/jupyter_ai_tools/toolkits/file_system.py +++ b/jupyter_ai_tools/toolkits/file_system.py @@ -64,8 +64,10 @@ def write(file_path: str, content: str) -> str: """Writes content to a file on the local filesystem Args: - file_path: The absolute path to the file to write - content: The content to write to the file + file_path: + The absolute path to the file to write + content: + The content to write to the file Returns: A success message or error message @@ -92,10 +94,14 @@ def edit(file_path: str, old_string: str, new_string: str, replace_all: bool = F """Performs string replacement in a file Args: - file_path: The absolute path to the file to modify - old_string: The text to replace - new_string: The text to replace it with - replace_all: Replace all occurrences of old_string (default False) + file_path: + The absolute path to the file to modify + old_string: + The text to replace + new_string: + The text to replace it with + replace_all: + Replace all occurrences of old_string (default False) Returns: A success message or error message @@ -140,10 +146,14 @@ async def search_and_replace( """Performs pattern search and replace in a file. Args: - file_path: The absolute path to the file to modify - pattern: The pattern to search for (supports sed syntax) - replacement: The replacement text - replace_all: Replace all occurrences of pattern (default False) + file_path: + The absolute path to the file to modify + pattern: + The pattern to search for (supports sed syntax) + replacement: + The replacement text + replace_all: + Replace all occurrences of pattern (default False) Returns: A success message or error message @@ -193,8 +203,10 @@ async def glob(pattern: str, path: Optional[str] = None) -> str: """Searches for files that matches the glob pattern Args: - pattern: The glob pattern to match files against - path: The directory to search in (optional, defaults to current directory) + pattern: + The glob pattern to match files against + path: + The directory to search in (optional, defaults to current directory) Returns: A list of matching file paths sorted by modification time @@ -237,9 +249,12 @@ async def grep( """Fast content search using regular expressions Args: - pattern: The regular expression pattern to search for in file contents - include: File pattern to include in the search (e.g. "*.js", "*.{ts,tsx}") (optional) - path: The directory to search in (optional, defaults to current directory) + pattern: + The regular expression pattern to search for in file contents + include: + File pattern to include in the search (e.g. "*.js", "*.{ts,tsx}") (optional) + path: + The directory to search in (optional, defaults to current directory) Returns: A list of file paths with at least one match @@ -288,8 +303,10 @@ async def ls(path: str, ignore: Optional[List[str]] = None) -> str: """Lists files and directories in a given path Args: - path: The absolute path to the directory to list - ignore: List of glob patterns to ignore (optional) + path: + The absolute path to the directory to list + ignore: + List of glob patterns to ignore (optional) Returns: A list of files and directories in the given path diff --git a/jupyter_ai_tools/toolkits/notebook.py b/jupyter_ai_tools/toolkits/notebook.py index eeeb504..d30d2cb 100644 --- a/jupyter_ai_tools/toolkits/notebook.py +++ b/jupyter_ai_tools/toolkits/notebook.py @@ -7,37 +7,128 @@ from ..utils import cell_to_md, get_file_id, get_jupyter_ydoc, notebook_json_to_md -def read_notebook(file_path: str, include_outputs=False) -> str: - """Returns the complete notebook content as markdown string""" - notebook_dict = read_notebook_json(file_path) +async def read_notebook(file_path: str, include_outputs=False) -> str: + """Returns the complete notebook content as markdown string. + + This function reads a Jupyter notebook file and converts its content to a markdown string. + It uses the read_notebook_json function to read the notebook file and then converts + the resulting JSON to markdown. + + Args: + file_path: + The absolute path to the notebook file on the filesystem. + include_outputs: + If True, cell outputs will be included in the markdown. Default is False. + + Returns: + The notebook content as a markdown string. + """ + notebook_dict = await read_notebook_json(file_path) notebook_md = notebook_json_to_md(notebook_dict, include_outputs=include_outputs) return notebook_md -def read_notebook_json(file_path: str) -> Dict[str, Any]: - """Returns the complete notebook content and returns as json dict""" - with open(file_path, "r:UTF-8") as f: +async def read_notebook_json(file_path: str) -> Dict[str, Any]: + """Returns the complete notebook content as a JSON dictionary. + + This function reads a Jupyter notebook file and returns its content as a + dictionary representation of the JSON structure. + + Args: + file_path: + The absolute path to the notebook file on the filesystem. + + Returns: + A dictionary containing the complete notebook structure. + """ + with open(file_path, "r", encoding="utf-8") as f: notebook_dict = json.load(f) return notebook_dict -def read_cell(file_path: str, cell_id: str, include_outputs: bool = True) -> str: - """Returns the notebook cell as markdown string""" - cell, cell_index = read_cell_json(file_path, cell_id) +async def read_cell(file_path: str, cell_id: str, include_outputs: bool = True) -> str: + """Returns the notebook cell as a markdown string. + + This function reads a specific cell from a Jupyter notebook file and converts + it to a markdown string. It uses the read_cell_json function to read the cell + and then converts it to markdown. + + Args: + file_path: + The absolute path to the notebook file on the filesystem. + cell_id: + The UUID of the cell to read. + include_outputs: + If True, cell outputs will be included in the markdown. Default is True. + + Returns: + The cell content as a markdown string. + + Raises: + LookupError: If no cell with the given ID is found. + """ + cell, cell_index = await read_cell_json(file_path, cell_id) cell_md = cell_to_md(cell, cell_index) return cell_md -def read_cell_json(file_path: str, cell_id: str) -> Tuple[Dict[str, Any], int]: - """Returns the notebook cell as json dict and cell index""" - notebook_json = read_notebook_json(file_path) +async def read_cell_json(file_path: str, cell_id: str) -> Tuple[Dict[str, Any], int]: + """Returns the notebook cell as a JSON dictionary and its index. + + This function reads a specific cell from a Jupyter notebook file and returns + both the cell content as a dictionary and the cell's index within the notebook. + + Args: + file_path: + The absolute path to the notebook file on the filesystem. + cell_id: + The UUID of the cell to read. + + Returns: + A tuple containing: + - The cell as a dictionary + - The index of the cell in the notebook + + Raises: + LookupError: If no cell with the given ID is found. + """ + notebook_json = await read_notebook_json(file_path) cell_index = _get_cell_index_from_id_json(notebook_json, cell_id) if cell_index and 0 <= cell_index < len(notebook_json["cells"]): return notebook_json["cells"][cell_index] raise LookupError(f"No cell found with {cell_id=}") -def add_cell( +async def get_cell_id_from_index(file_path: str, cell_index: int) -> Optional[int]: + """Finds the cell_id of the cell at a specific cell index. + + This function reads a Jupyter notebook file and returns the UUID of the cell + at the specified index position. + + Args: + file_path: + The absolute path to the notebook file on the filesystem. + cell_index: + The index of the cell to find the ID for. + + Returns: + The UUID of the cell at the specified index, or None if the index is out of range + or if the cell does not have an ID. + """ + + cell_id = None + notebook_json = await read_notebook_json(file_path) + cells = notebook_json["cells"] + if 0 <= cell_index < len(cells): + cell_id = cells[cell_index].get("cell_id") + + if cell_id is None: + raise ValueError("No cell_id found, use `insert_cell` based on cell index") + + return cell_id + + +async def add_cell( file_path: str, content: str | None = None, cell_id: str | None = None, @@ -52,32 +143,38 @@ def add_cell( and write the notebook file directly. Args: - file_path: The absolute path to the notebook file on the filesystem. - content: The content of the new cell. If None, an empty cell is created. - cell_id: The UUID of the cell to add relative to. If None, - the cell is added at the end of the notebook. - add_above: If True, the cell is added above the specified cell. If False, - it's added below the specified cell. - cell_type: The type of cell to add ("code", "markdown", "raw"). + file_path: + The absolute path to the notebook file on the filesystem. + content: + The content of the new cell. If None, an empty cell is created. + cell_id: + The UUID of the cell to add relative to. If None, + the cell is added at the end of the notebook. + add_above: + If True, the cell is added above the specified cell. If False, + it's added below the specified cell. + cell_type: + The type of cell to add ("code", "markdown", "raw"). Returns: None """ - file_id = get_file_id(file_path) - ydoc = get_jupyter_ydoc(file_id) + file_id = await get_file_id(file_path) + ydoc = await get_jupyter_ydoc(file_id) if ydoc: - cells_count = ydoc.cell_number() + cells_count = ydoc.cell_number cell_index = _get_cell_index_from_id_ydoc(ydoc, cell_id) if cell_id else None insert_index = _determine_insert_index(cells_count, cell_index, add_above) - ycell = ydoc.create_ycell( - { - "cell_type": cell_type, - "source": content or "", - } - ) - ydoc.cells.insert(insert_index, ycell) + cell = { + "cell_type": cell_type, + "source": content or "", + } + if insert_index >= cells_count: + ydoc.append_cell(cell) + else: + ydoc.ycells.insert(insert_index, ydoc.create_cell(cell)) else: with open(file_path, "r", encoding="utf-8") as f: notebook = nbformat.read(f, as_version=nbformat.NO_CONVERT) @@ -97,7 +194,64 @@ def add_cell( nbformat.write(notebook, f) -def delete_cell(file_path: str, cell_id: str): +async def insert_cell( + file_path: str, + content: str | None = None, + insert_index: int | None = None, + cell_type: Literal["code", "markdown", "raw"] = "code", +): + """Inserts a new cell to the Jupyter notebook at the specified cell index. + + This function adds a new cell to a Jupyter notebook. It first attempts to use + the in-memory YDoc representation if the notebook is currently active. If the + notebook is not active, it falls back to using the filesystem to read, modify, + and write the notebook file directly. + + Args: + file_path: + The absolute path to the notebook file on the filesystem. + content: + The content of the new cell. If None, an empty cell is created. + insert_index: + The index to insert the cell at. + cell_type: + The type of cell to add ("code", "markdown", "raw"). + + Returns: + None + """ + + file_id = await get_file_id(file_path) + ydoc = await get_jupyter_ydoc(file_id) + + if ydoc: + cells_count = ydoc.cell_number + cell = { + "cell_type": cell_type, + "source": content or "", + } + if insert_index >= cells_count: + ydoc.append_cell(cell) + else: + ydoc.ycells.insert(insert_index, ydoc.create_cell(cell)) + else: + with open(file_path, "r", encoding="utf-8") as f: + notebook = nbformat.read(f, as_version=nbformat.NO_CONVERT) + + cells_count = len(notebook.cells) + + if cell_type == "code": + notebook.cells.insert(insert_index, nbformat.v4.new_code_cell(source=content or "")) + elif cell_type == "markdown": + notebook.cells.insert(insert_index, nbformat.v4.new_markdown_cell(source=content or "")) + else: + notebook.cells.insert(insert_index, nbformat.v4.new_raw_cell(source=content or "")) + + with open(file_path, "w", encoding="utf-8") as f: + nbformat.write(notebook, f) + + +async def delete_cell(file_path: str, cell_id: str): """Removes a notebook cell with the specified cell ID. This function deletes a cell from a Jupyter notebook. It first attempts to use @@ -113,12 +267,12 @@ def delete_cell(file_path: str, cell_id: str): None """ - file_id = get_file_id(file_path) - ydoc = get_jupyter_ydoc(file_id) + file_id = await get_file_id(file_path) + ydoc = await get_jupyter_ydoc(file_id) if ydoc: cell_index = _get_cell_index_from_id_ydoc(ydoc, cell_id) - if cell_index is not None and 0 <= cell_index < len(ydoc.cells): - del ydoc.cells[cell_index] + if cell_index is not None and 0 <= cell_index < len(ydoc.ycells): + del ydoc.ycells[cell_index] else: with open(file_path, "r", encoding="utf-8") as f: notebook = nbformat.read(f, as_version=nbformat.NO_CONVERT) @@ -130,8 +284,11 @@ def delete_cell(file_path: str, cell_id: str): with open(file_path, "w", encoding="utf-8") as f: nbformat.write(notebook, f) + if not cell_index: + raise ValueError(f"Could not find cell index for {cell_id=}") + -def edit_cell(file_path: str, cell_id: str, content: str | None = None) -> None: +async def edit_cell(file_path: str, cell_id: str, content: str) -> None: """Edits the content of a notebook cell with the specified ID This function modifies the content of a cell in a Jupyter notebook. It first attempts to use @@ -140,9 +297,12 @@ def edit_cell(file_path: str, cell_id: str, content: str | None = None) -> None: and write the notebook file directly using nbformat. Args: - file_path: The absolute path to the notebook file on the filesystem. - cell_id: The UUID of the cell to edit. - content: The new content for the cell. If None, the cell content remains unchanged. + file_path: + The absolute path to the notebook file on the filesystem. + cell_id: + The UUID of the cell to edit. + content: + The new content for the cell. If None, the cell content remains unchanged. Returns: None @@ -151,34 +311,49 @@ def edit_cell(file_path: str, cell_id: str, content: str | None = None) -> None: ValueError: If the cell_id is not found in the notebook. """ - file_id = get_file_id(file_path) - ydoc = get_jupyter_ydoc(file_id) + file_id = await get_file_id(file_path) + ydoc = await get_jupyter_ydoc(file_id) if ydoc: cell_index = _get_cell_index_from_id_ydoc(ydoc, cell_id) if cell_index is not None: - if content is not None: - ydoc.cells[cell_index]["source"] = content - else: - raise ValueError(f"Cell with {cell_id=} not found in notebook at {file_path=}") + ydoc.ycells[cell_index]["source"] = content else: with open(file_path, "r", encoding="utf-8") as f: notebook = nbformat.read(f, as_version=nbformat.NO_CONVERT) cell_index = _get_cell_index_from_id_nbformat(notebook, cell_id) if cell_index is not None: - if content is not None: - notebook.cells[cell_index].source = content + notebook.cells[cell_index].source = content with open(file_path, "w", encoding="utf-8") as f: nbformat.write(notebook, f) - else: - raise ValueError(f"Cell with {cell_id=} not found in notebook at {file_path=}") + + raise ValueError(f"Cell with {cell_id=} not found in notebook at {file_path=}") # Note: This is currently failing with server outputs, use `read_cell` instead def read_cell_nbformat(file_path: str, cell_id: str) -> Dict[str, Any]: - """Returns the content and metadata of a cell with the specified ID""" + """Returns the content and metadata of a cell with the specified ID. + + This function reads a specific cell from a Jupyter notebook file using the nbformat + library and returns the cell's content and metadata. + + Note: This function is currently not functioning properly with server outputs. + Use `read_cell` instead. + + Args: + file_path: + The absolute path to the notebook file on the filesystem. + cell_id: + The UUID of the cell to read. + + Returns: + The cell as a dictionary containing its content and metadata. + + Raises: + ValueError: If no cell with the given ID is found. + """ with open(file_path, "r", encoding="utf-8") as f: notebook = nbformat.read(f, as_version=nbformat.NO_CONVERT) @@ -191,13 +366,18 @@ def read_cell_nbformat(file_path: str, cell_id: str) -> Dict[str, Any]: raise ValueError(f"Cell with {cell_id=} not found in notebook at {file_path=}") -def summarize_notebook(file_id: str, max_length: int = 500) -> str: - """Generates a summary of the notebook content""" - raise NotImplementedError("Implementation todo") +def _get_cell_index_from_id_json(notebook_json, cell_id: str) -> int | None: + """Get cell index from cell_id by notebook json dict. + Args: + notebook_json: + The notebook as a JSON dictionary. + cell_id: + The UUID of the cell to find. -def _get_cell_index_from_id_json(notebook_json, cell_id: str) -> int | None: - """Get cell index from cell_id by notebook json dict.""" + Returns: + The index of the cell in the notebook, or None if not found. + """ for i, cell in enumerate(notebook_json["cells"]): if "id" in cell and cell["id"] == cell_id: return i @@ -205,7 +385,17 @@ def _get_cell_index_from_id_json(notebook_json, cell_id: str) -> int | None: def _get_cell_index_from_id_ydoc(ydoc, cell_id: str) -> int | None: - """Get cell index from cell_id using YDoc interface.""" + """Get cell index from cell_id using YDoc interface. + + Args: + ydoc: + The YDoc object representing the notebook. + cell_id: + The UUID of the cell to find. + + Returns: + The index of the cell in the notebook, or None if not found. + """ try: cell_index, _ = ydoc.find_cell(cell_id) return cell_index @@ -214,7 +404,17 @@ def _get_cell_index_from_id_ydoc(ydoc, cell_id: str) -> int | None: def _get_cell_index_from_id_nbformat(notebook, cell_id: str) -> int | None: - """Get cell index from cell_id using nbformat interface.""" + """Get cell index from cell_id using nbformat interface. + + Args: + notebook: + The nbformat notebook object. + cell_id: + The UUID of the cell to find. + + Returns: + The index of the cell in the notebook, or None if not found. + """ for i, cell in enumerate(notebook.cells): if hasattr(cell, "id") and cell.id == cell_id: return i @@ -224,6 +424,19 @@ def _get_cell_index_from_id_nbformat(notebook, cell_id: str) -> int | None: def _determine_insert_index(cells_count: int, cell_index: Optional[int], add_above: bool) -> int: + """Determine the index where a new cell should be inserted. + + Args: + cells_count: + The total number of cells in the notebook. + cell_index: + The index of the reference cell, or None to append at the end. + add_above: + If True, insert above the reference cell; if False, insert below. + + Returns: + The index where the new cell should be inserted. + """ if cell_index is None: insert_index = cells_count else: @@ -239,6 +452,8 @@ def _determine_insert_index(cells_count: int, cell_index: Optional[int], add_abo ) toolkit.add_tool(Tool(callable=read_notebook, read=True)) toolkit.add_tool(Tool(callable=read_cell, read=True)) -toolkit.add_tool(Tool(callable=add_cell, write=True)) +toolkit.add_tool(Tool(callable=add_cell, read=True, write=True)) +toolkit.add_tool(Tool(callable=insert_cell, read=True, write=True)) toolkit.add_tool(Tool(callable=delete_cell, delete=True)) toolkit.add_tool(Tool(callable=edit_cell, read=True, write=True)) +toolkit.add_tool(Tool(callable=get_cell_id_from_index, read=True)) diff --git a/jupyter_ai_tools/utils.py b/jupyter_ai_tools/utils.py index fbd058d..57134a0 100644 --- a/jupyter_ai_tools/utils.py +++ b/jupyter_ai_tools/utils.py @@ -1,14 +1,14 @@ -from jupyter_server.base.call_context import CallContext +from jupyter_server.serverapp import ServerApp -def get_serverapp(): +async def get_serverapp(): """Returns the server app from the request context""" - handler = CallContext.get(CallContext.JUPYTER_HANDLER) - serverapp = handler.serverapp - return serverapp + server = ServerApp.instance() + return server -def get_jupyter_ydoc(file_id: str): + +async def get_jupyter_ydoc(file_id: str): """Returns the notebook ydoc Args: @@ -17,16 +17,17 @@ def get_jupyter_ydoc(file_id: str): Returns: `YNotebook` ydoc for the notebook """ - serverapp = get_serverapp() + serverapp = await get_serverapp() yroom_manager = serverapp.web_app.settings["yroom_manager"] room_id = f"json:notebook:{file_id}" + if yroom_manager.has_room(room_id): yroom = yroom_manager.get_room(room_id) - notebook = yroom.get_jupyter_ydoc() + notebook = await yroom.get_jupyter_ydoc() return notebook -def get_file_id(file_path: str) -> str: +async def get_file_id(file_path: str) -> str: """Returns the file_id for the document Args: @@ -37,7 +38,7 @@ def get_file_id(file_path: str) -> str: The file ID of the document """ - serverapp = get_serverapp() + serverapp = await get_serverapp() file_id_manager = serverapp.web_app.settings["file_id_manager"] file_id = file_id_manager.get_id(file_path)