diff --git a/jupyter_ai_tools/__init__.py b/jupyter_ai_tools/__init__.py index e719d5f..16454e0 100644 --- a/jupyter_ai_tools/__init__.py +++ b/jupyter_ai_tools/__init__.py @@ -1,7 +1,3 @@ -from .extension import jupyter_server_extension_tools - -__all__ = ["jupyter_server_extension_tools"] - __version__ = "0.1.2" diff --git a/jupyter_ai_tools/extension.py b/jupyter_ai_tools/extension.py deleted file mode 100644 index f0b4483..0000000 --- a/jupyter_ai_tools/extension.py +++ /dev/null @@ -1,241 +0,0 @@ -from jupyter_server_ai_tools.models import ToolDefinition - -from . import git_tools, ynotebook_tools - - -def jupyter_server_extension_tools(): - return [ - ToolDefinition( - callable=ynotebook_tools.delete_cell, - metadata={ - "name": "delete_cell", - "description": "Remove the cell at the specified index and return its contents.", - "inputSchema": { - "type": "object", - "properties": { - "index": { - "type": "integer", - "description": "The index of the cell to delete", - } - }, - "required": ["index"], - }, - }, - ), - ToolDefinition( - callable=ynotebook_tools.add_cell, - metadata={ - "name": "add_cell", - "description": "Insert a blank cell at the specified index.", - "inputSchema": { - "type": "object", - "properties": { - "index": {"type": "integer", "description": "The index to insert at"}, - "cell_type": { - "type": "string", - "description": "The type of cell: 'code' or 'markdown' ", - "default": "code", - }, - }, - "required": ["index"], - }, - }, - ), - ToolDefinition( - callable=ynotebook_tools.write_to_cell, - metadata={ - "name": "write_to_cell", - "description": "Overwrite the source of a cell with content at the given index " - "in the notebook.", - "inputSchema": { - "type": "object", - "properties": { - "index": {"type": "integer", "description": "The index to write at"}, - "content": { - "type": "string", - "description": "The content to write into the cell, either python " - "code or markdown", - }, - }, - "required": ["index", "content"], - }, - }, - ), - ToolDefinition( - callable=ynotebook_tools.get_max_cell_index, - metadata={ - "name": "get_max_cell_index", - "description": "Return the highest valid cell index in the current notebook.", - "inputSchema": {"type": "object", "properties": {}}, - }, - ), - ToolDefinition( - callable=ynotebook_tools.read_cell, - metadata={ - "name": "read_cell", - "description": "Read the full content of a specific cell, including outputs, " - "source, and metadata.", - "inputSchema": { - "type": "object", - "properties": { - "index": {"type": "integer", "description": "The index of the cell to read"} - }, - "required": ["index"], - }, - }, - ), - ToolDefinition( - callable=ynotebook_tools.read_notebook, - metadata={ - "name": "read_notebook", - "description": "Return all cells in the notebook as a JSON-formatted list.", - "inputSchema": {"type": "object", "properties": {}}, - }, - ), - ToolDefinition( - callable=git_tools.git_clone, - metadata={ - "name": "git_clone", - "description": "Clone a Git repo into the specified path.", - "inputSchema": { - "type": "object", - "properties": { - "path": {"type": "string", "description": "Target path"}, - "url": {"type": "string", "description": "Repository URL"}, - }, - "required": ["path", "url"], - }, - }, - ), - ToolDefinition( - callable=git_tools.git_status, - metadata={ - "name": "git_status", - "description": "Get the current Git status in the specified path.", - "inputSchema": { - "type": "object", - "properties": { - "path": { - "type": "string", - "description": "Path to the Git repository root directory", - } - }, - "required": ["path"], - }, - }, - ), - ToolDefinition( - callable=git_tools.git_log, - metadata={ - "name": "git_log", - "description": "Get the last N Git commits.", - "inputSchema": { - "type": "object", - "properties": { - "path": { - "type": "string", - "description": "Path to the Git repository root directory", - }, - "history_count": { - "type": "integer", - "description": "Number of commits", - "default": 10, - }, - }, - "required": ["path"], - }, - }, - ), - ToolDefinition( - callable=git_tools.git_pull, - metadata={ - "name": "git_pull", - "description": "Pull the latest changes from the remote.", - "inputSchema": { - "type": "object", - "properties": { - "path": { - "type": "string", - "description": "Path to the Git repository root directory", - } - }, - "required": ["path"], - }, - }, - ), - ToolDefinition( - callable=git_tools.git_push, - metadata={ - "name": "git_push", - "description": "Push local changes to the remote.", - "inputSchema": { - "type": "object", - "properties": { - "path": { - "type": "string", - "description": "Path to the Git repository root directory", - }, - "branch": {"type": "string", "description": "Repo branch"}, - }, - "required": ["path"], - }, - }, - ), - ToolDefinition( - callable=git_tools.git_commit, - metadata={ - "name": "git_commit", - "description": "Commit staged changes with a message.", - "inputSchema": { - "type": "object", - "properties": { - "path": { - "type": "string", - "description": "Path to the Git repository root directory", - }, - "message": {"type": "string", "description": "Commit message"}, - }, - "required": ["path", "message"], - }, - }, - ), - ToolDefinition( - callable=git_tools.git_add, - metadata={ - "name": "git_add", - "description": "Stage files for commit. Optionally add all files.", - "inputSchema": { - "type": "object", - "properties": { - "path": { - "type": "string", - "description": "Path to the Git repository root directory", - }, - "add_all": { - "type": "boolean", - "default": True, - "description": "Stage all files", - }, - "filename": { - "type": "string", - "description": "File to add (used if add_all is false)", - }, - }, - "required": ["path"], - }, - }, - ), - ToolDefinition( - callable=git_tools.git_get_repo_root, - metadata={ - "name": "git_get_repo_root_from_notebookpath", - "description": "Given the path of a file, return the path to the Repo root" - " if any.", - "inputSchema": { - "type": "object", - "properties": {"path": {"type": "string", "description": "the path to a file"}}, - "required": ["path"], - }, - }, - ), - ] diff --git a/jupyter_ai_tools/toolkits/__init__.py b/jupyter_ai_tools/toolkits/__init__.py new file mode 100644 index 0000000..5a8b40e --- /dev/null +++ b/jupyter_ai_tools/toolkits/__init__.py @@ -0,0 +1 @@ +"""Toolkits for Jupyter""" diff --git a/jupyter_ai_tools/toolkits/code_execution.py b/jupyter_ai_tools/toolkits/code_execution.py new file mode 100644 index 0000000..36023f6 --- /dev/null +++ b/jupyter_ai_tools/toolkits/code_execution.py @@ -0,0 +1,47 @@ +"""Tools that provide code execution features""" + +import asyncio +import shlex +from typing import Optional + +from jupyter_ai.tools.models import Tool, Toolkit + + +async def bash(command: str, timeout: Optional[int] = None) -> str: + """Executes a bash command and returns the result + + Args: + command: The bash command to execute + timeout: Optional timeout in seconds + + Returns: + The command output (stdout and stderr combined) + """ + + proc = await asyncio.create_subprocess_exec( + *shlex.split(command), + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + try: + stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout) + output = stdout.decode("utf-8") + error = stderr.decode("utf-8") + + if proc.returncode != 0: + if error: + return f"Error: {error}" + return f"Command failed with exit code {proc.returncode}" + + return output if output else "Command executed successfully with no output." + except asyncio.TimeoutError: + proc.kill() + return f"Command timed out after {timeout} seconds" + + +toolkit = Toolkit( + name="code_execution_toolkit", + description="Tools to execute code in different environments.", +) +toolkit.add_tool(Tool(callable=bash, execute=True)) diff --git a/jupyter_ai_tools/toolkits/file_system.py b/jupyter_ai_tools/toolkits/file_system.py new file mode 100644 index 0000000..4dae671 --- /dev/null +++ b/jupyter_ai_tools/toolkits/file_system.py @@ -0,0 +1,345 @@ +"""Tools that provide file system functionality""" + +import asyncio +import fnmatch +import glob as glob_module +import os +from typing import List, Optional + +from jupyter_ai.tools.models import Tool, Toolkit + + +def read(file_path: str, offset: Optional[int] = None, limit: Optional[int] = None) -> str: + """Reads a file from the local filesystem + + Args: + file_path: The absolute path to the file to read + offset: The line number to start reading from (optional) + limit: The number of lines to read (optional) + + Returns: + The contents of the file, potentially with line numbers + """ + try: + if not os.path.exists(file_path): + return f"Error: File not found: {file_path}" + + if not os.path.isfile(file_path): + return f"Error: Not a file: {file_path}" + + content = _read_file_content(file_path, offset, limit) + return content + except Exception as e: + return f"Error: Failed to read file: {str(e)}" + + +def _read_file_content( + file_path: str, offset: Optional[int] = None, limit: Optional[int] = None +) -> str: + """Helper function to read file content in a separate thread""" + with open(file_path, "r", encoding="utf-8") as f: + if offset is not None: + # Skip lines until we reach the offset + for _ in range(offset): + line = f.readline() + if not line: + break + + # Read the specified number of lines or all lines if limit is None + if limit is not None: + lines = [f.readline() for _ in range(limit)] + # Filter out None values in case we hit EOF + lines = [line for line in lines if line] + else: + lines = f.readlines() + + # Add line numbers (starting from offset+1 if offset is provided) + start_line = (offset or 0) + 1 + numbered_lines = [f"{i}→{line}" for i, line in enumerate(lines, start=start_line)] + + return "".join(numbered_lines) + + +def write(file_path: str, content: str) -> str: + """Writes content to a file on the local filesystem + + Args: + file_path: The absolute path to the file to write + content: The content to write to the file + + Returns: + A success message or error message + """ + try: + # Ensure the directory exists + directory = os.path.dirname(file_path) + if directory and not os.path.exists(directory): + os.makedirs(directory) + + _write_file_content(file_path, content) + return f"File written successfully at: {file_path}" + except Exception as e: + return f"Error: Failed to write file: {str(e)}" + + +def _write_file_content(file_path: str, content: str) -> None: + """Helper function to write file content in a separate thread""" + with open(file_path, "w", encoding="utf-8") as f: + f.write(content) + + +def edit(file_path: str, old_string: str, new_string: str, replace_all: bool = False) -> str: + """Performs string replacement in a file + + Args: + file_path: The absolute path to the file to modify + old_string: The text to replace + new_string: The text to replace it with + replace_all: Replace all occurrences of old_string (default False) + + Returns: + A success message or error message + """ + try: + if not os.path.exists(file_path): + return f"Error: File not found: {file_path}" + + if not os.path.isfile(file_path): + return f"Error: Not a file: {file_path}" + + # Read the file content + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + # Check if old_string exists in the file + if old_string not in content: + return "Error: String to replace not found in file" + + # Perform the replacement + if replace_all: + new_content = content.replace(old_string, new_string) + else: + # Replace only the first occurrence + new_content = content.replace(old_string, new_string, 1) + + # If nothing changed, old and new strings might be identical + if new_content == content: + return "Error: No changes made. Old string and new string might be identical" + + # Write the updated content back to the file + _write_file_content(file_path, new_content) + + return f"File {file_path} has been updated successfully" + except Exception as e: + return f"Error: Failed to edit file: {str(e)}" + + +async def search_and_replace( + file_path: str, pattern: str, replacement: str, replace_all: bool = False +) -> str: + """Performs pattern search and replace in a file. + + Args: + file_path: The absolute path to the file to modify + pattern: The pattern to search for (supports sed syntax) + replacement: The replacement text + replace_all: Replace all occurrences of pattern (default False) + + Returns: + A success message or error message + """ + try: + if not os.path.exists(file_path): + return f"Error: File not found: {file_path}" + + if not os.path.isfile(file_path): + return f"Error: Not a file: {file_path}" + + # Build the sed command + sed_cmd = ["sed"] + + # -i option for in-place editing (macOS requires an extension) + if os.name == "posix" and "darwin" in os.uname().sysname.lower(): + sed_cmd.extend(["-i", ""]) + else: + sed_cmd.append("-i") + + # Add the search and replace expression + expression = f"s/{pattern}/{replacement}/" + if replace_all: + expression += "g" + + sed_cmd.extend([expression, file_path]) + + # Run sed command + proc = await asyncio.create_subprocess_exec( + *sed_cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + + _, stderr = await proc.communicate() + + if proc.returncode != 0: + if stderr: + error = stderr.decode("utf-8") + return f"Error: sed command failed: {error}" + return f"Error: sed command failed with return code {proc.returncode}" + + return f"File {file_path} has been updated successfully" + except Exception as e: + return f"Error: Failed to search and edit file: {str(e)}" + + +async def glob(pattern: str, path: Optional[str] = None) -> str: + """Searches for files that matches the glob pattern + + Args: + pattern: The glob pattern to match files against + path: The directory to search in (optional, defaults to current directory) + + Returns: + A list of matching file paths sorted by modification time + """ + try: + search_path = path or os.getcwd() + if not os.path.exists(search_path): + return f"Error: Path not found: {search_path}" + + # Use asyncio.to_thread to run glob in a separate thread + matching_files = await asyncio.to_thread(_glob_search, search_path, pattern) + + if not matching_files: + return "No matching files found" + + # Sort files by modification time (most recent first) + matching_files.sort(key=lambda f: os.path.getmtime(f), reverse=True) + matching_files = [str(f) for f in matching_files] + + return "\n".join(matching_files) + except Exception as e: + return f"Error: Failed to perform glob search: {str(e)}" + + +def _glob_search(search_path: str, pattern: str) -> List[str]: + """Helper function to perform glob search in a separate thread""" + # Construct the full pattern + if not search_path.endswith(os.sep) and not pattern.startswith(os.sep): + full_pattern = os.path.join(search_path, pattern) + else: + full_pattern = search_path + pattern + + # Use glob.glob for the actual search + return glob_module.glob(full_pattern, recursive=True) + + +async def grep( + pattern: str, include: Optional[str] = None, path: Optional[str] = None +) -> List[str]: + """Fast content search using regular expressions + + Args: + pattern: The regular expression pattern to search for in file contents + include: File pattern to include in the search (e.g. "*.js", "*.{ts,tsx}") (optional) + path: The directory to search in (optional, defaults to current directory) + + Returns: + A list of file paths with at least one match + """ + try: + search_path = path or os.getcwd() + if not os.path.exists(search_path): + return [f"Error: Path not found: {search_path}"] + + # Prepare the command arguments for running grep + command_args = ["grep", "-l", "--include", include or "*", "-r", pattern, search_path] + + # Run grep command asynchronously + proc = await asyncio.create_subprocess_exec( + *command_args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + + stdout, stderr = await proc.communicate() + + if ( + proc.returncode != 0 and proc.returncode != 1 + ): # 1 means no matches, which is not an error + if stderr: + error = stderr.decode("utf-8") + return [f"Error: Grep command failed: {error}"] + return [f"Error: Grep command failed with return code {proc.returncode}"] + + # Parse the output and get the list of files + matching_files = stdout.decode("utf-8").strip().split("\n") + + # Filter out empty entries + matching_files = [f for f in matching_files if f] + + if not matching_files: + return [] + + # Sort files by modification time (most recent first) + matching_files.sort(key=lambda f: os.path.getmtime(f), reverse=True) + + return matching_files + except Exception as e: + return [f"Error: Failed to perform grep search: {str(e)}"] + + +async def ls(path: str, ignore: Optional[List[str]] = None) -> str: + """Lists files and directories in a given path + + Args: + path: The absolute path to the directory to list + ignore: List of glob patterns to ignore (optional) + + Returns: + A list of files and directories in the given path + """ + try: + if not os.path.exists(path): + return f"Error: Path not found: {path}" + + if not os.path.isdir(path): + return f"Error: Not a directory: {path}" + + # Get all files and directories in the given path + items = await asyncio.to_thread(os.listdir, path) + + # Apply ignore patterns if provided + if ignore: + filtered_items = [] + for item in items: + item_path = os.path.join(path, item) + should_ignore = False + + for pattern in ignore: + if fnmatch.fnmatch(item, pattern) or fnmatch.fnmatch(item_path, pattern): + should_ignore = True + break + + if not should_ignore: + filtered_items.append(item) + + items = filtered_items + + # Construct full paths + full_paths = [os.path.join(path, item) for item in items] + + # Sort by type (directories first) and then by name + full_paths.sort(key=lambda p: (0 if os.path.isdir(p) else 1, p.lower())) + + return "\n".join(full_paths) + except Exception as e: + return f"Error: Failed to list directory: {str(e)}" + + +toolkit = Toolkit( + name="file_system_toolkit", + description="Tools to do search, list, read, write and edit operations on files.", +) +toolkit.add_tool(Tool(callable=read, read=True)) +toolkit.add_tool(Tool(callable=edit, read=True, write=True)) +toolkit.add_tool(Tool(callable=write, write=True)) +toolkit.add_tool(Tool(callable=search_and_replace, read=True, write=True)) +toolkit.add_tool(Tool(callable=glob, read=True)) +toolkit.add_tool(Tool(callable=grep, read=True)) +toolkit.add_tool(Tool(callable=ls, read=True)) diff --git a/jupyter_ai_tools/git_tools.py b/jupyter_ai_tools/toolkits/git.py similarity index 88% rename from jupyter_ai_tools/git_tools.py rename to jupyter_ai_tools/toolkits/git.py index a640d5e..6698203 100644 --- a/jupyter_ai_tools/git_tools.py +++ b/jupyter_ai_tools/toolkits/git.py @@ -1,6 +1,7 @@ import json import os +from jupyter_ai.tools.models import Tool, Toolkit from jupyterlab_git.git import Git git = Git() @@ -162,3 +163,17 @@ async def git_get_repo_root(path: str) -> str: if res["code"] == 0 and res.get("path"): return f"📁 Repo root: {res['path']}" return f"❌ Not inside a Git repo. {res.get('message', '')}" + + +toolkit = Toolkit( + name="git_toolkit", + description="Tools for working with Git repositories.", +) +toolkit.add_tool(Tool(callable=git_clone, execute=True)) +toolkit.add_tool(Tool(callable=git_status, read=True)) +toolkit.add_tool(Tool(callable=git_log, read=True)) +toolkit.add_tool(Tool(callable=git_pull, execute=True)) +toolkit.add_tool(Tool(callable=git_push, execute=True)) +toolkit.add_tool(Tool(callable=git_commit, execute=True)) +toolkit.add_tool(Tool(callable=git_add, execute=True)) +toolkit.add_tool(Tool(callable=git_get_repo_root, read=True)) diff --git a/jupyter_ai_tools/toolkits/notebook.py b/jupyter_ai_tools/toolkits/notebook.py new file mode 100644 index 0000000..eeeb504 --- /dev/null +++ b/jupyter_ai_tools/toolkits/notebook.py @@ -0,0 +1,244 @@ +import json +from typing import Any, Dict, Literal, Optional, Tuple + +import nbformat +from jupyter_ai.tools.models import Tool, Toolkit + +from ..utils import cell_to_md, get_file_id, get_jupyter_ydoc, notebook_json_to_md + + +def read_notebook(file_path: str, include_outputs=False) -> str: + """Returns the complete notebook content as markdown string""" + notebook_dict = read_notebook_json(file_path) + notebook_md = notebook_json_to_md(notebook_dict, include_outputs=include_outputs) + return notebook_md + + +def read_notebook_json(file_path: str) -> Dict[str, Any]: + """Returns the complete notebook content and returns as json dict""" + with open(file_path, "r:UTF-8") as f: + notebook_dict = json.load(f) + return notebook_dict + + +def read_cell(file_path: str, cell_id: str, include_outputs: bool = True) -> str: + """Returns the notebook cell as markdown string""" + cell, cell_index = read_cell_json(file_path, cell_id) + cell_md = cell_to_md(cell, cell_index) + return cell_md + + +def read_cell_json(file_path: str, cell_id: str) -> Tuple[Dict[str, Any], int]: + """Returns the notebook cell as json dict and cell index""" + notebook_json = read_notebook_json(file_path) + cell_index = _get_cell_index_from_id_json(notebook_json, cell_id) + if cell_index and 0 <= cell_index < len(notebook_json["cells"]): + return notebook_json["cells"][cell_index] + raise LookupError(f"No cell found with {cell_id=}") + + +def add_cell( + file_path: str, + content: str | None = None, + cell_id: str | None = None, + add_above: bool = False, + cell_type: Literal["code", "markdown", "raw"] = "code", +): + """Adds a new cell to the Jupyter notebook above or below a specified cell. + + This function adds a new cell to a Jupyter notebook. It first attempts to use + the in-memory YDoc representation if the notebook is currently active. If the + notebook is not active, it falls back to using the filesystem to read, modify, + and write the notebook file directly. + + Args: + file_path: The absolute path to the notebook file on the filesystem. + content: The content of the new cell. If None, an empty cell is created. + cell_id: The UUID of the cell to add relative to. If None, + the cell is added at the end of the notebook. + add_above: If True, the cell is added above the specified cell. If False, + it's added below the specified cell. + cell_type: The type of cell to add ("code", "markdown", "raw"). + + Returns: + None + """ + + file_id = get_file_id(file_path) + ydoc = get_jupyter_ydoc(file_id) + + if ydoc: + cells_count = ydoc.cell_number() + cell_index = _get_cell_index_from_id_ydoc(ydoc, cell_id) if cell_id else None + insert_index = _determine_insert_index(cells_count, cell_index, add_above) + ycell = ydoc.create_ycell( + { + "cell_type": cell_type, + "source": content or "", + } + ) + ydoc.cells.insert(insert_index, ycell) + else: + with open(file_path, "r", encoding="utf-8") as f: + notebook = nbformat.read(f, as_version=nbformat.NO_CONVERT) + + cells_count = len(notebook.cells) + cell_index = _get_cell_index_from_id_nbformat(notebook, cell_id) if cell_id else None + insert_index = _determine_insert_index(cells_count, cell_index, add_above) + + if cell_type == "code": + notebook.cells.insert(insert_index, nbformat.v4.new_code_cell(source=content or "")) + elif cell_type == "markdown": + notebook.cells.insert(insert_index, nbformat.v4.new_markdown_cell(source=content or "")) + else: + notebook.cells.insert(insert_index, nbformat.v4.new_raw_cell(source=content or "")) + + with open(file_path, "w", encoding="utf-8") as f: + nbformat.write(notebook, f) + + +def delete_cell(file_path: str, cell_id: str): + """Removes a notebook cell with the specified cell ID. + + This function deletes a cell from a Jupyter notebook. It first attempts to use + the in-memory YDoc representation if the notebook is currently active. If the + notebook is not active, it falls back to using the filesystem to read, modify, + and write the notebook file directly using nbformat. + + Args: + file_path: The absolute path to the notebook file on the filesystem. + cell_id: The UUID of the cell to delete. + + Returns: + None + """ + + file_id = get_file_id(file_path) + ydoc = get_jupyter_ydoc(file_id) + if ydoc: + cell_index = _get_cell_index_from_id_ydoc(ydoc, cell_id) + if cell_index is not None and 0 <= cell_index < len(ydoc.cells): + del ydoc.cells[cell_index] + else: + with open(file_path, "r", encoding="utf-8") as f: + notebook = nbformat.read(f, as_version=nbformat.NO_CONVERT) + + cell_index = _get_cell_index_from_id_nbformat(notebook, cell_id) + if cell_index is not None and 0 <= cell_index < len(notebook.cells): + notebook.cells.pop(cell_index) + + with open(file_path, "w", encoding="utf-8") as f: + nbformat.write(notebook, f) + + +def edit_cell(file_path: str, cell_id: str, content: str | None = None) -> None: + """Edits the content of a notebook cell with the specified ID + + This function modifies the content of a cell in a Jupyter notebook. It first attempts to use + the in-memory YDoc representation if the notebook is currently active. If the + notebook is not active, it falls back to using the filesystem to read, modify, + and write the notebook file directly using nbformat. + + Args: + file_path: The absolute path to the notebook file on the filesystem. + cell_id: The UUID of the cell to edit. + content: The new content for the cell. If None, the cell content remains unchanged. + + Returns: + None + + Raises: + ValueError: If the cell_id is not found in the notebook. + """ + + file_id = get_file_id(file_path) + ydoc = get_jupyter_ydoc(file_id) + + if ydoc: + cell_index = _get_cell_index_from_id_ydoc(ydoc, cell_id) + if cell_index is not None: + if content is not None: + ydoc.cells[cell_index]["source"] = content + else: + raise ValueError(f"Cell with {cell_id=} not found in notebook at {file_path=}") + else: + with open(file_path, "r", encoding="utf-8") as f: + notebook = nbformat.read(f, as_version=nbformat.NO_CONVERT) + + cell_index = _get_cell_index_from_id_nbformat(notebook, cell_id) + if cell_index is not None: + if content is not None: + notebook.cells[cell_index].source = content + + with open(file_path, "w", encoding="utf-8") as f: + nbformat.write(notebook, f) + else: + raise ValueError(f"Cell with {cell_id=} not found in notebook at {file_path=}") + + +# Note: This is currently failing with server outputs, use `read_cell` instead +def read_cell_nbformat(file_path: str, cell_id: str) -> Dict[str, Any]: + """Returns the content and metadata of a cell with the specified ID""" + + with open(file_path, "r", encoding="utf-8") as f: + notebook = nbformat.read(f, as_version=nbformat.NO_CONVERT) + + cell_index = _get_cell_index_from_id_nbformat(notebook, cell_id) + if cell_index is not None: + cell = notebook.cells[cell_index] + return cell + else: + raise ValueError(f"Cell with {cell_id=} not found in notebook at {file_path=}") + + +def summarize_notebook(file_id: str, max_length: int = 500) -> str: + """Generates a summary of the notebook content""" + raise NotImplementedError("Implementation todo") + + +def _get_cell_index_from_id_json(notebook_json, cell_id: str) -> int | None: + """Get cell index from cell_id by notebook json dict.""" + for i, cell in enumerate(notebook_json["cells"]): + if "id" in cell and cell["id"] == cell_id: + return i + return None + + +def _get_cell_index_from_id_ydoc(ydoc, cell_id: str) -> int | None: + """Get cell index from cell_id using YDoc interface.""" + try: + cell_index, _ = ydoc.find_cell(cell_id) + return cell_index + except (AttributeError, KeyError): + return None + + +def _get_cell_index_from_id_nbformat(notebook, cell_id: str) -> int | None: + """Get cell index from cell_id using nbformat interface.""" + for i, cell in enumerate(notebook.cells): + if hasattr(cell, "id") and cell.id == cell_id: + return i + elif hasattr(cell, "metadata") and cell.metadata.get("id") == cell_id: + return i + return None + + +def _determine_insert_index(cells_count: int, cell_index: Optional[int], add_above: bool) -> int: + if cell_index is None: + insert_index = cells_count + else: + if not (0 <= cell_index < cells_count): + cell_index = max(0, min(cell_index, cells_count)) + insert_index = cell_index if add_above else cell_index + 1 + return insert_index + + +toolkit = Toolkit( + name="notebook_toolkit", + description="Tools for reading and manipulating Jupyter notebooks.", +) +toolkit.add_tool(Tool(callable=read_notebook, read=True)) +toolkit.add_tool(Tool(callable=read_cell, read=True)) +toolkit.add_tool(Tool(callable=add_cell, write=True)) +toolkit.add_tool(Tool(callable=delete_cell, delete=True)) +toolkit.add_tool(Tool(callable=edit_cell, read=True, write=True)) diff --git a/jupyter_ai_tools/utils.py b/jupyter_ai_tools/utils.py new file mode 100644 index 0000000..fbd058d --- /dev/null +++ b/jupyter_ai_tools/utils.py @@ -0,0 +1,201 @@ +from jupyter_server.base.call_context import CallContext + + +def get_serverapp(): + """Returns the server app from the request context""" + handler = CallContext.get(CallContext.JUPYTER_HANDLER) + serverapp = handler.serverapp + return serverapp + + +def get_jupyter_ydoc(file_id: str): + """Returns the notebook ydoc + + Args: + file_id: The file ID for the document + + Returns: + `YNotebook` ydoc for the notebook + """ + serverapp = get_serverapp() + yroom_manager = serverapp.web_app.settings["yroom_manager"] + room_id = f"json:notebook:{file_id}" + if yroom_manager.has_room(room_id): + yroom = yroom_manager.get_room(room_id) + notebook = yroom.get_jupyter_ydoc() + return notebook + + +def get_file_id(file_path: str) -> str: + """Returns the file_id for the document + + Args: + file_path: + absolute path to the document file + + Returns: + The file ID of the document + """ + + serverapp = get_serverapp() + file_id_manager = serverapp.web_app.settings["file_id_manager"] + file_id = file_id_manager.get_id(file_path) + + return file_id + + +def notebook_json_to_md(notebook_json: dict, include_outputs: bool = True) -> str: + """Converts a notebook json dict to markdown string using a custom format. + + Args: + notebook_json: The notebook JSON dictionary + include_outputs: Whether to include cell outputs in the markdown. Default is True. + + Returns: + Markdown string representation of the notebook + + Example: + ```markdown + ```yaml + kernelspec: + display_name: Python 3 + language: python + name: python3 + ``` + + ### Cell 0 + + #### Metadata + ```yaml + type: code + execution_count: 1 + ``` + + #### Source + ```python + print("Hello world") + ``` + + #### Output + ``` + Hello world + ``` + ``` + """ + # Extract notebook metadata + md_parts = [] + + # Add notebook metadata at the top + md_parts.append(metadata_to_md(notebook_json.get("metadata", {}))) + + # Process all cells + for i, cell in enumerate(notebook_json.get("cells", [])): + md_parts.append(cell_to_md(cell, index=i, include_outputs=include_outputs)) + + # Join all parts with double newlines + return "\n\n".join(md_parts) + + +def metadata_to_md(metadata_json: dict) -> str: + """Converts notebook or cell metadata to markdown string in YAML format. + + Args: + metadata_json: The metadata JSON dictionary + + Returns: + Markdown string with YAML formatted metadata + """ + import yaml # type: ignore[import-untyped] + + yaml_str = yaml.dump(metadata_json, default_flow_style=False) + return f"```yaml\n{yaml_str}```" + + +def cell_to_md(cell_json: dict, index: int = 0, include_outputs: bool = True) -> str: + """Converts notebook cell to markdown string. + + Args: + cell_json: The cell JSON dictionary + index: Cell index number for the heading + include_outputs: Whether to include cell outputs in the markdown + + Returns: + Markdown string representation of the cell + """ + md_parts = [] + + # Add cell heading with index + md_parts.append(f"### Cell {index}") + + # Add metadata section + md_parts.append("#### Metadata") + metadata = { + "type": cell_json.get("cell_type"), + "execution_count": cell_json.get("execution_count"), + } + # Filter out None values + metadata = {k: v for k, v in metadata.items() if v is not None} + # Add any additional metadata from the cell + if "metadata" in cell_json: + for key, value in cell_json["metadata"].items(): + metadata[key] = value + + md_parts.append(metadata_to_md(metadata)) + + # Add source section + md_parts.append("#### Source") + source = "".join(cell_json.get("source", [])) + + if cell_json.get("cell_type") == "code": + # For code cells, use python code block + md_parts.append(f"```python\n{source}```") + else: + # For markdown cells, use regular code block + md_parts.append(f"```\n{source}```") + + # Add output section if available and requested + if ( + include_outputs + and cell_json.get("cell_type") == "code" + and "outputs" in cell_json + and cell_json["outputs"] + ): + md_parts.append("#### Output") + md_parts.append(format_outputs(cell_json["outputs"])) + + return "\n\n".join(md_parts) + + +def format_outputs(outputs: list) -> str: + """Formats cell outputs into markdown. + + Args: + outputs: List of cell output dictionaries + + Returns: + Formatted markdown string of the outputs + """ + result = [] + + for output in outputs: + output_type = output.get("output_type") + + if output_type == "stream": + text = "".join(output.get("text", [])) + result.append(f"```\n{text}```") + + elif output_type == "execute_result" or output_type == "display_data": + data = output.get("data", {}) + + # Handle text/plain output + if "text/plain" in data: + text = "".join(data["text/plain"]) + result.append(f"```\n{text}```") + + # TODO: Add other mime types + + elif output_type == "error": + traceback = "\n".join(output.get("traceback", [])) + result.append(f"```\n{traceback}```") + + return "\n\n".join(result) diff --git a/jupyter_ai_tools/ynotebook_tools.py b/jupyter_ai_tools/ynotebook_tools.py deleted file mode 100644 index 4eb52d9..0000000 --- a/jupyter_ai_tools/ynotebook_tools.py +++ /dev/null @@ -1,176 +0,0 @@ -import asyncio -import difflib -import json -from typing import Any, Dict - -from jupyter_ydoc import YNotebook - - -# Delete a cell -async def delete_cell(ynotebook: YNotebook, index: int) -> str: - """ - Delete the cell at the specified index and return its source. - - Parameters: - ynotebook (YNotebook): The notebook to modify. - index (int): The index of the cell to delete. - - Returns: - str: The source of the deleted cell, or an error message. - """ - try: - cell = ynotebook.get_cell(index) - ynotebook._ycells.pop(index) - return f"✅ Cut cell {index} :\n{cell['source']}" - except Exception as e: - return f"❌ Error cutting cell {index}: {str(e)}" - - -# Overwrite cell contents -async def write_to_cell(ynotebook: YNotebook, index: int, content: str, stream: bool = True) -> str: - """ - Overwrite the source of a notebook cell at the given index. - - Parameters: - ynotebook (YNotebook): The notebook to modify. - index (int): The index of the cell to overwrite. - content (str): The new content to write. - stream (bool): Whether to simulate gradual updates (default: True). - - Returns: - str: Success or error message. - """ - try: - ycell = ynotebook.get_cell(index) - old = ycell["source"] - new = content - - if not stream: - ycell["source"] = new - ynotebook.set_cell(index, ycell) - return f"✅ Overwrote cell {index}." - - sm = difflib.SequenceMatcher(None, old, new) - result = list(old) - cursor = 0 - - for tag, i1, i2, j1, j2 in sm.get_opcodes(): - if tag == "equal": - cursor += i2 - i1 - elif tag == "delete": - for offset in reversed(range(i2 - i1)): - del result[cursor + offset] - ycell["source"] = ''.join(result) - ynotebook.set_cell(index, ycell) - await asyncio.sleep(0.03) - elif tag == "insert": - for c in new[j1:j2]: - result.insert(cursor, c) - cursor += 1 - ycell["source"] = ''.join(result) - ynotebook.set_cell(index, ycell) - await asyncio.sleep(0.03) - elif tag == "replace": - for _ in range(i2 - i1): - result.pop(cursor) - ycell["source"] = ''.join(result) - ynotebook.set_cell(index, ycell) - await asyncio.sleep(0.03) - for c in new[j1:j2]: - result.insert(cursor, c) - cursor += 1 - ycell["source"] = ''.join(result) - ynotebook.set_cell(index, ycell) - await asyncio.sleep(0.03) - - return f"✅ Updated cell {index}." - except Exception as e: - return f"❌ Error editing cell {index}: {str(e)}" - - -# Add a new cell -async def add_cell(ynotebook: YNotebook, index: int, cell_type: str = "code") -> str: - """ - Insert a new blank cell at the specified index. - - Parameters: - ynotebook (YNotebook): The notebook to modify. - index (int): The index at which to insert the cell. - cell_type (str): The type of cell to insert (default: "code"). - - Returns: - str: Success or error message. - """ - try: - new_cell: Dict[str, Any] = { - "cell_type": cell_type, - "source": "", - "metadata": {}, - } - if cell_type == "code": - new_cell["outputs"] = [] - new_cell["execution_count"] = None - - ycell = ynotebook.create_ycell(new_cell) - ynotebook._ycells.insert(index, ycell) - - return f"✅ Added {cell_type} cell at index {index}." - except Exception as e: - return f"❌ Error adding cell at index {index}: {str(e)}" - - -# Get the index of the last cell -async def get_max_cell_index(ynotebook: YNotebook) -> int: - """ - Return the index of the last cell in the notebook. - - Parameters: - ynotebook (YNotebook): The notebook to query. - - Returns: - int: The highest valid cell index. - """ - try: - return len(ynotebook._ycells) - 1 - except Exception as e: - raise RuntimeError(f"❌ Error getting max cell index: {str(e)}") - - -# Read a specific cell -async def read_cell(ynotebook: YNotebook, index: int) -> str: - """ - Return the full content of a specific notebook cell. - - Parameters: - ynotebook (YNotebook): The notebook to read from. - index (int): The index of the cell to read. - - Returns: - str: JSON-formatted cell data or error message. - """ - try: - if 0 <= index < len(ynotebook._ycells): - cell_data = ynotebook.get_cell(index) - return json.dumps(cell_data, indent=2) - else: - return f"❌ Invalid cell index: {index}" - except Exception as e: - return f"❌ Error reading cell {index}: {str(e)}" - - -# Read the entire notebook -async def read_notebook(ynotebook: YNotebook) -> str: - """ - Return the full notebook content as a JSON-formatted list of cells. - - Parameters: - ynotebook (YNotebook): The notebook to read. - - Returns: - str: JSON-formatted list of cells or an error message. - """ - try: - cells = [ynotebook.get_cell(i) for i in range(len(ynotebook._ycells))] - return json.dumps(cells, indent=2) - except Exception as e: - return f"❌ Error reading notebook: {str(e)}" diff --git a/pyproject.toml b/pyproject.toml index 7175ee9..8d32bc0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "jupyter_ai_tools" authors = [{name = "Abigayle Mercer", email = "abigaylemercer@gmail.com"}] dynamic = ["version"] readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.9" keywords = ["Jupyter", "Extension"] classifiers = [ "License :: OSI Approved :: BSD License", @@ -23,8 +23,7 @@ classifiers = [ dependencies = [ "jupyter_server>=1.6,<3", "jupyterlab_git", - "jupyter_ydoc", - "jupyter_server_ai_tools>=0.1.0", + "jupyter_ai>=3.0.0-beta.1" ]