From 5c8e441c445c0476673614a6f55793487de2a0c2 Mon Sep 17 00:00:00 2001 From: Piyush Jain Date: Thu, 19 Jun 2025 09:17:24 -0700 Subject: [PATCH 1/9] WIP: Updates to match toolkit changes --- jupyter_ai_tools/__init__.py | 22 ++- jupyter_ai_tools/extension.py | 259 +++------------------------------- 2 files changed, 41 insertions(+), 240 deletions(-) diff --git a/jupyter_ai_tools/__init__.py b/jupyter_ai_tools/__init__.py index e719d5f..63db365 100644 --- a/jupyter_ai_tools/__init__.py +++ b/jupyter_ai_tools/__init__.py @@ -1,6 +1,5 @@ -from .extension import jupyter_server_extension_tools - -__all__ = ["jupyter_server_extension_tools"] +from .extension import get_git_tools, get_notebook_tools +from jupyter_server_ai_tools.models import Tool, Toolkit __version__ = "0.1.2" @@ -11,3 +10,20 @@ def _jupyter_server_extension_points(): def _load_jupyter_server_extension(serverapp): serverapp.log.info("✅ jupyter_ai_tools extension loaded.") + +async def _start_jupyter_server_extension(serverapp): + registry = serverapp.extension_manager.extensions.get( + "jupyter_server_ai_tools" + ) + if registry: + registry.register_toolkit( + Toolkit( + name="notebook_toolkit", tools=get_notebook_tools() + ) + ) + + registry.register_toolkit( + Toolkit( + name="git_toolkit", tools=get_git_tools() + ) + ) \ No newline at end of file diff --git a/jupyter_ai_tools/extension.py b/jupyter_ai_tools/extension.py index f0b4483..9e795db 100644 --- a/jupyter_ai_tools/extension.py +++ b/jupyter_ai_tools/extension.py @@ -1,241 +1,26 @@ -from jupyter_server_ai_tools.models import ToolDefinition +from jupyter_server_ai_tools.models import Tool from . import git_tools, ynotebook_tools -def jupyter_server_extension_tools(): - return [ - ToolDefinition( - callable=ynotebook_tools.delete_cell, - metadata={ - "name": "delete_cell", - "description": "Remove the cell at the specified index and return its contents.", - "inputSchema": { - "type": "object", - "properties": { - "index": { - "type": "integer", - "description": "The index of the cell to delete", - } - }, - "required": ["index"], - }, - }, - ), - ToolDefinition( - callable=ynotebook_tools.add_cell, - metadata={ - "name": "add_cell", - "description": "Insert a blank cell at the specified index.", - "inputSchema": { - "type": "object", - "properties": { - "index": {"type": "integer", "description": "The index to insert at"}, - "cell_type": { - "type": "string", - "description": "The type of cell: 'code' or 'markdown' ", - "default": "code", - }, - }, - "required": ["index"], - }, - }, - ), - ToolDefinition( - callable=ynotebook_tools.write_to_cell, - metadata={ - "name": "write_to_cell", - "description": "Overwrite the source of a cell with content at the given index " - "in the notebook.", - "inputSchema": { - "type": "object", - "properties": { - "index": {"type": "integer", "description": "The index to write at"}, - "content": { - "type": "string", - "description": "The content to write into the cell, either python " - "code or markdown", - }, - }, - "required": ["index", "content"], - }, - }, - ), - ToolDefinition( - callable=ynotebook_tools.get_max_cell_index, - metadata={ - "name": "get_max_cell_index", - "description": "Return the highest valid cell index in the current notebook.", - "inputSchema": {"type": "object", "properties": {}}, - }, - ), - ToolDefinition( - callable=ynotebook_tools.read_cell, - metadata={ - "name": "read_cell", - "description": "Read the full content of a specific cell, including outputs, " - "source, and metadata.", - "inputSchema": { - "type": "object", - "properties": { - "index": {"type": "integer", "description": "The index of the cell to read"} - }, - "required": ["index"], - }, - }, - ), - ToolDefinition( - callable=ynotebook_tools.read_notebook, - metadata={ - "name": "read_notebook", - "description": "Return all cells in the notebook as a JSON-formatted list.", - "inputSchema": {"type": "object", "properties": {}}, - }, - ), - ToolDefinition( - callable=git_tools.git_clone, - metadata={ - "name": "git_clone", - "description": "Clone a Git repo into the specified path.", - "inputSchema": { - "type": "object", - "properties": { - "path": {"type": "string", "description": "Target path"}, - "url": {"type": "string", "description": "Repository URL"}, - }, - "required": ["path", "url"], - }, - }, - ), - ToolDefinition( - callable=git_tools.git_status, - metadata={ - "name": "git_status", - "description": "Get the current Git status in the specified path.", - "inputSchema": { - "type": "object", - "properties": { - "path": { - "type": "string", - "description": "Path to the Git repository root directory", - } - }, - "required": ["path"], - }, - }, - ), - ToolDefinition( - callable=git_tools.git_log, - metadata={ - "name": "git_log", - "description": "Get the last N Git commits.", - "inputSchema": { - "type": "object", - "properties": { - "path": { - "type": "string", - "description": "Path to the Git repository root directory", - }, - "history_count": { - "type": "integer", - "description": "Number of commits", - "default": 10, - }, - }, - "required": ["path"], - }, - }, - ), - ToolDefinition( - callable=git_tools.git_pull, - metadata={ - "name": "git_pull", - "description": "Pull the latest changes from the remote.", - "inputSchema": { - "type": "object", - "properties": { - "path": { - "type": "string", - "description": "Path to the Git repository root directory", - } - }, - "required": ["path"], - }, - }, - ), - ToolDefinition( - callable=git_tools.git_push, - metadata={ - "name": "git_push", - "description": "Push local changes to the remote.", - "inputSchema": { - "type": "object", - "properties": { - "path": { - "type": "string", - "description": "Path to the Git repository root directory", - }, - "branch": {"type": "string", "description": "Repo branch"}, - }, - "required": ["path"], - }, - }, - ), - ToolDefinition( - callable=git_tools.git_commit, - metadata={ - "name": "git_commit", - "description": "Commit staged changes with a message.", - "inputSchema": { - "type": "object", - "properties": { - "path": { - "type": "string", - "description": "Path to the Git repository root directory", - }, - "message": {"type": "string", "description": "Commit message"}, - }, - "required": ["path", "message"], - }, - }, - ), - ToolDefinition( - callable=git_tools.git_add, - metadata={ - "name": "git_add", - "description": "Stage files for commit. Optionally add all files.", - "inputSchema": { - "type": "object", - "properties": { - "path": { - "type": "string", - "description": "Path to the Git repository root directory", - }, - "add_all": { - "type": "boolean", - "default": True, - "description": "Stage all files", - }, - "filename": { - "type": "string", - "description": "File to add (used if add_all is false)", - }, - }, - "required": ["path"], - }, - }, - ), - ToolDefinition( - callable=git_tools.git_get_repo_root, - metadata={ - "name": "git_get_repo_root_from_notebookpath", - "description": "Given the path of a file, return the path to the Repo root" - " if any.", - "inputSchema": { - "type": "object", - "properties": {"path": {"type": "string", "description": "the path to a file"}}, - "required": ["path"], - }, - }, - ), - ] +def get_notebook_tools(): + return { + Tool(callable=ynotebook_tools.delete_cell, delete=True), + Tool(callable=ynotebook_tools.add_cell, write=True), + Tool(callable=ynotebook_tools.write_to_cell, read=True, write=True), + Tool(callable=ynotebook_tools.get_max_cell_index, read=True), + Tool(callable=ynotebook_tools.read_cell, read=True), + Tool(callable=ynotebook_tools.read_notebook, read=True) + } + +def get_git_tools(): + return { + Tool(callable=git_tools.git_clone, write=True), + Tool(callable=git_tools.git_status, read=True), + Tool(callable=git_tools.git_log, read=True), + Tool(callable=git_tools.git_pull, read=True, write=True), + Tool(callable=git_tools.git_push, read=True, write=True), + Tool(callable=git_tools.git_commit, write=True), + Tool(callable=git_tools.git_add, write=True), + Tool(callable=git_tools.git_get_repo_root, read=True) + } From 89ca3af287aadf1ce291d8f283bd13fc164a554e Mon Sep 17 00:00:00 2001 From: Piyush Jain Date: Thu, 19 Jun 2025 20:44:09 -0700 Subject: [PATCH 2/9] Fixed register tool --- jupyter_ai_tools/__init__.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/jupyter_ai_tools/__init__.py b/jupyter_ai_tools/__init__.py index 63db365..6009f68 100644 --- a/jupyter_ai_tools/__init__.py +++ b/jupyter_ai_tools/__init__.py @@ -1,5 +1,5 @@ from .extension import get_git_tools, get_notebook_tools -from jupyter_server_ai_tools.models import Tool, Toolkit +from jupyter_server_ai_tools.models import ToolSet, Toolkit __version__ = "0.1.2" @@ -12,18 +12,18 @@ def _load_jupyter_server_extension(serverapp): serverapp.log.info("✅ jupyter_ai_tools extension loaded.") async def _start_jupyter_server_extension(serverapp): - registry = serverapp.extension_manager.extensions.get( - "jupyter_server_ai_tools" - ) + registry = serverapp.web_app.settings["toolkit_registry"] if registry: + notebook_tools = ToolSet(get_notebook_tools()) registry.register_toolkit( Toolkit( - name="notebook_toolkit", tools=get_notebook_tools() + name="notebook_toolkit", tools=notebook_tools ) ) + git_tools = ToolSet(get_git_tools()) registry.register_toolkit( Toolkit( - name="git_toolkit", tools=get_git_tools() + name="git_toolkit", tools=git_tools ) ) \ No newline at end of file From a0208c35c526340dd9b03e986000df851b9d5dde Mon Sep 17 00:00:00 2001 From: Piyush Jain Date: Wed, 25 Jun 2025 15:35:29 -0700 Subject: [PATCH 3/9] WIP: Toolkits for Jupyter --- jupyter_ai_tools/__init__.py | 22 +- jupyter_ai_tools/extension.py | 26 -- jupyter_ai_tools/toolkits/__init__.py | 1 + jupyter_ai_tools/toolkits/code_execution.py | 49 +++ jupyter_ai_tools/toolkits/file_system.py | 346 ++++++++++++++++++ .../{git_tools.py => toolkits/git.py} | 16 + jupyter_ai_tools/toolkits/notebook.py | 244 ++++++++++++ jupyter_ai_tools/utils.py | 230 ++++++++++++ jupyter_ai_tools/ynotebook_tools.py | 176 --------- pyproject.toml | 6 +- 10 files changed, 891 insertions(+), 225 deletions(-) delete mode 100644 jupyter_ai_tools/extension.py create mode 100644 jupyter_ai_tools/toolkits/__init__.py create mode 100644 jupyter_ai_tools/toolkits/code_execution.py create mode 100644 jupyter_ai_tools/toolkits/file_system.py rename jupyter_ai_tools/{git_tools.py => toolkits/git.py} (89%) create mode 100644 jupyter_ai_tools/toolkits/notebook.py create mode 100644 jupyter_ai_tools/utils.py delete mode 100644 jupyter_ai_tools/ynotebook_tools.py diff --git a/jupyter_ai_tools/__init__.py b/jupyter_ai_tools/__init__.py index 6009f68..f16dffe 100644 --- a/jupyter_ai_tools/__init__.py +++ b/jupyter_ai_tools/__init__.py @@ -1,4 +1,4 @@ -from .extension import get_git_tools, get_notebook_tools +from .tools import get_git_tools, get_notebook_tools from jupyter_server_ai_tools.models import ToolSet, Toolkit __version__ = "0.1.2" @@ -7,23 +7,5 @@ def _jupyter_server_extension_points(): return [{"module": "jupyter_ai_tools"}] - def _load_jupyter_server_extension(serverapp): - serverapp.log.info("✅ jupyter_ai_tools extension loaded.") - -async def _start_jupyter_server_extension(serverapp): - registry = serverapp.web_app.settings["toolkit_registry"] - if registry: - notebook_tools = ToolSet(get_notebook_tools()) - registry.register_toolkit( - Toolkit( - name="notebook_toolkit", tools=notebook_tools - ) - ) - - git_tools = ToolSet(get_git_tools()) - registry.register_toolkit( - Toolkit( - name="git_toolkit", tools=git_tools - ) - ) \ No newline at end of file + serverapp.log.info("✅ jupyter_ai_tools extension loaded.") \ No newline at end of file diff --git a/jupyter_ai_tools/extension.py b/jupyter_ai_tools/extension.py deleted file mode 100644 index 9e795db..0000000 --- a/jupyter_ai_tools/extension.py +++ /dev/null @@ -1,26 +0,0 @@ -from jupyter_server_ai_tools.models import Tool - -from . import git_tools, ynotebook_tools - - -def get_notebook_tools(): - return { - Tool(callable=ynotebook_tools.delete_cell, delete=True), - Tool(callable=ynotebook_tools.add_cell, write=True), - Tool(callable=ynotebook_tools.write_to_cell, read=True, write=True), - Tool(callable=ynotebook_tools.get_max_cell_index, read=True), - Tool(callable=ynotebook_tools.read_cell, read=True), - Tool(callable=ynotebook_tools.read_notebook, read=True) - } - -def get_git_tools(): - return { - Tool(callable=git_tools.git_clone, write=True), - Tool(callable=git_tools.git_status, read=True), - Tool(callable=git_tools.git_log, read=True), - Tool(callable=git_tools.git_pull, read=True, write=True), - Tool(callable=git_tools.git_push, read=True, write=True), - Tool(callable=git_tools.git_commit, write=True), - Tool(callable=git_tools.git_add, write=True), - Tool(callable=git_tools.git_get_repo_root, read=True) - } diff --git a/jupyter_ai_tools/toolkits/__init__.py b/jupyter_ai_tools/toolkits/__init__.py new file mode 100644 index 0000000..761bfa6 --- /dev/null +++ b/jupyter_ai_tools/toolkits/__init__.py @@ -0,0 +1 @@ +"""Toolkits for Jupyter""" \ No newline at end of file diff --git a/jupyter_ai_tools/toolkits/code_execution.py b/jupyter_ai_tools/toolkits/code_execution.py new file mode 100644 index 0000000..c1381bb --- /dev/null +++ b/jupyter_ai_tools/toolkits/code_execution.py @@ -0,0 +1,49 @@ +"""Tools that provide code execution features""" + + +import asyncio +import shlex +from typing import Optional + +from jupyter_ai.tools.models import Tool, Toolkit + + +async def bash(command: str, timeout: Optional[int] = None) -> str: + """Executes a bash command and returns the result + + Args: + command: The bash command to execute + timeout: Optional timeout in seconds + + Returns: + The command output (stdout and stderr combined) + """ + + proc = await asyncio.create_subprocess_exec( + *shlex.split(command), + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + try: + stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout) + output = stdout.decode("utf-8") + error = stderr.decode("utf-8") + + if proc.returncode != 0: + if error: + return f"Error: {error}" + return f"Command failed with exit code {proc.returncode}" + + return output if output else "Command executed successfully with no output." + except asyncio.TimeoutError: + proc.kill() + return f"Command timed out after {timeout} seconds" + + +toolkit = Toolkit( + name="code_execution_toolkit", + description="Tools to execute code in different environments.", +) +toolkit.add(Tool(callable=bash, execute=True)) + diff --git a/jupyter_ai_tools/toolkits/file_system.py b/jupyter_ai_tools/toolkits/file_system.py new file mode 100644 index 0000000..1e2ffc0 --- /dev/null +++ b/jupyter_ai_tools/toolkits/file_system.py @@ -0,0 +1,346 @@ +"""Tools that provide file system functionality""" + +import asyncio +import fnmatch +import glob as glob_module +import os +from typing import List, Optional + +from jupyter_ai.tools.models import Tool, Toolkit + + +async def read(file_path: str, offset: Optional[int] = None, limit: Optional[int] = None) -> str: + """Reads a file from the local filesystem + + Args: + file_path: The absolute path to the file to read + offset: The line number to start reading from (optional) + limit: The number of lines to read (optional) + + Returns: + The contents of the file, potentially with line numbers + """ + try: + if not os.path.exists(file_path): + return f"Error: File not found: {file_path}" + + if not os.path.isfile(file_path): + return f"Error: Not a file: {file_path}" + + content = await _read_file_content(file_path, offset, limit) + return content + except Exception as e: + return f"Error: Failed to read file: {str(e)}" + + +async def _read_file_content( + file_path: str, offset: Optional[int] = None, limit: Optional[int] = None +) -> str: + """Helper function to read file content in a separate thread""" + with open(file_path, "r", encoding="utf-8") as f: + if offset is not None: + # Skip lines until we reach the offset + for _ in range(offset): + line = await f.readline() + if not line: + break + + # Read the specified number of lines or all lines if limit is None + if limit is not None: + lines = [await f.readline() for _ in range(limit)] + # Filter out None values in case we hit EOF + lines = [line for line in lines if line] + else: + lines = await f.readlines() + + # Add line numbers (starting from offset+1 if offset is provided) + start_line = (offset or 0) + 1 + numbered_lines = [f"{i}→{line}" for i, line in enumerate(lines, start=start_line)] + + return "".join(numbered_lines) + + +# Question: Should this be async? +def write(file_path: str, content: str) -> str: + """Writes content to a file on the local filesystem + + Args: + file_path: The absolute path to the file to write + content: The content to write to the file + + Returns: + A success message or error message + """ + try: + # Ensure the directory exists + directory = os.path.dirname(file_path) + if directory and not os.path.exists(directory): + os.makedirs(directory) + + _write_file_content(file_path, content) + return f"File written successfully at: {file_path}" + except Exception as e: + return f"Error: Failed to write file: {str(e)}" + + +def _write_file_content(file_path: str, content: str) -> None: + """Helper function to write file content in a separate thread""" + with open(file_path, "w", encoding="utf-8") as f: + f.write(content) + + +async def edit(file_path: str, old_string: str, new_string: str, replace_all: bool = False) -> str: + """Performs string replacement in a file + + Args: + file_path: The absolute path to the file to modify + old_string: The text to replace + new_string: The text to replace it with + replace_all: Replace all occurrences of old_string (default False) + + Returns: + A success message or error message + """ + try: + if not os.path.exists(file_path): + return f"Error: File not found: {file_path}" + + if not os.path.isfile(file_path): + return f"Error: Not a file: {file_path}" + + # Read the file content + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + # Check if old_string exists in the file + if old_string not in content: + return f"Error: String to replace not found in file" + + # Perform the replacement + if replace_all: + new_content = content.replace(old_string, new_string) + else: + # Replace only the first occurrence + new_content = content.replace(old_string, new_string, 1) + + # If nothing changed, old and new strings might be identical + if new_content == content: + return "Error: No changes made. Old string and new string might be identical" + + # Write the updated content back to the file + _write_file_content(file_path, new_content) + + return f"File {file_path} has been updated successfully" + except Exception as e: + return f"Error: Failed to edit file: {str(e)}" + + +async def search_and_replace( + file_path: str, pattern: str, replacement: str, replace_all: bool = False +) -> str: + """Performs pattern search and replace in a file. + + Args: + file_path: The absolute path to the file to modify + pattern: The pattern to search for (supports sed syntax) + replacement: The replacement text + replace_all: Replace all occurrences of pattern (default False) + + Returns: + A success message or error message + """ + try: + if not os.path.exists(file_path): + return f"Error: File not found: {file_path}" + + if not os.path.isfile(file_path): + return f"Error: Not a file: {file_path}" + + # Build the sed command + sed_cmd = ["sed"] + + # -i option for in-place editing (macOS requires an extension) + if os.name == "posix" and "darwin" in os.uname().sysname.lower(): + sed_cmd.extend(["-i", ""]) + else: + sed_cmd.append("-i") + + # Add the search and replace expression + expression = f"s/{pattern}/{replacement}/" + if replace_all: + expression += "g" + + sed_cmd.extend([expression, file_path]) + + # Run sed command + proc = await asyncio.create_subprocess_exec( + *sed_cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + + _, stderr = await proc.communicate() + + if proc.returncode != 0: + if stderr: + error = stderr.decode("utf-8") + return f"Error: sed command failed: {error}" + return f"Error: sed command failed with return code {proc.returncode}" + + return f"File {file_path} has been updated successfully" + except Exception as e: + return f"Error: Failed to search and edit file: {str(e)}" + + +async def glob(pattern: str, path: Optional[str] = None) -> List[str]: + """Searches for files that matches the glob pattern + + Args: + pattern: The glob pattern to match files against + path: The directory to search in (optional, defaults to current directory) + + Returns: + A list of matching file paths sorted by modification time + """ + try: + search_path = path or os.getcwd() + if not os.path.exists(search_path): + return [f"Error: Path not found: {search_path}"] + + # Use asyncio.to_thread to run glob in a separate thread + matching_files = await asyncio.to_thread(_glob_search, search_path, pattern) + + if not matching_files: + return [] + + # Sort files by modification time (most recent first) + matching_files.sort(key=lambda f: os.path.getmtime(f), reverse=True) + + return matching_files + except Exception as e: + return [f"Error: Failed to perform glob search: {str(e)}"] + + +def _glob_search(search_path: str, pattern: str) -> List[str]: + """Helper function to perform glob search in a separate thread""" + # Construct the full pattern + if not search_path.endswith(os.sep) and not pattern.startswith(os.sep): + full_pattern = os.path.join(search_path, pattern) + else: + full_pattern = search_path + pattern + + # Use glob.glob for the actual search + return glob_module.glob(full_pattern, recursive=True) + + +async def grep( + pattern: str, include: Optional[str] = None, path: Optional[str] = None +) -> List[str]: + """Fast content search using regular expressions + + Args: + pattern: The regular expression pattern to search for in file contents + include: File pattern to include in the search (e.g. "*.js", "*.{ts,tsx}") (optional) + path: The directory to search in (optional, defaults to current directory) + + Returns: + A list of file paths with at least one match + """ + try: + search_path = path or os.getcwd() + if not os.path.exists(search_path): + return [f"Error: Path not found: {search_path}"] + + # Prepare the command arguments for running grep + command_args = ["grep", "-l", "--include", include or "*", "-r", pattern, search_path] + + # Run grep command asynchronously + proc = await asyncio.create_subprocess_exec( + *command_args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + + stdout, stderr = await proc.communicate() + + if ( + proc.returncode != 0 and proc.returncode != 1 + ): # 1 means no matches, which is not an error + if stderr: + error = stderr.decode("utf-8") + return [f"Error: Grep command failed: {error}"] + return [f"Error: Grep command failed with return code {proc.returncode}"] + + # Parse the output and get the list of files + matching_files = stdout.decode("utf-8").strip().split("\n") + + # Filter out empty entries + matching_files = [f for f in matching_files if f] + + if not matching_files: + return [] + + # Sort files by modification time (most recent first) + matching_files.sort(key=lambda f: os.path.getmtime(f), reverse=True) + + return matching_files + except Exception as e: + return [f"Error: Failed to perform grep search: {str(e)}"] + + +async def ls(path: str, ignore: Optional[List[str]] = None) -> List[str]: + """Lists files and directories in a given path + + Args: + path: The absolute path to the directory to list + ignore: List of glob patterns to ignore (optional) + + Returns: + A list of files and directories in the given path + """ + try: + if not os.path.exists(path): + return [f"Error: Path not found: {path}"] + + if not os.path.isdir(path): + return [f"Error: Not a directory: {path}"] + + # Get all files and directories in the given path + items = await asyncio.to_thread(os.listdir, path) + + # Apply ignore patterns if provided + if ignore: + filtered_items = [] + for item in items: + item_path = os.path.join(path, item) + should_ignore = False + + for pattern in ignore: + if fnmatch.fnmatch(item, pattern) or fnmatch.fnmatch(item_path, pattern): + should_ignore = True + break + + if not should_ignore: + filtered_items.append(item) + + items = filtered_items + + # Construct full paths + full_paths = [os.path.join(path, item) for item in items] + + # Sort by type (directories first) and then by name + full_paths.sort(key=lambda p: (0 if os.path.isdir(p) else 1, p.lower())) + + return full_paths + except Exception as e: + return [f"Error: Failed to list directory: {str(e)}"] + + +toolkit = Toolkit( + name="file_system_toolkit", + description="Tools to do search, list, read, write and edit operations on files.", +) +toolkit.add(Tool(callable=read, read=True)) +toolkit.add(Tool(callable=write, write=True)) +toolkit.add(Tool(callable=edit, read=True, write=True)) +toolkit.add(Tool(callable=search_and_replace, read=True, write=True)) +toolkit.add(Tool(callable=glob, read=True)) +toolkit.add(Tool(callable=grep, read=True)) +toolkit.add(Tool(callable=ls, read=True)) + diff --git a/jupyter_ai_tools/git_tools.py b/jupyter_ai_tools/toolkits/git.py similarity index 89% rename from jupyter_ai_tools/git_tools.py rename to jupyter_ai_tools/toolkits/git.py index a640d5e..a77ad1d 100644 --- a/jupyter_ai_tools/git_tools.py +++ b/jupyter_ai_tools/toolkits/git.py @@ -2,6 +2,7 @@ import os from jupyterlab_git.git import Git +from jupyter_ai.tools.models import Tool, Toolkit git = Git() @@ -162,3 +163,18 @@ async def git_get_repo_root(path: str) -> str: if res["code"] == 0 and res.get("path"): return f"📁 Repo root: {res['path']}" return f"❌ Not inside a Git repo. {res.get('message', '')}" + + +toolkit = Toolkit( + name="git_toolkit", + description="Tools for working with Git repositories.", +) +toolkit.add(Tool(callable=git_clone, execute=True)) +toolkit.add(Tool(callable=git_status, read=True)) +toolkit.add(Tool(callable=git_log, read=True)) +toolkit.add(Tool(callable=git_pull, execute=True)) +toolkit.add(Tool(callable=git_push, execute=True)) +toolkit.add(Tool(callable=git_commit, execute=True)) +toolkit.add(Tool(callable=git_add, execute=True)) +toolkit.add(Tool(callable=git_get_repo_root, read=True)) + diff --git a/jupyter_ai_tools/toolkits/notebook.py b/jupyter_ai_tools/toolkits/notebook.py new file mode 100644 index 0000000..12e6896 --- /dev/null +++ b/jupyter_ai_tools/toolkits/notebook.py @@ -0,0 +1,244 @@ +import json +from typing import Any, Dict, Literal, Tuple + +import nbformat + +from jupyter_ai.tools.models import Tool, Toolkit +from ..utils import cell_to_md, get_file_id, get_jupyter_ydoc, notebook_json_to_md + + +def read_notebook(file_path: str, include_outputs=False) -> str: + """Returns the complete notebook content as markdown string""" + notebook_dict = read_notebook_json(file_path) + notebook_md = notebook_json_to_md(notebook_dict, include_outputs=include_outputs) + return notebook_md + + +def read_notebook_json(file_path: str) -> Dict[str, Any]: + """Returns the complete notebook content and returns as json dict""" + with open(file_path, "r:UTF-8") as f: + notebook_dict = json.load(f) + return notebook_dict + + +def read_cell(file_path: str, cell_id: str, include_outputs: bool = True) -> str: + """Returns the notebook cell as markdown string""" + cell, cell_index = read_cell_json(file_path, cell_id) + cell_md = cell_to_md(cell, cell_index) + return cell_md + + +def read_cell_json(file_path: str, cell_id: str) -> Tuple[Dict[str, Any], int]: + """Returns the notebook cell as json dict and cell index""" + notebook_json = read_notebook_json(file_path) + cell_index = _get_cell_index_from_id_json(notebook_json, cell_id) + if cell_index and 0 <= cell_index < len(notebook_json["cells"]): + return notebook_json["cells"][cell_index] + raise LookupError(f"No cell found with {cell_id=}") + + +def add_cell( + file_path: str, + content: str | None = None, + cell_id: str | None = None, + add_above: bool = False, + cell_type: Literal["code", "markdown", "raw"] = "code", +): + """Adds a new cell to the Jupyter notebook above or below a specified cell. + + This function adds a new cell to a Jupyter notebook. It first attempts to use + the in-memory YDoc representation if the notebook is currently active. If the + notebook is not active, it falls back to using the filesystem to read, modify, + and write the notebook file directly. + + Args: + file_path: The absolute path to the notebook file on the filesystem. + content: The content of the new cell. If None, an empty cell is created. + cell_id: The UUID of the cell to add relative to. If None, + the cell is added at the end of the notebook. + add_above: If True, the cell is added above the specified cell. If False, + it's added below the specified cell. + cell_type: The type of cell to add ("code", "markdown", "raw"). + + Returns: + None + """ + + file_id = get_file_id(file_path) + ydoc = get_jupyter_ydoc(file_id) + + if ydoc: + cells_count = ydoc.cell_number() + cell_index = _get_cell_index_from_id_ydoc(ydoc, cell_id) if cell_id else None + insert_index = _determine_insert_index(cells_count, cell_index, add_above) + ycell = ydoc.create_ycell( + { + "cell_type": cell_type, + "source": content or "", + } + ) + ydoc.cells.insert(insert_index, ycell) + else: + with open(file_path, "r", encoding="utf-8") as f: + notebook = nbformat.read(f, as_version=nbformat.NO_CONVERT) + + cells_count = len(notebook.cells) + cell_index = _get_cell_index_from_id_nbformat(notebook, cell_id) if cell_id else None + insert_index = _determine_insert_index(cells_count, cell_index, add_above) + + if cell_type == "code": + notebook.cells.insert(insert_index, nbformat.v4.new_code_cell(source=content or "")) + elif cell_type == "markdown": + notebook.cells.insert(insert_index, nbformat.v4.new_markdown_cell(source=content or "")) + else: + notebook.cells.insert(insert_index, nbformat.v4.new_raw_cell(source=content or "")) + + with open(file_path, "w", encoding="utf-8") as f: + nbformat.write(notebook, f) + + +def delete_cell(file_path: str, cell_id: str): + """Removes a notebook cell with the specified cell ID. + + This function deletes a cell from a Jupyter notebook. It first attempts to use + the in-memory YDoc representation if the notebook is currently active. If the + notebook is not active, it falls back to using the filesystem to read, modify, + and write the notebook file directly using nbformat. + + Args: + file_path: The absolute path to the notebook file on the filesystem. + cell_id: The UUID of the cell to delete. + + Returns: + None + """ + + file_id = get_file_id(file_path) + ydoc = get_jupyter_ydoc(file_id) + if ydoc: + cell_index = _get_cell_index_from_id_ydoc(ydoc, cell_id) + if cell_index is not None and 0 <= cell_index < len(ydoc.cells): + del ydoc.cells[cell_index] + else: + with open(file_path, "r", encoding="utf-8") as f: + notebook = nbformat.read(f, as_version=nbformat.NO_CONVERT) + + cell_index = _get_cell_index_from_id_nbformat(notebook, cell_id) + if cell_index is not None and 0 <= cell_index < len(notebook.cells): + notebook.cells.pop(cell_index) + + with open(file_path, "w", encoding="utf-8") as f: + nbformat.write(notebook, f) + + +def edit_cell(file_path: str, cell_id: str, content: str | None = None) -> None: + """Edits the content of a notebook cell with the specified ID + + This function modifies the content of a cell in a Jupyter notebook. It first attempts to use + the in-memory YDoc representation if the notebook is currently active. If the + notebook is not active, it falls back to using the filesystem to read, modify, + and write the notebook file directly using nbformat. + + Args: + file_path: The absolute path to the notebook file on the filesystem. + cell_id: The UUID of the cell to edit. + content: The new content for the cell. If None, the cell content remains unchanged. + + Returns: + None + + Raises: + ValueError: If the cell_id is not found in the notebook. + """ + + file_id = get_file_id(file_path) + ydoc = get_jupyter_ydoc(file_id) + + if ydoc: + cell_index = _get_cell_index_from_id_ydoc(ydoc, cell_id) + if cell_index is not None: + if content is not None: + ydoc.cells[cell_index]["source"] = content + else: + raise ValueError(f"Cell with {cell_id=} not found in notebook at {file_path=}") + else: + with open(file_path, "r", encoding="utf-8") as f: + notebook = nbformat.read(f, as_version=nbformat.NO_CONVERT) + + cell_index = _get_cell_index_from_id_nbformat(notebook, cell_id) + if cell_index is not None: + if content is not None: + notebook.cells[cell_index].source = content + + with open(file_path, "w", encoding="utf-8") as f: + nbformat.write(notebook, f) + else: + raise ValueError(f"Cell with {cell_id=} not found in notebook at {file_path=}") + + +# Note: This is currently failing with server outputs, use `read_cell` instead +def read_cell_nbformat(file_path: str, cell_id: str) -> Dict[str, Any]: + """Returns the content and metadata of a cell with the specified ID""" + + with open(file_path, "r", encoding="utf-8") as f: + notebook = nbformat.read(f, as_version=nbformat.NO_CONVERT) + + cell_index = _get_cell_index_from_id_nbformat(notebook, cell_id) + if cell_index is not None: + cell = notebook.cells[cell_index] + return cell + else: + raise ValueError(f"Cell with {cell_id=} not found in notebook at {file_path=}") + + +def summarize_notebook(file_id: str, max_length: int = 500) -> str: + """Generates a summary of the notebook content""" + pass + + +def _get_cell_index_from_id_json(notebook_json, cell_id: str) -> int | None: + """Get cell index from cell_id by notebook json dict.""" + for i, cell in enumerate(notebook_json["cells"]): + if "id" in cell and cell["id"] == cell_id: + return i + return None + + +def _get_cell_index_from_id_ydoc(ydoc, cell_id: str) -> int | None: + """Get cell index from cell_id using YDoc interface.""" + try: + cell_index, _ = ydoc.find_cell(cell_id) + return cell_index + except (AttributeError, KeyError): + return None + + +def _get_cell_index_from_id_nbformat(notebook, cell_id: str) -> int | None: + """Get cell index from cell_id using nbformat interface.""" + for i, cell in enumerate(notebook.cells): + if hasattr(cell, "id") and cell.id == cell_id: + return i + elif hasattr(cell, "metadata") and cell.metadata.get("id") == cell_id: + return i + return None + + +def _determine_insert_index(cells_count: int, cell_index: int, add_above: bool) -> int: + if cell_index is None: + insert_index = cells_count + else: + if not (0 <= cell_index < cells_count): + cell_index = max(0, min(cell_index, cells_count)) + insert_index = cell_index if add_above else cell_index + 1 + return insert_index + + +toolkit = Toolkit( + name="notebook_toolkit", + description="Tools for reading and manipulating Jupyter notebooks.", +) +toolkit.add(Tool(callable=read_notebook, read=True)) +toolkit.add(Tool(callable=read_cell, read=True)) +toolkit.add(Tool(callable=add_cell, write=True)) +toolkit.add(Tool(callable=delete_cell, delete=True)) +toolkit.add(Tool(callable=edit_cell, read=True, write=True)) diff --git a/jupyter_ai_tools/utils.py b/jupyter_ai_tools/utils.py new file mode 100644 index 0000000..f4af664 --- /dev/null +++ b/jupyter_ai_tools/utils.py @@ -0,0 +1,230 @@ +from jupyter_server.base.call_context import CallContext +import nbformat + + +def get_serverapp(): + """Returns the server app from the request context""" + handler = CallContext.get(CallContext.JUPYTER_HANDLER) + serverapp = handler.serverapp + return serverapp + + +def get_jupyter_ydoc(file_id: str): + """Returns the notebook ydoc + + Args: + file_id: The file ID for the document + + Returns: + `YNotebook` ydoc for the notebook + """ + serverapp = get_serverapp() + yroom_manager = serverapp.web_app.settings["yroom_manager"] + room_id = f"json:notebook:{file_id}" + if yroom_manager.has_room(room_id): + yroom = yroom_manager.get_room(room_id) + notebook = yroom.get_jupyter_ydoc() + return notebook + + +def get_file_id(file_path: str) -> str: + """Returns the file_id for the document + + Args: + file_path: + absolute path to the document file + + Returns: + The file ID of the document + """ + + serverapp = get_serverapp() + file_id_manager = serverapp.web_app.settings["file_id_manager"] + file_id = file_id_manager.get_id(file_path) + + return file_id + + +def notebook_json_to_md(notebook_json: dict, include_outputs: bool = True) -> str: + """Converts a notebook json dict to markdown string using a custom format. + + Args: + notebook_json: The notebook JSON dictionary + include_outputs: Whether to include cell outputs in the markdown. Default is True. + + Returns: + Markdown string representation of the notebook + + Example: + ```markdown + ```yaml + kernelspec: + display_name: Python 3 + language: python + name: python3 + ``` + + ### Cell 0 + + #### Metadata + ```yaml + type: code + execution_count: 1 + ``` + + #### Source + ```python + print("Hello world") + ``` + + #### Output + ``` + Hello world + ``` + ``` + """ + # Extract notebook metadata + md_parts = [] + + # Add notebook metadata at the top + md_parts.append( + metadata_to_md(notebook_json.get("metadata", {})) + ) + + # Process all cells + for i, cell in enumerate(notebook_json.get("cells", [])): + md_parts.append(cell_to_md(cell, index=i, include_outputs=outputs)) + + # Join all parts with double newlines + return "\n\n".join(md_parts) + + +def metadata_to_md(metadata_json: dict) -> str: + """Converts notebook or cell metadata to markdown string in YAML format. + + Args: + metadata_json: The metadata JSON dictionary + + Returns: + Markdown string with YAML formatted metadata + """ + import yaml + + yaml_str = yaml.dump(metadata_json, default_flow_style=False) + return f"```yaml\n{yaml_str}```" + + +def cell_to_md(cell_json: dict, index: int = 0, include_outputs: bool = True) -> str: + """Converts notebook cell to markdown string. + + Args: + cell_json: The cell JSON dictionary + index: Cell index number for the heading + include_outputs: Whether to include cell outputs in the markdown + + Returns: + Markdown string representation of the cell + """ + md_parts = [] + + # Add cell heading with index + md_parts.append(f"### Cell {index}") + + # Add metadata section + md_parts.append("#### Metadata") + metadata = { + "type": cell_json.get("cell_type"), + "execution_count": cell_json.get("execution_count"), + } + # Filter out None values + metadata = {k: v for k, v in metadata.items() if v is not None} + # Add any additional metadata from the cell + if "metadata" in cell_json: + for key, value in cell_json["metadata"].items(): + metadata[key] = value + + md_parts.append(metadata_to_md(metadata)) + + # Add source section + md_parts.append("#### Source") + source = "".join(cell_json.get("source", [])) + + if cell_json.get("cell_type") == "code": + # For code cells, use python code block + md_parts.append(f"```python\n{source}```") + else: + # For markdown cells, use regular code block + md_parts.append(f"```\n{source}```") + + # Add output section if available and requested + if ( + include_outputs + and cell_json.get("cell_type") == "code" + and "outputs" in cell_json + and cell_json["outputs"] + ): + md_parts.append("#### Output") + md_parts.append(format_outputs(cell_json["outputs"])) + + return "\n\n".join(md_parts) + + +def format_outputs(outputs: list) -> str: + """Formats cell outputs into markdown. + + Args: + outputs: List of cell output dictionaries + + Returns: + Formatted markdown string of the outputs + """ + result = [] + + for output in outputs: + output_type = output.get("output_type") + + if output_type == "stream": + text = "".join(output.get("text", [])) + result.append(f"```\n{text}```") + + elif output_type == "execute_result" or output_type == "display_data": + data = output.get("data", {}) + + # Handle text/plain output + if "text/plain" in data: + text = "".join(data["text/plain"]) + result.append(f"```\n{text}```") + + # TODO: Add other mime types + + elif output_type == "error": + traceback = "\n".join(output.get("traceback", [])) + result.append(f"```\n{traceback}```") + + return "\n\n".join(result) + +# Note: We might end up removing this, as nbconvert doesn't emit metadata +# and currently failing with server outputs +def nbformat_to_md(notebook: nbformat.NotebookNode, outputs: bool = False) -> str: + """Converts a notebook in nbformat to markdown string + + Args: + notebook: The notebook to convert to markdown + outputs: Whether to include cell outputs in the markdown. Default is False. + + Returns: + Markdown string representation of the notebook + """ + from nbconvert.exporters import MarkdownExporter + + # Create the markdown exporter instance + exporter = MarkdownExporter() + + # Configure exporter based on outputs flag + if not outputs: + exporter.exclude_output = True + + # Convert notebook to markdown + markdown, _ = exporter.from_notebook_node(notebook) + + return markdown diff --git a/jupyter_ai_tools/ynotebook_tools.py b/jupyter_ai_tools/ynotebook_tools.py deleted file mode 100644 index 4eb52d9..0000000 --- a/jupyter_ai_tools/ynotebook_tools.py +++ /dev/null @@ -1,176 +0,0 @@ -import asyncio -import difflib -import json -from typing import Any, Dict - -from jupyter_ydoc import YNotebook - - -# Delete a cell -async def delete_cell(ynotebook: YNotebook, index: int) -> str: - """ - Delete the cell at the specified index and return its source. - - Parameters: - ynotebook (YNotebook): The notebook to modify. - index (int): The index of the cell to delete. - - Returns: - str: The source of the deleted cell, or an error message. - """ - try: - cell = ynotebook.get_cell(index) - ynotebook._ycells.pop(index) - return f"✅ Cut cell {index} :\n{cell['source']}" - except Exception as e: - return f"❌ Error cutting cell {index}: {str(e)}" - - -# Overwrite cell contents -async def write_to_cell(ynotebook: YNotebook, index: int, content: str, stream: bool = True) -> str: - """ - Overwrite the source of a notebook cell at the given index. - - Parameters: - ynotebook (YNotebook): The notebook to modify. - index (int): The index of the cell to overwrite. - content (str): The new content to write. - stream (bool): Whether to simulate gradual updates (default: True). - - Returns: - str: Success or error message. - """ - try: - ycell = ynotebook.get_cell(index) - old = ycell["source"] - new = content - - if not stream: - ycell["source"] = new - ynotebook.set_cell(index, ycell) - return f"✅ Overwrote cell {index}." - - sm = difflib.SequenceMatcher(None, old, new) - result = list(old) - cursor = 0 - - for tag, i1, i2, j1, j2 in sm.get_opcodes(): - if tag == "equal": - cursor += i2 - i1 - elif tag == "delete": - for offset in reversed(range(i2 - i1)): - del result[cursor + offset] - ycell["source"] = ''.join(result) - ynotebook.set_cell(index, ycell) - await asyncio.sleep(0.03) - elif tag == "insert": - for c in new[j1:j2]: - result.insert(cursor, c) - cursor += 1 - ycell["source"] = ''.join(result) - ynotebook.set_cell(index, ycell) - await asyncio.sleep(0.03) - elif tag == "replace": - for _ in range(i2 - i1): - result.pop(cursor) - ycell["source"] = ''.join(result) - ynotebook.set_cell(index, ycell) - await asyncio.sleep(0.03) - for c in new[j1:j2]: - result.insert(cursor, c) - cursor += 1 - ycell["source"] = ''.join(result) - ynotebook.set_cell(index, ycell) - await asyncio.sleep(0.03) - - return f"✅ Updated cell {index}." - except Exception as e: - return f"❌ Error editing cell {index}: {str(e)}" - - -# Add a new cell -async def add_cell(ynotebook: YNotebook, index: int, cell_type: str = "code") -> str: - """ - Insert a new blank cell at the specified index. - - Parameters: - ynotebook (YNotebook): The notebook to modify. - index (int): The index at which to insert the cell. - cell_type (str): The type of cell to insert (default: "code"). - - Returns: - str: Success or error message. - """ - try: - new_cell: Dict[str, Any] = { - "cell_type": cell_type, - "source": "", - "metadata": {}, - } - if cell_type == "code": - new_cell["outputs"] = [] - new_cell["execution_count"] = None - - ycell = ynotebook.create_ycell(new_cell) - ynotebook._ycells.insert(index, ycell) - - return f"✅ Added {cell_type} cell at index {index}." - except Exception as e: - return f"❌ Error adding cell at index {index}: {str(e)}" - - -# Get the index of the last cell -async def get_max_cell_index(ynotebook: YNotebook) -> int: - """ - Return the index of the last cell in the notebook. - - Parameters: - ynotebook (YNotebook): The notebook to query. - - Returns: - int: The highest valid cell index. - """ - try: - return len(ynotebook._ycells) - 1 - except Exception as e: - raise RuntimeError(f"❌ Error getting max cell index: {str(e)}") - - -# Read a specific cell -async def read_cell(ynotebook: YNotebook, index: int) -> str: - """ - Return the full content of a specific notebook cell. - - Parameters: - ynotebook (YNotebook): The notebook to read from. - index (int): The index of the cell to read. - - Returns: - str: JSON-formatted cell data or error message. - """ - try: - if 0 <= index < len(ynotebook._ycells): - cell_data = ynotebook.get_cell(index) - return json.dumps(cell_data, indent=2) - else: - return f"❌ Invalid cell index: {index}" - except Exception as e: - return f"❌ Error reading cell {index}: {str(e)}" - - -# Read the entire notebook -async def read_notebook(ynotebook: YNotebook) -> str: - """ - Return the full notebook content as a JSON-formatted list of cells. - - Parameters: - ynotebook (YNotebook): The notebook to read. - - Returns: - str: JSON-formatted list of cells or an error message. - """ - try: - cells = [ynotebook.get_cell(i) for i in range(len(ynotebook._ycells))] - return json.dumps(cells, indent=2) - except Exception as e: - return f"❌ Error reading notebook: {str(e)}" diff --git a/pyproject.toml b/pyproject.toml index 7175ee9..273274d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "jupyter_ai_tools" authors = [{name = "Abigayle Mercer", email = "abigaylemercer@gmail.com"}] dynamic = ["version"] readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.9" keywords = ["Jupyter", "Extension"] classifiers = [ "License :: OSI Approved :: BSD License", @@ -23,8 +23,8 @@ classifiers = [ dependencies = [ "jupyter_server>=1.6,<3", "jupyterlab_git", - "jupyter_ydoc", - "jupyter_server_ai_tools>=0.1.0", + "jupyter_ai>=3.0.0-beta.0", # Needs update after Jupyter AI released with tool models + "nbconvert" ] From 288c291098ed7a51c38241401ba2cf6dcc1f9007 Mon Sep 17 00:00:00 2001 From: Piyush Jain Date: Wed, 25 Jun 2025 17:23:30 -0700 Subject: [PATCH 4/9] Removed nbconvert --- jupyter_ai_tools/utils.py | 43 +++++++-------------------------------- pyproject.toml | 3 +-- 2 files changed, 8 insertions(+), 38 deletions(-) diff --git a/jupyter_ai_tools/utils.py b/jupyter_ai_tools/utils.py index f4af664..a8656cc 100644 --- a/jupyter_ai_tools/utils.py +++ b/jupyter_ai_tools/utils.py @@ -1,5 +1,4 @@ from jupyter_server.base.call_context import CallContext -import nbformat def get_serverapp(): @@ -54,7 +53,7 @@ def notebook_json_to_md(notebook_json: dict, include_outputs: bool = True) -> st Returns: Markdown string representation of the notebook - + Example: ```markdown ```yaml @@ -63,20 +62,20 @@ def notebook_json_to_md(notebook_json: dict, include_outputs: bool = True) -> st language: python name: python3 ``` - + ### Cell 0 - + #### Metadata ```yaml type: code execution_count: 1 ``` - + #### Source ```python print("Hello world") ``` - + #### Output ``` Hello world @@ -87,13 +86,11 @@ def notebook_json_to_md(notebook_json: dict, include_outputs: bool = True) -> st md_parts = [] # Add notebook metadata at the top - md_parts.append( - metadata_to_md(notebook_json.get("metadata", {})) - ) + md_parts.append(metadata_to_md(notebook_json.get("metadata", {}))) # Process all cells for i, cell in enumerate(notebook_json.get("cells", [])): - md_parts.append(cell_to_md(cell, index=i, include_outputs=outputs)) + md_parts.append(cell_to_md(cell, index=i, include_outputs=include_outputs)) # Join all parts with double newlines return "\n\n".join(md_parts) @@ -202,29 +199,3 @@ def format_outputs(outputs: list) -> str: result.append(f"```\n{traceback}```") return "\n\n".join(result) - -# Note: We might end up removing this, as nbconvert doesn't emit metadata -# and currently failing with server outputs -def nbformat_to_md(notebook: nbformat.NotebookNode, outputs: bool = False) -> str: - """Converts a notebook in nbformat to markdown string - - Args: - notebook: The notebook to convert to markdown - outputs: Whether to include cell outputs in the markdown. Default is False. - - Returns: - Markdown string representation of the notebook - """ - from nbconvert.exporters import MarkdownExporter - - # Create the markdown exporter instance - exporter = MarkdownExporter() - - # Configure exporter based on outputs flag - if not outputs: - exporter.exclude_output = True - - # Convert notebook to markdown - markdown, _ = exporter.from_notebook_node(notebook) - - return markdown diff --git a/pyproject.toml b/pyproject.toml index 273274d..92e95c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,8 +23,7 @@ classifiers = [ dependencies = [ "jupyter_server>=1.6,<3", "jupyterlab_git", - "jupyter_ai>=3.0.0-beta.0", # Needs update after Jupyter AI released with tool models - "nbconvert" + "jupyter_ai>=3.0.0-beta.0" # Needs update after Jupyter AI released with tool models ] From 950ed4b61f39a76697adc1d779973d3ae2a4fcc2 Mon Sep 17 00:00:00 2001 From: Piyush Jain Date: Thu, 26 Jun 2025 00:06:19 -0700 Subject: [PATCH 5/9] Working version for file_system toolkit --- jupyter_ai_tools/__init__.py | 3 -- jupyter_ai_tools/toolkits/code_execution.py | 2 +- jupyter_ai_tools/toolkits/file_system.py | 42 ++++++++++----------- jupyter_ai_tools/toolkits/git.py | 19 +++++----- jupyter_ai_tools/toolkits/notebook.py | 12 +++--- 5 files changed, 37 insertions(+), 41 deletions(-) diff --git a/jupyter_ai_tools/__init__.py b/jupyter_ai_tools/__init__.py index f16dffe..de7a26b 100644 --- a/jupyter_ai_tools/__init__.py +++ b/jupyter_ai_tools/__init__.py @@ -1,6 +1,3 @@ -from .tools import get_git_tools, get_notebook_tools -from jupyter_server_ai_tools.models import ToolSet, Toolkit - __version__ = "0.1.2" diff --git a/jupyter_ai_tools/toolkits/code_execution.py b/jupyter_ai_tools/toolkits/code_execution.py index c1381bb..ec06709 100644 --- a/jupyter_ai_tools/toolkits/code_execution.py +++ b/jupyter_ai_tools/toolkits/code_execution.py @@ -45,5 +45,5 @@ async def bash(command: str, timeout: Optional[int] = None) -> str: name="code_execution_toolkit", description="Tools to execute code in different environments.", ) -toolkit.add(Tool(callable=bash, execute=True)) +toolkit.add_tool(Tool(callable=bash, execute=True)) diff --git a/jupyter_ai_tools/toolkits/file_system.py b/jupyter_ai_tools/toolkits/file_system.py index 1e2ffc0..10718a3 100644 --- a/jupyter_ai_tools/toolkits/file_system.py +++ b/jupyter_ai_tools/toolkits/file_system.py @@ -9,7 +9,7 @@ from jupyter_ai.tools.models import Tool, Toolkit -async def read(file_path: str, offset: Optional[int] = None, limit: Optional[int] = None) -> str: +def read(file_path: str, offset: Optional[int] = None, limit: Optional[int] = None) -> str: """Reads a file from the local filesystem Args: @@ -27,13 +27,13 @@ async def read(file_path: str, offset: Optional[int] = None, limit: Optional[int if not os.path.isfile(file_path): return f"Error: Not a file: {file_path}" - content = await _read_file_content(file_path, offset, limit) + content = _read_file_content(file_path, offset, limit) return content except Exception as e: return f"Error: Failed to read file: {str(e)}" -async def _read_file_content( +def _read_file_content( file_path: str, offset: Optional[int] = None, limit: Optional[int] = None ) -> str: """Helper function to read file content in a separate thread""" @@ -41,17 +41,17 @@ async def _read_file_content( if offset is not None: # Skip lines until we reach the offset for _ in range(offset): - line = await f.readline() + line = f.readline() if not line: break # Read the specified number of lines or all lines if limit is None if limit is not None: - lines = [await f.readline() for _ in range(limit)] + lines = [f.readline() for _ in range(limit)] # Filter out None values in case we hit EOF lines = [line for line in lines if line] else: - lines = await f.readlines() + lines = f.readlines() # Add line numbers (starting from offset+1 if offset is provided) start_line = (offset or 0) + 1 @@ -60,7 +60,6 @@ async def _read_file_content( return "".join(numbered_lines) -# Question: Should this be async? def write(file_path: str, content: str) -> str: """Writes content to a file on the local filesystem @@ -89,7 +88,7 @@ def _write_file_content(file_path: str, content: str) -> None: f.write(content) -async def edit(file_path: str, old_string: str, new_string: str, replace_all: bool = False) -> str: +def edit(file_path: str, old_string: str, new_string: str, replace_all: bool = False) -> str: """Performs string replacement in a file Args: @@ -114,7 +113,7 @@ async def edit(file_path: str, old_string: str, new_string: str, replace_all: bo # Check if old_string exists in the file if old_string not in content: - return f"Error: String to replace not found in file" + return "Error: String to replace not found in file" # Perform the replacement if replace_all: @@ -209,12 +208,13 @@ async def glob(pattern: str, path: Optional[str] = None) -> List[str]: matching_files = await asyncio.to_thread(_glob_search, search_path, pattern) if not matching_files: - return [] + return "No matching files found" # Sort files by modification time (most recent first) matching_files.sort(key=lambda f: os.path.getmtime(f), reverse=True) - - return matching_files + matching_files = [str(f) for f in matching_files] + + return "\n".join(matching_files) except Exception as e: return [f"Error: Failed to perform glob search: {str(e)}"] @@ -284,7 +284,7 @@ async def grep( return [f"Error: Failed to perform grep search: {str(e)}"] -async def ls(path: str, ignore: Optional[List[str]] = None) -> List[str]: +async def ls(path: str, ignore: Optional[List[str]] = None) -> str: """Lists files and directories in a given path Args: @@ -327,7 +327,7 @@ async def ls(path: str, ignore: Optional[List[str]] = None) -> List[str]: # Sort by type (directories first) and then by name full_paths.sort(key=lambda p: (0 if os.path.isdir(p) else 1, p.lower())) - return full_paths + return "\n".join(full_paths) except Exception as e: return [f"Error: Failed to list directory: {str(e)}"] @@ -336,11 +336,11 @@ async def ls(path: str, ignore: Optional[List[str]] = None) -> List[str]: name="file_system_toolkit", description="Tools to do search, list, read, write and edit operations on files.", ) -toolkit.add(Tool(callable=read, read=True)) -toolkit.add(Tool(callable=write, write=True)) -toolkit.add(Tool(callable=edit, read=True, write=True)) -toolkit.add(Tool(callable=search_and_replace, read=True, write=True)) -toolkit.add(Tool(callable=glob, read=True)) -toolkit.add(Tool(callable=grep, read=True)) -toolkit.add(Tool(callable=ls, read=True)) +toolkit.add_tool(Tool(callable=read, read=True)) +toolkit.add_tool(Tool(callable=edit, read=True, write=True)) +toolkit.add_tool(Tool(callable=write, write=True)) +toolkit.add_tool(Tool(callable=search_and_replace, read=True, write=True)) +toolkit.add_tool(Tool(callable=glob, read=True)) +toolkit.add_tool(Tool(callable=grep, read=True)) +toolkit.add_tool(Tool(callable=ls, read=True)) diff --git a/jupyter_ai_tools/toolkits/git.py b/jupyter_ai_tools/toolkits/git.py index a77ad1d..6698203 100644 --- a/jupyter_ai_tools/toolkits/git.py +++ b/jupyter_ai_tools/toolkits/git.py @@ -1,8 +1,8 @@ import json import os -from jupyterlab_git.git import Git from jupyter_ai.tools.models import Tool, Toolkit +from jupyterlab_git.git import Git git = Git() @@ -169,12 +169,11 @@ async def git_get_repo_root(path: str) -> str: name="git_toolkit", description="Tools for working with Git repositories.", ) -toolkit.add(Tool(callable=git_clone, execute=True)) -toolkit.add(Tool(callable=git_status, read=True)) -toolkit.add(Tool(callable=git_log, read=True)) -toolkit.add(Tool(callable=git_pull, execute=True)) -toolkit.add(Tool(callable=git_push, execute=True)) -toolkit.add(Tool(callable=git_commit, execute=True)) -toolkit.add(Tool(callable=git_add, execute=True)) -toolkit.add(Tool(callable=git_get_repo_root, read=True)) - +toolkit.add_tool(Tool(callable=git_clone, execute=True)) +toolkit.add_tool(Tool(callable=git_status, read=True)) +toolkit.add_tool(Tool(callable=git_log, read=True)) +toolkit.add_tool(Tool(callable=git_pull, execute=True)) +toolkit.add_tool(Tool(callable=git_push, execute=True)) +toolkit.add_tool(Tool(callable=git_commit, execute=True)) +toolkit.add_tool(Tool(callable=git_add, execute=True)) +toolkit.add_tool(Tool(callable=git_get_repo_root, read=True)) diff --git a/jupyter_ai_tools/toolkits/notebook.py b/jupyter_ai_tools/toolkits/notebook.py index 12e6896..513afb6 100644 --- a/jupyter_ai_tools/toolkits/notebook.py +++ b/jupyter_ai_tools/toolkits/notebook.py @@ -2,8 +2,8 @@ from typing import Any, Dict, Literal, Tuple import nbformat - from jupyter_ai.tools.models import Tool, Toolkit + from ..utils import cell_to_md, get_file_id, get_jupyter_ydoc, notebook_json_to_md @@ -237,8 +237,8 @@ def _determine_insert_index(cells_count: int, cell_index: int, add_above: bool) name="notebook_toolkit", description="Tools for reading and manipulating Jupyter notebooks.", ) -toolkit.add(Tool(callable=read_notebook, read=True)) -toolkit.add(Tool(callable=read_cell, read=True)) -toolkit.add(Tool(callable=add_cell, write=True)) -toolkit.add(Tool(callable=delete_cell, delete=True)) -toolkit.add(Tool(callable=edit_cell, read=True, write=True)) +toolkit.add_tool(Tool(callable=read_notebook, read=True)) +toolkit.add_tool(Tool(callable=read_cell, read=True)) +toolkit.add_tool(Tool(callable=add_cell, write=True)) +toolkit.add_tool(Tool(callable=delete_cell, delete=True)) +toolkit.add_tool(Tool(callable=edit_cell, read=True, write=True)) From a4659b4b415e7617d637a25161a76ec53d95251c Mon Sep 17 00:00:00 2001 From: Piyush Jain Date: Thu, 26 Jun 2025 11:31:58 -0700 Subject: [PATCH 6/9] Updated jupyter ai version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 92e95c4..8d32bc0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ classifiers = [ dependencies = [ "jupyter_server>=1.6,<3", "jupyterlab_git", - "jupyter_ai>=3.0.0-beta.0" # Needs update after Jupyter AI released with tool models + "jupyter_ai>=3.0.0-beta.1" ] From c8dd607127df23c77ae52c379c518ea161bd2510 Mon Sep 17 00:00:00 2001 From: Piyush Jain Date: Thu, 26 Jun 2025 12:45:29 -0700 Subject: [PATCH 7/9] Fixed lint errors --- jupyter_ai_tools/toolkits/file_system.py | 8 ++++---- jupyter_ai_tools/toolkits/notebook.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/jupyter_ai_tools/toolkits/file_system.py b/jupyter_ai_tools/toolkits/file_system.py index 10718a3..aaf573c 100644 --- a/jupyter_ai_tools/toolkits/file_system.py +++ b/jupyter_ai_tools/toolkits/file_system.py @@ -189,7 +189,7 @@ async def search_and_replace( return f"Error: Failed to search and edit file: {str(e)}" -async def glob(pattern: str, path: Optional[str] = None) -> List[str]: +async def glob(pattern: str, path: Optional[str] = None) -> str: """Searches for files that matches the glob pattern Args: @@ -296,10 +296,10 @@ async def ls(path: str, ignore: Optional[List[str]] = None) -> str: """ try: if not os.path.exists(path): - return [f"Error: Path not found: {path}"] + return f"Error: Path not found: {path}" if not os.path.isdir(path): - return [f"Error: Not a directory: {path}"] + return f"Error: Not a directory: {path}" # Get all files and directories in the given path items = await asyncio.to_thread(os.listdir, path) @@ -329,7 +329,7 @@ async def ls(path: str, ignore: Optional[List[str]] = None) -> str: return "\n".join(full_paths) except Exception as e: - return [f"Error: Failed to list directory: {str(e)}"] + return f"Error: Failed to list directory: {str(e)}" toolkit = Toolkit( diff --git a/jupyter_ai_tools/toolkits/notebook.py b/jupyter_ai_tools/toolkits/notebook.py index 513afb6..9c11846 100644 --- a/jupyter_ai_tools/toolkits/notebook.py +++ b/jupyter_ai_tools/toolkits/notebook.py @@ -223,7 +223,7 @@ def _get_cell_index_from_id_nbformat(notebook, cell_id: str) -> int | None: return None -def _determine_insert_index(cells_count: int, cell_index: int, add_above: bool) -> int: +def _determine_insert_index(cells_count: int, cell_index: str, add_above: bool) -> int: if cell_index is None: insert_index = cells_count else: From 54c096f5036248d9857d8b485a97a75483e16af3 Mon Sep 17 00:00:00 2001 From: Piyush Jain Date: Thu, 26 Jun 2025 12:57:13 -0700 Subject: [PATCH 8/9] lint and fmt --- jupyter_ai_tools/toolkits/file_system.py | 4 ++-- jupyter_ai_tools/toolkits/notebook.py | 6 +++--- jupyter_ai_tools/utils.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/jupyter_ai_tools/toolkits/file_system.py b/jupyter_ai_tools/toolkits/file_system.py index aaf573c..1a00bf3 100644 --- a/jupyter_ai_tools/toolkits/file_system.py +++ b/jupyter_ai_tools/toolkits/file_system.py @@ -202,7 +202,7 @@ async def glob(pattern: str, path: Optional[str] = None) -> str: try: search_path = path or os.getcwd() if not os.path.exists(search_path): - return [f"Error: Path not found: {search_path}"] + return f"Error: Path not found: {search_path}" # Use asyncio.to_thread to run glob in a separate thread matching_files = await asyncio.to_thread(_glob_search, search_path, pattern) @@ -216,7 +216,7 @@ async def glob(pattern: str, path: Optional[str] = None) -> str: return "\n".join(matching_files) except Exception as e: - return [f"Error: Failed to perform glob search: {str(e)}"] + return f"Error: Failed to perform glob search: {str(e)}" def _glob_search(search_path: str, pattern: str) -> List[str]: diff --git a/jupyter_ai_tools/toolkits/notebook.py b/jupyter_ai_tools/toolkits/notebook.py index 9c11846..eeeb504 100644 --- a/jupyter_ai_tools/toolkits/notebook.py +++ b/jupyter_ai_tools/toolkits/notebook.py @@ -1,5 +1,5 @@ import json -from typing import Any, Dict, Literal, Tuple +from typing import Any, Dict, Literal, Optional, Tuple import nbformat from jupyter_ai.tools.models import Tool, Toolkit @@ -193,7 +193,7 @@ def read_cell_nbformat(file_path: str, cell_id: str) -> Dict[str, Any]: def summarize_notebook(file_id: str, max_length: int = 500) -> str: """Generates a summary of the notebook content""" - pass + raise NotImplementedError("Implementation todo") def _get_cell_index_from_id_json(notebook_json, cell_id: str) -> int | None: @@ -223,7 +223,7 @@ def _get_cell_index_from_id_nbformat(notebook, cell_id: str) -> int | None: return None -def _determine_insert_index(cells_count: int, cell_index: str, add_above: bool) -> int: +def _determine_insert_index(cells_count: int, cell_index: Optional[int], add_above: bool) -> int: if cell_index is None: insert_index = cells_count else: diff --git a/jupyter_ai_tools/utils.py b/jupyter_ai_tools/utils.py index a8656cc..fbd058d 100644 --- a/jupyter_ai_tools/utils.py +++ b/jupyter_ai_tools/utils.py @@ -105,7 +105,7 @@ def metadata_to_md(metadata_json: dict) -> str: Returns: Markdown string with YAML formatted metadata """ - import yaml + import yaml # type: ignore[import-untyped] yaml_str = yaml.dump(metadata_json, default_flow_style=False) return f"```yaml\n{yaml_str}```" From db1ab46311bba19c59d7e9198d291c233eaac89f Mon Sep 17 00:00:00 2001 From: Piyush Jain Date: Thu, 26 Jun 2025 13:01:40 -0700 Subject: [PATCH 9/9] fmt --- jupyter_ai_tools/__init__.py | 3 ++- jupyter_ai_tools/toolkits/__init__.py | 2 +- jupyter_ai_tools/toolkits/code_execution.py | 16 +++++++--------- jupyter_ai_tools/toolkits/file_system.py | 3 +-- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/jupyter_ai_tools/__init__.py b/jupyter_ai_tools/__init__.py index de7a26b..16454e0 100644 --- a/jupyter_ai_tools/__init__.py +++ b/jupyter_ai_tools/__init__.py @@ -4,5 +4,6 @@ def _jupyter_server_extension_points(): return [{"module": "jupyter_ai_tools"}] + def _load_jupyter_server_extension(serverapp): - serverapp.log.info("✅ jupyter_ai_tools extension loaded.") \ No newline at end of file + serverapp.log.info("✅ jupyter_ai_tools extension loaded.") diff --git a/jupyter_ai_tools/toolkits/__init__.py b/jupyter_ai_tools/toolkits/__init__.py index 761bfa6..5a8b40e 100644 --- a/jupyter_ai_tools/toolkits/__init__.py +++ b/jupyter_ai_tools/toolkits/__init__.py @@ -1 +1 @@ -"""Toolkits for Jupyter""" \ No newline at end of file +"""Toolkits for Jupyter""" diff --git a/jupyter_ai_tools/toolkits/code_execution.py b/jupyter_ai_tools/toolkits/code_execution.py index ec06709..36023f6 100644 --- a/jupyter_ai_tools/toolkits/code_execution.py +++ b/jupyter_ai_tools/toolkits/code_execution.py @@ -1,6 +1,5 @@ """Tools that provide code execution features""" - import asyncio import shlex from typing import Optional @@ -10,40 +9,39 @@ async def bash(command: str, timeout: Optional[int] = None) -> str: """Executes a bash command and returns the result - + Args: command: The bash command to execute timeout: Optional timeout in seconds - + Returns: The command output (stdout and stderr combined) """ - + proc = await asyncio.create_subprocess_exec( *shlex.split(command), stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) - + try: stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout) output = stdout.decode("utf-8") error = stderr.decode("utf-8") - + if proc.returncode != 0: if error: return f"Error: {error}" return f"Command failed with exit code {proc.returncode}" - + return output if output else "Command executed successfully with no output." except asyncio.TimeoutError: proc.kill() return f"Command timed out after {timeout} seconds" - + toolkit = Toolkit( name="code_execution_toolkit", description="Tools to execute code in different environments.", ) toolkit.add_tool(Tool(callable=bash, execute=True)) - diff --git a/jupyter_ai_tools/toolkits/file_system.py b/jupyter_ai_tools/toolkits/file_system.py index 1a00bf3..4dae671 100644 --- a/jupyter_ai_tools/toolkits/file_system.py +++ b/jupyter_ai_tools/toolkits/file_system.py @@ -213,7 +213,7 @@ async def glob(pattern: str, path: Optional[str] = None) -> str: # Sort files by modification time (most recent first) matching_files.sort(key=lambda f: os.path.getmtime(f), reverse=True) matching_files = [str(f) for f in matching_files] - + return "\n".join(matching_files) except Exception as e: return f"Error: Failed to perform glob search: {str(e)}" @@ -343,4 +343,3 @@ async def ls(path: str, ignore: Optional[List[str]] = None) -> str: toolkit.add_tool(Tool(callable=glob, read=True)) toolkit.add_tool(Tool(callable=grep, read=True)) toolkit.add_tool(Tool(callable=ls, read=True)) -