add temporary default toolkit

dlqqq · dlqqq · commit b2413a2b8af4 · 2025-09-08T10:48:03.000-07:00
diff --git a/packages/jupyter-ai/jupyter_ai/personas/base_persona.py b/packages/jupyter-ai/jupyter_ai/personas/base_persona.py
@@ -20,21 +20,13 @@
 from ..litellm_utils import ToolCallList, StreamResult, ResolvedToolCall
 
 # Import toolkits
-from jupyter_ai_tools.toolkits.file_system import toolkit as fs_toolkit
-from jupyter_ai_tools.toolkits.code_execution import toolkit as codeexec_toolkit
-from jupyter_ai_tools.toolkits.git import toolkit as git_toolkit
+from ..tools.default_toolkit import DEFAULT_TOOLKIT
 
 if TYPE_CHECKING:
     from collections.abc import AsyncIterator
     from .persona_manager import PersonaManager
     from ..tools import Toolkit
 
-DEFAULT_TOOLKITS: dict[str, Toolkit] = {
-    "fs": fs_toolkit,
-    "codeexec": codeexec_toolkit,
-    "git": git_toolkit,
-}
-
 class PersonaDefaults(BaseModel):
     """
     Data structure that represents the default settings of a persona. Each persona
@@ -512,27 +504,19 @@ def get_tools(self, model_id: str) -> list[dict]:
 
         tool_descriptions = []
 
-        # Get all tools from `jupyter_ai_tools` and store their object descriptions
-        for toolkit_name, toolkit in DEFAULT_TOOLKITS.items():
-            # TODO: make these tool permissions configurable.
-            for tool in toolkit.get_tools():
-                # Here, we are using a util function from LiteLLM to coerce
-                # each `Tool` struct into a tool description dictionary expected
-                # by LiteLLM.
-                desc = {
-                    "type": "function",
-                    "function": function_to_dict(tool.callable),
-                }
-
-                # Prepend the toolkit name to each function name, hopefully
-                # ensuring every tool function has a unique name.
-                # e.g. 'git_add' => 'git__git_add'
-                #
-                # TODO: Actually ensure this instead of hoping.
-                desc['function']['name'] = f"{toolkit_name}__{desc['function']['name']}"
-                tool_descriptions.append(desc)
+        # Get all tools from the default toolkit and store their object descriptions
+        for tool in DEFAULT_TOOLKIT.get_tools():
+            # Here, we are using a util function from LiteLLM to coerce
+            # each `Tool` struct into a tool description dictionary expected
+            # by LiteLLM.
+            desc = {
+                "type": "function",
+                "function": function_to_dict(tool.callable),
+            }
+            tool_descriptions.append(desc)
         
         # Finally, return the tool descriptions
+        self.log.info(tool_descriptions)
         return tool_descriptions
     
 
@@ -549,9 +533,9 @@ async def run_tools(self, tools: list[ResolvedToolCall]) -> list[dict]:
         tool_outputs: list[dict] = []
         for tool_call in tools:
             # Get tool definition from the correct toolkit
-            toolkit_name, tool_name = tool_call.function.name.split("__")
-            assert toolkit_name in DEFAULT_TOOLKITS
-            tool_defn = DEFAULT_TOOLKITS[toolkit_name].get_tool_unsafe(tool_name)
+            # TODO: validation?
+            tool_name = tool_call.function.name
+            tool_defn = DEFAULT_TOOLKIT.get_tool_unsafe(tool_name)
 
             # Run tool and store its output
             output = tool_defn.callable(**tool_call.function.arguments)
diff --git a/packages/jupyter-ai/jupyter_ai/tools/__init__.py b/packages/jupyter-ai/jupyter_ai/tools/__init__.py
@@ -1,5 +1,6 @@
 """Tools package for Jupyter AI."""
 
 from .models import Tool, Toolkit
+from .default_toolkit import DEFAULT_TOOLKIT
 
-__all__ = ["Tool", "Toolkit"]
+__all__ = ["Tool", "Toolkit", "DEFAULT_TOOLKIT"]
diff --git a/packages/jupyter-ai/jupyter_ai/tools/default_toolkit.py b/packages/jupyter-ai/jupyter_ai/tools/default_toolkit.py
@@ -0,0 +1,255 @@
+from .models import Tool, Toolkit
+from jupyter_ai_tools.toolkits.code_execution import bash
+
+import pathlib
+
+
+def read(file_path: str, offset: int, limit: int) -> str:
+    """
+    Read a subset of lines from a text file.
+
+    Parameters
+    ----------
+    file_path : str
+        Absolute path to the file that should be read.
+    offset : int
+        The line number at which to start reading (1-based indexing).
+    limit : int
+        Number of lines to read starting from *offset*.  
+        If *offset + limit* exceeds the number of lines in the file,
+        all available lines after *offset* are returned.
+
+    Returns
+    -------
+    List[str]
+        List of lines (including line-ending characters) that were read.
+
+    Examples
+    --------
+    >>> # Suppose ``/tmp/example.txt`` contains 10 lines
+    >>> read('/tmp/example.txt', offset=3, limit=4)
+    ['third line\n', 'fourth line\n', 'fifth line\n', 'sixth line\n']
+    """
+    path = pathlib.Path(file_path)
+    if not path.is_file():
+        raise FileNotFoundError(f"File not found: {file_path}")
+
+    # Normalize arguments
+    offset = max(1, int(offset))
+    limit = max(0, int(limit))
+    lines: list[str] = []
+
+    with path.open(encoding='utf-8', errors='replace') as f:
+        # Skip to offset
+        line_no = 0
+        # Loop invariant: line_no := last read line
+        # After the loop exits, line_no == offset - 1, meaning the
+        # next line starts at `offset`
+        while line_no < offset - 1:
+            line = f.readline()
+            # Return early if offset exceeds number of lines in file
+            if line == "":
+                return ""
+            line_no += 1
+        
+        # Append lines until limit is reached
+        while len(lines) < limit:
+            line = f.readline()
+            if line == "":
+                break
+            lines.append(line)
+
+    return "".join(lines)
+
+
+def edit(
+    file_path: str,
+    old_string: str,
+    new_string: str,
+    replace_all: bool = False,
+) -> None:
+    """
+    Replace occurrences of a substring in a file.
+
+    Parameters
+    ----------
+    file_path : str
+        Absolute path to the file that should be edited.
+    old_string : str
+        Text that should be replaced.
+    new_string : str
+        Text that will replace *old_string*.
+    replace_all : bool, optional
+        If ``True`` all occurrences of *old_string* are replaced.
+        If ``False`` (default), only the first occurrence in the file is replaced.
+
+    Returns
+    -------
+    None
+
+    Raises
+    ------
+    FileNotFoundError
+        If *file_path* does not exist.
+    ValueError
+        If *old_string* is empty (replacing an empty string is ambiguous).
+
+    Notes
+    -----
+    The file is overwritten atomically: it is first read into memory,
+    the substitution is performed, and the file is written back.
+    This keeps the operation safe for short to medium-sized files.
+
+    Examples
+    --------
+    >>> # Replace only the first occurrence
+    >>> edit('/tmp/test.txt', 'foo', 'bar', replace_all=False)
+    >>> # Replace all occurrences
+    >>> edit('/tmp/test.txt', 'foo', 'bar', replace_all=True)
+    """
+    path = pathlib.Path(file_path)
+    if not path.is_file():
+        raise FileNotFoundError(f"File not found: {file_path}")
+
+    if old_string == "":
+        raise ValueError("old_string must not be empty")
+
+    # Read the entire file
+    content = path.read_text(encoding="utf-8", errors="replace")
+
+    # Perform replacement
+    if replace_all:
+        new_content = content.replace(old_string, new_string)
+    else:
+        new_content = content.replace(old_string, new_string, 1)
+
+    # Write back
+    path.write_text(new_content, encoding="utf-8")
+
+
+def write(file_path: str, content: str) -> None:
+    """
+    Write content to a file, creating it if it doesn't exist.
+
+    Parameters
+    ----------
+    file_path : str
+        Absolute path to the file that should be written.
+    content : str
+        Content to write to the file.
+
+    Returns
+    -------
+    None
+
+    Raises
+    ------
+    OSError
+        If the file cannot be written (e.g., permission denied, invalid path).
+
+    Notes
+    -----
+    This function will overwrite the file if it already exists.
+    The parent directory must exist; this function does not create directories.
+
+    Examples
+    --------
+    >>> write('/tmp/example.txt', 'Hello, world!')
+    >>> write('/tmp/data.json', '{"key": "value"}')
+    """
+    path = pathlib.Path(file_path)
+    
+    # Write the content to the file
+    path.write_text(content, encoding="utf-8")
+
+
+async def search_grep(pattern: str, include: str = "*") -> str:
+    """
+    Search for text patterns in files using ripgrep.
+
+    This function uses ripgrep (rg) to perform fast regex-based text searching
+    across files, with optional file filtering based on glob patterns.
+
+    Parameters
+    ----------
+    pattern : str
+        A regular expression pattern to search for. Ripgrep uses Rust regex
+        syntax which supports:
+        - Basic regex features: ., *, +, ?, ^, $, [], (), |
+        - Character classes: \w, \d, \s, \W, \D, \S
+        - Unicode categories: \p{L}, \p{N}, \p{P}, etc.
+        - Word boundaries: \b, \B
+        - Anchors: ^, $, \A, \z
+        - Quantifiers: {n}, {n,}, {n,m}
+        - Groups: (pattern), (?:pattern), (?P<name>pattern)
+        - Lookahead/lookbehind: (?=pattern), (?!pattern), (?<=pattern), (?<!pattern)
+        - Flags: (?i), (?m), (?s), (?x), (?U)
+        
+        Note: Ripgrep uses Rust's regex engine, which does NOT support:
+        - Backreferences (use --pcre2 flag for this)
+        - Some advanced PCRE features
+    include : str, optional
+        A glob pattern to filter which files to search. Defaults to "*" (all files).
+        Glob patterns follow gitignore syntax:
+        - * matches any sequence of characters except /
+        - ? matches any single character except /
+        - ** matches any sequence of characters including /
+        - [abc] matches any character in the set
+        - {a,b} matches either "a" or "b"
+        - ! at start negates the pattern
+        Examples: "*.py", "**/*.js", "src/**/*.{ts,tsx}", "!*.test.*"
+
+    Returns
+    -------
+    str
+        The raw output from ripgrep, including file paths, line numbers,
+        and matching lines. Empty string if no matches found.
+
+    Raises
+    ------
+    RuntimeError
+        If ripgrep command fails or encounters an error (non-zero exit code).
+        This includes cases where:
+        - Pattern syntax is invalid
+        - Include glob pattern is malformed
+        - Ripgrep binary is not available
+        - File system errors occur
+
+    Examples
+    --------
+    >>> search_grep(r"def\s+\w+", "*.py")
+    'file.py:10:def my_function():'
+    
+    >>> search_grep(r"TODO|FIXME", "**/*.{py,js}")
+    'app.py:25:# TODO: implement this
+    script.js:15:// FIXME: handle edge case'
+    
+    >>> search_grep(r"class\s+(\w+)", "src/**/*.py")
+    'src/models.py:1:class User:'
+    """
+    # Use bash tool to execute ripgrep
+    cmd_parts = ["rg", "--color=never", "--line-number", "--with-filename"]
+    
+    # Add glob pattern if specified
+    if include != "*":
+        cmd_parts.extend(["-g", include])
+    
+    # Add the pattern (always quote it to handle special characters)
+    cmd_parts.append(pattern)
+    
+    # Join command with proper shell escaping
+    command = " ".join(f'"{part}"' if " " in part or any(c in part for c in "!*?[]{}()") else part for part in cmd_parts)
+    
+    try:
+        result = await bash(command)
+        return result
+    except Exception as e:
+        raise RuntimeError(f"Ripgrep search failed: {str(e)}") from e
+
+
+DEFAULT_TOOLKIT = Toolkit(name="jupyter-ai-default-toolkit")
+DEFAULT_TOOLKIT.add_tool(Tool(callable=bash))
+DEFAULT_TOOLKIT.add_tool(Tool(callable=read))
+DEFAULT_TOOLKIT.add_tool(Tool(callable=edit))
+DEFAULT_TOOLKIT.add_tool(Tool(callable=write))
+DEFAULT_TOOLKIT.add_tool(Tool(callable=search_grep))
diff --git a/packages/jupyter-ai/jupyter_ai/tools/test_default_toolkit.py b/packages/jupyter-ai/jupyter_ai/tools/test_default_toolkit.py