snap-stanford · lxasqjc · Oct 6, 2025 · Oct 6, 2025 · Oct 6, 2025
diff --git a/.gitignore b/.gitignore
@@ -227,3 +227,9 @@ data/ddinter_raw/
 # Sphinx build
 /docs/build/
 /docs/source/api/
+*.csv
+*.txt
+*.json
+local_outputs/
+logs/
+sandbox/
diff --git a/biomni/agent/a1.py b/biomni/agent/a1.py
@@ -64,6 +64,8 @@ def __init__(
         api_key: str | None = None,
         commercial_mode: bool | None = None,
         expected_data_lake_files: list | None = None,
+        sandbox_mode: bool = False,
+        sandbox_path: str | None = None,
     ):
         """Initialize the biomni agent.
 
@@ -76,6 +78,8 @@ def __init__(
             base_url: Base URL for custom model serving (e.g., "http://localhost:8000/v1")
             api_key: API key for the custom LLM
             commercial_mode: If True, excludes datasets that require commercial licenses or are non-commercial only
+            sandbox_mode: If True, enables data sandbox mode for file operations
+            sandbox_path: Custom path for sandbox directory. If None and sandbox_mode=True, creates auto-generated session folder
 
         """
         # Use default_config values for unspecified parameters
@@ -111,6 +115,23 @@ def __init__(
         self.library_content_dict = library_content_dict
         self.commercial_mode = commercial_mode
 
+        # Setup sandbox mode
+        self.sandbox_mode = sandbox_mode
+        if sandbox_mode:
+            if sandbox_path is None:
+                # Auto-generate session folder name
+                session_id = datetime.now().strftime("session_%Y%m%d_%H%M%S")
+                sandbox_path = os.path.join("sandbox", session_id)
+
+            self.sandbox_path = os.path.abspath(sandbox_path)
+            self.original_cwd = os.getcwd()  # Store original working directory
+            os.makedirs(self.sandbox_path, exist_ok=True)
+
+            print(f"📁 Sandbox mode enabled: {self.sandbox_path}")
+        else:
+            self.sandbox_path = None
+            self.original_cwd = None
+
         # Display configuration in a nice, readable format
         print("\n" + "=" * 50)
         print("🔧 BIOMNI CONFIGURATION")
@@ -143,6 +164,17 @@ def __init__(
             if api_key is not None and api_key != "EMPTY":
                 print(f"  API Key: {'*' * 8 + api_key[-4:] if len(api_key) > 8 else '***'}")
 
+        # Show sandbox configuration
+        if self.sandbox_mode:
+            print("\n📁 SANDBOX MODE:")
+            print("  Enabled: True")
+            print(f"  Sandbox Path: {self.sandbox_path}")
+            print(f"  Files will be created in: {self.sandbox_path}")
+            print(f"  Original Project Path: {self.original_cwd}")
+            print("  Data Access:")
+            print("    - Relative paths: ./data/... (via symlinks)")
+            print("    - Helper function: get_project_path('data/...') for absolute paths")
+
         print("=" * 50 + "\n")
 
         self.path = path
@@ -151,6 +183,10 @@ def __init__(
             os.makedirs(path)
             print(f"Created directory: {path}")
 
+        # Setup sandbox data access after path is assigned
+        if self.sandbox_mode:
+            self._setup_sandbox_data_access()
+
         # --- Begin custom folder/file checks ---
         benchmark_dir = os.path.join(path, "biomni_data", "benchmark")
         data_lake_dir = os.path.join(path, "biomni_data", "data_lake")
@@ -1389,7 +1425,14 @@ def execute(state: AgentState) -> AgentState:
 
                     # Inject custom functions into the Python execution environment
                     self._inject_custom_functions_to_repl()
-                    result = run_with_timeout(run_python_repl, [code], timeout=timeout)
+
+                    # Pass sandbox path and original directory if sandbox mode is enabled
+                    if self.sandbox_mode and self.sandbox_path:
+                        result = run_with_timeout(
+                            run_python_repl, [code, self.sandbox_path, self.original_cwd], timeout=timeout
+                        )
+                    else:
+                        result = run_with_timeout(run_python_repl, [code], timeout=timeout)
 
                     # Plots are now captured directly in the execution entry above
 
@@ -1891,6 +1934,54 @@ def create_mcp_server(self, tool_modules=None):
         print(f"Created MCP server with {registered_tools} tools")
         return mcp
 
+    def get_sandbox_path(self) -> str | None:
+        """Get the current sandbox path if sandbox mode is enabled.
+
+        Returns:
+            str: The absolute path to the sandbox directory if sandbox mode is enabled, None otherwise
+        """
+        return self.sandbox_path if self.sandbox_mode else None
+
+    def _setup_sandbox_data_access(self) -> None:
+        """Setup data access for sandbox mode by creating symbolic links to important directories."""
+        if not self.sandbox_mode or not self.sandbox_path:
+            return
+
+        # List of important directories/files to link into sandbox
+        important_paths = [
+            ("data", os.path.join(self.original_cwd, "data")),
+            ("biomni_data", os.path.join(self.original_cwd, "data", "biomni_data")),
+        ]
+
+        # Create symbolic links for data access
+        for link_name, target_path in important_paths:
+            if os.path.exists(target_path):
+                sandbox_link = os.path.join(self.sandbox_path, link_name)
+
+                # Remove existing link if it exists
+                if os.path.islink(sandbox_link):
+                    os.unlink(sandbox_link)
+                elif os.path.exists(sandbox_link):
+                    # Don't overwrite real directories/files
+                    continue
+
+                try:
+                    os.symlink(target_path, sandbox_link)
+                    print(f"🔗 Linked {link_name} -> {target_path}")
+                except OSError as e:
+                    print(f"⚠️  Could not create symlink {link_name}: {e}")
+
+        # Also try to link the main data directory directly if path is provided
+        main_data_path = os.path.join(self.original_cwd, self.path)
+        if os.path.exists(main_data_path) and main_data_path != os.path.join(self.original_cwd, "data"):
+            sandbox_data_link = os.path.join(self.sandbox_path, "main_data")
+            if not os.path.exists(sandbox_data_link):
+                try:
+                    os.symlink(main_data_path, sandbox_data_link)
+                    print(f"🔗 Linked main_data -> {main_data_path}")
+                except OSError as e:
+                    print(f"⚠️  Could not create main_data symlink: {e}")
+
     def save_conversation_history(self, filepath: str, include_images: bool = True, save_pdf: bool = True) -> None:
         """Save the complete conversation history as PDF only.
 

diff --git a/biomni/tool/support_tools.py b/biomni/tool/support_tools.py
@@ -1,5 +1,6 @@
 import base64
 import io
+import os
 import sys
 from io import StringIO
 
@@ -10,20 +11,46 @@
 _captured_plots = []
 
 
-def run_python_repl(command: str) -> str:
+def run_python_repl(command: str, working_dir: str | None = None, original_cwd: str | None = None) -> str:
     """Executes the provided Python command in a persistent environment and returns the output.
     Variables defined in one execution will be available in subsequent executions.
+
+    Args:
+        command: Python command to execute
+        working_dir: Optional working directory to change to before execution
+        original_cwd: Original working directory (for sandbox mode data access)
     """
 
     def execute_in_repl(command: str) -> str:
         """Helper function to execute the command in the persistent environment."""
         old_stdout = sys.stdout
+        old_cwd = None
         sys.stdout = mystdout = StringIO()
 
         # Use the persistent namespace
         global _persistent_namespace
 
         try:
+            # Change working directory if specified
+            if working_dir is not None:
+                old_cwd = os.getcwd()
+                os.chdir(working_dir)
+
+            # Inject sandbox-aware helper variables for data access
+            if working_dir is not None and original_cwd is not None:
+                _persistent_namespace["__sandbox_mode__"] = True
+                _persistent_namespace["__original_cwd__"] = original_cwd
+                _persistent_namespace["__sandbox_path__"] = working_dir
+
+                # Helper function for accessing original project data
+                def _get_project_path(relative_path):
+                    """Helper function to get absolute path to project data from sandbox."""
+                    return os.path.join(original_cwd, relative_path)
+
+                _persistent_namespace["get_project_path"] = _get_project_path
+            else:
+                _persistent_namespace["__sandbox_mode__"] = False
+
             # Apply matplotlib monkey patches before execution
             _apply_matplotlib_patches()
 
@@ -37,6 +64,9 @@ def execute_in_repl(command: str) -> str:
         except Exception as e:
             output = f"Error: {str(e)}"
         finally:
+            # Restore original working directory
+            if old_cwd is not None:
+                os.chdir(old_cwd)
             sys.stdout = old_stdout
         return output
 

diff --git a/docs/SANDBOX_EXAMPLE.md b/docs/SANDBOX_EXAMPLE.md
@@ -0,0 +1,106 @@
+# Biomni Sandbox Mode Example
+
+This example demonstrates how to use the new sandbox mode feature in Biomni.
+
+## Basic Usage
+
+```python
+from biomni.agent import A1
+
+# Enable sandbox mode with auto-generated session folder
+agent = A1(
+    path='./data',
+    sandbox_mode=True,  # Enable sandbox mode
+    commercial_mode=True
+)
+
+# The agent will automatically create a sandbox directory like:
+# sandbox/session_20251006_143022/
+
+# All file operations in Python code will happen in the sandbox
+result = agent.go("""
+Create a simple analysis and save the results to a CSV file.
+
+```python
+import pandas as pd
+import matplotlib.pyplot as plt
+
+# Create sample data
+data = {
+    'name': ['Alice', 'Bob', 'Charlie', 'Diana'],
+    'age': [25, 30, 35, 28],
+    'score': [95, 87, 92, 88]
+}
+df = pd.DataFrame(data)
+
+# Save to CSV
+df.to_csv('analysis_results.csv', index=False)
+print("Data saved to analysis_results.csv")
+
+# Create a simple plot
+plt.figure(figsize=(8, 6))
+plt.scatter(df['age'], df['score'])
+plt.xlabel('Age')
+plt.ylabel('Score')
+plt.title('Age vs Score Analysis')
+plt.savefig('analysis_plot.png')
+print("Plot saved to analysis_plot.png")
+
+# List files in current directory
+import os
+print(f"Files created: {os.listdir('.')}")
+```
+""")
+
+# Check where files were created
+sandbox_path = agent.get_sandbox_path()
+print(f"All files were created in: {sandbox_path}")
+```
+
+## Custom Sandbox Path
+
+```python
+# Use a custom sandbox directory
+agent = A1(
+    path='./data',
+    sandbox_mode=True,
+    sandbox_path='/tmp/my_analysis_workspace',  # Custom path
+    commercial_mode=True
+)
+
+# All file operations will happen in /tmp/my_analysis_workspace/
+result = agent.go("Create some analysis files...")
+```
+
+## Regular Mode (No Sandbox)
+
+```python
+# Disable sandbox mode (default behavior)
+agent = A1(
+    path='./data',
+    sandbox_mode=False,  # Or omit this parameter (default is False)
+    commercial_mode=True
+)
+
+# Files will be created in the current working directory (existing behavior)
+result = agent.go("Create some files...")
+```
+
+## Benefits of Sandbox Mode
+
+1. **Clean Workspace**: Each session gets its own isolated directory
+2. **No Clutter**: Generated files don't mix with your project files
+3. **Easy Cleanup**: Simply delete the sandbox folder when done
+4. **Reproducible**: Each run starts with a clean environment
+5. **Safe Exploration**: Experimental code won't affect your main workspace
+
+## API Reference
+
+### New Parameters
+
+- `sandbox_mode: bool = False` - Enable/disable sandbox mode
+- `sandbox_path: str | None = None` - Custom sandbox directory (optional)
+
+### New Methods
+
+- `agent.get_sandbox_path() -> str | None` - Get current sandbox path