autogluon
diff --git a/‎.gitignore‎
Lines changed: 29 additions & 5 deletions b/‎.gitignore‎
Lines changed: 29 additions & 5 deletions
diff --git a/‎README.md‎
Lines changed: 31 additions & 6 deletions b/‎README.md‎
Lines changed: 31 additions & 6 deletions
diff --git a/‎docs/tutorials/getting_started.md‎
Lines changed: 4 additions & 4 deletions b/‎docs/tutorials/getting_started.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎maab/agents/mlzero_default/mlzero_default.sh‎
100644100755
Lines changed: 1 addition & 1 deletion b/‎maab/agents/mlzero_default/mlzero_default.sh‎
100644100755
Lines changed: 1 addition & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 9 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎src/autogluon/assistant/cli/app.py‎
Lines changed: 40 additions & 7 deletions b/‎src/autogluon/assistant/cli/app.py‎
Lines changed: 40 additions & 7 deletions
diff --git a/‎src/autogluon/assistant/coding_agent.py‎
Lines changed: 1 addition & 1 deletion b/‎src/autogluon/assistant/coding_agent.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/autogluon/assistant/rich_logging.py‎
Lines changed: 1 addition & 0 deletions b/‎src/autogluon/assistant/rich_logging.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/autogluon/assistant/tools_registry/indexing.py‎
Lines changed: 13 additions & 2 deletions b/‎src/autogluon/assistant/tools_registry/indexing.py‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎src/autogluon/assistant/webui/Home.py‎
Lines changed: 0 additions & 56 deletions b/‎src/autogluon/assistant/webui/Home.py‎
Lines changed: 0 additions & 56 deletions
@@ -2,6 +2,7 @@
 maab/datasets/
 runs/
 maab/runs/
+/maab/
 
 # OS specific
 *.DS_Store
@@ -21,6 +22,18 @@ cdk.out/
 output_*.csv
 runs/
 
+# MCP specific
+.flask.pid
+.mcp.pid
+mcp_output/
+mcp/__pycache__/
+mcp/examples/credentials_template.txt
+mcp/examples/example_config.yaml
+mcp/*.pyc
+
+# IDE specific
+.vscode/
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -190,10 +203,21 @@ aga-output-*.csv
 # ignore the MLAgent output folder
 output/
 
-# MLZero Specific
-maab/datasets/
-runs/
-maab/runs/
-
 # OS specific
 *.DS_Store
+
+src/autogluon/mcp/server/.flask.pid
+src/autogluon/mcp/server/.mcp.pid
+src/autogluon/mcp/server/test_output/
+src/autogluon/mcp/server/test_data/
+src/autogluon/mcp/server/nohup.out
+src/autogluon/mcp/**/uploads/
+src/autogluon/mcp/**/mlzero-*/
+
+# Credentials
+*.credentials
+*.creds
+credentials.txt
+aws_credentials.txt
+
+.claude
@@ -34,7 +34,7 @@ uv pip install git+https://github.com/autogluon/autogluon-assistant.git
 
 For detailed usage instructions, Anthropic/Azure/OpenAI setup, and advanced configuration options, see our [Getting Started Tutorial](docs/tutorials/getting_started.md).
 
-## API Setup
+### 1. API Setup
 MLZero uses AWS Bedrock by default. Configure your AWS credentials:
 
 ```bash
@@ -45,17 +45,15 @@ export AWS_SECRET_ACCESS_KEY="<your-secret-key>"
 
 We also support Anthropic, Azure, and OpenAI. Support for more LLM providers (e.g. DeepSeek, etc.) will be added soon.
 
-## Basic Usage
-
-### CLI UI
+### 2.1 CLI
 
 ![Demo](https://github.com/autogluon/autogluon-assistant/blob/main/docs/assets/cli_demo.gif)
 
 ```bash
-mlzero -i <input_data_folder> [-u <optional_user_instructions>]
+mlzero -i <input_data_folder> [-t <optional_user_instructions>]
 ```
 
-## WEB UI
+### 2.2 Web UI
 
 ![Demo](https://github.com/autogluon/autogluon-assistant/blob/main/docs/assets/web_demo.gif)
 
@@ -67,6 +65,33 @@ mlzero-frontend # command to start frontend on 8509(default)
 1. **Configure**: Set your model provider and credentials in settings
 2. **Upload & Describe**: Drag your data folder into the chat input box, then type what you want to accomplish and press Enter
 
+### 2.3 MCP (Model Context Protocol)
+
+Note: The system can run on a single machine or distributed across multiple machines (e.g., server on EC2, client on local).
+1. **Start the server**
+```bash
+cd autogluon-assistant
+mlzero-backend # command to start backend
+mlzero-mcp-server # This will start the service—run it in a new terminal.
+```
+2. **Start the client**
+```bash
+cd autogluon-assistant
+mlzero-mcp-client
+```
+Note: You may need to set up port tunneling to expose your local MCP Client Server (port 8005) if you want to use it with remote LLM services (e.g., Claude API, OpenAI API).
+
+### 2.4 Python API
+
+```python
+from autogluon.assistant.coding_agent import run_agent
+run_agent(
+      input_data_folder=<your-input-folder>,
+      output_folder=<your-output-folder>,
+      # more args ...
+)
+```
+
 ## Citation
 If you use Autogluon Assistant (MLZero) in your research, please cite our paper:
 
 
@@ -70,7 +70,7 @@ You can select the LLM provider, model, and credentials to use. If using Bedrock
 ### CLI
 
 ```bash
-mlzero -i INPUT_DATA_FOLDER [-o OUTPUT_DIR] [-c CONFIG_PATH] [-n MAX_ITERATIONS] [--need-user-input] [-u INITIAL_USER_INPUT] [-e EXTRACT_TO] [-v VERBOSITY_LEVEL]
+mlzero -i INPUT_DATA_FOLDER [-o OUTPUT_DIR] [-c CONFIG_PATH] [-n MAX_ITERATIONS] [--ENABLE-PER-ITERATION-INSTRUCTION] [-t --INITIAL-INSTRUCTION] [-e EXTRACT_TO] [-v VERBOSITY_LEVEL]
 ```
 
 #### Required Arguments
@@ -89,10 +89,10 @@ mlzero -i INPUT_DATA_FOLDER [-o OUTPUT_DIR] [-c CONFIG_PATH] [-n MAX_ITERATIONS]
 - `-n, --max-iterations`:  
   Maximum number of iterations. Default is `5`.
 
-- `--need-user-input`:  
-  Whether to prompt user input at each iteration. Defaults to `False`.
+- `--enable-per-iteration-instruction`:  
+  If enabled, provide an instruction at the start of each iteration (except the first, which uses the initial instruction). The process suspends until you provide it.
 
-- `-u, --user-input`:  
+- `-t, --initial-instruction`:  
   Initial user input to use in the first iteration. Optional.
 
 - `-e, --extract-to`:  
 
@@ -59,7 +59,7 @@ mlzero \
     -o "$OUTPUT_DIR" \
     -n 10 \
     -v 1 \
-    -u "complete the task in 10 minutes"
+    --initial-instruction "complete the task in 10 minutes"
 
 # Check if the process was successful
 if [ $? -ne 0 ]; then
 
@@ -42,20 +42,27 @@ dependencies = [
   "streamlit-extras>=0.4",
   "psutil>=5.9.8",
   "peft>=0.15.2",
+  "fastmcp>=2.0.0",
+  "aiohttp>=3.8.0",
+  "requests>=2.28.0",
+  "boto3>=1.28.0",
+  "mcp>=0.1.0",
 ]
 
 [project.scripts]
 aga          = "autogluon.assistant.cli.app:app"
 mlzero       = "autogluon.assistant.cli.app:app"
-mlzero-webui = "autogluon.assistant.webui.Home:main"
 mlzero-backend = "autogluon.assistant.webui.backend.app:main"
 mlzero-frontend = "autogluon.assistant.webui.runner:run_frontend"
+mlzero-mcp-server = "autogluon.mcp.server.runner:main"
+mlzero-mcp-client = "autogluon.mcp.client.server:main"
 
 [project.optional-dependencies]
 dev = [
   "black>=24",
   "GitRepo>=1",
   "pytest>=7",
+  "pytest-asyncio>=0.21.0",
   "isort",
   "ruff==0.4",
 ]
@@ -65,7 +72,7 @@ Homepage = "https://github.com/autogluon/autogluon-assistant"
 
 [tool.setuptools.packages.find]
 where = ["src"]
-include = ["autogluon.assistant*"]
+include = ["autogluon.assistant*", "autogluon.mcp*"]
 namespaces = true
 
 [tool.setuptools.package-data]
 
@@ -1,15 +1,23 @@
 #!/usr/bin/env python3
 from __future__ import annotations
 
+import multiprocessing.resource_tracker
 from pathlib import Path
 
 import typer
 
 from autogluon.assistant.coding_agent import run_agent
 
-from .. import __file__ as assistant_file
 
-PACKAGE_ROOT = Path(assistant_file).parent
+def _noop(*args, **kwargs):
+    pass
+
+
+multiprocessing.resource_tracker.register = _noop
+multiprocessing.resource_tracker.unregister = _noop
+multiprocessing.resource_tracker.ensure_running = _noop
+
+PACKAGE_ROOT = Path(__file__).parent.parent
 DEFAULT_CONFIG_PATH = PACKAGE_ROOT / "configs" / "default.yaml"
 
 app = typer.Typer(add_completion=False)
@@ -31,14 +39,39 @@ def main(
         "--config",
         help=f"YAML config file (default: {DEFAULT_CONFIG_PATH})",
     ),
-    max_iterations: int = typer.Option(5, "-n", "--max-iterations", help="Max iteration count"),
-    need_user_input: bool = typer.Option(False, "--need-user-input", help="Whether to prompt user each iteration"),
-    initial_user_input: str | None = typer.Option(None, "-u", "--user-input", help="Initial user input"),
+    max_iterations: int = typer.Option(
+        5,
+        "-n",
+        "--max-iterations",
+        help="Max iteration count. If the task hasn’t succeeded after this many iterations, it will terminate.",
+    ),
+    need_user_input: bool = typer.Option(
+        False,
+        "--enable-per-iteration-instruction",
+        help="If enabled, provide an instruction at the start of each iteration (except the first, which uses the initial instruction). The process suspends until you provide it.",
+    ),
+    initial_user_input: str | None = typer.Option(
+        None, "-t", "--initial-instruction", help="You can provide the initial instruction here."
+    ),
     extract_archives_to: str | None = typer.Option(
-        None, "-e", "--extract-to", help="Directory in which to unpack any archives"
+        None,
+        "-e",
+        "--extract-to",
+        help="Copy input data to specified directory and automatically extract all .zip archives. ",
     ),
     # === Logging parameters ===
-    verbosity: int = typer.Option(1, "-v", "--verbosity", help="Verbosity level (0–4)"),
+    verbosity: int = typer.Option(
+        1,
+        "-v",
+        "--verbosity",
+        help=(
+            "-v 0: Only includes error messages\n"
+            "-v 1: Contains key essential information\n"
+            "-v 2: Includes brief information plus detailed information such as file save locations\n"
+            "-v 3: Includes info-level information plus all model training related information\n"
+            "-v 4: Includes full debug information"
+        ),
+    ),
 ):
     """
     mlzero: a CLI for running the AutoMLAgent pipeline.
 
@@ -6,7 +6,6 @@
 
 from omegaconf import OmegaConf
 
-from .managers import Manager
 from .rich_logging import configure_logging
 from .utils import extract_archives
 
@@ -45,6 +44,7 @@ def run_agent(
     output_dir.mkdir(parents=False, exist_ok=True)
 
     configure_logging(verbosity=verbosity, output_dir=output_dir)
+    from .managers import Manager
 
     if extract_archives_to is not None:
         if extract_archives_to and extract_archives_to != input_data_folder:
 
@@ -47,6 +47,7 @@ def _configure_logging(console_level: int, output_dir: Path = None) -> None:
         console = Console(file=sys.stderr)
         console_handler = RichHandler(console=console, markup=True, rich_tracebacks=True)
         console_handler.setLevel(console_level)
+        console_handler.name = CONSOLE_HANDLER
         handlers = [console_handler]
     else:
         stdout_handler = logging.StreamHandler(sys.stdout)
 
@@ -1,4 +1,7 @@
+import contextlib
+import io
 import logging
+import os
 import pickle
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple
@@ -11,6 +14,8 @@
 
 logger = logging.getLogger(__name__)
 
+os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "true"
+
 
 class TutorialIndexer:
     """
@@ -32,6 +37,10 @@ def __del__(self):
         """Cleanup method to properly close the embedding model."""
         self.cleanup()
 
+    def __silent_encode(self, input):
+        with contextlib.redirect_stderr(io.StringIO()):
+            return self.model.encode(input)
+
     def cleanup(self):
         """Cleanup the embedding model to avoid multiprocessing issues."""
         if self.model is not None:
@@ -41,6 +50,8 @@ def cleanup(self):
                     self.model.close()
                 elif hasattr(self.model, "stop_multi_process_pool"):
                     self.model.stop_multi_process_pool()
+                else:
+                    del self.model
             except Exception as e:
                 logger.debug(f"Error during model cleanup: {e}")
             finally:
@@ -141,7 +152,7 @@ def _build_tool_index(self, tool_name: str, tutorial_type: str) -> Tuple[faiss.I
 
             for i in range(0, len(summaries), batch_size):
                 batch_summaries = summaries[i : i + batch_size]
-                batch_embeddings = self.model.encode(batch_summaries)
+                batch_embeddings = self.__silent_encode(batch_summaries)
 
                 # Ensure proper format
                 if not isinstance(batch_embeddings, np.ndarray):
@@ -316,7 +327,7 @@ def search(self, query: str, tool_name: str, condensed: bool = False, top_k: int
             return []
 
         # Generate query embedding
-        query_embedding = self.model.encode([query])
+        query_embedding = self.__silent_encode([query])
 
         # Ensure proper data type and memory layout
         if not isinstance(query_embedding, np.ndarray):