Skip to content

Commit 9991128

Browse files
authored
update deploy to retry if deployment failed (#477)
### TL;DR Added retry functionality to the MCP Agent deployment process to improve reliability. ### What changed? - Added a new `--retry-count` option to the deploy command with a default of 3 retries - Implemented exponential backoff retry logic for deployment operations - Split the deployment process into bundling and deployment phases for better error handling - Added progress indicators showing the current attempt number - Enhanced error messages to indicate whether errors are retriable or not - Added visual feedback during retry attempts with warning messages ### How to test? 1. Deploy an MCP agent with the default retry settings: ``` mcp-agent deploy config ./my-config ``` 2. Test with a custom retry count: ``` mcp-agent deploy config ./my-config --retry-count 5 ``` 3. Force a deployment failure to verify retry behavior (e.g., by temporarily disrupting network connectivity)
1 parent fe2567c commit 9991128

File tree

3 files changed

+102
-33
lines changed

3 files changed

+102
-33
lines changed

src/mcp_agent/cli/cloud/commands/deploy/main.py

Lines changed: 98 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from mcp_agent.cli.secrets.processor import (
2828
process_config_secrets,
2929
)
30+
from mcp_agent.cli.utils.retry import retry_async_with_exponential_backoff, RetryError
3031
from mcp_agent.cli.utils.ux import (
3132
console,
3233
print_deployment_header,
@@ -84,6 +85,13 @@ def deploy_config(
8485
help="API key for authentication. Defaults to MCP_API_KEY environment variable.",
8586
envvar=ENV_API_KEY,
8687
),
88+
retry_count: int = typer.Option(
89+
3,
90+
"--retry-count",
91+
help="Number of retries on deployment failure.",
92+
min=1,
93+
max=10,
94+
),
8795
) -> str:
8896
"""Deploy an MCP agent using the specified configuration.
8997
@@ -101,6 +109,7 @@ def deploy_config(
101109
non_interactive: Never prompt for reusing or updating secrets or existing apps; reuse existing where possible
102110
api_url: API base URL
103111
api_key: API key for authentication
112+
retry_count: Number of retries on deployment failure
104113
105114
Returns:
106115
Newly-deployed MCP App ID
@@ -119,11 +128,13 @@ def deploy_config(
119128

120129
if not effective_api_url:
121130
raise CLIError(
122-
"MCP_API_BASE_URL environment variable or --api-url option must be set."
131+
"MCP_API_BASE_URL environment variable or --api-url option must be set.",
132+
retriable=False
123133
)
124134
if not effective_api_key:
125135
raise CLIError(
126-
"Must be logged in to deploy. Run 'mcp-agent login', set MCP_API_KEY environment variable or specify --api-key option."
136+
"Must be logged in to deploy. Run 'mcp-agent login', set MCP_API_KEY environment variable or specify --api-key option.",
137+
retriable=False
127138
)
128139
print_info(f"Using API at {effective_api_url}")
129140

@@ -166,7 +177,8 @@ def deploy_config(
166177
)
167178
except UnauthenticatedError as e:
168179
raise CLIError(
169-
"Invalid API key for deployment. Run 'mcp-agent login' or set MCP_API_KEY environment variable with new API key."
180+
"Invalid API key for deployment. Run 'mcp-agent login' or set MCP_API_KEY environment variable with new API key.",
181+
retriable=False
170182
) from e
171183
except Exception as e:
172184
raise CLIError(f"Error checking or creating app: {str(e)}") from e
@@ -237,47 +249,99 @@ def deploy_config(
237249
)
238250
)
239251

240-
wrangler_deploy(
252+
app = run_async(_deploy_with_retry(
241253
app_id=app_id,
242254
api_key=effective_api_key,
243255
project_dir=config_dir,
256+
mcp_app_client=mcp_app_client,
257+
retry_count=retry_count,
258+
))
259+
260+
print_info(f"App ID: {app_id}")
261+
if app.appServerInfo:
262+
status = (
263+
"ONLINE"
264+
if app.appServerInfo.status == "APP_SERVER_STATUS_ONLINE"
265+
else "OFFLINE"
266+
)
267+
print_info(f"App URL: {app.appServerInfo.serverUrl}")
268+
print_info(f"App Status: {status}")
269+
return app_id
270+
271+
except Exception as e:
272+
if settings.VERBOSE:
273+
import traceback
274+
275+
typer.echo(traceback.format_exc())
276+
raise CLIError(f"Deployment failed: {str(e)}") from e
277+
278+
279+
async def _deploy_with_retry(
280+
app_id: str,
281+
api_key: str,
282+
project_dir: Path,
283+
mcp_app_client: MCPAppClient,
284+
retry_count: int,
285+
):
286+
"""Execute the deployment operations with retry logic.
287+
288+
Args:
289+
app_id: The application ID
290+
api_key: API key for authentication
291+
project_dir: Directory containing the project files
292+
mcp_app_client: MCP App client for API calls
293+
retry_count: Number of retry attempts for deployment
294+
295+
Returns:
296+
Deployed app information
297+
"""
298+
# Step 1: Bundle once (no retry - if this fails, fail immediately)
299+
try:
300+
wrangler_deploy(
301+
app_id=app_id,
302+
api_key=api_key,
303+
project_dir=project_dir,
244304
)
305+
except Exception as e:
306+
raise CLIError(f"Bundling failed: {str(e)}") from e
307+
308+
# Step 2: Deployment API call with retries if needed
309+
attempt = 0
310+
311+
async def _perform_api_deployment():
312+
nonlocal attempt
313+
attempt += 1
314+
315+
attempt_suffix = f" (attempt {attempt}/{retry_count})" if attempt > 1 else ""
245316

246317
with Progress(
247318
SpinnerColumn(spinner_name="arrow3"),
248319
TextColumn("[progress.description]{task.description}"),
249320
) as progress:
250-
task = progress.add_task("Deploying MCP App bundle...", total=None)
251-
321+
deploy_task = progress.add_task(f"Deploying MCP App bundle{attempt_suffix}...", total=None)
252322
try:
253-
app = run_async(
254-
mcp_app_client.deploy_app(
255-
app_id=app_id,
256-
)
257-
)
258-
progress.update(task, description="✅ MCP App deployed successfully!")
259-
print_info(f"App ID: {app_id}")
260-
261-
if app.appServerInfo:
262-
status = (
263-
"ONLINE"
264-
if app.appServerInfo.status == "APP_SERVER_STATUS_ONLINE"
265-
else "OFFLINE"
266-
)
267-
print_info(f"App URL: {app.appServerInfo.serverUrl}")
268-
print_info(f"App Status: {status}")
269-
return app_id
270-
271-
except Exception as e:
272-
progress.update(task, description="❌ Deployment failed")
273-
raise e
323+
app = await mcp_app_client.deploy_app(app_id=app_id)
324+
progress.update(deploy_task, description=f"✅ MCP App deployed successfully{attempt_suffix}!")
325+
return app
326+
except Exception:
327+
progress.update(deploy_task, description=f"❌ Deployment failed{attempt_suffix}")
328+
raise
274329

275-
except Exception as e:
276-
if settings.VERBOSE:
277-
import traceback
330+
if retry_count > 1:
331+
print_info(f"Deployment API configured with up to {retry_count} attempts")
278332

279-
typer.echo(traceback.format_exc())
280-
raise CLIError(f"Deployment failed: {str(e)}") from e
333+
try:
334+
return await retry_async_with_exponential_backoff(
335+
_perform_api_deployment,
336+
max_attempts=retry_count,
337+
initial_delay=1.0,
338+
backoff_multiplier=2.0,
339+
max_delay=30.0,
340+
)
341+
except RetryError as e:
342+
attempts_text = "attempts" if retry_count > 1 else "attempt"
343+
print_error(f"Deployment failed after {retry_count} {attempts_text}")
344+
raise CLIError(f"Deployment failed after {retry_count} {attempts_text}. Last error: {e.original_error}") from e.original_error
281345

282346

283347
def get_config_files(config_dir: Path) -> tuple[Path, Optional[Path], Optional[Path]]:
@@ -293,7 +357,8 @@ def get_config_files(config_dir: Path) -> tuple[Path, Optional[Path], Optional[P
293357
config_file = config_dir / MCP_CONFIG_FILENAME
294358
if not config_file.exists():
295359
raise CLIError(
296-
f"Configuration file '{MCP_CONFIG_FILENAME}' not found in {config_dir}"
360+
f"Configuration file '{MCP_CONFIG_FILENAME}' not found in {config_dir}",
361+
retriable=False
297362
)
298363

299364
secrets_file: Optional[Path] = None

src/mcp_agent/cli/utils/retry.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,9 @@ async def retry_async_with_exponential_backoff(
135135
except Exception as e:
136136
last_exception = e
137137

138+
if isinstance(e, asyncio.CancelledError):
139+
raise
140+
138141
if attempt == max_attempts or not retryable_check(e):
139142
break
140143

tests/cli/commands/test_deploy_command.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ def test_deploy_with_secrets_file():
196196
api_url="http://test.api/",
197197
api_key="test-token",
198198
non_interactive=True, # Set to True to avoid prompting
199+
retry_count=3, # Add the missing retry_count parameter
199200
)
200201

201202
# Verify deploy was successful

0 commit comments

Comments
 (0)