2424
2525 python deploy.py start
2626
27+ You may see the following error:
28+
29+ botocore.exceptions.ClientError: An error occurred (OptInRequired) when
30+ calling the RunInstances operation: In order to use this AWS
31+ Marketplace product you need to accept terms and subscribe. To do so
32+ please visit https://aws.amazon.com/marketplace/pp?sku=64g24n0wem7a8nuhfum3097vb
33+
34+ Open the specified URL in the browser and accept the terms, then try again.
35+
2736 4. Wait for the build to succeed in Github actions (see console output for URL)
2837
2938 5. Open the gradio interface (see console output for URL) and test it out.
@@ -125,8 +134,8 @@ class Config(BaseSettings):
125134 GITHUB_TOKEN : str
126135 PROJECT_NAME : str
127136
128- AWS_EC2_AMI : str = "" # fetches the latest compatible AMI dynamically if empty
129- AWS_EC2_DISK_SIZE : int = 100 # GB
137+ AWS_EC2_AMI : str = "ami-06835d15c4de57810"
138+ AWS_EC2_DISK_SIZE : int = 128 # GB
130139 #AWS_EC2_INSTANCE_TYPE: str = "p3.2xlarge" # (V100 16GB $3.06/hr x86_64)
131140 AWS_EC2_INSTANCE_TYPE : str = "g4dn.xlarge" # (T4 16GB $0.526/hr x86_64)
132141 AWS_EC2_USER : str = "ubuntu"
@@ -323,43 +332,6 @@ def get_or_create_security_group_id(ports: list[int] = [22, config.PORT]) -> str
323332 logger .error (f"Error describing security groups: { e } " )
324333 return None
325334
326- def get_latest_ami (
327- name_filter : str = "Deep Learning AMI GPU PyTorch *" ,
328- owner : str = "amazon" ,
329- region : str = config .AWS_REGION
330- ) -> str :
331- """
332- Retrieves the latest AMI ID matching the specified name filter and owner.
333-
334- Args:
335- name_filter (str): Filter for the AMI name. Defaults to "Deep Learning AMI GPU PyTorch *".
336- owner (str): Owner ID for the AMI. Defaults to "amazon".
337- region (str): AWS region. Defaults to config.AWS_REGION.
338-
339- Returns:
340- str: The latest AMI ID matching the criteria.
341- """
342- ec2_client = boto3 .client ('ec2' , region_name = region )
343- try :
344- response = ec2_client .describe_images (
345- Filters = [{'Name' : 'name' , 'Values' : [name_filter ]}],
346- Owners = [owner ]
347- )
348- # Sort AMIs by creation date in descending order
349- images = sorted (
350- response ['Images' ],
351- key = lambda img : img ['CreationDate' ],
352- reverse = True
353- )
354- if not images :
355- raise ValueError (f"No AMIs found matching filter: { name_filter } " )
356- latest_ami = images [0 ]['ImageId' ]
357- logger .info (f"Latest AMI found: { latest_ami } " )
358- return latest_ami
359- except ClientError as e :
360- logger .error (f"Error fetching AMI: { e } " )
361- raise
362-
363335def deploy_ec2_instance (
364336 ami : str = config .AWS_EC2_AMI ,
365337 instance_type : str = config .AWS_EC2_INSTANCE_TYPE ,
@@ -371,7 +343,7 @@ def deploy_ec2_instance(
371343 Deploys an EC2 instance with the specified parameters.
372344
373345 Args:
374- ami (str): The Amazon Machine Image ID to use for the instance. Defaults to the latest matching AMI .
346+ ami (str): The Amazon Machine Image ID to use for the instance. Defaults to config.AWS_EC2_AMI .
375347 instance_type (str): The type of instance to deploy. Defaults to config.AWS_EC2_INSTANCE_TYPE.
376348 project_name (str): The project name, used for tagging the instance. Defaults to config.PROJECT_NAME.
377349 key_name (str): The name of the key pair to use for the instance. Defaults to config.AWS_EC2_KEY_NAME.
@@ -383,8 +355,6 @@ def deploy_ec2_instance(
383355 ec2 = boto3 .resource ('ec2' )
384356 ec2_client = boto3 .client ('ec2' )
385357
386- ami = ami or get_latest_ami ()
387-
388358 # Check if key pair exists, if not create one
389359 try :
390360 ec2_client .describe_key_pairs (KeyNames = [key_name ])
@@ -461,9 +431,9 @@ def deploy_ec2_instance(
461431def configure_ec2_instance (
462432 instance_id : str | None = None ,
463433 instance_ip : str | None = None ,
464- max_ssh_retries : int = 10 ,
465- ssh_retry_delay : int = 10 ,
466- max_cmd_retries : int = 10 ,
434+ max_ssh_retries : int = 20 ,
435+ ssh_retry_delay : int = 20 ,
436+ max_cmd_retries : int = 20 ,
467437 cmd_retry_delay : int = 30 ,
468438) -> tuple [str | None , str | None ]:
469439 """
@@ -472,9 +442,9 @@ def configure_ec2_instance(
472442 Args:
473443 instance_id (str | None): The ID of the instance to configure. If None, a new instance will be deployed. Defaults to None.
474444 instance_ip (str | None): The IP address of the instance. Must be provided if instance_id is manually passed. Defaults to None.
475- max_ssh_retries (int): Maximum number of SSH connection retries. Defaults to 10 .
476- ssh_retry_delay (int): Delay between SSH connection retries in seconds. Defaults to 10 .
477- max_cmd_retries (int): Maximum number of command execution retries. Defaults to 10 .
445+ max_ssh_retries (int): Maximum number of SSH connection retries. Defaults to 20 .
446+ ssh_retry_delay (int): Delay between SSH connection retries in seconds. Defaults to 20 .
447+ max_cmd_retries (int): Maximum number of command execution retries. Defaults to 20 .
478448 cmd_retry_delay (int): Delay between command execution retries in seconds. Defaults to 30.
479449
480450 Returns:
0 commit comments