1+ #!/usr/bin/env python3
2+
3+ import boto3
4+ import json
5+ import os
6+ import subprocess
7+ import re
8+ import multiprocessing
9+ import requests
10+ import signal
11+ import argparse
12+ from botocore .exceptions import ClientError , NoCredentialsError
13+ from typing import Dict
14+ import sys
15+ import time
16+ import yaml
17+
18+ sys .path .append (os .path .dirname (os .path .dirname (os .path .abspath (__file__ ))))
19+ from confidential_compute import ConfidentialCompute , ConfidentialComputeConfig , MissingInstanceProfile , ConfigNotFound , InvalidConfigValue , ConfidentialComputeStartupException
20+
21+ class AWSConfidentialComputeConfig (ConfidentialComputeConfig ):
22+ enclave_memory_mb : int
23+ enclave_cpu_count : int
24+
25+ class AuxiliaryConfig :
26+ FLASK_PORT : str = "27015"
27+ LOCALHOST : str = "127.0.0.1"
28+ AWS_METADATA : str = "169.254.169.254"
29+
30+ @classmethod
31+ def get_socks_url (cls ) -> str :
32+ return f"socks5://{ cls .LOCALHOST } :3306"
33+
34+ @classmethod
35+ def get_config_url (cls ) -> str :
36+ return f"http://{ cls .LOCALHOST } :{ cls .FLASK_PORT } /getConfig"
37+
38+ @classmethod
39+ def get_user_data_url (cls ) -> str :
40+ return f"http://{ cls .AWS_METADATA } /latest/user-data"
41+
42+ @classmethod
43+ def get_token_url (cls ) -> str :
44+ return f"http://{ cls .AWS_METADATA } /latest/api/token"
45+
46+ @classmethod
47+ def get_meta_url (cls ) -> str :
48+ return f"http://{ cls .AWS_METADATA } /latest/dynamic/instance-identity/document"
49+
50+
51+ class EC2 (ConfidentialCompute ):
52+
53+ def __init__ (self ):
54+ super ().__init__ ()
55+
56+ def __get_aws_token (self ) -> str :
57+ """Fetches a temporary AWS EC2 metadata token."""
58+ try :
59+ response = requests .put (
60+ AuxiliaryConfig .get_token_url (), headers = {"X-aws-ec2-metadata-token-ttl-seconds" : "3600" }, timeout = 2
61+ )
62+ return response .text
63+ except requests .RequestException as e :
64+ raise RuntimeError (f"Failed to fetch AWS token: { e } " )
65+
66+ def __get_current_region (self ) -> str :
67+ """Fetches the current AWS region from EC2 instance metadata."""
68+ token = self .__get_aws_token ()
69+ headers = {"X-aws-ec2-metadata-token" : token }
70+ try :
71+ response = requests .get (AuxiliaryConfig .get_meta_url (), headers = headers , timeout = 2 )
72+ response .raise_for_status ()
73+ return response .json ()["region" ]
74+ except requests .RequestException as e :
75+ raise RuntimeError (f"Failed to fetch region: { e } " )
76+
77+ def __validate_aws_specific_config (self , secret ):
78+ if "enclave_memory_mb" in secret or "enclave_cpu_count" in secret :
79+ max_capacity = self .__get_max_capacity ()
80+ min_capacity = {"enclave_memory_mb" : 11000 , "enclave_cpu_count" : 2 }
81+ for key in ["enclave_memory_mb" , "enclave_cpu_count" ]:
82+ if int (secret .get (key , 0 )) > max_capacity .get (key ):
83+ raise ValueError (f"{ key } value ({ secret .get (key , 0 )} ) exceeds the maximum allowed ({ max_capacity .get (key )} )." )
84+ if min_capacity .get (key ) > int (secret .get (key , 10 ** 9 )):
85+ raise ValueError (f"{ key } value ({ secret .get (key , 0 )} ) needs to be higher than the minimum required ({ min_capacity .get (key )} )." )
86+
87+ def _get_secret (self , secret_identifier : str ) -> AWSConfidentialComputeConfig :
88+ """Fetches a secret value from AWS Secrets Manager and adds defaults"""
89+
90+ def add_defaults (configs : Dict [str , any ]) -> AWSConfidentialComputeConfig :
91+ """Adds default values to configuration if missing."""
92+ default_capacity = self .__get_max_capacity ()
93+ configs .setdefault ("enclave_memory_mb" , default_capacity ["enclave_memory_mb" ])
94+ configs .setdefault ("enclave_cpu_count" , default_capacity ["enclave_cpu_count" ])
95+ configs .setdefault ("debug_mode" , False )
96+ return configs
97+
98+ region = self .__get_current_region ()
99+ print (f"Running in { region } " )
100+ client = boto3 .client ("secretsmanager" , region_name = region )
101+ try :
102+ secret = add_defaults (json .loads (client .get_secret_value (SecretId = secret_identifier )["SecretString" ]))
103+ self .__validate_aws_specific_config (secret )
104+ return secret
105+ except NoCredentialsError as _ :
106+ raise MissingInstanceProfile (self .__class__ .__name__ )
107+ except ClientError as _ :
108+ raise ConfigNotFound (self .__class__ .__name__ , f"Secret Manager { secret_identifier } in { region } " )
109+
110+ @staticmethod
111+ def __get_max_capacity ():
112+ try :
113+ with open ("/etc/nitro_enclaves/allocator.yaml" , "r" ) as file :
114+ nitro_config = yaml .safe_load (file )
115+ return {"enclave_memory_mb" : nitro_config ['memory_mib' ], "enclave_cpu_count" : nitro_config ['cpu_count' ]}
116+ except Exception as e :
117+ raise RuntimeError ("/etc/nitro_enclaves/allocator.yaml does not have CPU, memory allocated" )
118+
119+ def __setup_vsockproxy (self , log_level : int ) -> None :
120+ """
121+ Sets up the vsock proxy service.
122+ """
123+ thread_count = (multiprocessing .cpu_count () + 1 ) // 2
124+ command = [
125+ "/usr/bin/vsockpx" , "-c" , "/etc/uid2operator/proxy.yaml" ,
126+ "--workers" , str (thread_count ), "--log-level" , str (log_level ), "--daemon"
127+ ]
128+ self .run_command (command )
129+
130+ def __run_config_server (self ) -> None :
131+ """
132+ Starts the Flask configuration server.
133+ """
134+ os .makedirs ("/etc/secret/secret-value" , exist_ok = True )
135+ config_path = "/etc/secret/secret-value/config"
136+ with open (config_path , 'w' ) as config_file :
137+ json .dump (self .configs , config_file )
138+ os .chdir ("/opt/uid2operator/config-server" )
139+ command = ["./bin/flask" , "run" , "--host" , AuxiliaryConfig .LOCALHOST , "--port" , AuxiliaryConfig .FLASK_PORT ]
140+ self .run_command (command , seperate_process = True )
141+
142+ def __run_socks_proxy (self ) -> None :
143+ """
144+ Starts the SOCKS proxy service.
145+ """
146+ command = ["sockd" , "-D" ]
147+ self .run_command (command )
148+
149+ def __get_secret_name_from_userdata (self ) -> str :
150+ """Extracts the secret name from EC2 user data."""
151+ token = self .__get_aws_token ()
152+ response = requests .get (AuxiliaryConfig .get_user_data_url (), headers = {"X-aws-ec2-metadata-token" : token })
153+ user_data = response .text
154+
155+ with open ("/opt/uid2operator/identity_scope.txt" ) as file :
156+ identity_scope = file .read ().strip ()
157+
158+ default_name = f"{ identity_scope .lower ()} -operator-config-key"
159+ hardcoded_value = f"{ identity_scope .upper ()} _CONFIG_SECRET_KEY"
160+ match = re .search (rf'^export { hardcoded_value } ="(.+?)"$' , user_data , re .MULTILINE )
161+ return match .group (1 ) if match else default_name
162+
163+ def _setup_auxiliaries (self ) -> None :
164+ """Sets up the vsock tunnel, socks proxy and flask server"""
165+ log_level = 1 if self .configs ["debug_mode" ] else 3
166+ self .__setup_vsockproxy (log_level )
167+ self .__run_config_server ()
168+ self .__run_socks_proxy ()
169+ print ("Finished setting up all auxiliaries" )
170+
171+ def _validate_auxiliaries (self ) -> None :
172+ """Validates connection to flask server direct and through socks proxy."""
173+ print ("Validating auxiliaries" )
174+ try :
175+ for attempt in range (10 ):
176+ try :
177+ response = requests .get (AuxiliaryConfig .get_config_url ())
178+ print ("Config server is reachable" )
179+ break
180+ except requests .exceptions .ConnectionError as e :
181+ print (f"Connecting to config server, attempt { attempt + 1 } failed with ConnectionError: { e } " )
182+ time .sleep (1 )
183+ else :
184+ raise RuntimeError (f"Config server unreachable" )
185+ response .raise_for_status ()
186+ except requests .RequestException as e :
187+ raise RuntimeError (f"Failed to get config from config server: { e } " )
188+ proxies = {"http" : AuxiliaryConfig .get_socks_url (), "https" : AuxiliaryConfig .get_socks_url ()}
189+ try :
190+ response = requests .get (AuxiliaryConfig .get_config_url (), proxies = proxies )
191+ response .raise_for_status ()
192+ except requests .RequestException as e :
193+ raise RuntimeError (f"Cannot connect to config server via SOCKS proxy: { e } " )
194+ print ("Connectivity check to config server passes" )
195+
196+ def __run_nitro_enclave (self ):
197+ command = [
198+ "nitro-cli" , "run-enclave" ,
199+ "--eif-path" , "/opt/uid2operator/uid2operator.eif" ,
200+ "--memory" , str (self .configs ["enclave_memory_mb" ]),
201+ "--cpu-count" , str (self .configs ["enclave_cpu_count" ]),
202+ "--enclave-cid" , "42" ,
203+ "--enclave-name" , "uid2operator"
204+ ]
205+ if self .configs .get ('debug_mode' , False ):
206+ print ("Running in debug_mode" )
207+ command += ["--debug-mode" , "--attach-console" ]
208+ self .run_command (command , seperate_process = True )
209+
210+ def run_compute (self ) -> None :
211+ """Main execution flow for confidential compute."""
212+ secret_manager_key = self .__get_secret_name_from_userdata ()
213+ self .configs = self ._get_secret (secret_manager_key )
214+ print (f"Fetched configs from { secret_manager_key } " )
215+ if not self .configs .get ("skip_validations" ):
216+ self .validate_configuration ()
217+ self ._setup_auxiliaries ()
218+ self ._validate_auxiliaries ()
219+ self .__run_nitro_enclave ()
220+
221+ def cleanup (self ) -> None :
222+ """Terminates the Nitro Enclave and auxiliary processes."""
223+ try :
224+ self .run_command (["nitro-cli" , "terminate-enclave" , "--all" ])
225+ self .__kill_auxiliaries ()
226+ except subprocess .SubprocessError as e :
227+ raise (f"Error during cleanup: { e } " )
228+
229+ def __kill_auxiliaries (self ) -> None :
230+ """Kills all auxiliary processes spawned."""
231+ for process_name in ["vsockpx" , "sockd" , "flask" ]:
232+ try :
233+ result = subprocess .run (["pgrep" , "-f" , process_name ], stdout = subprocess .PIPE , text = True , check = False )
234+ if result .stdout .strip ():
235+ for pid in result .stdout .strip ().split ("\n " ):
236+ os .kill (int (pid ), signal .SIGKILL )
237+ print (f"Killed process '{ process_name } '." )
238+ else :
239+ print (f"No process named '{ process_name } ' found." )
240+ except Exception as e :
241+ print (f"Error killing process '{ process_name } ': { e } " )
242+
243+
244+ if __name__ == "__main__" :
245+ parser = argparse .ArgumentParser (description = "Manage EC2-based confidential compute workflows." )
246+ parser .add_argument ("-o" , "--operation" , choices = ["stop" , "start" ], default = "start" , help = "Operation to perform." )
247+ args = parser .parse_args ()
248+ try :
249+ ec2 = EC2 ()
250+ if args .operation == "stop" :
251+ ec2 .cleanup ()
252+ else :
253+ ec2 .run_compute ()
254+ except ConfidentialComputeStartupException as e :
255+ print ("Failed starting up Confidential Compute. Please checks the logs for errors and retry \n " , e )
256+ except Exception as e :
257+ print ("Unexpected failure while starting up Confidential Compute. Please contact UID support team with this log \n " , e )
258+
0 commit comments