1+ #!/usr/bin/env python3
2+
3+ import boto3
4+ import json
5+ import os
6+ import subprocess
7+ import re
8+ import multiprocessing
9+ import requests
10+ import signal
11+ import argparse
12+ from botocore .exceptions import ClientError
13+ from typing import Dict
14+ import sys
15+ import time
16+ import yaml
17+
18+ sys .path .append (os .path .dirname (os .path .dirname (os .path .abspath (__file__ ))))
19+ from confidential_compute import ConfidentialCompute , ConfidentialComputeConfig , SecretNotFoundException , ConfidentialComputeStartupException
20+
21+ class AWSConfidentialComputeConfig (ConfidentialComputeConfig ):
22+ enclave_memory_mb : int
23+ enclave_cpu_count : int
24+
25+ class AuxiliaryConfig :
26+ FLASK_PORT : str = "27015"
27+ LOCALHOST : str = "127.0.0.1"
28+ AWS_METADATA : str = "169.254.169.254"
29+
30+ @classmethod
31+ def get_socks_url (cls ) -> str :
32+ return f"socks5://{ cls .LOCALHOST } :3306"
33+
34+ @classmethod
35+ def get_config_url (cls ) -> str :
36+ return f"http://{ cls .LOCALHOST } :{ cls .FLASK_PORT } /getConfig"
37+
38+ @classmethod
39+ def get_user_data_url (cls ) -> str :
40+ return f"http://{ cls .AWS_METADATA } /latest/user-data"
41+
42+ @classmethod
43+ def get_token_url (cls ) -> str :
44+ return f"http://{ cls .AWS_METADATA } /latest/api/token"
45+
46+ @classmethod
47+ def get_meta_url (cls ) -> str :
48+ return f"http://{ cls .AWS_METADATA } /latest/dynamic/instance-identity/document"
49+
50+
51+ class EC2 (ConfidentialCompute ):
52+
53+ def __init__ (self ):
54+ super ().__init__ ()
55+
56+ def __get_aws_token (self ) -> str :
57+ """Fetches a temporary AWS EC2 metadata token."""
58+ try :
59+ response = requests .put (
60+ AuxiliaryConfig .get_token_url (), headers = {"X-aws-ec2-metadata-token-ttl-seconds" : "3600" }, timeout = 2
61+ )
62+ return response .text
63+ except requests .RequestException as e :
64+ raise RuntimeError (f"Failed to fetch aws token: { e } " )
65+
66+ def __get_current_region (self ) -> str :
67+ """Fetches the current AWS region from EC2 instance metadata."""
68+ token = self .__get_aws_token ()
69+ headers = {"X-aws-ec2-metadata-token" : token }
70+ try :
71+ response = requests .get (AuxiliaryConfig .get_meta_url (), headers = headers , timeout = 2 )
72+ response .raise_for_status ()
73+ return response .json ()["region" ]
74+ except requests .RequestException as e :
75+ raise RuntimeError (f"Failed to fetch region: { e } " )
76+
77+ def __validate_aws_specific_config (self , secret ):
78+ if "enclave_memory_mb" in secret or "enclave_cpu_count" in secret :
79+ max_capacity = self .__get_max_capacity ()
80+ min_capacity = {"enclave_memory_mb" : 11000 , "enclave_cpu_count" : 2 }
81+ for key in ["enclave_memory_mb" , "enclave_cpu_count" ]:
82+ if int (secret .get (key , 0 )) > max_capacity .get (key ):
83+ raise ValueError (f"{ key } value ({ secret .get (key , 0 )} ) exceeds the maximum allowed ({ max_capacity .get (key )} )." )
84+ if min_capacity .get (key ) > int (secret .get (key , 10 ** 9 )):
85+ raise ValueError (f"{ key } value ({ secret .get (key , 0 )} ) needs to be higher than the minimum required ({ min_capacity .get (key )} )." )
86+
87+ def _get_secret (self , secret_identifier : str ) -> AWSConfidentialComputeConfig :
88+ """Fetches a secret value from AWS Secrets Manager and adds defaults"""
89+
90+ def add_defaults (configs : Dict [str , any ]) -> AWSConfidentialComputeConfig :
91+ """Adds default values to configuration if missing."""
92+ default_capacity = self .__get_max_capacity ()
93+ configs .setdefault ("enclave_memory_mb" , default_capacity ["enclave_memory_mb" ])
94+ configs .setdefault ("enclave_cpu_count" , default_capacity ["enclave_cpu_count" ])
95+ configs .setdefault ("debug_mode" , False )
96+ return configs
97+
98+ region = self .__get_current_region ()
99+ print (f"Running in { region } " )
100+ try :
101+ client = boto3 .client ("secretsmanager" , region_name = region )
102+ except Exception as e :
103+ raise RuntimeError ("Please use IAM instance profile for your instance and make sure that has permission to access Secret Manager" , e )
104+ try :
105+ secret = add_defaults (json .loads (client .get_secret_value (SecretId = secret_identifier )["SecretString" ]))
106+ self .__validate_aws_specific_config (secret )
107+ return secret
108+ except ClientError as _ :
109+ raise SecretNotFoundException (f"{ secret_identifier } in { region } " )
110+
111+ @staticmethod
112+ def __get_max_capacity ():
113+ try :
114+ with open ("/etc/nitro_enclaves/allocator.yaml" , "r" ) as file :
115+ nitro_config = yaml .safe_load (file )
116+ return {"enclave_memory_mb" : nitro_config ['memory_mib' ], "enclave_cpu_count" : nitro_config ['cpu_count' ]}
117+ except Exception as e :
118+ raise RuntimeError ("/etc/nitro_enclaves/allocator.yaml does not have CPU, memory allocated" )
119+
120+ def __setup_vsockproxy (self , log_level : int ) -> None :
121+ """
122+ Sets up the vsock proxy service.
123+ """
124+ thread_count = (multiprocessing .cpu_count () + 1 ) // 2
125+ command = [
126+ "/usr/bin/vsockpx" , "-c" , "/etc/uid2operator/proxy.yaml" ,
127+ "--workers" , str (thread_count ), "--log-level" , str (log_level ), "--daemon"
128+ ]
129+ self .run_command (command )
130+
131+ def __run_config_server (self ) -> None :
132+ """
133+ Starts the Flask configuration server.
134+ """
135+ os .makedirs ("/etc/secret/secret-value" , exist_ok = True )
136+ config_path = "/etc/secret/secret-value/config"
137+ with open (config_path , 'w' ) as config_file :
138+ json .dump (self .configs , config_file )
139+ os .chdir ("/opt/uid2operator/config-server" )
140+ command = ["./bin/flask" , "run" , "--host" , AuxiliaryConfig .LOCALHOST , "--port" , AuxiliaryConfig .FLASK_PORT ]
141+ self .run_command (command , seperate_process = True )
142+
143+ def __run_socks_proxy (self ) -> None :
144+ """
145+ Starts the SOCKS proxy service.
146+ """
147+ command = ["sockd" , "-D" ]
148+ self .run_command (command )
149+
150+ def __get_secret_name_from_userdata (self ) -> str :
151+ """Extracts the secret name from EC2 user data."""
152+ token = self .__get_aws_token ()
153+ response = requests .get (AuxiliaryConfig .get_user_data_url (), headers = {"X-aws-ec2-metadata-token" : token })
154+ user_data = response .text
155+
156+ with open ("/opt/uid2operator/identity_scope.txt" ) as file :
157+ identity_scope = file .read ().strip ()
158+
159+ default_name = f"{ identity_scope .lower ()} -operator-config-key"
160+ hardcoded_value = f"{ identity_scope .upper ()} _CONFIG_SECRET_KEY"
161+ match = re .search (rf'^export { hardcoded_value } ="(.+?)"$' , user_data , re .MULTILINE )
162+ return match .group (1 ) if match else default_name
163+
164+ def _setup_auxiliaries (self ) -> None :
165+ """Sets up the vsock tunnel, socks proxy and flask server"""
166+ log_level = 1 if self .configs ["debug_mode" ] else 3
167+ self .__setup_vsockproxy (log_level )
168+ self .__run_config_server ()
169+ self .__run_socks_proxy ()
170+ print ("Finished setting up all auxiliaries" )
171+
172+ def _validate_auxiliaries (self ) -> None :
173+ """Validates connection to flask server direct and through socks proxy."""
174+ print ("Validating auxiliaries" )
175+ try :
176+ for attempt in range (10 ):
177+ try :
178+ response = requests .get (AuxiliaryConfig .get_config_url ())
179+ print ("Config server is reachable" )
180+ break
181+ except requests .exceptions .ConnectionError as e :
182+ print (f"Connecting to config server, attempt { attempt + 1 } failed with ConnectionError: { e } " )
183+ time .sleep (1 )
184+ else :
185+ raise RuntimeError (f"Config server unreachable" )
186+ response .raise_for_status ()
187+ except requests .RequestException as e :
188+ raise RuntimeError (f"Failed to get config from config server: { e } " )
189+ proxies = {"http" : AuxiliaryConfig .get_socks_url (), "https" : AuxiliaryConfig .get_socks_url ()}
190+ try :
191+ response = requests .get (AuxiliaryConfig .get_config_url (), proxies = proxies )
192+ response .raise_for_status ()
193+ except requests .RequestException as e :
194+ raise RuntimeError (f"Cannot connect to config server via SOCKS proxy: { e } " )
195+ print ("Connectivity check to config server passes" )
196+
197+ def __run_nitro_enclave (self ):
198+ command = [
199+ "nitro-cli" , "run-enclave" ,
200+ "--eif-path" , "/opt/uid2operator/uid2operator.eif" ,
201+ "--memory" , str (self .configs ["enclave_memory_mb" ]),
202+ "--cpu-count" , str (self .configs ["enclave_cpu_count" ]),
203+ "--enclave-cid" , "42" ,
204+ "--enclave-name" , "uid2operator"
205+ ]
206+ if self .configs .get ('debug_mode' , False ):
207+ print ("Running in debug_mode" )
208+ command += ["--debug-mode" , "--attach-console" ]
209+ self .run_command (command , seperate_process = True )
210+
211+ def run_compute (self ) -> None :
212+ """Main execution flow for confidential compute."""
213+ secret_manager_key = self .__get_secret_name_from_userdata ()
214+ self .configs = self ._get_secret (secret_manager_key )
215+ print (f"Fetched configs from { secret_manager_key } " )
216+ if not self .configs .get ("skip_validations" ):
217+ self .validate_configuration ()
218+ self ._setup_auxiliaries ()
219+ self ._validate_auxiliaries ()
220+ self .__run_nitro_enclave ()
221+
222+ def cleanup (self ) -> None :
223+ """Terminates the Nitro Enclave and auxiliary processes."""
224+ try :
225+ self .run_command (["nitro-cli" , "terminate-enclave" , "--all" ])
226+ self .__kill_auxiliaries ()
227+ except subprocess .SubprocessError as e :
228+ raise (f"Error during cleanup: { e } " )
229+
230+ def __kill_auxiliaries (self ) -> None :
231+ """Kills all auxiliary processes spawned."""
232+ for process_name in ["vsockpx" , "sockd" , "flask" ]:
233+ try :
234+ result = subprocess .run (["pgrep" , "-f" , process_name ], stdout = subprocess .PIPE , text = True , check = False )
235+ if result .stdout .strip ():
236+ for pid in result .stdout .strip ().split ("\n " ):
237+ os .kill (int (pid ), signal .SIGKILL )
238+ print (f"Killed process '{ process_name } '." )
239+ else :
240+ print (f"No process named '{ process_name } ' found." )
241+ except Exception as e :
242+ print (f"Error killing process '{ process_name } ': { e } " )
243+
244+
245+ if __name__ == "__main__" :
246+ parser = argparse .ArgumentParser (description = "Manage EC2-based confidential compute workflows." )
247+ parser .add_argument ("-o" , "--operation" , choices = ["stop" , "start" ], default = "start" , help = "Operation to perform." )
248+ args = parser .parse_args ()
249+ try :
250+ ec2 = EC2 ()
251+ if args .operation == "stop" :
252+ ec2 .cleanup ()
253+ else :
254+ ec2 .run_compute ()
255+ except ConfidentialComputeStartupException as e :
256+ print ("Failed starting up Confidential Compute. Please checks the logs for errors and retry \n " , e )
257+ except Exception as e :
258+ print ("Unknown failure while starting up Confidential Compute. Please contact UID support team with this log \n " , e )
259+
0 commit comments