diff --git a/sebs/__init__.py b/sebs/__init__.py index b92b9f25c..87070044e 100644 --- a/sebs/__init__.py +++ b/sebs/__init__.py @@ -1,5 +1,10 @@ """ - SeBS +SeBS: Serverless Benchmark Suite. + +This package provides the core functionalities for defining, deploying, running, +and analyzing serverless benchmarks across various FaaS platforms. +It includes modules for managing FaaS systems, benchmarks, experiments, +configurations, caching, and utility functions. """ from .version import __version__ # noqa diff --git a/sebs/aws/aws.py b/sebs/aws/aws.py index 243a6f0f9..61feb29f8 100644 --- a/sebs/aws/aws.py +++ b/sebs/aws/aws.py @@ -25,36 +25,40 @@ class AWS(System): + """ + AWS cloud provider implementation. + + This class manages functions and resources on Amazon Web Services. + """ logs_client = None cached = False _config: AWSConfig @staticmethod def name(): + """Return the name of the cloud provider.""" return "aws" @staticmethod def typename(): + """Return the type name of the cloud provider.""" return "AWS" @staticmethod def function_type() -> "Type[Function]": + """Return the type of the function implementation for this provider.""" return LambdaFunction @property def config(self) -> AWSConfig: + """Return the AWS-specific configuration.""" return self._config @property def system_resources(self) -> AWSSystemResources: + """Return the AWS-specific system resources.""" return cast(AWSSystemResources, self._system_resources) - """ - :param cache_client: Function cache instance - :param config: Experiments config - :param docker_client: Docker instance - """ - def __init__( self, sebs_config: SeBSConfig, @@ -63,6 +67,15 @@ def __init__( docker_client: docker.client, logger_handlers: LoggingHandlers, ): + """ + Initialize AWS provider. + + :param sebs_config: SeBS configuration. + :param config: AWS-specific configuration. + :param cache_client: Function cache instance. + :param docker_client: Docker instance. + :param logger_handlers: Logging handlers. + """ super().__init__( sebs_config, cache_client, @@ -75,6 +88,14 @@ def __init__( self.nosql_storage: Optional[DynamoDB] = None def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): + """ + Initialize AWS session and resources. + + Creates a boto3 session and initializes system resources. + + :param config: Additional configuration parameters (not used). + :param resource_prefix: Optional prefix for resource names. + """ # thread-safe self.session = boto3.session.Session( aws_access_key_id=self.config.credentials.access_key, @@ -89,6 +110,13 @@ def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] ) def get_lambda_client(self): + """ + Return a boto3 Lambda client. + + Initializes the client if it doesn't exist. + + :return: Boto3 Lambda client. + """ if not hasattr(self, "client"): self.client = self.session.client( service_name="lambda", @@ -96,24 +124,6 @@ def get_lambda_client(self): ) return self.client - """ - It would be sufficient to just pack the code and ship it as zip to AWS. - However, to have a compatible function implementation across providers, - we create a small module. - Issue: relative imports in Python when using storage wrapper. - Azure expects a relative import inside a module thus it's easier - to always create a module. - - Structure: - function - - function.py - - storage.py - - resources - handler.py - - benchmark: benchmark name - """ - def package_code( self, directory: str, @@ -124,6 +134,29 @@ def package_code( is_cached: bool, container_deployment: bool, ) -> Tuple[str, int, str]: + """ + Package benchmark code for AWS Lambda. + + Creates a zip archive of the function code. If container_deployment is True, + it also builds a Docker image and pushes it to ECR. + + The directory structure for packaging is: + function/ + - function.py (or actual benchmark code) + - storage.py (if applicable) + - resources/ (if applicable) + handler.py (AWS Lambda handler) + + :param directory: Directory containing the benchmark code. + :param language_name: Name of the programming language. + :param language_version: Version of the programming language. + :param architecture: CPU architecture for the function. + :param benchmark: Name of the benchmark. + :param is_cached: Flag indicating if the code is cached. + :param container_deployment: Flag indicating if deploying as a container image. + :return: Tuple containing the path to the zip archive, size of the archive in bytes, + and the ECR container URI (if applicable). + """ container_uri = "" @@ -163,13 +196,27 @@ def package_code( ) def _map_architecture(self, architecture: str) -> str: + """ + Map standard architecture names to AWS-specific names. + :param architecture: Standard architecture name (e.g., "x64"). + :return: AWS-specific architecture name (e.g., "x86_64"). + """ if architecture == "x64": return "x86_64" return architecture def _map_language_runtime(self, language: str, runtime: str): + """ + Map standard language runtime versions to AWS-specific names. + + AWS uses different naming scheme for Node.js versions + For example, it's 12.x instead of 12. + :param language: Name of the programming language. + :param runtime: Version of the programming language. + :return: AWS-specific language runtime name. + """ # AWS uses different naming scheme for Node.js versions # For example, it's 12.x instead of 12. if language == "nodejs": @@ -183,7 +230,18 @@ def create_function( container_deployment: bool, container_uri: str, ) -> "LambdaFunction": - + """ + Create or update an AWS Lambda function. + + If the function already exists, its configuration and code are updated. + Otherwise, a new function is created. + + :param code_package: Benchmark object containing code and configuration. + :param func_name: Name of the function. + :param container_deployment: Flag indicating if deploying as a container image. + :param container_uri: ECR container URI (if applicable). + :return: LambdaFunction object representing the created/updated function. + """ package = code_package.code_location benchmark = code_package.benchmark language = code_package.language_name @@ -287,7 +345,13 @@ def create_function( return lambda_function def cached_function(self, function: Function): + """ + Configure a cached function. + Sets up logging handlers for library and HTTP triggers. + + :param function: Function object. + """ from sebs.aws.triggers import LibraryTrigger for trigger in function.triggers(Trigger.TriggerType.LIBRARY): @@ -296,17 +360,6 @@ def cached_function(self, function: Function): for trigger in function.triggers(Trigger.TriggerType.HTTP): trigger.logging_handlers = self.logging_handlers - """ - Update function code and configuration on AWS. - - :param benchmark: benchmark name - :param name: function name - :param code_package: path to code package - :param code_size: size of code package in bytes - :param timeout: function timeout in seconds - :param memory: memory limit for function - """ - def update_function( self, function: Function, @@ -314,6 +367,14 @@ def update_function( container_deployment: bool, container_uri: str, ): + """ + Update function code and configuration on AWS. + + :param function: LambdaFunction object to update. + :param code_package: Benchmark object with new code and configuration. + :param container_deployment: Flag indicating if deploying as a container image. + :param container_uri: ECR container URI (if applicable). + """ name = function.name function = cast(LambdaFunction, function) @@ -360,7 +421,15 @@ def update_function( def update_function_configuration( self, function: Function, code_package: Benchmark, env_variables: dict = {} ): + """ + Update the configuration of an AWS Lambda function. + This includes timeout, memory, and environment variables. + + :param function: LambdaFunction object to update. + :param code_package: Benchmark object with new configuration. + :param env_variables: Additional environment variables to set. + """ # We can only update storage configuration once it has been processed for this benchmark assert code_package.has_input_processed @@ -405,6 +474,13 @@ def update_function_configuration( def default_function_name( self, code_package: Benchmark, resources: Optional[Resources] = None ) -> str: + """ + Generate a default function name based on benchmark properties. + + :param code_package: Benchmark object. + :param resources: Optional Resources object. + :return: Default function name string. + """ # Create function name resource_id = resources.resources_id if resources else self.config.resources.resources_id func_name = "sebs-{}-{}-{}-{}-{}".format( @@ -420,47 +496,59 @@ def default_function_name( @staticmethod def format_function_name(func_name: str) -> str: + """ + Format the function name to comply with AWS Lambda naming rules. + + Replaces hyphens and dots with underscores. + + :param func_name: Original function name. + :return: Formatted function name. + """ # AWS Lambda does not allow hyphens in function names func_name = func_name.replace("-", "_") func_name = func_name.replace(".", "_") return func_name - """ - FIXME: does not clean the cache - """ - def delete_function(self, func_name: Optional[str]): + """ + Delete an AWS Lambda function. + + FIXME: does not clean the cache. + + :param func_name: Name of the function to delete. + """ self.logging.debug("Deleting function {}".format(func_name)) try: self.client.delete_function(FunctionName=func_name) except Exception: self.logging.debug("Function {} does not exist!".format(func_name)) - """ - Prepare AWS resources to store experiment results. - Allocate one bucket. - - :param benchmark: benchmark name - :return: name of bucket to store experiment results - """ - # def prepare_experiment(self, benchmark: str): + # """ + # Prepare AWS resources to store experiment results. + # Allocate one bucket. + # + # :param benchmark: benchmark name + # :return: name of bucket to store experiment results + # """ # logs_bucket = self.get_storage().add_output_bucket(benchmark, suffix="logs") # return logs_bucket - """ - Accepts AWS report after function invocation. - Returns a dictionary filled with values with various metrics such as - time, invocation time and memory consumed. - - :param log: decoded log from CloudWatch or from synchronuous invocation - :return: dictionary with parsed values - """ - @staticmethod def parse_aws_report( log: str, requests: Union[ExecutionResult, Dict[str, ExecutionResult]] ) -> str: + """ + Parse an AWS Lambda report log. + + Accepts AWS report after function invocation. + Returns a dictionary filled with values with various metrics such as + time, invocation time and memory consumed. + + :param log: Decoded log from CloudWatch or from synchronous invocation. + :param requests: ExecutionResult object or dictionary of request IDs to ExecutionResult objects. + :return: Request ID parsed from the log. + """ aws_vals = {} for line in log.split("\t"): if not line.isspace(): @@ -487,9 +575,19 @@ def parse_aws_report( return request_id def shutdown(self) -> None: + """Shutdown the AWS provider, performing any necessary cleanup.""" super().shutdown() def get_invocation_error(self, function_name: str, start_time: int, end_time: int): + """ + Retrieve and log invocation errors for a function. + + Queries CloudWatch Logs for error messages within a given time window. + + :param function_name: Name of the Lambda function. + :param start_time: Start timestamp for querying logs. + :param end_time: End timestamp for querying logs. + """ if not self.logs_client: self.logs_client = boto3.client( service_name="logs", @@ -533,6 +631,17 @@ def download_metrics( requests: Dict[str, ExecutionResult], metrics: dict, ): + """ + Download invocation metrics from CloudWatch Logs. + + Parses REPORT lines from logs to populate ExecutionResult objects. + + :param function_name: Name of the Lambda function. + :param start_time: Start timestamp for querying logs. + :param end_time: End timestamp for querying logs. + :param requests: Dictionary of request IDs to ExecutionResult objects. + :param metrics: Dictionary to store additional metrics (not used). + """ if not self.logs_client: self.logs_client = boto3.client( @@ -576,6 +685,16 @@ def download_metrics( ) def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: + """ + Create a trigger for an AWS Lambda function. + + Supports HTTP and Library triggers. + + :param func: Function object for which to create the trigger. + :param trigger_type: Type of trigger to create (HTTP or LIBRARY). + :return: Trigger object. + :raises RuntimeError: If the trigger type is not supported. + """ from sebs.aws.triggers import HTTPTrigger function = cast(LambdaFunction, func) @@ -611,12 +730,29 @@ def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> T return trigger def _enforce_cold_start(self, function: Function, code_package: Benchmark): + """ + Helper method to enforce a cold start for a single function. + + Updates function configuration with a unique environment variable. + + :param function: LambdaFunction object. + :param code_package: Benchmark object. + """ func = cast(LambdaFunction, function) self.update_function_configuration( func, code_package, {"ForceColdStart": str(self.cold_start_counter)} ) def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + """ + Enforce cold starts for a list of functions. + + Increments a counter and updates function configurations to ensure + the next invocation is a cold start. + + :param functions: List of Function objects. + :param code_package: Benchmark object. + """ self.cold_start_counter += 1 for func in functions: self._enforce_cold_start(func, code_package) @@ -627,18 +763,31 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) self.logging.info("Finished function updates enforcing cold starts.") def wait_function_active(self, func: LambdaFunction): + """ + Wait until a Lambda function becomes active. + + Uses a boto3 waiter. + :param func: LambdaFunction object. + """ self.logging.info("Waiting for Lambda function to be created...") waiter = self.client.get_waiter("function_active_v2") waiter.wait(FunctionName=func.name) self.logging.info("Lambda function has been created.") def wait_function_updated(self, func: LambdaFunction): + """ + Wait until a Lambda function update is complete. + + Uses a boto3 waiter. + :param func: LambdaFunction object. + """ self.logging.info("Waiting for Lambda function to be updated...") waiter = self.client.get_waiter("function_updated_v2") waiter.wait(FunctionName=func.name) self.logging.info("Lambda function has been updated.") def disable_rich_output(self): + """Disable rich output for the ECR client.""" self.ecr_client.disable_rich_output = True diff --git a/sebs/aws/config.py b/sebs/aws/config.py index 2d05e842e..4515bc94c 100644 --- a/sebs/aws/config.py +++ b/sebs/aws/config.py @@ -14,7 +14,14 @@ class AWSCredentials(Credentials): + """AWS account credentials.""" def __init__(self, access_key: str, secret_key: str): + """ + Initialize AWS credentials. + + :param access_key: AWS access key ID. + :param secret_key: AWS secret access key. + """ super().__init__() self._access_key = access_key @@ -27,27 +34,48 @@ def __init__(self, access_key: str, secret_key: str): @staticmethod def typename() -> str: + """Return the type name of the credentials class.""" return "AWS.Credentials" @property def access_key(self) -> str: + """Return the AWS access key ID.""" return self._access_key @property def secret_key(self) -> str: + """Return the AWS secret access key.""" return self._secret_key @property def account_id(self) -> str: + """Return the AWS account ID.""" return self._account_id @staticmethod def initialize(dct: dict) -> "AWSCredentials": + """ + Initialize AWSCredentials from a dictionary. + + :param dct: Dictionary containing 'access_key' and 'secret_key'. + :return: AWSCredentials instance. + """ return AWSCredentials(dct["access_key"], dct["secret_key"]) @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: - + """ + Deserialize AWS credentials from configuration or environment variables. + + Prioritizes credentials from the config dictionary, then environment variables. + Checks against cached account ID if available. + + :param config: Configuration dictionary. + :param cache: Cache object for retrieving cached account ID. + :param handlers: Logging handlers. + :return: AWSCredentials instance. + :raises RuntimeError: If credentials are not found or if account ID mismatch with cache. + """ # FIXME: update return types of both functions to avoid cast # needs 3.7+ to support annotations cached_config = cache.get_config("aws") @@ -86,32 +114,63 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden return ret def update_cache(self, cache: Cache): + """ + Update the cache with the AWS account ID. + + :param cache: Cache object. + """ cache.update_config(val=self.account_id, keys=["aws", "credentials", "account_id"]) def serialize(self) -> dict: + """ + Serialize AWS credentials to a dictionary. + + :return: Dictionary containing the account ID. + """ out = {"account_id": self._account_id} return out class AWSResources(Resources): + """Manages AWS resources like ECR repositories, IAM roles, and HTTP APIs.""" class HTTPApi: + """Represents an AWS HTTP API Gateway endpoint.""" def __init__(self, arn: str, endpoint: str): + """ + Initialize HTTPApi. + + :param arn: ARN of the HTTP API. + :param endpoint: Endpoint URL of the HTTP API. + """ self._arn = arn self._endpoint = endpoint @property def arn(self) -> str: + """Return the ARN of the HTTP API.""" return self._arn @property def endpoint(self) -> str: + """Return the endpoint URL of the HTTP API.""" return self._endpoint @staticmethod def deserialize(dct: dict) -> "AWSResources.HTTPApi": + """ + Deserialize HTTPApi from a dictionary. + + :param dct: Dictionary containing 'arn' and 'endpoint'. + :return: HTTPApi instance. + """ return AWSResources.HTTPApi(dct["arn"], dct["endpoint"]) def serialize(self) -> dict: + """ + Serialize HTTPApi to a dictionary. + + :return: Dictionary containing 'arn' and 'endpoint'. + """ out = {"arn": self.arn, "endpoint": self.endpoint} return out @@ -121,6 +180,13 @@ def __init__( username: Optional[str] = None, password: Optional[str] = None, ): + """ + Initialize AWSResources. + + :param registry: Docker registry URL (optional). + :param username: Docker registry username (optional). + :param password: Docker registry password (optional). + """ super().__init__(name="aws") self._docker_registry: Optional[str] = registry if registry != "" else None self._docker_username: Optional[str] = username if username != "" else None @@ -131,25 +197,40 @@ def __init__( @staticmethod def typename() -> str: + """Return the type name of the resources class.""" return "AWS.Resources" @property def docker_registry(self) -> Optional[str]: + """Return the Docker registry URL for ECR.""" return self._docker_registry @property def docker_username(self) -> Optional[str]: + """Return the Docker username for ECR.""" return self._docker_username @property def docker_password(self) -> Optional[str]: + """Return the Docker password for ECR.""" return self._docker_password @property def container_repository(self) -> Optional[str]: + """Return the name of the ECR container repository.""" return self._container_repository def lambda_role(self, boto3_session: boto3.session.Session) -> str: + """ + Get or create the IAM role for Lambda functions. + + If the role 'sebs-lambda-role' doesn't exist, it's created with + necessary trust policies and attached policies for S3 access and + basic Lambda execution. + + :param boto3_session: Boto3 session. + :return: ARN of the Lambda IAM role. + """ if not self._lambda_role: iam_client = boto3_session.client(service_name="iam") trust_policy = { @@ -190,7 +271,18 @@ def lambda_role(self, boto3_session: boto3.session.Session) -> str: def http_api( self, api_name: str, func: LambdaFunction, boto3_session: boto3.session.Session ) -> "AWSResources.HTTPApi": - + """ + Get or create an HTTP API Gateway for a Lambda function. + + If an API with the given name doesn't exist, it's created and + configured to target the specified Lambda function. + + :param api_name: Name of the HTTP API. + :param func: LambdaFunction object to target. + :param boto3_session: Boto3 session. + :return: HTTPApi instance. + :raises RuntimeError: If API creation fails after multiple retries. + """ http_api = self._http_apis.get(api_name) if not http_api: # get apigateway client @@ -245,6 +337,14 @@ def http_api( def check_ecr_repository_exists( self, ecr_client: ECRClient, repository_name: str ) -> Optional[str]: + """ + Check if an ECR repository exists. + + :param ecr_client: Boto3 ECR client. + :param repository_name: Name of the repository. + :return: Repository URI if it exists, None otherwise. + :raises Exception: If an error occurs during the check. + """ try: resp = ecr_client.describe_repositories(repositoryNames=[repository_name]) return resp["repositories"][0]["repositoryUri"] @@ -255,7 +355,15 @@ def check_ecr_repository_exists( raise e def get_ecr_repository(self, ecr_client: ECRClient) -> str: + """ + Get or create an ECR repository for SeBS benchmarks. + The repository name is 'sebs-benchmarks-{resources_id}'. + If it doesn't exist, it's created. + + :param ecr_client: Boto3 ECR client. + :return: Name of the ECR container repository. + """ if self._container_repository is not None: return self._container_repository @@ -281,7 +389,14 @@ def get_ecr_repository(self, ecr_client: ECRClient) -> str: return self._container_repository def ecr_repository_authorization(self, ecr_client: ECRClient) -> Tuple[str, str, str]: + """ + Get ECR authorization token (username, password, registry). + + If not already retrieved, it fetches an authorization token from ECR. + :param ecr_client: Boto3 ECR client. + :return: Tuple containing username, password, and registry URL. + """ if self._docker_password is None: response = ecr_client.get_authorization_token() auth_token = response["authorizationData"][0]["authorizationToken"] @@ -296,7 +411,15 @@ def ecr_repository_authorization(self, ecr_client: ECRClient) -> Tuple[str, str, @staticmethod def initialize(res: Resources, dct: dict): + """ + Initialize AWSResources from a dictionary. + Populates Docker registry details, Lambda role, and HTTP APIs. + + :param res: Resources object to initialize (cast to AWSResources). + :param dct: Dictionary containing resource configurations. + :return: Initialized AWSResources instance. + """ ret = cast(AWSResources, res) super(AWSResources, AWSResources).initialize(ret, dct) @@ -313,6 +436,13 @@ def initialize(res: Resources, dct: dict): return ret def serialize(self) -> dict: + """ + Serialize AWSResources to a dictionary. + + Includes Lambda role, HTTP APIs, Docker details, and container repository. + + :return: Dictionary representation of AWSResources. + """ out = { **super().serialize(), "lambda-role": self._lambda_role, @@ -326,6 +456,14 @@ def serialize(self) -> dict: return out def update_cache(self, cache: Cache): + """ + Update the cache with AWS resource details. + + Saves Docker registry, username, container repository, Lambda role, + and HTTP API configurations. + + :param cache: Cache object. + """ super().update_cache(cache) cache.update_config( val=self.docker_registry, keys=["aws", "resources", "docker", "registry"] @@ -342,6 +480,16 @@ def update_cache(self, cache: Cache): @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: + """ + Deserialize AWSResources from configuration or cache. + + Prioritizes cached configuration if available. + + :param config: Configuration dictionary. + :param cache: Cache object. + :param handlers: Logging handlers. + :return: AWSResources instance. + """ ret = AWSResources() cached_config = cache.get_config("aws") @@ -365,32 +513,60 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour class AWSConfig(Config): + """AWS specific configuration, including credentials and resources.""" def __init__(self, credentials: AWSCredentials, resources: AWSResources): + """ + Initialize AWSConfig. + + :param credentials: AWSCredentials instance. + :param resources: AWSResources instance. + """ super().__init__(name="aws") self._credentials = credentials self._resources = resources @staticmethod def typename() -> str: + """Return the type name of the config class.""" return "AWS.Config" @property def credentials(self) -> AWSCredentials: + """Return the AWS credentials.""" return self._credentials @property def resources(self) -> AWSResources: + """Return the AWS resources configuration.""" return self._resources # FIXME: use future annotations (see sebs/faas/system) @staticmethod def initialize(cfg: Config, dct: dict): + """ + Initialize AWSConfig attributes from a dictionary. + + Sets the AWS region. + + :param cfg: Config object to initialize (cast to AWSConfig). + :param dct: Dictionary containing 'region'. + """ config = cast(AWSConfig, cfg) config._region = dct["region"] @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: + """ + Deserialize AWSConfig from configuration or cache. + Deserializes credentials and resources, then initializes the AWSConfig + object, prioritizing cached configuration. + + :param config: Configuration dictionary. + :param cache: Cache object. + :param handlers: Logging handlers. + :return: AWSConfig instance. + """ cached_config = cache.get_config("aws") # FIXME: use future annotations (see sebs/faas/system) credentials = cast(AWSCredentials, AWSCredentials.deserialize(config, cache, handlers)) @@ -408,19 +584,27 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config resources.region = config_obj.region return config_obj - """ - Update the contents of the user cache. - The changes are directly written to the file system. + def update_cache(self, cache: Cache): + """ + Update the user cache with AWS configuration. - Update values: region. - """ + Saves region, credentials, and resources to the cache. + The changes are directly written to the file system. - def update_cache(self, cache: Cache): + :param cache: Cache object. + """ cache.update_config(val=self.region, keys=["aws", "region"]) self.credentials.update_cache(cache) self.resources.update_cache(cache) def serialize(self) -> dict: + """ + Serialize AWSConfig to a dictionary. + + Includes region, credentials, and resources. + + :return: Dictionary representation of AWSConfig. + """ out = { "name": "aws", "region": self._region, diff --git a/sebs/aws/container.py b/sebs/aws/container.py index e7c2cbe69..90349f0a2 100644 --- a/sebs/aws/container.py +++ b/sebs/aws/container.py @@ -11,12 +11,15 @@ class ECRContainer(DockerContainer): + """Manages Docker container images in AWS Elastic Container Registry (ECR).""" @staticmethod def name(): + """Return the name of the container platform (aws).""" return "aws" @staticmethod def typename() -> str: + """Return the type name of the ECRContainer class.""" return "AWS.ECRContainer" def __init__( @@ -26,19 +29,39 @@ def __init__( config: AWSConfig, docker_client: docker.client.DockerClient, ): - + """ + Initialize ECRContainer. + + :param system_config: SeBS system configuration. + :param session: Boto3 session. + :param config: AWS-specific configuration. + :param docker_client: Docker client instance. + """ super().__init__(system_config, docker_client) self.ecr_client = session.client(service_name="ecr", region_name=config.region) self.config = config @property def client(self) -> ECRClient: + """Return the Boto3 ECR client.""" return self.ecr_client def registry_name( self, benchmark: str, language_name: str, language_version: str, architecture: str ) -> Tuple[str, str, str, str]: - + """ + Generate ECR registry and image names. + + :param benchmark: Name of the benchmark. + :param language_name: Name of the programming language. + :param language_version: Version of the programming language. + :param architecture: CPU architecture of the image. + :return: Tuple containing: + - registry_name (e.g., {account_id}.dkr.ecr.{region}.amazonaws.com) + - repository_name (e.g., sebs-benchmarks-{resources_id}) + - image_tag (e.g., aws-benchmark-python-3.8-x64) + - image_uri (e.g., {registry_name}/{repository_name}:{image_tag}) + """ account_id = self.config.credentials.account_id region = self.config.region registry_name = f"{account_id}.dkr.ecr.{region}.amazonaws.com" @@ -51,7 +74,14 @@ def registry_name( return registry_name, repository_name, image_tag, image_uri - def find_image(self, repository_name, image_tag) -> bool: + def find_image(self, repository_name: str, image_tag: str) -> bool: + """ + Check if an image with a specific tag exists in the ECR repository. + + :param repository_name: Name of the ECR repository. + :param image_tag: Tag of the image. + :return: True if the image exists, False otherwise. + """ try: response = self.ecr_client.describe_images( repositoryName=repository_name, imageIds=[{"imageTag": image_tag}] @@ -63,8 +93,16 @@ def find_image(self, repository_name, image_tag) -> bool: return False - def push_image(self, repository_uri, image_tag): + def push_image(self, repository_uri: str, image_tag: str): + """ + Push a Docker image to the ECR repository. + + Authenticates with ECR using credentials from AWSResources. + :param repository_uri: URI of the ECR repository. + :param image_tag: Tag of the image to push. + :raises RuntimeError: If pushing the image fails. + """ username, password, registry_url = self.config.resources.ecr_repository_authorization( self.client ) diff --git a/sebs/aws/dynamodb.py b/sebs/aws/dynamodb.py index 0f3cc8782..2e6180b81 100644 --- a/sebs/aws/dynamodb.py +++ b/sebs/aws/dynamodb.py @@ -10,12 +10,15 @@ class DynamoDB(NoSQLStorage): + """AWS DynamoDB NoSQL storage implementation.""" @staticmethod def typename() -> str: + """Return the type name of the NoSQL storage implementation.""" return "AWS.DynamoDB" @staticmethod def deployment_name(): + """Return the deployment name for AWS (aws).""" return "aws" def __init__( @@ -27,6 +30,16 @@ def __init__( access_key: str, secret_key: str, ): + """ + Initialize DynamoDB client and internal table mapping. + + :param session: Boto3 session. + :param cache_client: Cache client instance. + :param resources: Cloud resources configuration. + :param region: AWS region. + :param access_key: AWS access key ID. + :param secret_key: AWS secret access key. + """ super().__init__(region, cache_client, resources) self.client = session.client( "dynamodb", @@ -42,7 +55,14 @@ def __init__( self._serializer = TypeSerializer() def retrieve_cache(self, benchmark: str) -> bool: + """ + Retrieve table mapping for a benchmark from the cache. + Populates the internal `_tables` mapping if cached data is found. + + :param benchmark: Name of the benchmark. + :return: True if cache was retrieved, False otherwise. + """ if benchmark in self._tables: return True @@ -54,7 +74,11 @@ def retrieve_cache(self, benchmark: str) -> bool: return False def update_cache(self, benchmark: str): + """ + Update the cache with the current table mapping for a benchmark. + :param benchmark: Name of the benchmark. + """ self._cache_client.update_nosql( self.deployment_name(), benchmark, @@ -64,10 +88,22 @@ def update_cache(self, benchmark: str): ) def get_tables(self, benchmark: str) -> Dict[str, str]: + """ + Get the mapping of benchmark-specific table names to actual AWS table names. + + :param benchmark: Name of the benchmark. + :return: Dictionary mapping benchmark table names to AWS table names. + """ return self._tables[benchmark] def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: + """ + Get the actual AWS table name for a given benchmark and table alias. + :param benchmark: Name of the benchmark. + :param table: Alias of the table used within the benchmark. + :return: Actual AWS table name, or None if not found. + """ if benchmark not in self._tables: return None @@ -84,7 +120,17 @@ def write_to_table( primary_key: Tuple[str, str], secondary_key: Optional[Tuple[str, str]] = None, ): + """ + Write data to a DynamoDB table. + Serializes data using TypeSerializer before writing. + + :param benchmark: Name of the benchmark. + :param table: Alias of the table used within the benchmark. + :param data: Dictionary containing the data to write. + :param primary_key: Tuple of (key_name, key_value) for the primary key. + :param secondary_key: Optional tuple for the secondary/sort key. + """ table_name = self._get_table_name(benchmark, table) assert table_name is not None @@ -95,17 +141,26 @@ def write_to_table( serialized_data = {k: self._serializer.serialize(v) for k, v in data.items()} self.client.put_item(TableName=table_name, Item=serialized_data) - """ - AWS: create a DynamoDB Table - - In contrast to the hierarchy of database objects in Azure (account -> database -> container) - and GCP (database per benchmark), we need to create unique table names here. - """ - def create_table( self, benchmark: str, name: str, primary_key: str, secondary_key: Optional[str] = None ) -> str: + """ + Create a DynamoDB table for a benchmark. + + Generates a unique table name using resource ID, benchmark name, and provided name. + Handles cases where the table already exists or is being created. + Uses PAY_PER_REQUEST billing mode. + In contrast to the hierarchy of database objects in Azure (account -> database -> container) + and GCP (database per benchmark), we need to create unique table names here. + + :param benchmark: Name of the benchmark. + :param name: Alias for the table within the benchmark. + :param primary_key: Name of the primary key attribute. + :param secondary_key: Optional name of the secondary/sort key attribute. + :return: Actual name of the created or existing DynamoDB table. + :raises RuntimeError: If table creation fails for an unknown reason. + """ table_name = f"sebs-benchmarks-{self._cloud_resources.resources_id}-{benchmark}-{name}" try: @@ -169,7 +224,23 @@ def create_table( raise RuntimeError(f"Creating DynamoDB failed, unknown reason! Error: {e}") def clear_table(self, name: str) -> str: + """ + Clear all items from a DynamoDB table. + + Note: This method is not implemented. + + :param name: Name of the table to clear. + :raises NotImplementedError: This method is not yet implemented. + """ raise NotImplementedError() def remove_table(self, name: str) -> str: + """ + Remove a DynamoDB table. + + Note: This method is not implemented. + + :param name: Name of the table to remove. + :raises NotImplementedError: This method is not yet implemented. + """ raise NotImplementedError() diff --git a/sebs/aws/function.py b/sebs/aws/function.py index 27aeb240b..cc15d5ebb 100644 --- a/sebs/aws/function.py +++ b/sebs/aws/function.py @@ -6,6 +6,12 @@ class LambdaFunction(Function): + """ + Represents an AWS Lambda function. + + Extends the base Function class with AWS-specific attributes like ARN, role, + and S3 bucket for code deployment. + """ def __init__( self, name: str, @@ -17,6 +23,18 @@ def __init__( cfg: FunctionConfig, bucket: Optional[str] = None, ): + """ + Initialize a LambdaFunction instance. + + :param name: Name of the Lambda function. + :param benchmark: Name of the benchmark this function belongs to. + :param arn: Amazon Resource Name (ARN) of the Lambda function. + :param code_package_hash: Hash of the deployed code package. + :param runtime: AWS Lambda runtime identifier (e.g., "python3.8"). + :param role: IAM role ARN assumed by the Lambda function. + :param cfg: FunctionConfig object with memory, timeout, etc. + :param bucket: Optional S3 bucket name where the code package is stored (for large functions). + """ super().__init__(benchmark, name, code_package_hash, cfg) self.arn = arn self.role = role @@ -25,9 +43,17 @@ def __init__( @staticmethod def typename() -> str: + """Return the type name of this function implementation.""" return "AWS.LambdaFunction" def serialize(self) -> dict: + """ + Serialize the LambdaFunction instance to a dictionary. + + Includes AWS-specific attributes along with base Function attributes. + + :return: Dictionary representation of the LambdaFunction. + """ return { **super().serialize(), "arn": self.arn, @@ -38,6 +64,14 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "LambdaFunction": + """ + Deserialize a LambdaFunction instance from a dictionary. + + Typically used when loading function details from a cache. + + :param cached_config: Dictionary containing serialized LambdaFunction data. + :return: A new LambdaFunction instance. + """ from sebs.faas.function import Trigger from sebs.aws.triggers import LibraryTrigger, HTTPTrigger @@ -61,6 +95,16 @@ def deserialize(cached_config: dict) -> "LambdaFunction": ret.add_trigger(trigger_type.deserialize(trigger)) return ret - def code_bucket(self, benchmark: str, storage_client: S3): + def code_bucket(self, benchmark: str, storage_client: S3) -> str: + """ + Get or assign the S3 bucket for code deployment. + + If a bucket is not already assigned to this function, it retrieves + the deployment bucket from the S3 storage client. + + :param benchmark: Name of the benchmark (used by storage_client if creating a new bucket, though typically not needed here). + :param storage_client: S3 client instance. + :return: The name of the S3 bucket used for code deployment. + """ self.bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) return self.bucket diff --git a/sebs/aws/resources.py b/sebs/aws/resources.py index 5913c3928..aabb398ca 100644 --- a/sebs/aws/resources.py +++ b/sebs/aws/resources.py @@ -14,12 +14,15 @@ class AWSSystemResources(SystemResources): + """Manages system-level resources for AWS, such as S3 and DynamoDB clients.""" @staticmethod def typename() -> str: + """Return the type name of the system resources class.""" return "AWS.SystemResources" @property def config(self) -> AWSConfig: + """Return the AWS-specific configuration.""" return cast(AWSConfig, self._config) def __init__( @@ -29,6 +32,14 @@ def __init__( docker_client: docker.client, logger_handlers: LoggingHandlers, ): + """ + Initialize AWSSystemResources. + + :param config: AWS-specific configuration. + :param cache_client: Cache client instance. + :param docker_client: Docker client instance. + :param logger_handlers: Logging handlers. + """ super().__init__(config, cache_client, docker_client) self._session: Optional[boto3.session.Session] = None @@ -37,19 +48,25 @@ def __init__( self._nosql_storage: Optional[DynamoDB] = None def initialize_session(self, session: boto3.session.Session): - self._session = session + """ + Initialize the Boto3 session for AWS clients. - """ - Create a client instance for cloud storage. When benchmark and buckets - parameters are passed, then storage is initialized with required number - of buckets. Buckets may be created or retrieved from cache. - - :param replace_existing: replace existing files in cached buckets? - :return: storage client - """ + :param session: Boto3 session instance. + """ + self._session = session def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStorage: + """ + Get or initialize the S3 persistent storage client. + Creates an S3 client instance if it doesn't exist. When benchmark and buckets + parameters are passed (implicitly via config), storage is initialized with the + required number of buckets. Buckets may be created or retrieved from cache. + + :param replace_existing: If True, replace existing files in cached buckets. + Defaults to False if None. + :return: S3 persistent storage client. + """ if not self._storage: assert self._session is not None self.logging.info("Initialize S3 storage instance.") @@ -68,6 +85,13 @@ def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStor return self._storage def get_nosql_storage(self) -> NoSQLStorage: + """ + Get or initialize the DynamoDB NoSQL storage client. + + Creates a DynamoDB client instance if it doesn't exist. + + :return: DynamoDB NoSQL storage client. + """ if not self._nosql_storage: assert self._session is not None self.logging.info("Initialize DynamoDB NoSQL instance.") diff --git a/sebs/aws/s3.py b/sebs/aws/s3.py index 79ca8905a..c065fd3a1 100644 --- a/sebs/aws/s3.py +++ b/sebs/aws/s3.py @@ -10,20 +10,25 @@ class S3(PersistentStorage): + """AWS S3 persistent storage implementation.""" @staticmethod def typename() -> str: + """Return the type name of the storage implementation.""" return "AWS.S3" @staticmethod def deployment_name(): + """Return the deployment name for AWS (aws).""" return "aws" @property def replace_existing(self) -> bool: + """Flag indicating whether to replace existing files in buckets.""" return self._replace_existing @replace_existing.setter def replace_existing(self, val: bool): + """Set the flag for replacing existing files.""" self._replace_existing = val def __init__( @@ -36,6 +41,17 @@ def __init__( secret_key: str, replace_existing: bool, ): + """ + Initialize S3 client. + + :param session: Boto3 session. + :param cache_client: Cache client instance. + :param resources: Cloud resources configuration. + :param location: AWS region for the S3 client. + :param access_key: AWS access key ID. + :param secret_key: AWS secret access key. + :param replace_existing: Flag to replace existing files in buckets. + """ super().__init__(location, cache_client, resources, replace_existing) self.client = session.client( "s3", @@ -46,11 +62,29 @@ def __init__( self.cached = False def correct_name(self, name: str) -> str: + """ + Return the corrected bucket name (no correction needed for S3). + + :param name: Original bucket name. + :return: Corrected bucket name. + """ return name def _create_bucket( self, name: str, buckets: List[str] = [], randomize_name: bool = False ) -> str: + """ + Create an S3 bucket. + + Handles bucket naming (randomization if requested) and region-specific + creation logic. Checks if a similar bucket already exists. + + :param name: Desired base name for the bucket. + :param buckets: List of existing bucket names to check against. + :param randomize_name: If True, append a random string to the bucket name. + :return: Name of the created or existing bucket. + :raises RuntimeError: If bucket creation fails (e.g., already exists globally). + """ for bucket_name in buckets: if name in bucket_name: self.logging.info( @@ -76,8 +110,8 @@ def _create_bucket( # This is incredible x2 - boto3 will not throw exception if you recreate # a bucket in us-east-1 # https://github.com/boto/boto3/issues/4023 - buckets = self.list_buckets() - if bucket_name in buckets: + existing_buckets = self.list_buckets() + if bucket_name in existing_buckets: self.logging.error( f"The bucket {bucket_name} not successful; it exists already" ) @@ -98,7 +132,17 @@ def _create_bucket( return bucket_name - def uploader_func(self, path_idx, key, filepath): + def uploader_func(self, path_idx: int, key: str, filepath: str): + """ + Upload a file to an S3 bucket, used as a callback for multiprocessing. + + Skips upload if using cached buckets and not replacing existing files. + Constructs the S3 key using input prefixes. + + :param path_idx: Index of the input path/prefix. + :param key: Object key (filename) within the bucket. + :param filepath: Local path to the file to upload. + """ # Skip upload when using cached buckets and not updating storage. if self.cached and not self.replace_existing: return @@ -116,21 +160,48 @@ def uploader_func(self, path_idx, key, filepath): self.upload(bucket_name, filepath, key) def upload(self, bucket_name: str, filepath: str, key: str): + """ + Upload a file to a specified S3 bucket. + + :param bucket_name: Name of the S3 bucket. + :param filepath: Local path to the file. + :param key: Object key (path within the bucket). + """ self.logging.info("Upload {} to {}".format(filepath, bucket_name)) self.client.upload_file(Filename=filepath, Bucket=bucket_name, Key=key) def download(self, bucket_name: str, key: str, filepath: str): + """ + Download an object from an S3 bucket to a local file. + + :param bucket_name: Name of the S3 bucket. + :param key: Object key (path within the bucket). + :param filepath: Local path to save the downloaded file. + """ self.logging.info("Download {}:{} to {}".format(bucket_name, key, filepath)) self.client.download_file(Bucket=bucket_name, Key=key, Filename=filepath) def exists_bucket(self, bucket_name: str) -> bool: + """ + Check if an S3 bucket exists. + + :param bucket_name: Name of the S3 bucket. + :return: True if the bucket exists, False otherwise. + """ try: self.client.head_bucket(Bucket=bucket_name) return True except self.client.exceptions.ClientError: return False - def list_bucket(self, bucket_name: str, prefix: str = ""): + def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: + """ + List objects in an S3 bucket, optionally filtered by prefix. + + :param bucket_name: Name of the S3 bucket. + :param prefix: Optional prefix to filter objects. + :return: List of object keys. + """ objects_list = self.client.list_objects_v2(Bucket=bucket_name, Prefix=prefix) objects: List[str] if "Contents" in objects_list: @@ -140,6 +211,12 @@ def list_bucket(self, bucket_name: str, prefix: str = ""): return objects def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: + """ + List all S3 buckets, or filter by a partial name. + + :param bucket_name: Optional string to filter bucket names (contains match). + :return: List of bucket names. + """ s3_buckets = self.client.list_buckets()["Buckets"] if bucket_name is not None: return [bucket["Name"] for bucket in s3_buckets if bucket_name in bucket["Name"]] @@ -147,10 +224,20 @@ def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: return [bucket["Name"] for bucket in s3_buckets] def clean_bucket(self, bucket: str): + """ + Delete all objects within an S3 bucket. + + :param bucket: Name of the S3 bucket to clean. + """ objects = self.client.list_objects_v2(Bucket=bucket) if "Contents" in objects: - objects = [{"Key": obj["Key"]} for obj in objects["Contents"]] # type: ignore - self.client.delete_objects(Bucket=bucket, Delete={"Objects": objects}) # type: ignore + objects_to_delete = [{"Key": obj["Key"]} for obj in objects["Contents"]] # type: ignore + self.client.delete_objects(Bucket=bucket, Delete={"Objects": objects_to_delete}) # type: ignore def remove_bucket(self, bucket: str): + """ + Delete an S3 bucket. The bucket must be empty. + + :param bucket: Name of the S3 bucket to delete. + """ self.client.delete_bucket(Bucket=bucket) diff --git a/sebs/aws/triggers.py b/sebs/aws/triggers.py index f18314593..2c9ac20b9 100644 --- a/sebs/aws/triggers.py +++ b/sebs/aws/triggers.py @@ -9,30 +9,49 @@ class LibraryTrigger(Trigger): + """ + Represents a library-based trigger for AWS Lambda, invoking functions directly + using the AWS SDK. + """ def __init__(self, fname: str, deployment_client: Optional[AWS] = None): + """ + Initialize a LibraryTrigger. + + :param fname: Name of the Lambda function. + :param deployment_client: Optional AWS client for deployment and invocation. + """ super().__init__() self.name = fname self._deployment_client = deployment_client @staticmethod def typename() -> str: + """Return the type name of this trigger implementation.""" return "AWS.LibraryTrigger" @property def deployment_client(self) -> AWS: + """AWS client used for deploying and invoking the function.""" assert self._deployment_client return self._deployment_client @deployment_client.setter def deployment_client(self, deployment_client: AWS): + """Set the AWS client.""" self._deployment_client = deployment_client @staticmethod def trigger_type() -> Trigger.TriggerType: + """Return the type of this trigger (LIBRARY).""" return Trigger.TriggerType.LIBRARY def sync_invoke(self, payload: dict) -> ExecutionResult: + """ + Synchronously invoke the Lambda function. + :param payload: Input payload for the function. + :return: ExecutionResult object containing invocation details and metrics. + """ self.logging.debug(f"Invoke function {self.name}") serialized_payload = json.dumps(payload).encode("utf-8") @@ -67,9 +86,19 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: aws_result.parse_benchmark_output(json.loads(function_output["body"])) return aws_result - def async_invoke(self, payload: dict): + def async_invoke(self, payload: dict) -> dict: + """ + Asynchronously invoke the Lambda function. + + Note: The return type 'dict' is based on the boto3 client.invoke response + when InvocationType is 'Event'. It might be beneficial to define a more + specific return type or structure if more details from the response are needed. - # FIXME: proper return type + :param payload: Input payload for the function. + :return: Dictionary containing the response from the Lambda invoke call (e.g., StatusCode, RequestId). + :raises RuntimeError: If the asynchronous invocation fails (status code is not 202). + """ + # FIXME: proper return type - consider a dataclass for the response serialized_payload = json.dumps(payload).encode("utf-8") client = self.deployment_client.get_lambda_client() ret = client.invoke( @@ -85,41 +114,86 @@ def async_invoke(self, payload: dict): return ret def serialize(self) -> dict: + """ + Serialize the LibraryTrigger to a dictionary. + + :return: Dictionary representation of the trigger. + """ return {"type": "Library", "name": self.name} @staticmethod def deserialize(obj: dict) -> Trigger: + """ + Deserialize a LibraryTrigger from a dictionary. + + :param obj: Dictionary representation of the trigger. + :return: A new LibraryTrigger instance. + """ return LibraryTrigger(obj["name"]) class HTTPTrigger(Trigger): + """ + Represents an HTTP-based trigger for AWS Lambda, typically via API Gateway. + """ def __init__(self, url: str, api_id: str): + """ + Initialize an HTTPTrigger. + + :param url: The invocation URL for the HTTP endpoint. + :param api_id: The API ID of the API Gateway. + """ super().__init__() self.url = url self.api_id = api_id @staticmethod def typename() -> str: + """Return the type name of this trigger implementation.""" return "AWS.HTTPTrigger" @staticmethod def trigger_type() -> Trigger.TriggerType: + """Return the type of this trigger (HTTP).""" return Trigger.TriggerType.HTTP def sync_invoke(self, payload: dict) -> ExecutionResult: + """ + Synchronously invoke the Lambda function via its HTTP endpoint. + :param payload: Input payload for the function (will be sent as JSON). + :return: ExecutionResult object containing invocation details and metrics. + """ self.logging.debug(f"Invoke function {self.url}") return self._http_invoke(payload, self.url) def async_invoke(self, payload: dict) -> concurrent.futures.Future: + """ + Asynchronously invoke the Lambda function via its HTTP endpoint. + + Uses a ThreadPoolExecutor to perform the HTTP request in a separate thread. + :param payload: Input payload for the function. + :return: A Future object representing the asynchronous invocation. + """ pool = concurrent.futures.ThreadPoolExecutor() fut = pool.submit(self.sync_invoke, payload) return fut def serialize(self) -> dict: + """ + Serialize the HTTPTrigger to a dictionary. + + :return: Dictionary representation of the trigger. + """ return {"type": "HTTP", "url": self.url, "api-id": self.api_id} @staticmethod def deserialize(obj: dict) -> Trigger: + """ + Deserialize an HTTPTrigger from a dictionary. + + :param obj: Dictionary representation of the trigger. + :return: A new HTTPTrigger instance. + """ return HTTPTrigger(obj["url"], obj["api-id"]) diff --git a/sebs/azure/azure.py b/sebs/azure/azure.py index d848d724a..95c2435ef 100644 --- a/sebs/azure/azure.py +++ b/sebs/azure/azure.py @@ -37,18 +37,22 @@ class Azure(System): @staticmethod def name(): + """Return the name of the cloud provider (azure).""" return "azure" @property def config(self) -> AzureConfig: + """Return the Azure-specific configuration.""" return self._config @staticmethod def function_type() -> Type[Function]: + """Return the type of the function implementation for Azure.""" return AzureFunction @property def cli_instance(self) -> AzureCLI: + """Return the Azure CLI instance.""" return cast(AzureSystemResources, self._system_resources).cli_instance def __init__( @@ -59,6 +63,15 @@ def __init__( docker_client: docker.client, logger_handlers: LoggingHandlers, ): + """ + Initialize Azure provider. + + :param sebs_config: SeBS configuration. + :param config: Azure-specific configuration. + :param cache_client: Function cache instance. + :param docker_client: Docker client instance. + :param logger_handlers: Logging handlers. + """ super().__init__( sebs_config, cache_client, @@ -68,26 +81,33 @@ def __init__( self.logging_handlers = logger_handlers self._config = config - """ - Start the Docker container running Azure CLI tools. - """ - def initialize( self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None, ): + """ + Initialize Azure resources and allocate shared resources. + Starts the Docker container running Azure CLI tools if not already running. + + :param config: Additional configuration parameters (not used). + :param resource_prefix: Optional prefix for resource names. + """ self.initialize_resources(select_prefix=resource_prefix) self.allocate_shared_resource() def shutdown(self): + """Shutdown the Azure provider, including the Azure CLI Docker container.""" cast(AzureSystemResources, self._system_resources).shutdown() super().shutdown() def find_deployments(self) -> List[str]: - """ - Look for duplicated resource groups. + Find existing SeBS deployments (resource groups) in Azure. + + Looks for resource groups matching the pattern "sebs_resource_group_(.*)". + + :return: List of deployment identifiers (resource prefixes). """ resource_groups = self.config.resources.list_resource_groups(self.cli_instance) deployments = [] @@ -99,22 +119,15 @@ def find_deployments(self) -> List[str]: return deployments - """ - Allow multiple deployment clients share the same settings. - Not an ideal situation, but makes regression testing much simpler. - """ - def allocate_shared_resource(self): + """ + Allocate or retrieve shared resources like the data storage account. + + This allows multiple deployment clients to share the same settings, + simplifying regression testing. + """ self.config.resources.data_storage_account(self.cli_instance) - # Directory structure - # handler - # - source files - # - Azure wrappers - handler, storage - # - additional resources - # - function.json - # host.json - # requirements.txt/package.json def package_code( self, directory: str, @@ -125,7 +138,31 @@ def package_code( is_cached: bool, container_deployment: bool, ) -> Tuple[str, int, str]: - + """ + Package benchmark code for Azure Functions. + + The directory structure for Azure Functions is: + handler/ + - source files (e.g., benchmark code) + - Azure wrappers (handler.py/js, storage.py if used) + - additional resources + - function.json (bindings configuration) + host.json (host configuration) + requirements.txt / package.json (dependencies) + + Python packages are expected to be in .python_packages/ after installation. + + :param directory: Directory containing the benchmark code. + :param language_name: Name of the programming language. + :param language_version: Version of the programming language. + :param architecture: CPU architecture (not used by Azure Functions packaging). + :param benchmark: Name of the benchmark. + :param is_cached: Flag indicating if the code is cached (not directly used in packaging logic). + :param container_deployment: Flag indicating if deploying as a container image (not supported). + :return: Tuple containing the path to the directory (Azure zips it implicitly), + size of the directory in bytes, and an empty container URI string. + :raises NotImplementedError: If container_deployment is True. + """ container_uri = "" if container_deployment: @@ -177,6 +214,10 @@ def package_code( json.dump(default_host_json, open(os.path.join(directory, "host.json"), "w"), indent=2) code_size = Benchmark.directory_size(directory) + # Azure CLI zips the function code automatically during publish. + # We return the directory path and its size. + # The execute command for zipping is not strictly necessary for Azure deployment itself + # but might be kept for consistency or other internal uses. execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True, cwd=directory) return directory, code_size, container_uri @@ -187,6 +228,20 @@ def publish_function( container_dest: str, repeat_on_failure: bool = False, ) -> str: + """ + Publish function code to Azure Functions. + + Uses Azure CLI (`func azure functionapp publish`) to deploy the code. + Can repeat on failure, which is useful for delays in Azure cache updates. + + :param function: Function object. + :param code_package: Benchmark object containing language details. + :param container_dest: Path within the Azure CLI Docker container where code is mounted. + :param repeat_on_failure: If True, retry publishing if the function app is not found. + :return: URL of the published HTTP-triggered function. + :raises RuntimeError: If publishing fails and repeat_on_failure is False, + or if the function URL cannot be retrieved. + """ success = False url = "" self.logging.info("Attempting publish of function {}".format(function.name)) @@ -245,17 +300,6 @@ def publish_function( raise e return url - """ - Publish function code on Azure. - Boolean flag enables repeating publish operation until it succeeds. - Useful for publish immediately after function creation where it might - take from 30-60 seconds for all Azure caches to be updated. - - :param name: function name - :param repeat_on_failure: keep repeating if command fails on unknown name. - :return: URL to reach HTTP-triggered function - """ - def update_function( self, function: Function, @@ -263,7 +307,18 @@ def update_function( container_deployment: bool, container_uri: str, ): + """ + Update an Azure Function with new code and configuration. + + This involves updating environment variables and then publishing the new code package. + It also ensures an HTTP trigger is correctly associated with the function's URL. + :param function: Function object to update. + :param code_package: Benchmark object with new code and configuration. + :param container_deployment: Flag for container deployment (not supported). + :param container_uri: Container URI (not used). + :raises NotImplementedError: If container_deployment is True. + """ if container_deployment: raise NotImplementedError("Container deployment is not supported in Azure") @@ -294,6 +349,17 @@ def update_function( function.add_trigger(trigger) def update_envs(self, function: Function, code_package: Benchmark, env_variables: dict = {}): + """ + Update environment variables for an Azure Function. + + Sets variables for NoSQL database access (CosmosDB) and Azure Storage + if the benchmark uses them. Preserves existing non-SeBS managed variables. + + :param function: Function object. + :param code_package: Benchmark object. + :param env_variables: Additional environment variables to set/override. + :raises RuntimeError: If retrieving or setting environment variables fails. + """ envs = {} if code_package.uses_nosql: @@ -378,12 +444,29 @@ def update_envs(self, function: Function, code_package: Benchmark, env_variables raise e def update_function_configuration(self, function: Function, code_package: Benchmark): + """ + Update function's memory and timeout configuration. + + Note: Currently, this is not supported for Azure Functions via SeBS. + A warning is logged. + + :param function: Function object. + :param code_package: Benchmark object. + """ # FIXME: this does nothing currently - we don't specify timeout self.logging.warning( "Updating function's memory and timeout configuration is not supported." ) def _mount_function_code(self, code_package: Benchmark) -> str: + """ + Mount the function code package into the Azure CLI Docker container. + + Generates a unique destination path within the container's /mnt/function directory. + + :param code_package: Benchmark object containing the code location. + :return: Destination path within the Docker container. + """ dest = os.path.join("/mnt", "function", uuid.uuid4().hex) self.cli_instance.upload_package(code_package.code_location, dest) return dest @@ -392,11 +475,20 @@ def default_function_name( self, code_package: Benchmark, resources: Optional[Resources] = None ) -> str: """ - Functionapp names must be globally unique in Azure. + Generate a default function name for Azure Functions. + + Function app names must be globally unique in Azure. + The name is constructed using SeBS prefix, resource ID, benchmark name, + language, and version, with dots and underscores replaced by hyphens. + + :param code_package: Benchmark object. + :param resources: Optional Resources object (uses self.config.resources if None). + :return: Default function name string. """ + current_resources = resources if resources else self.config.resources func_name = ( "sebs-{}-{}-{}-{}".format( - self.config.resources.resources_id, + current_resources.resources_id, code_package.benchmark, code_package.language_name, code_package.language_version, @@ -413,7 +505,20 @@ def create_function( container_deployment: bool, container_uri: str, ) -> AzureFunction: - + """ + Create or update an Azure Function app. + + If the function app doesn't exist, it's created with necessary configurations + (resource group, storage account, runtime). Then, the function code is deployed. + If it exists, it's updated. + + :param code_package: Benchmark object. + :param func_name: Desired name for the function app. + :param container_deployment: Flag for container deployment (not supported). + :param container_uri: Container URI (not used). + :return: AzureFunction object. + :raises NotImplementedError: If container_deployment is True. + """ if container_deployment: raise NotImplementedError("Container deployment is not supported in Azure") @@ -497,7 +602,13 @@ def create_function( return function def cached_function(self, function: Function): + """ + Configure a cached Azure Function instance. + Sets up logging handlers and the data storage account for its triggers. + + :param function: Function object (expected to be AzureFunction). + """ data_storage_account = self.config.resources.data_storage_account(self.cli_instance) for trigger in function.triggers_all(): azure_trigger = cast(AzureTrigger, trigger) @@ -512,7 +623,18 @@ def download_metrics( requests: Dict[str, ExecutionResult], metrics: Dict[str, dict], ): + """ + Download invocation metrics from Azure Application Insights. + Queries App Insights for request logs and custom dimensions (like FunctionExecutionTimeMs) + to populate ExecutionResult objects. + + :param function_name: Name of the Azure Function app. + :param start_time: Start timestamp for querying logs. + :param end_time: End timestamp for querying logs. + :param requests: Dictionary of request IDs to ExecutionResult objects. + :param metrics: Dictionary to store additional metrics (not directly used here). + """ self.cli_instance.install_insights() resource_group = self.config.resources.resource_group(self.cli_instance) @@ -585,13 +707,33 @@ def download_metrics( # TODO: query performance counters for mem def _enforce_cold_start(self, function: Function, code_package: Benchmark): + """ + Helper method to enforce a cold start for a single Azure Function. + + Updates function environment variables with a unique 'ForceColdStart' value. + Note: The effectiveness of this method for ensuring cold starts might depend + on Azure's internal behavior and caching. The commented-out `update_function` + call suggests that simply updating envs might not always be sufficient. + :param function: Function object. + :param code_package: Benchmark object. + """ self.update_envs(function, code_package, {"ForceColdStart": str(self.cold_start_counter)}) # FIXME: is this sufficient to enforce cold starts? # self.update_function(function, code_package, False, "") def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + """ + Enforce cold starts for a list of Azure Functions. + + Increments a counter and updates function configurations to attempt + to ensure the next invocation is a cold start. A sleep is added to allow + changes to propagate. + + :param functions: List of Function objects. + :param code_package: Benchmark object. + """ self.cold_start_counter += 1 for func in functions: self._enforce_cold_start(func, code_package) @@ -599,12 +741,17 @@ def enforce_cold_start(self, functions: List[Function], code_package: Benchmark) time.sleep(20) - """ - The only implemented trigger at the moment is HTTPTrigger. - It is automatically created for each function. - """ - def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + """ + Create a trigger for an Azure Function. + + Note: This method is not currently implemented. HTTP triggers are typically + created and managed during the function deployment and update process. + + :param function: Function object. + :param trigger_type: Type of trigger to create. + :raises NotImplementedError: This method is not yet implemented for Azure. + """ raise NotImplementedError() diff --git a/sebs/azure/blob_storage.py b/sebs/azure/blob_storage.py index 079e72d33..db8ae46c2 100644 --- a/sebs/azure/blob_storage.py +++ b/sebs/azure/blob_storage.py @@ -10,12 +10,15 @@ class BlobStorage(PersistentStorage): + """Azure Blob Storage persistent storage implementation.""" @staticmethod def typename() -> str: + """Return the type name of the storage implementation.""" return "Azure.BlobStorage" @staticmethod def deployment_name(): + """Return the deployment name for Azure (azure).""" return "azure" def __init__( @@ -26,16 +29,32 @@ def __init__( conn_string: str, replace_existing: bool, ): + """ + Initialize Azure Blob Storage client. + + :param region: Azure region (used by parent class, not directly by BlobServiceClient). + :param cache_client: Cache client instance. + :param resources: Cloud resources configuration. + :param conn_string: Azure Storage connection string. + :param replace_existing: Flag to replace existing files in containers. + """ super().__init__(region, cache_client, resources, replace_existing) self.client: BlobServiceClient = BlobServiceClient.from_connection_string(conn_string) - """ - Internal implementation of creating a new container. - """ - def _create_bucket( self, name: str, containers: List[str] = [], randomize_name: bool = False ) -> str: + """ + Internal implementation of creating a new Azure Blob Storage container. + + Checks if a container with a similar name already exists. + Randomizes name if requested. + + :param name: Desired base name for the container. + :param containers: List of existing container names to check against. + :param randomize_name: If True, append a random string to the container name. + :return: Name of the created or existing container. + """ for c in containers: if name in c: self.logging.info("Container {} for {} already exists, skipping.".format(c, name)) @@ -47,14 +66,25 @@ def _create_bucket( self.logging.info("Created container {}".format(name)) return name - """ - Azure does not allow dots in container names. - """ - def correct_name(self, name: str) -> str: + """ + Correct a bucket/container name to comply with Azure Blob Storage naming rules. + Azure does not allow dots in container names, so they are replaced with hyphens. + + :param name: Original bucket/container name. + :return: Corrected name with dots replaced by hyphens. + """ return name.replace(".", "-") def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: + """ + List Azure Blob Storage containers. + + Can filter by a starting prefix if `bucket_name` is provided. + + :param bucket_name: Optional prefix to filter container names. + :return: List of container names. + """ if bucket_name is not None: return [ container["name"] @@ -63,7 +93,17 @@ def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: else: return [container["name"] for container in self.client.list_containers()] - def uploader_func(self, container_idx, file, filepath): + def uploader_func(self, container_idx: int, file: str, filepath: str): + """ + Upload a file to an Azure Blob Storage container, used as a callback. + + Skips upload if using cached containers and not replacing existing files. + Constructs the blob key using input prefixes. + + :param container_idx: Index of the input path/prefix. + :param file: Blob name (filename) within the container. + :param filepath: Local path to the file to upload. + """ # Skip upload when using cached containers if self.cached and not self.replace_existing: return @@ -82,51 +122,80 @@ def uploader_func(self, container_idx, file, filepath): client.upload_blob(data=file_data, overwrite=True) self.logging.info("Upload {} to {}".format(filepath, container_name)) - """ - Download file from bucket. - - :param container_name: - :param file: - :param filepath: - """ - def download(self, container_name: str, key: str, filepath: str): + """ + Download a blob from an Azure Blob Storage container to a local file. + + :param container_name: Name of the Azure Blob Storage container. + :param key: Blob key (path within the container). + :param filepath: Local path to save the downloaded file. + """ self.logging.info("Download {}:{} to {}".format(container_name, key, filepath)) client = self.client.get_blob_client(container_name, key) with open(filepath, "wb") as download_file: download_file.write(client.download_blob().readall()) def upload(self, container_name: str, filepath: str, key: str): + """ + Upload a file to a specified Azure Blob Storage container. + + :param container_name: Name of the Azure Blob Storage container. + :param filepath: Local path to the file. + :param key: Blob key (path within the container). + """ self.logging.info("Upload {} to {}".format(filepath, container_name)) client = self.client.get_blob_client(container_name, key) with open(filepath, "rb") as upload_file: - client.upload_blob(upload_file) # type: ignore + client.upload_blob(upload_file, overwrite=True) # type: ignore def exists_bucket(self, container: str) -> bool: + """ + Check if an Azure Blob Storage container exists. + + :param container: Name of the container. + :return: True if the container exists, False otherwise. + """ return self.client.get_container_client(container).exists() - """ - Return list of files in a container. + def list_bucket(self, container: str, prefix: str = "") -> List[str]: + """ + Return list of blobs in an Azure Blob Storage container. - :param container: - :return: list of file names. empty if container empty - """ + Can filter by a prefix. - def list_bucket(self, container: str, prefix: str = ""): + :param container: Name of the container. + :param prefix: Optional prefix to filter blob names. + :return: List of blob names. Empty if container is empty or no blobs match the prefix. + """ objects = list( map( lambda x: x["name"], - self.client.get_container_client(container).list_blobs(), + self.client.get_container_client(container).list_blobs(name_starts_with=prefix), ) ) - return [x for x in objects if prefix in x] + # The list_blobs with name_starts_with already filters by prefix at the API level. + # The original list comprehension `[x for x in objects if prefix in x]` + # would do a substring match which might not be the intended behavior + # if prefix is meant to be a path prefix. + # Assuming name_starts_with is sufficient. + return objects def clean_bucket(self, bucket: str): + """ + Delete all blobs within an Azure Blob Storage container. + + :param bucket: Name of the container to clean. + """ self.logging.info("Clean output container {}".format(bucket)) container_client = self.client.get_container_client(bucket) - blobs = list(map(lambda x: x["name"], container_client.list_blobs())) - if len(blobs) > 0: - container_client.delete_blobs(*blobs) + blobs_to_delete = [blob["name"] for blob in container_client.list_blobs()] + if blobs_to_delete: + container_client.delete_blobs(*blobs_to_delete) def remove_bucket(self, bucket: str): + """ + Delete an Azure Blob Storage container. The container must be empty or cleaning should be handled. + + :param bucket: Name of the container to delete. + """ self.client.get_container_client(bucket).delete_container() diff --git a/sebs/azure/cli.py b/sebs/azure/cli.py index b875ee029..a9d43e792 100644 --- a/sebs/azure/cli.py +++ b/sebs/azure/cli.py @@ -10,8 +10,23 @@ class AzureCLI(LoggingBase): + """ + Manages interactions with Azure CLI through a Docker container. + + This class starts a Docker container running the Azure CLI, allowing for + execution of Azure commands, login, and package uploads. + """ def __init__(self, system_config: SeBSConfig, docker_client: docker.client): + """ + Initialize AzureCLI and start the Docker container. + + Pulls the Azure CLI Docker image if not found locally, then runs a + container in detached mode. + :param system_config: SeBS system configuration. + :param docker_client: Docker client instance. + :raises RuntimeError: If Docker image pull fails. + """ super().__init__() repo_name = system_config.docker_repository() @@ -52,14 +67,17 @@ def __init__(self, system_config: SeBSConfig, docker_client: docker.client): @staticmethod def typename() -> str: + """Return the type name of this class.""" return "Azure.CLI" - """ - Execute the given command in Azure CLI. - Throws an exception on failure (commands are expected to execute succesfully). - """ + def execute(self, cmd: str) -> bytes: + """ + Execute a command in the Azure CLI Docker container. - def execute(self, cmd: str): + :param cmd: The command string to execute. + :return: The standard output of the command as bytes. + :raises RuntimeError: If the command execution fails (non-zero exit code). + """ exit_code, out = self.docker_instance.exec_run(cmd, user="docker_user") if exit_code != 0: raise RuntimeError( @@ -69,11 +87,17 @@ def execute(self, cmd: str): ) return out - """ - Run azure login command on Docker instance. - """ - def login(self, appId: str, tenant: str, password: str) -> bytes: + """ + Log in to Azure using service principal credentials. + + Executes `az login` within the Docker container. + + :param appId: Application ID of the service principal. + :param tenant: Tenant ID. + :param password: Password/secret of the service principal. + :return: The output of the login command. + """ result = self.execute( "az login -u {0} --service-principal --tenant {1} -p {2}".format( appId, @@ -85,37 +109,34 @@ def login(self, appId: str, tenant: str, password: str) -> bytes: return result def upload_package(self, directory: str, dest: str): - """ - This is not an efficient and memory-intensive implementation. - So far, we didn't have very large functions that require many gigabytes. + Upload a directory as a tar.gz archive to the Azure CLI Docker container. - Since docker-py does not support a straightforward copy, and we can't - put_archive in chunks. + This implementation reads the entire tar.gz archive into memory before + uploading. This is not efficient for very large function packages. + Potential solutions for large archives include: + 1. Manually calling `docker cp` and decompressing within the container. + 2. Committing the Docker container and restarting with a new mount volume. - If we end up having problems because of the archive size, there are two - potential solutions: - (1) manually call docker cp and decompress - (2) commit the docker container and restart with a new mount volume. + :param directory: Path to the local directory to upload. + :param dest: Destination path within the Docker container. """ handle = io.BytesIO() with tarfile.open(fileobj=handle, mode="w:gz") as tar: for f in os.listdir(directory): tar.add(os.path.join(directory, f), arcname=f) - # shutil.make_archive(, 'zip', directory) # move to the beginning of memory before writing handle.seek(0) self.execute("mkdir -p {}".format(dest)) self.docker_instance.put_archive(path=dest, data=handle.read()) def install_insights(self): + """Install the Application Insights extension for Azure CLI if not already installed.""" if not self._insights_installed: self.execute("az extension add --name application-insights") - - """ - Shutdowns Docker instance. - """ + self._insights_installed = True def shutdown(self): + """Stop the Azure CLI Docker container.""" self.logging.info("Stopping Azure manage Docker instance") self.docker_instance.stop() diff --git a/sebs/azure/cloud_resources.py b/sebs/azure/cloud_resources.py index e0d2a1ddd..715258e0b 100644 --- a/sebs/azure/cloud_resources.py +++ b/sebs/azure/cloud_resources.py @@ -4,26 +4,40 @@ from sebs.azure.cli import AzureCLI """ - Keep a list of deployed special resources in Azure cloud. +Manages information about deployed special resources in Azure, specifically CosmosDB accounts. - Currently, we have here CosmosDB accounts that require special handling. +This module provides a class to encapsulate CosmosDB account details and methods +for querying and serializing this information. """ class CosmosDBAccount: + """ + Represents an Azure CosmosDB account with its name, URL, and credentials. + """ @property def account_name(self) -> str: + """The name of the CosmosDB account.""" return self._account_name @property def url(self) -> str: + """The document endpoint URL of the CosmosDB account.""" return self._url @property def credential(self) -> str: + """The primary master key for accessing the CosmosDB account.""" return self._credential def __init__(self, account_name: str, url: str, credential: str): + """ + Initialize a CosmosDBAccount instance. + + :param account_name: The name of the CosmosDB account. + :param url: The document endpoint URL. + :param credential: The primary master key. + """ super().__init__() self._account_name = account_name self._url = url @@ -31,13 +45,29 @@ def __init__(self, account_name: str, url: str, credential: str): @staticmethod def from_cache(account_name: str, url: str, credential: str) -> "CosmosDBAccount": + """ + Create a CosmosDBAccount instance from cached values. + + :param account_name: The name of the CosmosDB account. + :param url: The document endpoint URL. + :param credential: The primary master key. + :return: A CosmosDBAccount instance. + """ return CosmosDBAccount(account_name, url, credential) @staticmethod def from_allocation( account_name: str, resource_group: str, cli_instance: AzureCLI, url: Optional[str] ) -> "CosmosDBAccount": - + """ + Create a CosmosDBAccount instance by querying Azure for URL and credentials if not provided. + + :param account_name: The name of the CosmosDB account. + :param resource_group: The resource group where the CosmosDB account resides. + :param cli_instance: An instance of AzureCLI for executing commands. + :param url: Optional pre-fetched URL. If None, it will be queried. + :return: A CosmosDBAccount instance. + """ if url is None: url = CosmosDBAccount.query_url( account_name, @@ -55,7 +85,14 @@ def from_allocation( @staticmethod def query_url(account_name: str, resource_group: str, cli_instance: AzureCLI) -> str: - + """ + Query Azure for the document endpoint URL of a CosmosDB account. + + :param account_name: The name of the CosmosDB account. + :param resource_group: The resource group of the CosmosDB account. + :param cli_instance: An AzureCLI instance. + :return: The document endpoint URL string. + """ # Find the endpoint URL ret = cli_instance.execute( f" az cosmosdb show --name {account_name} " f" --resource-group {resource_group} " @@ -65,7 +102,14 @@ def query_url(account_name: str, resource_group: str, cli_instance: AzureCLI) -> @staticmethod def query_credentials(account_name: str, resource_group: str, cli_instance: AzureCLI) -> str: - + """ + Query Azure for the primary master key of a CosmosDB account. + + :param account_name: The name of the CosmosDB account. + :param resource_group: The resource group of the CosmosDB account. + :param cli_instance: An AzureCLI instance. + :return: The primary master key string. + """ # Read the master key to access CosmosDB account ret = cli_instance.execute( f" az cosmosdb keys list --name {account_name} " f" --resource-group {resource_group} " @@ -76,6 +120,11 @@ def query_credentials(account_name: str, resource_group: str, cli_instance: Azur return credential def serialize(self) -> dict: + """ + Serialize the CosmosDBAccount instance to a dictionary. + + :return: A dictionary containing account_name, url, and credential. + """ return { "account_name": self._account_name, "url": self._url, @@ -84,4 +133,10 @@ def serialize(self) -> dict: @staticmethod def deserialize(obj: dict) -> "CosmosDBAccount": + """ + Deserialize a CosmosDBAccount instance from a dictionary. + + :param obj: A dictionary containing 'account_name', 'url', and 'credential'. + :return: A CosmosDBAccount instance. + """ return CosmosDBAccount.from_cache(obj["account_name"], obj["url"], obj["credential"]) diff --git a/sebs/azure/config.py b/sebs/azure/config.py index 9aef0d8c0..5b90b2f21 100644 --- a/sebs/azure/config.py +++ b/sebs/azure/config.py @@ -14,7 +14,7 @@ class AzureCredentials(Credentials): - + """Azure service principal credentials.""" _appId: str _tenant: str _password: str @@ -22,6 +22,14 @@ class AzureCredentials(Credentials): def __init__( self, appId: str, tenant: str, password: str, subscription_id: Optional[str] = None ): + """ + Initialize Azure credentials. + + :param appId: Application ID of the service principal. + :param tenant: Tenant ID. + :param password: Password/secret of the service principal. + :param subscription_id: Optional Azure subscription ID. + """ super().__init__() self._appId = appId self._tenant = tenant @@ -30,24 +38,36 @@ def __init__( @property def appId(self) -> str: + """Application ID of the service principal.""" return self._appId @property def tenant(self) -> str: + """Tenant ID.""" return self._tenant @property def password(self) -> str: + """Password/secret of the service principal.""" return self._password @property def subscription_id(self) -> str: + """Azure subscription ID.""" assert self._subscription_id is not None return self._subscription_id @subscription_id.setter def subscription_id(self, subscription_id: str): + """ + Set the Azure subscription ID. + + Logs an error and raises RuntimeError if the new subscription ID + conflicts with an existing one from the cache. + :param subscription_id: The Azure subscription ID. + :raises RuntimeError: If the new subscription ID conflicts with a cached one. + """ if self._subscription_id is not None and subscription_id != self._subscription_id: self.logging.error( f"The subscription id {subscription_id} from provided " @@ -64,15 +84,34 @@ def subscription_id(self, subscription_id: str): @property def has_subscription_id(self) -> bool: + """Check if the subscription ID has been set.""" return self._subscription_id is not None @staticmethod def initialize(dct: dict, subscription_id: Optional[str]) -> "AzureCredentials": + """ + Initialize AzureCredentials from a dictionary. + + :param dct: Dictionary containing 'appId', 'tenant', and 'password'. + :param subscription_id: Optional Azure subscription ID. + :return: AzureCredentials instance. + """ return AzureCredentials(dct["appId"], dct["tenant"], dct["password"], subscription_id) @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + """ + Deserialize Azure credentials from configuration or environment variables. + + Prioritizes credentials from the config dictionary, then environment variables. + Uses cached subscription ID if available. + :param config: Configuration dictionary. + :param cache: Cache object for retrieving cached subscription ID. + :param handlers: Logging handlers. + :return: AzureCredentials instance. + :raises RuntimeError: If credentials are not found. + """ cached_config = cache.get_config("azure") ret: AzureCredentials old_subscription_id: Optional[str] = None @@ -101,16 +140,34 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Creden return ret def serialize(self) -> dict: + """ + Serialize Azure credentials to a dictionary. + + :return: Dictionary containing the subscription ID. + """ out = {"subscription_id": self.subscription_id} return out def update_cache(self, cache_client: Cache): + """ + Update the cache with the serialized Azure credentials. + + :param cache_client: Cache client instance. + """ cache_client.update_config(val=self.serialize(), keys=["azure", "credentials"]) class AzureResources(Resources): + """Manages Azure resources like resource groups, storage accounts, and CosmosDB accounts.""" class Storage: + """Represents an Azure Storage account with its name and connection string.""" def __init__(self, account_name: str, connection_string: str): + """ + Initialize an Azure Storage account representation. + + :param account_name: Name of the storage account. + :param connection_string: Connection string for the storage account. + """ super().__init__() self.account_name = account_name self.connection_string = connection_string @@ -118,23 +175,40 @@ def __init__(self, account_name: str, connection_string: str): # FIXME: 3.7+ migration with future annotations @staticmethod def from_cache(account_name: str, connection_string: str) -> "AzureResources.Storage": + """ + Create an AzureResources.Storage instance from cached values. + + :param account_name: Name of the storage account. + :param connection_string: Connection string. + :return: AzureResources.Storage instance. + """ assert connection_string, "Empty connection string for account {}".format(account_name) return AzureResources.Storage(account_name, connection_string) @staticmethod def from_allocation(account_name: str, cli_instance: AzureCLI) -> "AzureResources.Storage": + """ + Create an AzureResources.Storage instance by querying its connection string. + + :param account_name: Name of the storage account. + :param cli_instance: AzureCLI instance. + :return: AzureResources.Storage instance. + """ connection_string = AzureResources.Storage.query_connection_string( account_name, cli_instance ) ret = AzureResources.Storage(account_name, connection_string) return ret - """ - Query the storage string in Azure using selected storage account. - """ - @staticmethod def query_connection_string(account_name: str, cli_instance: AzureCLI) -> str: + """ + Query the connection string for an Azure Storage account. + + :param account_name: Name of the storage account. + :param cli_instance: AzureCLI instance. + :return: Connection string. + """ ret = cli_instance.execute( "az storage account show-connection-string --name {}".format(account_name) ) @@ -143,10 +217,21 @@ def query_connection_string(account_name: str, cli_instance: AzureCLI) -> str: return connection_string def serialize(self) -> dict: + """ + Serialize the Storage instance to a dictionary. + + :return: Dictionary representation of the Storage instance. + """ return vars(self) @staticmethod def deserialize(obj: dict) -> "AzureResources.Storage": + """ + Deserialize an AzureResources.Storage instance from a dictionary. + + :param obj: Dictionary representation. + :return: AzureResources.Storage instance. + """ return AzureResources.Storage.from_cache(obj["account_name"], obj["connection_string"]) # FIXME: 3.7 Python, future annotations @@ -157,6 +242,14 @@ def __init__( data_storage_account: Optional["AzureResources.Storage"] = None, cosmosdb_account: Optional[CosmosDBAccount] = None, ): + """ + Initialize AzureResources. + + :param resource_group: Optional name of the resource group. + :param storage_accounts: List of function storage accounts. + :param data_storage_account: Storage account for benchmark data. + :param cosmosdb_account: CosmosDB account for NoSQL benchmarks. + """ super().__init__(name="azure") self._resource_group = resource_group self._storage_accounts = storage_accounts @@ -164,20 +257,30 @@ def __init__( self._cosmosdb_account = cosmosdb_account def set_region(self, region: str): + """ + Set the Azure region for these resources. + + :param region: Azure region name (e.g., "westeurope"). + """ self._region = region @property def storage_accounts(self) -> List["AzureResources.Storage"]: + """List of Azure Storage accounts used for function code deployment.""" return self._storage_accounts - """ - Locate resource group name in config. - If not found, then create a new resource group with uuid-based name. + def resource_group(self, cli_instance: AzureCLI) -> str: + """ + Get or create the Azure Resource Group for SeBS. + + If a resource group name is not already set, it generates one based on + the resource ID and creates it in the configured region if it doesn't exist. Requires Azure CLI instance in Docker. - """ - def resource_group(self, cli_instance: AzureCLI) -> str: + :param cli_instance: AzureCLI instance. + :return: Name of the resource group. + """ # Create resource group if not known if not self._resource_group: # Only underscore and alphanumeric characters are allowed @@ -199,7 +302,15 @@ def resource_group(self, cli_instance: AzureCLI) -> str: return self._resource_group def list_resource_groups(self, cli_instance: AzureCLI) -> List[str]: + """ + List SeBS-related resource groups in the configured region. + + Filters groups starting with "sebs_resource_group_". + :param cli_instance: AzureCLI instance. + :return: List of resource group names. + :raises RuntimeError: If parsing the Azure CLI response fails. + """ ret = cli_instance.execute( "az group list --query " "\"[?starts_with(name,'sebs_resource_group_') && location=='{0}']\"".format( @@ -215,7 +326,14 @@ def list_resource_groups(self, cli_instance: AzureCLI) -> List[str]: raise RuntimeError("Failed to parse response from Azure CLI!") def delete_resource_group(self, cli_instance: AzureCLI, name: str, wait: bool = True): + """ + Delete an Azure Resource Group. + :param cli_instance: AzureCLI instance. + :param name: Name of the resource group to delete. + :param wait: If True, wait for the deletion to complete. + :raises RuntimeError: If deletion fails. + """ cmd = "az group delete -y --name {0}".format(name) if not wait: cmd += " --no-wait" @@ -225,15 +343,19 @@ def delete_resource_group(self, cli_instance: AzureCLI, name: str, wait: bool = self.logging.error(ret.decode()) raise RuntimeError("Failed to delete the resource group!") - """ - Find or create a serverless CosmosDB account. - If not found, then create a new one based on the current resource ID. - Restriction: account names must be globally unique. + def cosmosdb_account(self, cli_instance: AzureCLI) -> CosmosDBAccount: + """ + Get or create a serverless Azure CosmosDB account. + + If an account name is not already set, it generates one based on the + resource ID (globally unique) and creates it if it doesn't exist. Requires Azure CLI instance in Docker. - """ - def cosmosdb_account(self, cli_instance: AzureCLI) -> CosmosDBAccount: + :param cli_instance: AzureCLI instance. + :return: CosmosDBAccount instance. + :raises RuntimeError: If CosmosDB account creation or query fails. + """ # Create resource group if not known if not self._cosmosdb_account: @@ -264,7 +386,7 @@ def cosmosdb_account(self, cli_instance: AzureCLI) -> CosmosDBAccount: self.logging.info(f"Allocated CosmosDB account {account_name}") except Exception: self.logging.error("Failed to parse the response!") - self.logging.error(ret.decode()) + self.logging.error(ret.decode()) # type: ignore raise RuntimeError("Failed to parse response from Azure CLI!") self._cosmosdb_account = CosmosDBAccount.from_allocation( @@ -274,7 +396,15 @@ def cosmosdb_account(self, cli_instance: AzureCLI) -> CosmosDBAccount: return self._cosmosdb_account def list_cosmosdb_accounts(self, cli_instance: AzureCLI) -> Dict[str, str]: + """ + List SeBS-related CosmosDB accounts in the current resource group. + + Filters accounts starting with "sebs-cosmosdb-account". + :param cli_instance: AzureCLI instance. + :return: Dictionary mapping account names to their document endpoint URLs. + :raises RuntimeError: If parsing the Azure CLI response fails. + """ ret = cli_instance.execute( f" az cosmosdb list --resource-group {self._resource_group} " " --query \"[?starts_with(name,'sebs-cosmosdb-account')]\" " @@ -287,13 +417,15 @@ def list_cosmosdb_accounts(self, cli_instance: AzureCLI) -> Dict[str, str]: self.logging.error(ret.decode()) raise RuntimeError("Failed to parse response from Azure CLI!") - """ - Retrieve or create storage account associated with benchmark data. - Last argument allows to override the resource - useful when handling - a single instance through multiple threads using different clients sharing the same cache. - """ - def data_storage_account(self, cli_instance: AzureCLI) -> "AzureResources.Storage": + """ + Retrieve or create the Azure Storage account for benchmark input/output data. + + The account name is derived from the resource ID. + + :param cli_instance: AzureCLI instance. + :return: AzureResources.Storage instance for the data storage account. + """ if not self._data_storage_account: # remove non-numerical and non-alphabetic characters @@ -304,7 +436,13 @@ def data_storage_account(self, cli_instance: AzureCLI) -> "AzureResources.Storag return self._data_storage_account def list_storage_accounts(self, cli_instance: AzureCLI) -> List[str]: + """ + List all storage accounts in the current resource group. + :param cli_instance: AzureCLI instance. + :return: List of storage account names. + :raises RuntimeError: If parsing the Azure CLI response fails. + """ ret = cli_instance.execute( ("az storage account list --resource-group {0}").format( self.resource_group(cli_instance) @@ -318,12 +456,15 @@ def list_storage_accounts(self, cli_instance: AzureCLI) -> List[str]: self.logging.error(ret.decode()) raise RuntimeError("Failed to parse response from Azure CLI!") - """ - Create a new function storage account and add to the list. - """ - def add_storage_account(self, cli_instance: AzureCLI) -> "AzureResources.Storage": + """ + Create a new Azure Storage account for function code and add it to the list. + The account name is generated with a UUID to ensure uniqueness. + + :param cli_instance: AzureCLI instance. + :return: AzureResources.Storage instance for the new function storage account. + """ # Create account. Only alphanumeric characters are allowed # This one is used to store functions code - hence the name. uuid_name = str(uuid.uuid1())[0:8] @@ -333,15 +474,19 @@ def add_storage_account(self, cli_instance: AzureCLI) -> "AzureResources.Storage self._storage_accounts.append(account) return account - """ - Internal implementation of creating a new storage account. - The method does NOT update cache and - does NOT add the account to any resource collection. - """ - def _create_storage_account( self, cli_instance: AzureCLI, account_name: str ) -> "AzureResources.Storage": + """ + Internal implementation of creating a new Azure Storage account. + + Uses Standard_LRS SKU. This method does NOT update the cache or + add the account to any resource collection by itself. + + :param cli_instance: AzureCLI instance. + :param account_name: Desired name for the storage account. + :return: AzureResources.Storage instance for the created account. + """ sku = "Standard_LRS" self.logging.info("Starting allocation of storage account {}.".format(account_name)) cli_instance.execute( @@ -358,20 +503,27 @@ def _create_storage_account( self.logging.info("Storage account {} created.".format(account_name)) return AzureResources.Storage.from_allocation(account_name, cli_instance) - """ - Update the contents of the user cache. - The changes are directly written to the file system. + def update_cache(self, cache_client: Cache): + """ + Update the user cache with Azure resource details. - Update values: storage accounts, data storage accounts, resource groups. - """ + Saves storage accounts, data storage account, resource group, + and CosmosDB account configurations. + The changes are directly written to the file system. - def update_cache(self, cache_client: Cache): + :param cache_client: Cache client instance. + """ super().update_cache(cache_client) cache_client.update_config(val=self.serialize(), keys=["azure", "resources"]) @staticmethod def initialize(res: Resources, dct: dict): + """ + Initialize AzureResources from a dictionary (typically from cache or config file). + :param res: Resources object to initialize (cast to AzureResources). + :param dct: Dictionary containing resource configurations. + """ ret = cast(AzureResources, res) super(AzureResources, AzureResources).initialize(ret, dct) @@ -392,6 +544,11 @@ def initialize(res: Resources, dct: dict): ret._cosmosdb_account = CosmosDBAccount.deserialize(dct["cosmosdb_account"]) def serialize(self) -> dict: + """ + Serialize AzureResources to a dictionary. + + :return: Dictionary representation of AzureResources. + """ out = super().serialize() if len(self._storage_accounts) > 0: out["storage_accounts"] = [x.serialize() for x in self._storage_accounts] @@ -405,7 +562,16 @@ def serialize(self) -> dict: @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: + """ + Deserialize AzureResources from configuration or cache. + + Prioritizes cached configuration if available. + :param config: Configuration dictionary. + :param cache: Cache object. + :param handlers: Logging handlers. + :return: AzureResources instance. + """ cached_config = cache.get_config("azure") ret = AzureResources() # Load cached values @@ -426,28 +592,55 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resour class AzureConfig(Config): + """Azure specific configuration, including credentials and resources.""" def __init__(self, credentials: AzureCredentials, resources: AzureResources): + """ + Initialize AzureConfig. + + :param credentials: AzureCredentials instance. + :param resources: AzureResources instance. + """ super().__init__(name="azure") self._credentials = credentials self._resources = resources @property def credentials(self) -> AzureCredentials: + """Return the Azure credentials.""" return self._credentials @property def resources(self) -> AzureResources: + """Return the Azure resources configuration.""" return self._resources # FIXME: use future annotations (see sebs/faas/system) @staticmethod def initialize(cfg: Config, dct: dict): + """ + Initialize AzureConfig attributes from a dictionary. + + Sets the Azure region. + + :param cfg: Config object to initialize (cast to AzureConfig). + :param dct: Dictionary containing 'region'. + """ config = cast(AzureConfig, cfg) config._region = dct["region"] @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: + """ + Deserialize AzureConfig from configuration or cache. + Deserializes credentials and resources, then initializes the AzureConfig + object, prioritizing cached configuration. + + :param config: Configuration dictionary. + :param cache: Cache object. + :param handlers: Logging handlers. + :return: AzureConfig instance. + """ cached_config = cache.get_config("azure") # FIXME: use future annotations (see sebs/faas/system) credentials = cast(AzureCredentials, AzureCredentials.deserialize(config, cache, handlers)) @@ -465,19 +658,27 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config resources.set_region(config_obj.region) return config_obj - """ - Update the contents of the user cache. - The changes are directly written to the file system. + def update_cache(self, cache: Cache): + """ + Update the user cache with Azure configuration. - Update values: region. - """ + Saves region, credentials, and resources to the cache. + The changes are directly written to the file system. - def update_cache(self, cache: Cache): + :param cache: Cache object. + """ cache.update_config(val=self.region, keys=["azure", "region"]) self.credentials.update_cache(cache) self.resources.update_cache(cache) def serialize(self) -> dict: + """ + Serialize AzureConfig to a dictionary. + + Includes region, credentials, and resources. + + :return: Dictionary representation of AzureConfig. + """ out = { "name": "azure", "region": self._region, diff --git a/sebs/azure/cosmosdb.py b/sebs/azure/cosmosdb.py index 52f8086b1..6cdcf0e1b 100644 --- a/sebs/azure/cosmosdb.py +++ b/sebs/azure/cosmosdb.py @@ -13,30 +13,60 @@ @dataclass class BenchmarkResources: + """ + Dataclass to hold Azure CosmosDB resources specific to a benchmark. + Attributes: + database: Name of the CosmosDB database. + containers: List of container names within the database. + database_client: Optional CosmosDB DatabaseProxy instance (dynamically allocated, not cached). + """ database: str containers: List[str] # We allocate this dynamically - ignore when caching database_client: Optional[DatabaseProxy] = None def serialize(self) -> dict: + """ + Serialize BenchmarkResources to a dictionary for caching. + Excludes the database_client. + + :return: Dictionary with 'database' and 'containers'. + """ return {"database": self.database, "containers": self.containers} @staticmethod def deserialize(config: dict) -> "BenchmarkResources": + """ + Deserialize BenchmarkResources from a dictionary (typically from cache). + + :param config: Dictionary with 'database' and 'containers'. + :return: BenchmarkResources instance. + """ return BenchmarkResources(database=config["database"], containers=config["containers"]) class CosmosDB(NoSQLStorage): + """Azure CosmosDB NoSQL storage implementation.""" @staticmethod def typename() -> str: + """Return the type name of the NoSQL storage implementation.""" return "Azure.CosmosDB" @staticmethod def deployment_name(): + """Return the deployment name for Azure (azure).""" return "azure" def __init__(self, cli: AzureCLI, cache_client: Cache, resources: AzureResources, region: str): + """ + Initialize CosmosDB client and internal resource tracking. + + :param cli: AzureCLI instance. + :param cache_client: Cache client instance. + :param resources: AzureResources instance. + :param region: Azure region. + """ super().__init__(region, cache_client, resources) self._cli_instance = cli self._resource_group = resources.resource_group(self._cli_instance) @@ -45,15 +75,26 @@ def __init__(self, cli: AzureCLI, cache_client: Cache, resources: AzureResources self._cosmos_client: Optional[CosmosClient] = None self._cosmosdb_account: Optional[CosmosDBAccount] = None - """ + def get_tables(self, benchmark: str) -> Dict[str, str]: + """ + Get the mapping of benchmark-specific table names to actual Azure CosmosDB container names. Azure requires no table mappings: the name of container is the same as benchmark name. - """ + Thus, an empty dictionary is returned as the names are directly used. - def get_tables(self, benchmark: str) -> Dict[str, str]: + :param benchmark: Name of the benchmark. + :return: Empty dictionary. + """ return {} def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: + """ + Get the actual Azure CosmosDB container name for a given benchmark and table alias. + In CosmosDB's case, the table alias is the container name if it's registered. + :param benchmark: Name of the benchmark. + :param table: Alias of the table (container name) used within the benchmark. + :return: Actual Azure CosmosDB container name, or None if not found for the benchmark. + """ if benchmark not in self._benchmark_resources: return None @@ -63,7 +104,14 @@ def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: return table def retrieve_cache(self, benchmark: str) -> bool: + """ + Retrieve benchmark-specific CosmosDB resource details (database, containers) from cache. + + Populates `_benchmark_resources` if cached data is found. + :param benchmark: Name of the benchmark. + :return: True if cache was retrieved, False otherwise. + """ if benchmark in self._benchmark_resources: return True @@ -75,13 +123,23 @@ def retrieve_cache(self, benchmark: str) -> bool: return False def update_cache(self, benchmark: str): + """ + Update the cache with the current benchmark-specific CosmosDB resource details. + :param benchmark: Name of the benchmark. + """ self.cache_client.update_nosql( self.deployment_name(), benchmark, self._benchmark_resources[benchmark].serialize() ) def cosmos_client(self) -> CosmosClient: + """ + Get or initialize the Azure CosmosDB client. + Retrieves CosmosDB account details (URL, credentials) if not already available. + + :return: CosmosClient instance. + """ if self._cosmos_client is None: self._cosmosdb_account = cast(AzureResources, self._cloud_resources).cosmosdb_account( @@ -95,13 +153,32 @@ def cosmos_client(self) -> CosmosClient: return self._cosmos_client def has_tables(self, benchmark: str) -> bool: + """ + Check if CosmosDB resources (database, containers) are registered for a benchmark. + + :param benchmark: Name of the benchmark. + :return: True if resources are registered, False otherwise. + """ return benchmark in self._benchmark_resources def benchmark_database(self, benchmark: str) -> str: + """ + Get the name of the CosmosDB database used for a specific benchmark. + + :param benchmark: Name of the benchmark. + :return: Name of the CosmosDB database. + """ return self._benchmark_resources[benchmark].database def credentials(self) -> Tuple[str, str, str]: + """ + Get the credentials for the CosmosDB account. + Retrieves account name, URL, and primary key. Initializes the + CosmosDB account details if not already done. + + :return: Tuple containing (account_name, url, credential_key). + """ # An update of function that uses fully cached data will have # to initialize it separately # There were no prior actions that initialized this variable @@ -124,14 +201,26 @@ def write_to_table( primary_key: Tuple[str, str], secondary_key: Optional[Tuple[str, str]] = None, ): + """ + Write data to an Azure CosmosDB container. + + The secondary key, if provided, is expected to be named 'id' in CosmosDB. + + :param benchmark: Name of the benchmark. + :param table: Name of the container. + :param data: Dictionary containing the data to write. + :param primary_key: Tuple of (partition_key_name, partition_key_value). + :param secondary_key: Optional tuple for the item ID (item_id_name, item_id_value). + The item_id_name is ignored, 'id' is used. + """ res = self._benchmark_resources[benchmark] table_name = self._get_table_name(benchmark, table) assert table_name is not None data[primary_key[0]] = primary_key[1] # secondary key must have that name in CosmosDB - # FIXME: support both options - assert secondary_key is not None + # FIXME: support both options for naming the ID key + assert secondary_key is not None, "CosmosDB requires an 'id' field (secondary_key)." data["id"] = secondary_key[1] if res.database_client is None: @@ -143,24 +232,38 @@ def write_to_table( def create_table( self, benchmark: str, name: str, primary_key: str, _: Optional[str] = None ) -> str: + """ + Create an Azure CosmosDB container within a benchmark-specific database. + + If the database for the benchmark doesn't exist, it's created. + If the container doesn't exist, it's created with the specified primary key + as the partition key. The secondary key parameter is ignored for CosmosDB schema. + For some reason, creating the client is enough to verify existence of db/container. + We need to force the client to make some actions; that's why we call read. + + :param benchmark: Name of the benchmark. + :param name: Name of the container to create. + :param primary_key: Name of the attribute to use as the partition key. + :param _: Secondary key (ignored for CosmosDB container creation). + :return: Name of the created or existing container. + """ benchmark_resources = self._benchmark_resources.get(benchmark, None) if benchmark_resources is not None and name in benchmark_resources.containers: self.logging.info(f"Using cached CosmosDB container {name}") + # Ensure database_client is initialized if loaded from cache + if benchmark_resources.database_client is None: + benchmark_resources.database_client = self.cosmos_client().get_database_client(benchmark) + return name - """ - For some reason, creating the client is enough to verify existence of db/container. - We need to force the client to make some actions; that's why we call read. - """ # Each benchmark receives its own CosmosDB database if benchmark_resources is None: - # Get or allocate database try: db_client = self.cosmos_client().get_database_client(benchmark) - db_client.read() - + db_client.read() # Force action to check existence + self.logging.info(f"Using existing CosmosDB database {benchmark}") except CosmosResourceNotFoundError: self.logging.info(f"Creating CosmosDB database {benchmark}") db_client = self.cosmos_client().create_database(benchmark) @@ -169,19 +272,16 @@ def create_table( database=benchmark, database_client=db_client, containers=[] ) self._benchmark_resources[benchmark] = benchmark_resources - - if benchmark_resources.database_client is None: + elif benchmark_resources.database_client is None: # Data loaded from cache will miss database client benchmark_resources.database_client = self.cosmos_client().get_database_client( benchmark ) try: - - # verify it exists + # verify container exists by trying to read it benchmark_resources.database_client.get_container_client(name).read() self.logging.info(f"Using existing CosmosDB container {name}") - except CosmosResourceNotFoundError: self.logging.info(f"Creating CosmosDB container {name}") # no container with such name -> allocate @@ -189,12 +289,29 @@ def create_table( id=name, partition_key=PartitionKey(path=f"/{primary_key}") ) - benchmark_resources.containers.append(name) + if name not in benchmark_resources.containers: + benchmark_resources.containers.append(name) return name def clear_table(self, name: str) -> str: + """ + Clear all items from a CosmosDB container. + + Note: This method is not implemented. + + :param name: Name of the container to clear. + :raises NotImplementedError: This method is not yet implemented. + """ raise NotImplementedError() def remove_table(self, name: str) -> str: + """ + Remove a CosmosDB container. + + Note: This method is not implemented. + + :param name: Name of the container to remove. + :raises NotImplementedError: This method is not yet implemented. + """ raise NotImplementedError() diff --git a/sebs/azure/function.py b/sebs/azure/function.py index 61ef4c578..87cbad2df 100644 --- a/sebs/azure/function.py +++ b/sebs/azure/function.py @@ -3,6 +3,12 @@ class AzureFunction(Function): + """ + Represents an Azure Function. + + Extends the base Function class with Azure-specific attributes like the + storage account associated with the function's code. + """ def __init__( self, name: str, @@ -11,10 +17,27 @@ def __init__( function_storage: AzureResources.Storage, cfg: FunctionConfig, ): + """ + Initialize an AzureFunction instance. + + :param name: Name of the Azure Function app. + :param benchmark: Name of the benchmark this function belongs to. + :param code_hash: Hash of the deployed code package. + :param function_storage: AzureResources.Storage instance for the function's code storage. + :param cfg: FunctionConfig object with memory, timeout, etc. + """ super().__init__(benchmark, name, code_hash, cfg) self.function_storage = function_storage def serialize(self) -> dict: + """ + Serialize the AzureFunction instance to a dictionary. + + Includes Azure-specific attributes (function_storage) along with base + Function attributes. + + :return: Dictionary representation of the AzureFunction. + """ return { **super().serialize(), "function_storage": self.function_storage.serialize(), @@ -22,6 +45,14 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> Function: + """ + Deserialize an AzureFunction instance from a dictionary. + + Typically used when loading function details from a cache. + + :param cached_config: Dictionary containing serialized AzureFunction data. + :return: A new AzureFunction instance. + """ cfg = FunctionConfig.deserialize(cached_config["config"]) ret = AzureFunction( cached_config["name"], diff --git a/sebs/azure/system_resources.py b/sebs/azure/system_resources.py index 0e3494d1c..c2482783b 100644 --- a/sebs/azure/system_resources.py +++ b/sebs/azure/system_resources.py @@ -14,12 +14,15 @@ class AzureSystemResources(SystemResources): + """Manages system-level resources for Azure, such as Blob Storage, CosmosDB, and Azure CLI.""" @staticmethod def typename() -> str: + """Return the type name of the system resources class.""" return "Azure.SystemResources" @property def config(self) -> AzureConfig: + """Return the Azure-specific configuration.""" return cast(AzureConfig, self._config) def __init__( @@ -30,6 +33,15 @@ def __init__( docker_client: docker.client, logger_handlers: LoggingHandlers, ): + """ + Initialize AzureSystemResources. + + :param system_config: SeBS system configuration. + :param config: Azure-specific configuration. + :param cache_client: Cache client instance. + :param docker_client: Docker client instance. + :param logger_handlers: Logging handlers. + """ super().__init__(config, cache_client, docker_client) self._logging_handlers = logger_handlers @@ -38,18 +50,19 @@ def __init__( self._cli_instance: Optional[AzureCLI] = None self._system_config = system_config - """ - Create wrapper object for Azure blob storage. - First ensure that storage account is created and connection string - is known. Then, create wrapper and create request number of buckets. + def get_storage(self, replace_existing: Optional[bool] = None) -> BlobStorage: + """ + Get or initialize the Azure Blob Storage client. - Requires Azure CLI instance in Docker to obtain storage account details. + Ensures that the data storage account is created and its connection string + is known. Creates the BlobStorage wrapper instance. - :param replace_existing: when true, replace existing files in input buckets - :return: Azure storage instance - """ + Requires Azure CLI instance to obtain storage account details if not cached. - def get_storage(self, replace_existing: Optional[bool] = None) -> BlobStorage: + :param replace_existing: If True, replace existing files in input buckets. + Defaults to False if None. + :return: BlobStorage instance. + """ if self._storage is None: self._storage = BlobStorage( self.config.region, @@ -64,6 +77,14 @@ def get_storage(self, replace_existing: Optional[bool] = None) -> BlobStorage: return self._storage def get_nosql_storage(self) -> CosmosDB: + """ + Get or initialize the Azure CosmosDB client. + + Creates a CosmosDB wrapper instance. Requires Azure CLI for initial setup + if resources are not cached. + + :return: CosmosDB instance. + """ if self._nosql_storage is None: self._nosql_storage = CosmosDB( self.cli_instance, self._cache_client, self.config.resources, self.config.region @@ -71,7 +92,14 @@ def get_nosql_storage(self) -> CosmosDB: return self._nosql_storage def _login_cli(self): + """ + Log in to Azure CLI using service principal credentials. + + Retrieves appId, tenant, and password from the configuration. + Sets the subscription ID in the credentials after successful login. + :raises RuntimeError: If no valid subscription is found or if multiple are found. + """ assert self._cli_instance is not None output = self._cli_instance.login( @@ -90,22 +118,45 @@ def _login_cli(self): @property def cli_instance(self) -> AzureCLI: + """ + Get or initialize the Azure CLI wrapper instance. + If the CLI instance doesn't exist, it's created, and a login is performed. + This instance will be stopped on shutdown. + + :return: AzureCLI instance. + """ if self._cli_instance is None: self._cli_instance = AzureCLI(self._system_config, self._docker_client) - self._cli_instance_stop = True + self._cli_instance_stop = True # Mark that this instance owns the CLI lifecycle self._login_cli() return self._cli_instance def initialize_cli(self, cli: AzureCLI, login: bool = False): + """ + Initialize with an externally managed Azure CLI instance. + + This allows sharing a single AzureCLI Docker container across multiple + SeBS instances or components. The provided CLI instance will not be + stopped on shutdown by this AzureSystemResources instance. + + :param cli: An existing AzureCLI instance. + :param login: If True, perform Azure login using this instance's credentials. + """ self._cli_instance = cli - self._cli_instance_stop = False + self._cli_instance_stop = False # Mark that this instance does not own the CLI lifecycle if login: self._login_cli() def shutdown(self) -> None: + """ + Shutdown the Azure system resources. + + Stops the Azure CLI Docker container if it was started and is managed by + this instance. + """ if self._cli_instance and self._cli_instance_stop: self._cli_instance.shutdown() diff --git a/sebs/azure/triggers.py b/sebs/azure/triggers.py index 4296a5880..3aebdffed 100644 --- a/sebs/azure/triggers.py +++ b/sebs/azure/triggers.py @@ -6,41 +6,88 @@ class AzureTrigger(Trigger): + """ + Base class for Azure triggers. + + Stores the data storage account information, which might be needed by + some trigger types or the functions they invoke. + """ def __init__(self, data_storage_account: Optional[AzureResources.Storage] = None): + """ + Initialize an AzureTrigger. + + :param data_storage_account: Optional Azure Storage account for benchmark data. + """ super().__init__() self._data_storage_account = data_storage_account @property def data_storage_account(self) -> AzureResources.Storage: + """The Azure Storage account associated with benchmark data.""" assert self._data_storage_account return self._data_storage_account @data_storage_account.setter def data_storage_account(self, data_storage_account: AzureResources.Storage): + """Set the Azure Storage account for benchmark data.""" self._data_storage_account = data_storage_account class HTTPTrigger(AzureTrigger): + """ + Represents an HTTP trigger for an Azure Function, invoked via a URL. + """ def __init__(self, url: str, data_storage_account: Optional[AzureResources.Storage] = None): + """ + Initialize an HTTPTrigger. + + :param url: The invocation URL for the HTTP-triggered function. + :param data_storage_account: Optional Azure Storage account for benchmark data. + """ super().__init__(data_storage_account) self.url = url @staticmethod def trigger_type() -> Trigger.TriggerType: + """Return the type of this trigger (HTTP).""" return Trigger.TriggerType.HTTP def sync_invoke(self, payload: dict) -> ExecutionResult: + """ + Synchronously invoke the Azure Function via its HTTP endpoint. + :param payload: Input payload for the function (will be sent as JSON). + :return: ExecutionResult object containing invocation details and metrics. + """ return self._http_invoke(payload, self.url) def async_invoke(self, payload: dict) -> concurrent.futures.Future: + """ + Asynchronously invoke the Azure Function via its HTTP endpoint. + + Uses a ThreadPoolExecutor to perform the HTTP request in a separate thread. + + :param payload: Input payload for the function. + :return: A Future object representing the asynchronous invocation. + """ pool = concurrent.futures.ThreadPoolExecutor() fut = pool.submit(self.sync_invoke, payload) return fut def serialize(self) -> dict: + """ + Serialize the HTTPTrigger to a dictionary. + + :return: Dictionary representation of the trigger, including type and URL. + """ return {"type": "HTTP", "url": self.url} @staticmethod def deserialize(obj: dict) -> Trigger: + """ + Deserialize an HTTPTrigger from a dictionary. + + :param obj: Dictionary representation of the trigger, must contain 'url'. + :return: A new HTTPTrigger instance. + """ return HTTPTrigger(obj["url"]) diff --git a/sebs/benchmark.py b/sebs/benchmark.py index f159e820c..1b6fef22c 100644 --- a/sebs/benchmark.py +++ b/sebs/benchmark.py @@ -22,10 +22,38 @@ from sebs.faas.function import Language +""" +This module defines the `Benchmark` and `BenchmarkConfig` classes, which are +central to defining, configuring, and managing benchmarks within the SeBS framework. +It also includes an interface for benchmark input generation modules and a helper +function to load them. +""" + + class BenchmarkConfig: + """ + Configuration for a specific benchmark. + + Stores settings like timeout, memory allocation, supported languages, + and required SeBS modules (e.g., for storage or NoSQL access). + + Attributes: + _timeout: Execution timeout for the benchmark function in seconds. + _memory: Memory allocated to the benchmark function in MB. + _languages: List of supported programming languages (Language enums). + _modules: List of SeBS modules required by the benchmark. + """ def __init__( self, timeout: int, memory: int, languages: List["Language"], modules: List[BenchmarkModule] ): + """ + Initialize a BenchmarkConfig instance. + + :param timeout: Function execution timeout in seconds. + :param memory: Memory allocation for the function in MB. + :param languages: List of supported Language enums. + :param modules: List of BenchmarkModule enums required by the benchmark. + """ self._timeout = timeout self._memory = memory self._languages = languages @@ -33,32 +61,44 @@ def __init__( @property def timeout(self) -> int: + """Execution timeout for the benchmark function in seconds.""" return self._timeout @timeout.setter def timeout(self, val: int): + """Set the execution timeout.""" self._timeout = val @property def memory(self) -> int: + """Memory allocated to the benchmark function in MB.""" return self._memory @memory.setter def memory(self, val: int): + """Set the memory allocation.""" self._memory = val @property def languages(self) -> List["Language"]: + """List of supported programming languages for this benchmark.""" return self._languages @property def modules(self) -> List[BenchmarkModule]: + """List of SeBS modules (e.g., storage, nosql) required by this benchmark.""" return self._modules - # FIXME: 3.7+ python with future annotations + # FIXME: 3.7+ python with future annotations - Noted from original. @staticmethod def deserialize(json_object: dict) -> "BenchmarkConfig": - from sebs.faas.function import Language + """ + Deserialize a BenchmarkConfig object from a JSON-like dictionary. + + :param json_object: Dictionary containing benchmark configuration data. + :return: A BenchmarkConfig instance. + """ + from sebs.faas.function import Language # Local import to avoid circular dependency at module level return BenchmarkConfig( json_object["timeout"], @@ -68,118 +108,165 @@ def deserialize(json_object: dict) -> "BenchmarkConfig": ) -""" - Creates code package representing a benchmark with all code and assets - prepared and dependency install performed within Docker image corresponding - to the cloud deployment. - - The behavior of the class depends on cache state: - 1) First, if there's no cache entry, a code package is built. - 2) Otherwise, the hash of the entire benchmark is computed and compared - with the cached value. If changed, then rebuilt then benchmark. - 3) Otherwise, just return the path to cache code. -""" - - class Benchmark(LoggingBase): + """ + Represents a benchmark, managing its code, configuration, and deployment package. + + Handles the lifecycle of a benchmark's code package, including building it + (which involves copying source files, adding deployment-specific wrappers and + dependencies, and installing dependencies via Docker), caching the package, + and preparing benchmark-specific input data. + + The behavior of this class, particularly the `build` method, depends on the + state of the SeBS cache: + 1. If no cache entry exists for the benchmark (for the current language, deployment, etc.), + a new code package is built. + 2. If a cache entry exists, the hash of the benchmark's source directory is computed + and compared with the cached hash. If they differ, or if an update is forced, + the package is rebuilt. + 3. Otherwise (cache entry exists and hash matches), the cached code package is used. + """ @staticmethod def typename() -> str: + """Return the type name of this class.""" return "Benchmark" @property - def benchmark(self): + def benchmark(self) -> str: + """The name of the benchmark (e.g., "010.sleep").""" return self._benchmark @property - def benchmark_path(self): + def benchmark_path(self) -> str: + """The absolute path to the benchmark's source directory.""" return self._benchmark_path @property def benchmark_config(self) -> BenchmarkConfig: + """The BenchmarkConfig object for this benchmark.""" return self._benchmark_config @property - def code_package(self) -> dict: + def code_package(self) -> Optional[dict]: # Can be None if not cached/built + """ + Cached information about the code package, if available. + This typically includes 'location' (relative to cache_dir), 'hash', and 'size'. + """ return self._code_package @property - def functions(self) -> Dict[str, Any]: + def functions(self) -> Dict[str, Any]: # Value can be complex, from Function.deserialize + """ + Cached information about deployed functions associated with this benchmark + for the current deployment, keyed by function name. + """ return self._functions @property - def code_location(self): - if self.code_package: - return os.path.join(self._cache_client.cache_dir, self.code_package["location"]) + def code_location(self) -> str: + """ + The absolute path to the prepared code package. + If cached, it points to the location within the SeBS cache directory. + Otherwise, it points to the build output directory. + """ + if self._code_package and "location" in self._code_package: + return os.path.join(self._cache_client.cache_dir, self._code_package["location"]) else: - return self._code_location + # Before build or if not cached, this might point to the intended output dir + # or could be considered unset. The _output_dir is set in __init__. + return self._output_dir # Changed from self._code_location which was not set @property - def is_cached(self): + def is_cached(self) -> bool: + """True if a code package entry for this benchmark exists in the cache.""" return self._is_cached @is_cached.setter def is_cached(self, val: bool): + """Set the cached status.""" self._is_cached = val @property - def is_cached_valid(self): + def is_cached_valid(self) -> bool: + """ + True if a cached code package exists and its hash matches the current + benchmark source code hash. + """ return self._is_cached_valid @is_cached_valid.setter def is_cached_valid(self, val: bool): + """Set the cache validity status.""" self._is_cached_valid = val @property - def code_size(self): + def code_size(self) -> Optional[int]: # Can be None if not set + """The size of the code package in bytes, if known.""" return self._code_size @property - def container_uri(self) -> str: - assert self._container_uri is not None + def container_uri(self) -> Optional[str]: # Changed from str to Optional[str] + """The URI of the container image, if applicable for containerized deployment.""" return self._container_uri @property def language(self) -> "Language": + """The programming language of this benchmark instance (Language enum).""" return self._language @property def language_name(self) -> str: + """The string name of the programming language (e.g., "python").""" return self._language.value @property - def language_version(self): + def language_version(self) -> str: # Added return type + """The version of the programming language runtime (e.g., "3.8").""" return self._language_version @property def has_input_processed(self) -> bool: + """True if the benchmark's input data has been prepared and processed.""" return self._input_processed @property def uses_storage(self) -> bool: + """True if the benchmark requires object storage (e.g., S3, Minio).""" return self._uses_storage @property def uses_nosql(self) -> bool: + """True if the benchmark requires NoSQL storage (e.g., DynamoDB, ScyllaDB).""" return self._uses_nosql @property def architecture(self) -> str: + """The target CPU architecture for this benchmark instance (e.g., "x64").""" return self._architecture @property - def container_deployment(self): + def container_deployment(self) -> bool: # Added return type + """True if this benchmark instance is intended for containerized deployment.""" return self._container_deployment @property # noqa: A003 - def hash(self): - path = os.path.join(self.benchmark_path, self.language_name) - self._hash_value = Benchmark.hash_directory(path, self._deployment_name, self.language_name) + def hash(self) -> Optional[str]: # Can be None before first calculation + """ + MD5 hash of the benchmark's source code directory for the current language + and deployment, including relevant wrappers. Used for cache validation. + """ + # Calculate hash on demand if not already computed + if self._hash_value is None: + path = os.path.join(self.benchmark_path, self.language_name) + self._hash_value = Benchmark.hash_directory(path, self._deployment_name, self.language_name) return self._hash_value @hash.setter # noqa: A003 def hash(self, val: str): """ - Used only for testing purposes. + Set the hash value. Used only for testing purposes. + + :param val: The hash value to set. """ self._hash_value = val @@ -187,37 +274,59 @@ def __init__( self, benchmark: str, deployment_name: str, - config: "ExperimentConfig", - system_config: SeBSConfig, - output_dir: str, + config: "ExperimentConfig", # Experiment-level config + system_config: SeBSConfig, # Global SeBS config + output_dir: str, # Base output directory for SeBS cache_client: Cache, docker_client: docker.client, ): + """ + Initialize a Benchmark instance. + + Loads benchmark configuration, determines language and version from experiment + config, sets up paths, and queries the cache for existing code packages + and function deployments. + + :param benchmark: Name of the benchmark. + :param deployment_name: Name of the target FaaS deployment (e.g., "aws", "local"). + :param config: The active experiment's configuration. + :param system_config: The global SeBS system configuration. + :param output_dir: Base directory for SeBS outputs. + :param cache_client: The SeBS cache client. + :param docker_client: The Docker client instance. + :raises RuntimeError: If the benchmark is not found or not supported for the language. + """ super().__init__() self._benchmark = benchmark self._deployment_name = deployment_name - self._experiment_config = config + self._experiment_config = config # Experiment-specific settings self._language = config.runtime.language self._language_version = config.runtime.version self._architecture = self._experiment_config.architecture self._container_deployment = config.container_deployment - self._benchmark_path = find_benchmark(self.benchmark, "benchmarks") - if not self._benchmark_path: - raise RuntimeError("Benchmark {benchmark} not found!".format(benchmark=self._benchmark)) + + benchmark_fs_path = find_benchmark(self.benchmark, "benchmarks") + if not benchmark_fs_path: + raise RuntimeError(f"Benchmark {self.benchmark} not found!") + self._benchmark_path = benchmark_fs_path + with open(os.path.join(self.benchmark_path, "config.json")) as json_file: - self._benchmark_config: BenchmarkConfig = BenchmarkConfig.deserialize( - json.load(json_file) - ) + self._benchmark_config: BenchmarkConfig = BenchmarkConfig.deserialize(json.load(json_file)) + if self.language not in self.benchmark_config.languages: - raise RuntimeError( - "Benchmark {} not available for language {}".format(self.benchmark, self.language) - ) + raise RuntimeError(f"Benchmark {self.benchmark} not available for language {self.language_name}") + self._cache_client = cache_client self._docker_client = docker_client - self._system_config = system_config - self._hash_value = None + self._system_config = system_config # Global SeBS settings + self._hash_value: Optional[str] = None # Lazily computed + self._code_package: Optional[Dict[str, Any]] = None # From cache + self._code_size: Optional[int] = None # From cache or after build + self._functions: Dict[str, Any] = {} # From cache + + # Directory for this specific benchmark variant's build outputs self._output_dir = os.path.join( - output_dir, + output_dir, # Base SeBS output directory f"{benchmark}_code", self._language.value, self._language_version, @@ -225,57 +334,76 @@ def __init__( "container" if self._container_deployment else "package", ) self._container_uri: Optional[str] = None + self._code_location: str = self._output_dir # Default if not cached - # verify existence of function in cache - self.query_cache() - if config.update_code: + self.query_cache() # Populate _code_package, _functions, _is_cached, etc. + if config.update_code: # If user forces code update self._is_cached_valid = False - # Load input module + # Load input generation module for this benchmark + benchmark_data_root = find_benchmark(self._benchmark, "benchmarks-data") + if not benchmark_data_root: + self.logging.warning(f"Data directory for benchmark {self._benchmark} not found.") + # Decide if this is fatal or if benchmark can run without data_dir + self._benchmark_data_path: Optional[str] = benchmark_data_root + self._benchmark_input_module: BenchmarkModuleInterface = load_benchmark_input(self._benchmark_path) - self._benchmark_data_path = find_benchmark(self._benchmark, "benchmarks-data") - self._benchmark_input_module = load_benchmark_input(self._benchmark_path) - - # Check if input has been processed self._input_processed: bool = False - self._uses_storage: bool = False - self._uses_nosql: bool = False - - """ - Compute MD5 hash of an entire directory. - """ + self._uses_storage: bool = False # Determined during input preparation + self._uses_nosql: bool = False # Determined during input preparation @staticmethod - def hash_directory(directory: str, deployment: str, language: str): + def hash_directory(directory: str, deployment: str, language_name: str) -> str: # language -> language_name + """ + Compute an MD5 hash of a directory's contents relevant to a benchmark. + + Includes source files (*.py, requirements.txt for Python; *.js, package.json for Node.js), + non-language-specific files (*.sh, *.json), and deployment-specific wrapper files. + :param directory: The path to the benchmark's language-specific source directory. + :param deployment: The name of the target deployment (e.g., "aws"). + :param language_name: The name of the programming language ("python" or "nodejs"). + :return: MD5 hexdigest string of the relevant directory contents. + """ hash_sum = hashlib.md5() - FILES = { - "python": ["*.py", "requirements.txt*"], + FILES_PATTERNS = { # Renamed from FILES for clarity + "python": ["*.py", "requirements.txt*"], # requirements.txt* for versioned ones "nodejs": ["*.js", "package.json"], } - WRAPPERS = {"python": "*.py", "nodejs": "*.js"} - NON_LANG_FILES = ["*.sh", "*.json"] - selected_files = FILES[language] + NON_LANG_FILES - for file_type in selected_files: - for f in glob.glob(os.path.join(directory, file_type)): - path = os.path.join(directory, f) - with open(path, "rb") as opened_file: - hash_sum.update(opened_file.read()) - # wrappers - wrappers = project_absolute_path( - "benchmarks", "wrappers", deployment, language, WRAPPERS[language] + WRAPPER_PATTERNS = {"python": "*.py", "nodejs": "*.js"} # Renamed from WRAPPERS + COMMON_FILES_PATTERNS = ["*.sh", "*.json"] # Renamed from NON_LANG_FILES + + selected_patterns = FILES_PATTERNS[language_name] + COMMON_FILES_PATTERNS + for pattern in selected_patterns: + for filepath in glob.glob(os.path.join(directory, pattern)): + if os.path.isfile(filepath): # Ensure it's a file + with open(filepath, "rb") as f_obj: + hash_sum.update(f_obj.read()) + + # Include wrappers + wrapper_search_path = project_absolute_path( + "benchmarks", "wrappers", deployment, language_name, WRAPPER_PATTERNS[language_name] ) - for f in glob.glob(wrappers): - path = os.path.join(directory, f) - with open(path, "rb") as opened_file: - hash_sum.update(opened_file.read()) + for filepath in glob.glob(wrapper_search_path): + if os.path.isfile(filepath): + with open(filepath, "rb") as f_obj: + hash_sum.update(f_obj.read()) return hash_sum.hexdigest() def serialize(self) -> dict: + """ + Serialize essential benchmark information (size and hash) for caching. + + :return: Dictionary with "size" and "hash". + """ return {"size": self.code_size, "hash": self.hash} def query_cache(self): - + """ + Query the SeBS cache for existing code packages and function deployments + for this benchmark variant. Updates internal state based on cache findings + (e.g., `_is_cached`, `_is_cached_valid`, `_code_package`, `_functions`). + """ if self.container_deployment: self._code_package = self._cache_client.get_container( deployment=self._deployment_name, @@ -285,7 +413,7 @@ def query_cache(self): architecture=self.architecture, ) if self._code_package is not None: - self._container_uri = self._code_package["image-uri"] + self._container_uri = self._code_package.get("image-uri") # Use .get for safety else: self._code_package = self._cache_client.get_code_package( deployment=self._deployment_name, @@ -298,283 +426,378 @@ def query_cache(self): self._functions = self._cache_client.get_functions( deployment=self._deployment_name, benchmark=self._benchmark, - language=self.language_name, + language=self.language_name, # Assumes functions are per language, not version specific in cache key ) if self._code_package is not None: - # compare hashes - current_hash = self.hash - old_hash = self._code_package["hash"] - self._code_size = self._code_package["size"] + current_hash = self.hash # Ensures hash is computed if not already + old_hash = self._code_package.get("hash") + self._code_size = self._code_package.get("size") self._is_cached = True - self._is_cached_valid = current_hash == old_hash + self._is_cached_valid = (current_hash == old_hash) if old_hash else False + if self._is_cached_valid: + self._code_location = os.path.join(self._cache_client.cache_dir, self._code_package["location"]) else: self._is_cached = False self._is_cached_valid = False + self._code_location = self._output_dir # Default to build output if not cached - def copy_code(self, output_dir): - FILES = { + def copy_code(self, output_dir: str): + """ + Copy benchmark source files (language-specific and common) to the output directory. + + Handles language-specific files (e.g., *.py, requirements.txt for Python) + and versioned package.json for Node.js. + + :param output_dir: The target directory for copying files. + """ + FILES_PATTERNS = { "python": ["*.py", "requirements.txt*"], "nodejs": ["*.js", "package.json"], } - path = os.path.join(self.benchmark_path, self.language_name) - for file_type in FILES[self.language_name]: - for f in glob.glob(os.path.join(path, file_type)): - shutil.copy2(os.path.join(path, f), output_dir) - # support node.js benchmarks with language specific packages - nodejs_package_json = os.path.join(path, f"package.json.{self.language_version}") - if os.path.exists(nodejs_package_json): - shutil.copy2(nodejs_package_json, os.path.join(output_dir, "package.json")) - - def add_benchmark_data(self, output_dir): - cmd = "/bin/bash {benchmark_path}/init.sh {output_dir} false {architecture}" - paths = [ - self.benchmark_path, - os.path.join(self.benchmark_path, self.language_name), + source_path = os.path.join(self.benchmark_path, self.language_name) + for pattern in FILES_PATTERNS[self.language_name]: + for filepath in glob.glob(os.path.join(source_path, pattern)): + if os.path.isfile(filepath): + shutil.copy2(filepath, output_dir) + + # Support Node.js benchmarks with language version-specific package.json + if self.language_name == "nodejs": + versioned_package_json = os.path.join(source_path, f"package.json.{self.language_version}") + if os.path.exists(versioned_package_json): + shutil.copy2(versioned_package_json, os.path.join(output_dir, "package.json")) + + def add_benchmark_data(self, output_dir: str): + """ + Add benchmark-specific data files by running `init.sh` script if it exists. + + Searches for `init.sh` in the benchmark's root and language-specific directories. + The script is executed with the output directory and architecture as arguments. + + :param output_dir: The target directory where data should be placed or referenced. + """ + # The script is expected to handle placing data correctly relative to output_dir + init_script_cmd = "/bin/bash {script_path} {target_dir} false {arch}" + # Check in benchmark root, then language-specific subdir + possible_script_locations = [ + os.path.join(self.benchmark_path, "init.sh"), + os.path.join(self.benchmark_path, self.language_name, "init.sh"), ] - for path in paths: - if os.path.exists(os.path.join(path, "init.sh")): + for script_path in possible_script_locations: + if os.path.exists(script_path): + self.logging.info(f"Executing benchmark data script: {script_path}") subprocess.run( - cmd.format( - benchmark_path=path, - output_dir=output_dir, - architecture=self._experiment_config._architecture, + init_script_cmd.format( + script_path=script_path, + target_dir=output_dir, + arch=self._experiment_config.architecture, # Use architecture from experiment config ), - shell=True, + shell=True, # Be cautious with shell=True stdout=subprocess.PIPE, stderr=subprocess.STDOUT, + check=False # Check return code manually if needed ) + break # Assume only one init.sh should be run + + def add_deployment_files(self, output_dir: str): + """ + Copy deployment-specific wrapper files (e.g., handlers) to the output directory. + + Files are sourced from `benchmarks/wrappers/{deployment_name}/{language_name}/`. - def add_deployment_files(self, output_dir): - handlers_dir = project_absolute_path( + :param output_dir: The target directory for copying wrapper files. + """ + handlers_source_dir = project_absolute_path( "benchmarks", "wrappers", self._deployment_name, self.language_name ) - handlers = [ - os.path.join(handlers_dir, file) - for file in self._system_config.deployment_files( - self._deployment_name, self.language_name - ) - ] - for file in handlers: - shutil.copy2(file, os.path.join(output_dir)) + # Get list of file names, not full paths, from system_config + required_handler_files = self._system_config.deployment_files( + self._deployment_name, self.language_name + ) + for file_name in required_handler_files: + source_file_path = os.path.join(handlers_source_dir, file_name) + if os.path.exists(source_file_path): + shutil.copy2(source_file_path, os.path.join(output_dir, file_name)) + else: + self.logging.warning(f"Deployment wrapper file {source_file_path} not found.") - def add_deployment_package_python(self, output_dir): - destination_file = f"requirements.txt.{self._language_version}" - if not os.path.exists(os.path.join(output_dir, destination_file)): - destination_file = "requirements.txt" + def add_deployment_package_python(self, output_dir: str): + """ + Add deployment-specific Python packages to the requirements.txt file. - # append to the end of requirements file - with open(os.path.join(output_dir, destination_file), "a") as out: + Appends packages listed in SeBS system configuration for the current + deployment, language, and any required benchmark modules. + Handles versioned requirements files (e.g., requirements.txt.3.8). - packages = self._system_config.deployment_packages( + :param output_dir: The directory containing the requirements.txt file. + """ + # Determine the correct requirements file (e.g., requirements.txt.3.8 or requirements.txt) + versioned_req_file = f"requirements.txt.{self._language_version}" + req_file_path = os.path.join(output_dir, versioned_req_file) + if not os.path.exists(req_file_path): + req_file_path = os.path.join(output_dir, "requirements.txt") + if not os.path.exists(req_file_path): # Create if doesn't exist + with open(req_file_path, "w") as f: + pass # Create empty file + self.logging.info(f"Created empty requirements file at {req_file_path}") + + + with open(req_file_path, "a") as out_f: # Open in append mode + # Add general deployment packages + general_packages = self._system_config.deployment_packages( self._deployment_name, self.language_name ) - for package in packages: - out.write(package) + for package in general_packages: + out_f.write(f"{package}\n") # Ensure newline - module_packages = self._system_config.deployment_module_packages( + # Add packages for specific benchmark modules + module_specific_packages = self._system_config.deployment_module_packages( self._deployment_name, self.language_name ) for bench_module in self._benchmark_config.modules: - if bench_module.value in module_packages: - for package in module_packages[bench_module.value]: - out.write(package) + if bench_module.value in module_specific_packages: + for package in module_specific_packages[bench_module.value]: + out_f.write(f"{package}\n") # Ensure newline + + def add_deployment_package_nodejs(self, output_dir: str): + """ + Add deployment-specific Node.js packages to the package.json file. + + Merges dependencies from SeBS system configuration into the benchmark's + package.json. Handles versioned package.json files (e.g., package.json.12). - def add_deployment_package_nodejs(self, output_dir): - # modify package.json - packages = self._system_config.deployment_packages( + :param output_dir: The directory containing the package.json file. + """ + # Determine the correct package.json file + versioned_pkg_file = f"package.json.{self._language_version}" + pkg_file_path = os.path.join(output_dir, versioned_pkg_file) + if not os.path.exists(pkg_file_path): + pkg_file_path = os.path.join(output_dir, "package.json") + if not os.path.exists(pkg_file_path): + # Create a default package.json if none exists + default_pkg_json = {"name": self.benchmark, "version": "1.0.0", "dependencies": {}} + with open(pkg_file_path, "w") as f: + json.dump(default_pkg_json, f, indent=2) + self.logging.info(f"Created default package.json at {pkg_file_path}") + + + # Read existing package.json + with open(pkg_file_path, "r") as package_file: + package_json_data = json.load(package_file) + + # Ensure 'dependencies' key exists + if "dependencies" not in package_json_data: + package_json_data["dependencies"] = {} + + # Add general deployment packages + general_packages = self._system_config.deployment_packages( self._deployment_name, self.language_name ) - if len(packages): + package_json_data["dependencies"].update(general_packages) # Merge dependencies + + # Add packages for specific benchmark modules + # This part was missing in original, adding for completeness if modules can have npm deps + module_specific_packages = self._system_config.deployment_module_packages( + self._deployment_name, self.language_name + ) + for bench_module in self._benchmark_config.modules: + if bench_module.value in module_specific_packages: + package_json_data["dependencies"].update(module_specific_packages[bench_module.value]) - package_config = os.path.join(output_dir, f"package.json.{self._language_version}") - if not os.path.exists(package_config): - package_config = os.path.join(output_dir, "package.json") - with open(package_config, "r") as package_file: - package_json = json.load(package_file) - for key, val in packages.items(): - package_json["dependencies"][key] = val - with open(package_config, "w") as package_file: - json.dump(package_json, package_file, indent=2) + # Write updated package.json + with open(pkg_file_path, "w") as package_file: + json.dump(package_json_data, package_file, indent=2) + + def add_deployment_package(self, output_dir: str): + """ + Add deployment-specific packages based on the benchmark's language. - def add_deployment_package(self, output_dir): - from sebs.faas.function import Language + Dispatches to language-specific methods (Python, Node.js). + + :param output_dir: The directory where package files (e.g., requirements.txt) are located. + :raises NotImplementedError: If the language is not supported. + """ + from sebs.faas.function import Language # Local import if self.language == Language.PYTHON: self.add_deployment_package_python(output_dir) elif self.language == Language.NODEJS: self.add_deployment_package_nodejs(output_dir) else: - raise NotImplementedError + raise NotImplementedError(f"Deployment package addition not implemented for language {self.language_name}") @staticmethod - def directory_size(directory: str): - from pathlib import Path + def directory_size(directory: str) -> int: # Return type is int (bytes) + """ + Calculate the total size of all files within a directory (recursive). + :param directory: The path to the directory. + :return: Total size in bytes. + """ + from pathlib import Path root = Path(directory) - sizes = [f.stat().st_size for f in root.glob("**/*") if f.is_file()] - return sum(sizes) + return sum(f.stat().st_size for f in root.glob("**/*") if f.is_file()) - def install_dependencies(self, output_dir): - # do we have docker image for this run and language? + def install_dependencies(self, output_dir: str): + """ + Install benchmark dependencies using a Docker container. + + Pulls a pre-built Docker image specific to the deployment, language, and + runtime version. Mounts the output directory into the container and runs + an installer script (`/sebs/installer.sh`) within the container. + Handles fallbacks to unversioned Docker images if versioned ones are not found. + Supports copying files to/from Docker for environments where volume mounting + is problematic (e.g., CircleCI). + + :param output_dir: The directory containing benchmark code and dependency files. + Dependencies will be installed into this directory (or subdirectories + like .python_packages or node_modules). + :raises RuntimeError: If Docker image pull fails or container execution fails. + """ + # Check if a Docker build image is defined for this deployment and language if "build" not in self._system_config.docker_image_types( self._deployment_name, self.language_name ): self.logging.info( - ( - "There is no Docker build image for {deployment} run in {language}, " - "thus skipping the Docker-based installation of dependencies." - ).format(deployment=self._deployment_name, language=self.language_name) - ) - else: - repo_name = self._system_config.docker_repository() - unversioned_image_name = "build.{deployment}.{language}.{runtime}".format( - deployment=self._deployment_name, - language=self.language_name, - runtime=self.language_version, - ) - image_name = "{base_image_name}-{sebs_version}".format( - base_image_name=unversioned_image_name, - sebs_version=self._system_config.version(), + f"No Docker build image for {self._deployment_name} in {self.language_name}, " + "skipping Docker-based dependency installation." ) + return # Skip if no build image is configured - def ensure_image(name: str) -> None: - try: - self._docker_client.images.get(repo_name + ":" + name) - except docker.errors.ImageNotFound: - try: - self.logging.info( - "Docker pull of image {repo}:{image}".format(repo=repo_name, image=name) - ) - self._docker_client.images.pull(repo_name, name) - except docker.errors.APIError: - raise RuntimeError( - "Docker pull of image {}:{} failed!".format(repo_name, name) - ) + repo_name = self._system_config.docker_repository() + # Construct image names (versioned and unversioned fallback) + unversioned_image_name = f"build.{self._deployment_name}.{self.language_name}.{self.language_version}" + versioned_image_name = f"{unversioned_image_name}-{self._system_config.version()}" + + final_image_name_to_use = versioned_image_name + def _ensure_docker_image(image_name_with_tag: str) -> bool: try: - ensure_image(image_name) - except RuntimeError as e: - self.logging.warning( - "Failed to ensure image {}, falling back to {}: {}".format( - image_name, unversioned_image_name, e - ) - ) + self._docker_client.images.get(f"{repo_name}:{image_name_with_tag}") + return True + except docker.errors.ImageNotFound: try: - ensure_image(unversioned_image_name) - except RuntimeError: - raise - # update `image_name` in the context to the fallback image name - image_name = unversioned_image_name + self.logging.info(f"Pulling Docker image {repo_name}:{image_name_with_tag}") + self._docker_client.images.pull(repo_name, image_name_with_tag) + return True + except docker.errors.APIError as e_pull: + self.logging.warning(f"Docker pull of {repo_name}:{image_name_with_tag} failed: {e_pull}") + return False + + if not _ensure_docker_image(versioned_image_name): + self.logging.warning( + f"Failed to ensure image {versioned_image_name}, falling back to {unversioned_image_name}." + ) + if not _ensure_docker_image(unversioned_image_name): + raise RuntimeError(f"Failed to pull both versioned and unversioned Docker build images.") + final_image_name_to_use = unversioned_image_name + + # Prepare for Docker run + volumes = {} + if not self._experiment_config.check_flag("docker_copy_build_files"): + volumes[os.path.abspath(output_dir)] = {"bind": "/mnt/function", "mode": "rw"} + package_script_path = os.path.abspath( + os.path.join(self._benchmark_path, self.language_name, "package.sh") + ) + if os.path.exists(package_script_path): + volumes[package_script_path] = {"bind": "/mnt/function/package.sh", "mode": "ro"} + + # Check if primary dependency file exists (e.g. requirements.txt) + PACKAGE_MANIFEST_FILES = {"python": "requirements.txt", "nodejs": "package.json"} + dependency_file_path = os.path.join(output_dir, PACKAGE_MANIFEST_FILES[self.language_name]) + if not os.path.exists(dependency_file_path): + self.logging.info(f"No dependency file ({dependency_file_path}) found. Skipping dependency installation.") + return + + try: + self.logging.info( + f"Starting Docker-based dependency installation using image {repo_name}:{final_image_name_to_use}" + ) + container_user_id = str(os.getuid()) if hasattr(os, 'getuid') else '1000' + container_group_id = str(os.getgid()) if hasattr(os, 'getgid') else '1000' - # Create set of mounted volumes unless Docker volumes are disabled if not self._experiment_config.check_flag("docker_copy_build_files"): - volumes = {os.path.abspath(output_dir): {"bind": "/mnt/function", "mode": "rw"}} - package_script = os.path.abspath( - os.path.join(self._benchmark_path, self.language_name, "package.sh") + self.logging.info(f"Using Docker volume mount for {os.path.abspath(output_dir)}") + run_stdout = self._docker_client.containers.run( + f"{repo_name}:{final_image_name_to_use}", + volumes=volumes, + environment={ + "CONTAINER_UID": container_user_id, + "CONTAINER_GID": container_group_id, + "CONTAINER_USER": "docker_user", # User inside container + "APP": self.benchmark, + "PLATFORM": self._deployment_name.upper(), + "TARGET_ARCHITECTURE": self._experiment_config.architecture, + }, + remove=True, stdout=True, stderr=True, ) - # does this benchmark has package.sh script? - if os.path.exists(package_script): - volumes[package_script] = { - "bind": "/mnt/function/package.sh", - "mode": "ro", - } + stdout_decoded = run_stdout.decode("utf-8") + else: # Fallback for environments where mounts are problematic (e.g., CI) + self.logging.info("Using Docker cp for file transfer due to 'docker_copy_build_files' flag.") + container = self._docker_client.containers.run( + f"{repo_name}:{final_image_name_to_use}", + environment={"APP": self.benchmark, + "TARGET_ARCHITECTURE": self._experiment_config.architecture}, + user=container_user_id, # Run as current host user if possible + remove=True, detach=True, tty=True, command="/bin/bash", # Keep alive + ) + # Copy files to container + tar_archive_path = os.path.join(os.path.dirname(output_dir), "function_package.tar") + with tarfile.open(tar_archive_path, "w") as tar: + tar.add(output_dir, arcname=".") # Add contents of output_dir to root of tar + with open(tar_archive_path, "rb") as tar_data: + container.put_archive("/mnt/function", tar_data) + os.remove(tar_archive_path) # Clean up tar + + # Execute installer script + exec_result = container.exec_run( + cmd="/bin/bash /sebs/installer.sh", user="docker_user", # Run installer as docker_user + stdout=True, stderr=True + ) + stdout_decoded = exec_result.output.decode("utf-8") + if exec_result.exit_code != 0: + self.logging.error(f"Dependency installation script failed with exit code {exec_result.exit_code}") + self.logging.error(f"Stderr: {stdout_decoded}") # Stderr is part of output here + raise docker.errors.ContainerError(container, exec_result.exit_code, cmd, final_image_name_to_use, stdout_decoded) + + + # Copy results back + # Need to clean output_dir before extracting to avoid issues with old files + # For simplicity, assume installer script places files correctly in /mnt/function + # and we are extracting the entire /mnt/function back. + # This might overwrite source files if installer modifies them, which is usually not intended. + # A safer approach might be to copy only specific dependency dirs (e.g. .python_packages) + data_stream, _ = container.get_archive("/mnt/function") + with open(tar_archive_path, "wb") as f_tar_out: + for chunk in data_stream: + f_tar_out.write(chunk) + + # Before extracting, ensure output_dir is clean of old dependency dirs + # Example: shutil.rmtree(os.path.join(output_dir, ".python_packages"), ignore_errors=True) + with tarfile.open(tar_archive_path, "r") as tar_in: + tar_in.extractall(output_dir) # Extracts to output_dir + os.remove(tar_archive_path) + container.stop() + + for line in stdout_decoded.split("\n"): + if "size" in line: # Log any size information from installer + self.logging.info(f"Docker build output: {line}") + + except docker.errors.ContainerError as e: + self.logging.error(f"Dependency installation in Docker failed: {e}") + if hasattr(e, 'stderr') and e.stderr: + self.logging.error(f"Container stderr: {e.stderr.decode('utf-8')}") + raise + + + def recalculate_code_size(self) -> int: # Return type is int (bytes) + """ + Recalculate the size of the code package directory and update `_code_size`. - # run Docker container to install packages - PACKAGE_FILES = {"python": "requirements.txt", "nodejs": "package.json"} - file = os.path.join(output_dir, PACKAGE_FILES[self.language_name]) - if os.path.exists(file): - try: - self.logging.info( - "Docker build of benchmark dependencies in container " - "of image {repo}:{image}".format(repo=repo_name, image=image_name) - ) - uid = os.getuid() - # Standard, simplest build - if not self._experiment_config.check_flag("docker_copy_build_files"): - self.logging.info( - "Docker mount of benchmark code from path {path}".format( - path=os.path.abspath(output_dir) - ) - ) - stdout = self._docker_client.containers.run( - "{}:{}".format(repo_name, image_name), - volumes=volumes, - environment={ - "CONTAINER_UID": str(os.getuid()), - "CONTAINER_GID": str(os.getgid()), - "CONTAINER_USER": "docker_user", - "APP": self.benchmark, - "PLATFORM": self._deployment_name.upper(), - "TARGET_ARCHITECTURE": self._experiment_config._architecture, - }, - remove=True, - stdout=True, - stderr=True, - ) - # Hack to enable builds on platforms where Docker mounted volumes - # are not supported. Example: CircleCI docker environment - else: - container = self._docker_client.containers.run( - "{}:{}".format(repo_name, image_name), - environment={"APP": self.benchmark}, - # user="1000:1000", - user=uid, - remove=True, - detach=True, - tty=True, - command="/bin/bash", - ) - # copy application files - import tarfile - - self.logging.info( - "Send benchmark code from path {path} to " - "Docker instance".format(path=os.path.abspath(output_dir)) - ) - tar_archive = os.path.join(output_dir, os.path.pardir, "function.tar") - with tarfile.open(tar_archive, "w") as tar: - for f in os.listdir(output_dir): - tar.add(os.path.join(output_dir, f), arcname=f) - with open(tar_archive, "rb") as data: - container.put_archive("/mnt/function", data.read()) - # do the build step - exit_code, stdout = container.exec_run( - cmd="/bin/bash /sebs/installer.sh", - user="docker_user", - stdout=True, - stderr=True, - ) - # copy updated code with package - data, stat = container.get_archive("/mnt/function") - with open(tar_archive, "wb") as f: - for chunk in data: - f.write(chunk) - with tarfile.open(tar_archive, "r") as tar: - tar.extractall(output_dir) - # docker packs the entire directory with basename function - for f in os.listdir(os.path.join(output_dir, "function")): - shutil.move( - os.path.join(output_dir, "function", f), - os.path.join(output_dir, f), - ) - shutil.rmtree(os.path.join(output_dir, "function")) - container.stop() - - # Pass to output information on optimizing builds. - # Useful for AWS where packages have to obey size limits. - for line in stdout.decode("utf-8").split("\n"): - if "size" in line: - self.logging.info("Docker build: {}".format(line)) - except docker.errors.ContainerError as e: - self.logging.error("Package build failed!") - self.logging.error(e) - self.logging.error(f"Docker mount volumes: {volumes}") - raise e - - def recalculate_code_size(self): + :return: The recalculated code size in bytes. + """ self._code_size = Benchmark.directory_size(self._output_dir) return self._code_size @@ -584,239 +807,331 @@ def build( [str, str, str, str, str, bool, bool], Tuple[str, int, str] ], ) -> Tuple[bool, str, bool, str]: - - # Skip build if files are up to date and user didn't enforce rebuild + """ + Build the benchmark code package. + + This involves copying source code, adding benchmark data and deployment-specific + files, installing dependencies, and then running the FaaS provider's specific + packaging step (e.g., zipping, creating container image). + Updates the cache with the new package information. + + :param deployment_build_step: A callable provided by the FaaS system implementation + that performs the final packaging (e.g., zipping, image build). + Signature: (abs_output_dir, lang_name, lang_version, arch, + benchmark_name, is_cached_valid, is_container_deployment) + Returns: (package_path, package_size_bytes, container_uri_if_any) + :return: Tuple (rebuilt: bool, code_location: str, is_container: bool, container_uri: str). + `rebuilt` is True if the package was newly built/rebuilt. + """ + # Skip build if files are up-to-date and user didn't enforce rebuild if self.is_cached and self.is_cached_valid: self.logging.info( - "Using cached benchmark {} at {}".format(self.benchmark, self.code_location) + f"Using cached benchmark {self.benchmark} at {self.code_location}" ) - if self.container_deployment: - return False, self.code_location, self.container_deployment, self.container_uri + # Ensure container_uri is correctly set from cache if it's a container deployment + container_uri_to_return = self.container_uri if self.container_deployment else "" + return False, self.code_location, self.container_deployment, container_uri_to_return or "" - return False, self.code_location, self.container_deployment, "" - msg = ( - "no cached code package." - if not self.is_cached - else "cached code package is not up to date/build enforced." - ) - self.logging.info("Building benchmark {}. Reason: {}".format(self.benchmark, msg)) - # clear existing cache information - self._code_package = None + reason_for_build = ("no cached code package." if not self.is_cached + else "cached code package is not up-to-date or build enforced.") + self.logging.info(f"Building benchmark {self.benchmark}. Reason: {reason_for_build}") + self._code_package = None # Clear existing cache info as we are rebuilding - # create directory to be deployed + # Create or clear the output directory for this build if os.path.exists(self._output_dir): shutil.rmtree(self._output_dir) os.makedirs(self._output_dir) + # Assemble benchmark code and dependencies self.copy_code(self._output_dir) - self.add_benchmark_data(self._output_dir) + if self._benchmark_data_path: # Only if data path exists + self.add_benchmark_data(self._output_dir) self.add_deployment_files(self._output_dir) self.add_deployment_package(self._output_dir) - self.install_dependencies(self._output_dir) + self.install_dependencies(self._output_dir) # Installs into _output_dir - self._code_location, self._code_size, self._container_uri = deployment_build_step( - os.path.abspath(self._output_dir), + # Perform deployment-specific packaging (e.g., zip, build image) + # deployment_build_step is responsible for the final package/image. + # It operates on the contents of self._output_dir. + package_path, package_size, container_image_uri = deployment_build_step( + os.path.abspath(self._output_dir), # Pass absolute path to build step self.language_name, self.language_version, self.architecture, self.benchmark, - self.is_cached_valid, + self.is_cached_valid, # Pass current validity, though it's being rebuilt self.container_deployment, ) + + self._code_location = package_path # This is the final packaged code location + self._code_size = package_size + self._container_uri = container_image_uri if self.container_deployment else None + self.logging.info( - ( - "Created code package (source hash: {hash}), for run on {deployment}" - + " with {language}:{runtime}" - ).format( - hash=self.hash, - deployment=self._deployment_name, - language=self.language_name, - runtime=self.language_version, - ) + f"Created code package (source hash: {self.hash}), for run on {self._deployment_name} " + f"with {self.language_name}:{self.language_version}" ) - if self.is_cached: - self._cache_client.update_code_package(self._deployment_name, self) + # Update cache with the new package information + if self.container_deployment: + self._cache_client.add_container(self._deployment_name, self) else: - self._cache_client.add_code_package(self._deployment_name, self) - self.query_cache() + if self.is_cached: # If there was an old entry, update it + self._cache_client.update_code_package(self._deployment_name, self) + else: # Otherwise, add a new entry + self._cache_client.add_code_package(self._deployment_name, self) + + self.query_cache() # Refresh internal state from cache - return True, self._code_location, self._container_deployment, self._container_uri + return True, self._code_location, self.container_deployment, self._container_uri or "" - """ - Locates benchmark input generator, inspect how many storage buckets - are needed and launches corresponding storage instance, if necessary. - - :param client: Deployment client - :param benchmark: - :param benchmark_path: - :param size: Benchmark workload size - """ def prepare_input( self, system_resources: SystemResources, size: str, replace_existing: bool = False - ): - + ) -> Dict[str, Any]: # Return type changed to Dict[str, Any] """ - Handle object storage buckets. - """ - if hasattr(self._benchmark_input_module, "buckets_count"): + Prepare input data for the benchmark. - buckets = self._benchmark_input_module.buckets_count() - storage = system_resources.get_storage(replace_existing) - input, output = storage.benchmark_data(self.benchmark, buckets) - - self._uses_storage = len(input) > 0 or len(output) > 0 - - storage_func = storage.uploader_func - bucket = storage.get_bucket(Resources.StorageBucketType.BENCHMARKS) - else: - input = [] - output = [] - storage_func = None - bucket = None + Locates the benchmark's input generator module (`input.py`), determines + storage requirements (object storage buckets, NoSQL tables), and invokes + the `generate_input` function from the module to create and upload + input data. Updates the cache with storage details after successful preparation. + :param system_resources: The SystemResources instance for the current deployment. + :param size: Workload size identifier (e.g., "test", "small", "large"). + :param replace_existing: If True, overwrite existing input data in storage. + :return: A dictionary containing the input configuration for the benchmark invocation. """ - Handle key-value storage. - This part is optional - only selected benchmarks implement this. - """ - if hasattr(self._benchmark_input_module, "allocate_nosql"): + input_config_dict: Dict[str, Any] = {} + storage_func: Optional[Callable[[int, str, str], None]] = None + target_bucket_for_input: Optional[str] = None + input_prefixes_list: List[str] = [] + output_prefixes_list: List[str] = [] - nosql_storage = system_resources.get_nosql_storage() - for name, table_properties in self._benchmark_input_module.allocate_nosql().items(): - nosql_storage.create_benchmark_tables( - self._benchmark, - name, - table_properties["primary_key"], - table_properties.get("secondary_key"), + # Handle object storage buckets + if hasattr(self._benchmark_input_module, "buckets_count"): + num_input_buckets, num_output_buckets = self._benchmark_input_module.buckets_count() + if num_input_buckets > 0 or num_output_buckets > 0: + storage_client = system_resources.get_storage(replace_existing) + input_prefixes_list, output_prefixes_list = storage_client.benchmark_data( + self.benchmark, (num_input_buckets, num_output_buckets) ) - - self._uses_nosql = True - nosql_func = nosql_storage.write_to_table + self._uses_storage = True + storage_func = storage_client.uploader_func + target_bucket_for_input = storage_client.get_bucket(Resources.StorageBucketType.BENCHMARKS) + + # Handle NoSQL storage + nosql_func: Optional[Callable] = None # Define with broader scope + if hasattr(self._benchmark_input_module, "allocate_nosql"): + nosql_storage_client = system_resources.get_nosql_storage() + tables_to_allocate = self._benchmark_input_module.allocate_nosql() + if tables_to_allocate: # Only proceed if there are tables defined + for table_name, table_props in tables_to_allocate.items(): + nosql_storage_client.create_benchmark_tables( + self._benchmark, + table_name, + table_props["primary_key"], + table_props.get("secondary_key"), + ) + self._uses_nosql = True + nosql_func = nosql_storage_client.write_to_table + + # Generate input using the benchmark's specific input.py module + if self._benchmark_data_path: # Ensure data path is valid + input_config_dict = self._benchmark_input_module.generate_input( + self._benchmark_data_path, size, target_bucket_for_input, + input_prefixes_list, output_prefixes_list, + storage_func, nosql_func + ) else: - nosql_func = None + self.logging.warning(f"Benchmark data path for {self.benchmark} is not set, cannot generate input.") + # Return empty or default config if data path is missing + input_config_dict = {"default_input": True, "size": size} - # buckets = mod.buckets_count() - # storage.allocate_buckets(self.benchmark, buckets) - # Get JSON and upload data as required by benchmark - input_config = self._benchmark_input_module.generate_input( - self._benchmark_data_path, size, bucket, input, output, storage_func, nosql_func - ) - # Cache only once we data is in the cloud. - if hasattr(self._benchmark_input_module, "buckets_count"): + # Update cache after successful data upload/preparation + if self._uses_storage and hasattr(self._benchmark_input_module, "buckets_count"): + # Assuming storage_client is the one from above + storage_client = system_resources.get_storage() self._cache_client.update_storage( - storage.deployment_name(), + storage_client.deployment_name(), self._benchmark, { "buckets": { - "input": storage.input_prefixes, - "output": storage.output_prefixes, - "input_uploaded": True, + "input": storage_client.input_prefixes, + "output": storage_client.output_prefixes, + "input_uploaded": True, # Mark as uploaded } }, ) - - if hasattr(self._benchmark_input_module, "allocate_nosql"): - nosql_storage.update_cache(self._benchmark) + if self._uses_nosql and hasattr(self._benchmark_input_module, "allocate_nosql"): + nosql_storage_client = system_resources.get_nosql_storage() + nosql_storage_client.update_cache(self._benchmark) self._input_processed = True + return input_config_dict - return input_config - - """ - This is used in experiments that modify the size of input package. - This step allows to modify code package without going through the entire pipeline. - """ def code_package_modify(self, filename: str, data: bytes): + """ + Modify a file within the benchmark's code package. + + This is used in experiments that vary code package contents (e.g., size). + Currently only supports modification if the code package is a zip archive. + :param filename: The name of the file within the package to modify/add. + :param data: The new byte content for the file. + :raises NotImplementedError: If the code package is not a zip archive. + """ if self.code_package_is_archive(): self._update_zip(self.code_location, filename, data) - new_size = self.code_package_recompute_size() / 1024.0 / 1024.0 - self.logging.info(f"Modified zip package {self.code_location}, new size {new_size} MB") + new_size_bytes = self.code_package_recompute_size() # Returns float, but should be int + new_size_mb = new_size_bytes / 1024.0 / 1024.0 + self.logging.info(f"Modified zip package {self.code_location}, new size {new_size_mb:.2f} MB") else: - raise NotImplementedError() + # This could be extended to handle modifications to directories for non-archive deployments + raise NotImplementedError("Code package modification is currently only supported for zip archives.") - """ - AWS: .zip file - Azure: directory - """ def code_package_is_archive(self) -> bool: - if os.path.isfile(self.code_location): - extension = os.path.splitext(self.code_location)[1] - return extension in [".zip"] + """ + Check if the benchmark's code package is an archive file (specifically, a .zip file). + + :return: True if the code package is a .zip file, False otherwise. + """ + # Ensure code_location is valid and points to a file + loc = self.code_location + if loc and os.path.isfile(loc): + _, extension = os.path.splitext(loc) + return extension.lower() == ".zip" return False - def code_package_recompute_size(self) -> float: - bytes_size = os.path.getsize(self.code_location) + def code_package_recompute_size(self) -> int: # Changed return to int + """ + Recompute the size of the code package file and update `_code_size`. + + :return: The recomputed code package size in bytes. + """ + bytes_size = 0 + if self.code_location and os.path.exists(self.code_location): # Check existence + bytes_size = os.path.getsize(self.code_location) self._code_size = bytes_size return bytes_size - # https://stackoverflow.com/questions/25738523/how-to-update-one-file-inside-zip-file-using-python @staticmethod - def _update_zip(zipname: str, filename: str, data: bytes): - import zipfile - import tempfile - - # generate a temp file - tmpfd, tmpname = tempfile.mkstemp(dir=os.path.dirname(zipname)) - os.close(tmpfd) - - # create a temp copy of the archive without filename - with zipfile.ZipFile(zipname, "r") as zin: - with zipfile.ZipFile(tmpname, "w") as zout: - zout.comment = zin.comment # preserve the comment - for item in zin.infolist(): - if item.filename != filename: - zout.writestr(item, zin.read(item.filename)) - - # replace with the temp archive - os.remove(zipname) - os.rename(tmpname, zipname) + def _update_zip(zip_archive_path: str, filename_in_zip: str, data_bytes: bytes): # Renamed args + """ + Update a file within an existing zip archive, or add it if not present. - # now add filename with its new data - with zipfile.ZipFile(zipname, mode="a", compression=zipfile.ZIP_DEFLATED) as zf: - zf.writestr(filename, data) + Creates a temporary zip file, copies all items from the original except + the target file (if it exists), then adds/replaces the target file with + new data. Finally, replaces the original zip with the temporary one. + Based on method from: https://stackoverflow.com/questions/25738523/how-to-update-one-file-inside-zip-file-using-python + :param zip_archive_path: Path to the zip archive to update. + :param filename_in_zip: The internal path/name of the file within the zip archive. + :param data_bytes: The new byte content for the file. + """ + import zipfile + import tempfile -""" - The interface of `input` module of each benchmark. - Useful for static type hinting with mypy. -""" + temp_fd, temp_zip_path = tempfile.mkstemp(dir=os.path.dirname(zip_archive_path), suffix='.zip') + os.close(temp_fd) + + try: + with zipfile.ZipFile(zip_archive_path, "r") as original_zip: + with zipfile.ZipFile(temp_zip_path, "w", compression=zipfile.ZIP_DEFLATED) as temp_new_zip: + temp_new_zip.comment = original_zip.comment # Preserve comment + for item in original_zip.infolist(): + if item.filename != filename_in_zip: + temp_new_zip.writestr(item, original_zip.read(item.filename)) + # Add the new/updated file + temp_new_zip.writestr(filename_in_zip, data_bytes) + + # Replace original with the new zip + os.remove(zip_archive_path) + os.rename(temp_zip_path, zip_archive_path) + except Exception: + # Ensure temp file is removed on error before re-raising + if os.path.exists(temp_zip_path): + os.remove(temp_zip_path) + raise class BenchmarkModuleInterface: + """ + Defines the expected interface for a benchmark's `input.py` module. + + This class is used for static type hinting and documentation purposes. + Benchmark input modules should provide static methods matching this interface. + """ @staticmethod @abstractmethod def buckets_count() -> Tuple[int, int]: + """ + Return the number of input and output storage buckets required by the benchmark. + + :return: Tuple (number_of_input_buckets, number_of_output_buckets). + """ pass @staticmethod @abstractmethod def allocate_nosql() -> dict: + """ + Return a dictionary specifying NoSQL tables to be allocated for the benchmark. + The dictionary format is: { "table_alias_1": {"primary_key": "key_name", "secondary_key": "sort_key_name"}, ... } + 'secondary_key' is optional. + + :return: Dictionary describing NoSQL table requirements. + """ pass @staticmethod @abstractmethod def generate_input( - data_dir: str, + data_dir: Optional[str], # data_dir can be None if benchmark doesn't need external data files size: str, - benchmarks_bucket: Optional[str], - input_paths: List[str], - output_paths: List[str], + benchmarks_bucket: Optional[str], # Name of the main benchmark data bucket + input_paths: List[str], # List of input prefixes/paths within the bucket + output_paths: List[str], # List of output prefixes/paths within the bucket + # Type for upload_func: (prefix_idx: int, key_in_prefix: str, local_filepath: str) -> None upload_func: Optional[Callable[[int, str, str], None]], + # Type for nosql_func: (benchmark_name: str, table_alias: str, data_item: dict, + # pk_tuple: Tuple[str,str], sk_tuple: Optional[Tuple[str,str]]) -> None nosql_func: Optional[ Callable[[str, str, dict, Tuple[str, str], Optional[Tuple[str, str]]], None] ], - ) -> Dict[str, str]: + ) -> Dict[str, Any]: # Return type changed to Dict[str, Any] for flexibility + """ + Generate and upload benchmark input data, and prepare the input configuration. + + :param data_dir: Path to the directory containing benchmark-specific data files. + :param size: Workload size identifier (e.g., "test", "small"). + :param benchmarks_bucket: Name of the primary bucket for benchmark data. + :param input_paths: List of input prefixes/paths within the `benchmarks_bucket`. + :param output_paths: List of output prefixes/paths within the `benchmarks_bucket`. + :param upload_func: Optional callback function to upload files to object storage. + Signature: `upload_func(prefix_index, key_relative_to_prefix, local_file_path)` + :param nosql_func: Optional callback function to write items to NoSQL storage. + Signature: `nosql_func(benchmark_name, table_alias, item_data, pk_tuple, sk_tuple)` + :return: Dictionary containing the input configuration for the function invocation. + """ pass def load_benchmark_input(benchmark_path: str) -> BenchmarkModuleInterface: - # Look for input generator file in the directory containing benchmark + """ + Load a benchmark's input generation module (`input.py`) dynamically. + + The `input.py` file is expected to be in the root of the benchmark's + source directory (alongside its language-specific subdirectories). + + :param benchmark_path: Absolute path to the benchmark's root directory. + :return: The loaded module, typed as BenchmarkModuleInterface. + :raises ImportError: If the input.py module cannot be loaded. + """ import importlib.machinery import importlib.util diff --git a/sebs/cache.py b/sebs/cache.py index f690e747a..b9ca5fbb0 100644 --- a/sebs/cache.py +++ b/sebs/cache.py @@ -15,7 +15,18 @@ from sebs.faas.function import Function -def update(d, u): +def update(d: dict, u: dict) -> dict: + """ + Recursively update a dictionary `d` with values from dictionary `u`. + + If a key exists in both dictionaries and both values are mappings (dictionaries), + the function recursively updates the nested dictionary. Otherwise, the value + from `u` overwrites the value in `d`. + + :param d: The dictionary to be updated. + :param u: The dictionary with new values. + :return: The updated dictionary `d`. + """ for k, v in u.items(): if isinstance(v, collections.abc.Mapping): d[k] = update(d.get(k, {}), v) @@ -24,102 +35,155 @@ def update(d, u): return d -def update_dict(cfg, val, keys): - def map_keys(obj, val, keys): - if len(keys): - return {keys[0]: map_keys(obj, val, keys[1:])} +def update_dict(cfg: dict, val: Any, keys: List[str]): + """ + Update a nested dictionary `cfg` at a path specified by `keys` with `val`. + + Constructs the nested dictionary structure if it doesn't exist. + + :param cfg: The dictionary to update. + :param val: The value to set at the nested path. + :param keys: A list of keys representing the path to the value. + """ + def map_keys_recursive(current_keys: List[str]) -> Any: # Renamed inner map_keys + if len(current_keys): + # Recursively build the dictionary structure + return {current_keys[0]: map_keys_recursive(current_keys[1:])} else: + # Base case: return the value to be set return val - - update(cfg, map_keys(cfg, val, keys)) + # Start the recursive update + update(cfg, map_keys_recursive(keys)) class Cache(LoggingBase): - cached_config: Dict[str, str] = {} """ - Indicate that cloud offerings updated credentials or settings. - Thus we have to write down changes. + Manages caching of SeBS configurations, benchmark code packages, and function details. + + The cache is stored on the local filesystem in a directory specified by `cache_dir`. + It helps avoid redundant building of code packages and re-fetching of cloud resource + details across SeBS runs. Thread safety for cache access is managed by an RLock. """ - config_updated = False + cached_config: Dict[str, Any] = {} # Stores loaded configurations for different clouds + config_updated: bool = False + """Flag indicating if the in-memory `cached_config` has been modified and needs saving.""" def __init__(self, cache_dir: str, docker_client: docker.DockerClient): + """ + Initialize the Cache instance. + + Creates the cache directory if it doesn't exist and loads existing + cached configurations from JSON files (one per cloud provider). + + :param cache_dir: Path to the directory where cache files are stored. + :param docker_client: Docker client instance (used for some cache operations like image details). + """ super().__init__() self.docker_client = docker_client self.cache_dir = os.path.abspath(cache_dir) - self.ignore_functions: bool = False - self.ignore_storage: bool = False - self._lock = threading.RLock() + self.ignore_functions: bool = False # If True, function caching is bypassed + self.ignore_storage: bool = False # If True, storage configuration caching is bypassed + self._lock = threading.RLock() # For thread-safe access to cache files and memory if not os.path.exists(self.cache_dir): os.makedirs(self.cache_dir, exist_ok=True) - else: - self.load_config() + # Load existing config from files on initialization + self.load_config() @staticmethod def typename() -> str: - return "Benchmark" + """Return the type name of this class (used for logging context).""" + # This seems to be a placeholder or misnamed, as Cache is not a Benchmark. + # It should probably be "Cache" or similar if used for logging context. + return "Cache" # Changed from "Benchmark" for clarity def load_config(self): + """ + Load cached configurations for all supported cloud providers from their + respective JSON files in the cache directory into `self.cached_config`. + """ with self._lock: - for cloud in ["azure", "aws", "gcp", "openwhisk", "local"]: - cloud_config_file = os.path.join(self.cache_dir, "{}.json".format(cloud)) - if os.path.exists(cloud_config_file): - self.cached_config[cloud] = json.load(open(cloud_config_file, "r")) + for cloud_provider in ["azure", "aws", "gcp", "openwhisk", "local"]: + config_file_path = os.path.join(self.cache_dir, f"{cloud_provider}.json") + if os.path.exists(config_file_path): + try: + with open(config_file_path, "r") as f: + self.cached_config[cloud_provider] = json.load(f) + except json.JSONDecodeError as e: + self.logging.error(f"Error decoding JSON from cache file {config_file_path}: {e}") + # Decide behavior: skip this file, delete it, or raise error? + # For now, it will just not load this specific cache. + # else: + # self.logging.debug(f"Cache file for {cloud_provider} not found at {config_file_path}.") + + + def get_config(self, cloud: str) -> Optional[Dict[str, Any]]: + """ + Get the cached configuration for a specific cloud provider. + + :param cloud: Name of the cloud provider (e.g., "aws", "local"). + :return: The cached configuration dictionary, or None if not found. + """ + return self.cached_config.get(cloud) - def get_config(self, cloud): - return self.cached_config[cloud] if cloud in self.cached_config else None + def update_config(self, val: Any, keys: List[str]): + """ + Update a value in the in-memory `cached_config` at a nested path specified by `keys`. - """ - Update config values. Sets flag to save updated content in the end. - val: new value to store - keys: array of consecutive keys for multi-level dictionary - """ + Sets `self.config_updated` to True to indicate that changes need to be + written to disk on shutdown. - def update_config(self, val, keys): + :param val: The new value to store. + :param keys: A list of strings representing the path to the value in the nested dictionary. + Example: `["aws", "resources", "region"]` + """ with self._lock: update_dict(self.cached_config, val, keys) self.config_updated = True def lock(self): + """Acquire the reentrant lock for thread-safe cache operations.""" self._lock.acquire() def unlock(self): + """Release the reentrant lock.""" self._lock.release() def shutdown(self): + """ + Write any updated configurations back to their respective JSON files in the cache directory. + This is typically called at the end of a SeBS run. + """ if self.config_updated: - for cloud in ["azure", "aws", "gcp", "openwhisk", "local"]: - if cloud in self.cached_config: - cloud_config_file = os.path.join(self.cache_dir, "{}.json".format(cloud)) - self.logging.info("Update cached config {}".format(cloud_config_file)) - with open(cloud_config_file, "w") as out: - json.dump(self.cached_config[cloud], out, indent=2) - - """ - Access cached config of a benchmark. - - :param deployment: allowed deployment clouds or local - :param benchmark: - :param language: - - :return: a JSON config or None when not exists - """ - - def get_benchmark_config(self, deployment: str, benchmark: str): - benchmark_dir = os.path.join(self.cache_dir, benchmark) - if os.path.exists(benchmark_dir): - with open(os.path.join(benchmark_dir, "config.json"), "r") as fp: - cfg = json.load(fp) - return cfg[deployment] if deployment in cfg else None - - """ - Access cached version of benchmark code. + with self._lock: # Ensure thread safety during write + for cloud_provider, config_data in self.cached_config.items(): + config_file_path = os.path.join(self.cache_dir, f"{cloud_provider}.json") + self.logging.info(f"Updating cached config file: {config_file_path}") + try: + with open(config_file_path, "w") as out_f: + json.dump(config_data, out_f, indent=2) + except IOError as e: + self.logging.error(f"Error writing cache file {config_file_path}: {e}") + self.config_updated = False # Reset flag after saving + + def get_benchmark_config(self, deployment: str, benchmark: str) -> Optional[Dict[str, Any]]: + """ + Access cached configuration specific to a benchmark and deployment. - :param deployment: allowed deployment clouds or local - :param benchmark: - :param language: + Reads `config.json` from the benchmark's cache directory. - :return: a tuple of JSON config and absolute path to code or None - """ + :param deployment: Name of the deployment (e.g., "aws", "local"). + :param benchmark: Name of the benchmark. + :return: The deployment-specific part of the benchmark's cached config, or None. + """ + benchmark_config_path = os.path.join(self.cache_dir, benchmark, "config.json") + if os.path.exists(benchmark_config_path): + try: + with open(benchmark_config_path, "r") as fp: + cfg = json.load(fp) + return cfg.get(deployment) + except json.JSONDecodeError as e: + self.logging.error(f"Error decoding JSON from {benchmark_config_path}: {e}") + return None def get_code_package( self, @@ -129,13 +193,23 @@ def get_code_package( language_version: str, architecture: str, ) -> Optional[Dict[str, Any]]: - cfg = self.get_benchmark_config(deployment, benchmark) + """ + Retrieve cached information about a benchmark's code package. + Looks for a non-containerized code package. + + :param deployment: Deployment name. + :param benchmark: Benchmark name. + :param language: Programming language name. + :param language_version: Language runtime version. + :param architecture: CPU architecture. + :return: Dictionary with code package details (hash, size, location) or None. + """ + cfg = self.get_benchmark_config(deployment, benchmark) key = f"{language_version}-{architecture}" - if cfg and language in cfg and key in cfg[language]["code_package"]: - return cfg[language]["code_package"][key] - else: - return None + # Path in cache: {deployment}.{language}.code_package.{key} + return cfg.get(language, {}).get("code_package", {}).get(key) if cfg else None + def get_container( self, @@ -145,319 +219,425 @@ def get_container( language_version: str, architecture: str, ) -> Optional[Dict[str, Any]]: + """ + Retrieve cached information about a benchmark's Docker container image. + + :param deployment: Deployment name. + :param benchmark: Benchmark name. + :param language: Programming language name. + :param language_version: Language runtime version. + :param architecture: CPU architecture. + :return: Dictionary with container image details (hash, size, image-uri, image-id) or None. + """ cfg = self.get_benchmark_config(deployment, benchmark) - key = f"{language_version}-{architecture}" - if cfg and language in cfg and key in cfg[language]["containers"]: - return cfg[language]["containers"][key] - else: - return None + # Path in cache: {deployment}.{language}.containers.{key} + return cfg.get(language, {}).get("containers", {}).get(key) if cfg else None def get_functions( self, deployment: str, benchmark: str, language: str ) -> Optional[Dict[str, Any]]: - cfg = self.get_benchmark_config(deployment, benchmark) - if cfg and language in cfg and not self.ignore_functions: - return cfg[language]["functions"] - else: - return None + """ + Retrieve cached information about deployed functions for a benchmark. - """ - Access cached storage config of a benchmark. + :param deployment: Deployment name. + :param benchmark: Benchmark name. + :param language: Programming language name. + :return: Dictionary of cached function details, keyed by function name, or None. + """ + if self.ignore_functions: + return None + cfg = self.get_benchmark_config(deployment, benchmark) + # Path in cache: {deployment}.{language}.functions + return cfg.get(language, {}).get("functions") if cfg else None - :param deployment: allowed deployment clouds or local - :param benchmark: - :return: a JSON config or None - """ + def get_storage_config(self, deployment: str, benchmark: str) -> Optional[Dict[str, Any]]: + """ + Access cached storage configuration for a specific benchmark and deployment. - def get_storage_config(self, deployment: str, benchmark: str): + :param deployment: Deployment name. + :param benchmark: Benchmark name. + :return: Cached storage configuration dictionary, or None if not found. + """ return self._get_resource_config(deployment, benchmark, "storage") - def get_nosql_config(self, deployment: str, benchmark: str): + def get_nosql_config(self, deployment: str, benchmark: str) -> Optional[Dict[str, Any]]: + """ + Access cached NoSQL storage configuration for a specific benchmark and deployment. + + :param deployment: Deployment name. + :param benchmark: Benchmark name. + :return: Cached NoSQL storage configuration dictionary, or None if not found. + """ return self._get_resource_config(deployment, benchmark, "nosql") - def _get_resource_config(self, deployment: str, benchmark: str, resource: str): + def _get_resource_config( + self, deployment: str, benchmark: str, resource_type: str + ) -> Optional[Dict[str, Any]]: + """ + Helper to retrieve a specific type of resource configuration from the benchmark's cache. + + :param deployment: Deployment name. + :param benchmark: Benchmark name. + :param resource_type: The type of resource ("storage" or "nosql"). + :return: The resource configuration dictionary, or None. + """ + if self.ignore_storage: # Applies to both storage and nosql types + return None cfg = self.get_benchmark_config(deployment, benchmark) - return cfg[resource] if cfg and resource in cfg and not self.ignore_storage else None + return cfg.get(resource_type) if cfg else None - def update_storage(self, deployment: str, benchmark: str, config: dict): - if self.ignore_storage: - return - self._update_resources(deployment, benchmark, "storage", config) + def update_storage(self, deployment: str, benchmark: str, config_data: dict): + """ + Update the cached storage configuration for a benchmark. - def update_nosql(self, deployment: str, benchmark: str, config: dict): + :param deployment: Deployment name. + :param benchmark: Benchmark name. + :param config_data: New storage configuration data to cache. + """ if self.ignore_storage: return - self._update_resources(deployment, benchmark, "nosql", config) + self._update_resources(deployment, benchmark, "storage", config_data) - def _update_resources(self, deployment: str, benchmark: str, resource: str, config: dict): + def update_nosql(self, deployment: str, benchmark: str, config_data: dict): + """ + Update the cached NoSQL storage configuration for a benchmark. + + :param deployment: Deployment name. + :param benchmark: Benchmark name. + :param config_data: New NoSQL configuration data to cache. + """ if self.ignore_storage: return + self._update_resources(deployment, benchmark, "nosql", config_data) + def _update_resources( + self, deployment: str, benchmark: str, resource_key: str, config_data: dict + ): """ - We are now preparing benchmark data before caching function. - Thus, we have to take over a situation where the cache directory does not exist. - """ + Internal helper to update a resource configuration (storage or NoSQL) in the cache. + + Ensures the benchmark's cache directory exists and updates the `config.json` file + within it. This method is called when preparing benchmark data before function caching. + :param deployment: Deployment name. + :param benchmark: Benchmark name. + :param resource_key: Key for the resource type ("storage" or "nosql"). + :param config_data: Configuration data to save. + """ + # This method is called when input data is prepared, before function itself might be cached. + # Thus, the benchmark's config.json might not exist or might not have the deployment section yet. benchmark_dir = os.path.join(self.cache_dir, benchmark) os.makedirs(benchmark_dir, exist_ok=True) + benchmark_config_path = os.path.join(benchmark_dir, "config.json") with self._lock: - if os.path.exists(os.path.join(benchmark_dir, "config.json")): - with open(os.path.join(benchmark_dir, "config.json"), "r") as fp: - cached_config = json.load(fp) - else: - cached_config = {} + cached_benchmark_config: Dict[str, Any] = {} + if os.path.exists(benchmark_config_path): + try: + with open(benchmark_config_path, "r") as fp: + cached_benchmark_config = json.load(fp) + except json.JSONDecodeError: + self.logging.error(f"Corrupted cache file at {benchmark_config_path}. Re-initializing.") + + # Ensure structure exists: cached_benchmark_config[deployment][resource_key] + if deployment not in cached_benchmark_config: + cached_benchmark_config[deployment] = {} + cached_benchmark_config[deployment][resource_key] = config_data + + with open(benchmark_config_path, "w") as fp: + json.dump(cached_benchmark_config, fp, indent=2) - if deployment in cached_config: - cached_config[deployment][resource] = config - else: - cached_config[deployment] = {resource: config} - - with open(os.path.join(benchmark_dir, "config.json"), "w") as fp: - json.dump(cached_config, fp, indent=2) def add_code_package( self, deployment_name: str, - code_package: "Benchmark", + code_package_benchmark: "Benchmark", # Renamed for clarity ): + """ + Add a new benchmark code package to the cache. + + Copies the code package (directory or zip file) into the cache structure. + Records metadata (hash, size, location, timestamps, image details if container) + in the benchmark's `config.json` within the cache. + + :param deployment_name: Name of the deployment. + :param code_package_benchmark: The Benchmark object representing the code package. + :raises RuntimeError: If a cached entry for this package already exists (use update instead). + """ with self._lock: - language = code_package.language_name - language_version = code_package.language_version - architecture = code_package.architecture - - benchmark_dir = os.path.join(self.cache_dir, code_package.benchmark) - os.makedirs(benchmark_dir, exist_ok=True) - - package_type = "docker" if code_package.container_deployment else "package" - # Check if cache directory for this deployment exist - cached_dir = os.path.join( - benchmark_dir, - deployment_name, - language, - language_version, - architecture, - package_type, + benchmark_name = code_package_benchmark.benchmark + language_name = code_package_benchmark.language_name + language_version = code_package_benchmark.language_version + architecture = code_package_benchmark.architecture + is_container = code_package_benchmark.container_deployment + + benchmark_cache_dir = os.path.join(self.cache_dir, benchmark_name) + os.makedirs(benchmark_cache_dir, exist_ok=True) + + package_subdir_type = "container" if is_container else "package" + # Path for this specific variant of the code package + variant_cache_dir = os.path.join( + benchmark_cache_dir, deployment_name, language_name, + language_version, architecture, package_subdir_type ) - if not os.path.exists(cached_dir): - os.makedirs(cached_dir, exist_ok=True) - - # copy code - if os.path.isdir(code_package.code_location): - cached_location = os.path.join(cached_dir, "code") - shutil.copytree(code_package.code_location, cached_location) - # copy zip file - else: - package_name = os.path.basename(code_package.code_location) - cached_location = os.path.join(cached_dir, package_name) - shutil.copy2(code_package.code_location, cached_dir) - language_config = code_package.serialize() - # don't store absolute path to avoid problems with moving cache dir - relative_cached_loc = os.path.relpath(cached_location, self.cache_dir) - language_config["location"] = relative_cached_loc - - date = str(datetime.datetime.now()) - language_config["date"] = { - "created": date, - "modified": date, - } - - key = f"{language_version}-{architecture}" - if code_package.container_deployment: - image = self.docker_client.images.get(code_package.container_uri) - language_config["image-uri"] = code_package.container_uri - language_config["image-id"] = image.id - - config = { - deployment_name: { - language: { - "containers": {key: language_config}, - "code_package": {}, - "functions": {}, - } - } - } - else: - config = { - deployment_name: { - language: { - "code_package": {key: language_config}, - "containers": {}, - "functions": {}, - } - } - } - - # make sure to not replace other entries - if os.path.exists(os.path.join(benchmark_dir, "config.json")): - with open(os.path.join(benchmark_dir, "config.json"), "r") as fp: - cached_config = json.load(fp) - if deployment_name in cached_config: - # language known, platform known, extend dictionary - if language in cached_config[deployment_name]: - if code_package.container_deployment: - cached_config[deployment_name][language]["containers"][ - key - ] = language_config - else: - cached_config[deployment_name][language]["code_package"][ - key - ] = language_config - - # language unknown, platform known - add new dictionary - else: - cached_config[deployment_name][language] = config[deployment_name][ - language - ] - else: - # language unknown, platform unknown - add new dictionary - cached_config[deployment_name] = config[deployment_name] - config = cached_config - with open(os.path.join(benchmark_dir, "config.json"), "w") as fp: - json.dump(config, fp, indent=2) - else: - # TODO: update + if os.path.exists(variant_cache_dir): + # This check might be too strict if we just want to ensure the record is there. + # Original code raised error. Consider logging a warning and proceeding if just metadata update. raise RuntimeError( - "Cached application {} for {} already exists!".format( - code_package.benchmark, deployment_name - ) + f"Attempting to add an already cached code package for {benchmark_name} " + f"({deployment_name}/{language_name}/{language_version}/{architecture}/{package_subdir_type}). " + "Use update_code_package if an update is intended." ) + os.makedirs(variant_cache_dir, exist_ok=True) + + # Copy code to cache + final_cached_code_path: str + if os.path.isdir(code_package_benchmark.code_location): + final_cached_code_path = os.path.join(variant_cache_dir, "code") + shutil.copytree(code_package_benchmark.code_location, final_cached_code_path) + else: # Assuming it's a file (zip) + file_basename = os.path.basename(code_package_benchmark.code_location) + final_cached_code_path = os.path.join(variant_cache_dir, file_basename) + shutil.copy2(code_package_benchmark.code_location, final_cached_code_path) # Use final_cached_code_path for dest + + # Prepare metadata for cache entry + package_metadata = code_package_benchmark.serialize() + # Store path relative to cache_dir + package_metadata["location"] = os.path.relpath(final_cached_code_path, self.cache_dir) + current_time_str = str(datetime.datetime.now()) + package_metadata["date"] = {"created": current_time_str, "modified": current_time_str} + + version_arch_key = f"{language_version}-{architecture}" + if is_container: + docker_image = self.docker_client.images.get(code_package_benchmark.container_uri) + package_metadata["image-uri"] = code_package_benchmark.container_uri + package_metadata["image-id"] = docker_image.id + new_entry_structure = {"containers": {version_arch_key: package_metadata}} + else: + new_entry_structure = {"code_package": {version_arch_key: package_metadata}} + + # Update benchmark's config.json + benchmark_config_path = os.path.join(benchmark_cache_dir, "config.json") + master_config: Dict[str, Any] = {} + if os.path.exists(benchmark_config_path): + with open(benchmark_config_path, "r") as fp: + try: + master_config = json.load(fp) + except json.JSONDecodeError: + self.logging.error(f"Corrupted cache file {benchmark_config_path}. Re-initializing.") + + # Merge new entry carefully + deployment_entry = master_config.setdefault(deployment_name, {}) + language_entry = deployment_entry.setdefault(language_name, {"code_package": {}, "containers": {}, "functions": {}}) + + if is_container: + language_entry.setdefault("containers", {})[version_arch_key] = package_metadata + else: + language_entry.setdefault("code_package", {})[version_arch_key] = package_metadata + + with open(benchmark_config_path, "w") as fp: + json.dump(master_config, fp, indent=2) + def update_code_package( self, deployment_name: str, - code_package: "Benchmark", + code_package_benchmark: "Benchmark", # Renamed for clarity ): + """ + Update an existing benchmark code package in the cache. + + Copies the new code package version over the old one. Updates metadata + (hash, size, modification timestamp, image details if container) in the + benchmark's `config.json`. If the package was not previously cached, + it calls `add_code_package` instead. + + :param deployment_name: Name of the deployment. + :param code_package_benchmark: The Benchmark object with updated code/details. + """ with self._lock: - language = code_package.language_name - language_version = code_package.language_version - architecture = code_package.architecture - benchmark_dir = os.path.join(self.cache_dir, code_package.benchmark) - - package_type = "docker" if code_package.container_deployment else "package" - # Check if cache directory for this deployment exist - cached_dir = os.path.join( - benchmark_dir, - deployment_name, - language, - language_version, - architecture, - package_type, + benchmark_name = code_package_benchmark.benchmark + language_name = code_package_benchmark.language_name + language_version = code_package_benchmark.language_version + architecture = code_package_benchmark.architecture + is_container = code_package_benchmark.container_deployment + benchmark_cache_dir = os.path.join(self.cache_dir, benchmark_name) + + package_subdir_type = "container" if is_container else "package" + variant_cache_dir = os.path.join( + benchmark_cache_dir, deployment_name, language_name, + language_version, architecture, package_subdir_type ) - if os.path.exists(cached_dir): - # copy code - if os.path.isdir(code_package.code_location): - cached_location = os.path.join(cached_dir, "code") - # could be replaced with dirs_exists_ok in copytree - # available in 3.8 - shutil.rmtree(cached_location) - shutil.copytree(src=code_package.code_location, dst=cached_location) - # copy zip file - else: - package_name = os.path.basename(code_package.code_location) - cached_location = os.path.join(cached_dir, package_name) - if code_package.code_location != cached_location: - shutil.copy2(code_package.code_location, cached_dir) - - with open(os.path.join(benchmark_dir, "config.json"), "r") as fp: - config = json.load(fp) - date = str(datetime.datetime.now()) - - key = f"{language_version}-{architecture}" - if code_package.container_deployment: - main_key = "containers" - else: - main_key = "code_package" - - config[deployment_name][language][main_key][key]["date"]["modified"] = date - config[deployment_name][language][main_key][key]["hash"] = code_package.hash - config[deployment_name][language][main_key][key][ - "size" - ] = code_package.code_size - - if code_package.container_deployment: - image = self.docker_client.images.get(code_package.container_uri) - config[deployment_name][language][main_key][key]["image-id"] = image.id - config[deployment_name][language][main_key][key][ - "image-uri" - ] = code_package.container_uri - - with open(os.path.join(benchmark_dir, "config.json"), "w") as fp: - json.dump(config, fp, indent=2) - else: - self.add_code_package(deployment_name, code_package) - - """ - Add new function to cache. - - :param deployment: - :param benchmark: - :param language: - :param code_package: Path to directory/ZIP with code. - :param language_config: Configuration of language and code. - :param storage_config: Configuration of storage buckets. - """ + if not os.path.exists(variant_cache_dir): + # If the specific variant cache dir doesn't exist, it's effectively a new add. + self.logging.info(f"Cache directory {variant_cache_dir} not found. Adding as new code package.") + self.add_code_package(deployment_name, code_package_benchmark) + return + + # Directory exists, proceed with update + # Copy new code over old cached code + # Assuming code_package_benchmark.code_location points to the *new* source to be cached + if os.path.isdir(code_package_benchmark.code_location): + cached_code_path = os.path.join(variant_cache_dir, "code") + if os.path.exists(cached_code_path): # Remove old before copying new + shutil.rmtree(cached_code_path) + shutil.copytree(code_package_benchmark.code_location, cached_code_path) + else: # It's a file (zip) + file_basename = os.path.basename(code_package_benchmark.code_location) + cached_code_path = os.path.join(variant_cache_dir, file_basename) + # Ensure source and dest are different if shutil.copy2 is used, + # or handle potential issues if they are the same (though unlikely here). + if os.path.abspath(code_package_benchmark.code_location) != os.path.abspath(cached_code_path): + shutil.copy2(code_package_benchmark.code_location, cached_code_path) + + # Update metadata in config.json + benchmark_config_path = os.path.join(benchmark_cache_dir, "config.json") + if not os.path.exists(benchmark_config_path): + # This case should ideally be handled by add_code_package if we reach here due to missing dir + self.logging.error(f"Benchmark config file {benchmark_config_path} missing during update. This indicates an inconsistent cache state.") + # Attempt to add as new, though this might indicate a deeper issue. + self.add_code_package(deployment_name, code_package_benchmark) + return + + with open(benchmark_config_path, "r+") as fp: # Open for read and write + master_config = json.load(fp) + current_time_str = str(datetime.datetime.now()) + version_arch_key = f"{language_version}-{architecture}" + + main_key = "containers" if is_container else "code_package" + + # Navigate to the specific package entry to update + try: + package_entry = master_config[deployment_name][language_name][main_key][version_arch_key] + except KeyError: + self.logging.error(f"Cache structure error for {benchmark_name} during update. Attempting to add as new.") + # Fallback to add_code_package if structure is missing + # Release lock before calling another method that acquires it + fp.close() # Close file before calling add_code_package + self.add_code_package(deployment_name, code_package_benchmark) + return + + package_entry["date"]["modified"] = current_time_str + package_entry["hash"] = code_package_benchmark.hash + package_entry["size"] = code_package_benchmark.code_size + package_entry["location"] = os.path.relpath(cached_code_path, self.cache_dir) + + + if is_container: + docker_image = self.docker_client.images.get(code_package_benchmark.container_uri) + package_entry["image-id"] = docker_image.id + package_entry["image-uri"] = code_package_benchmark.container_uri + + # Write changes back + fp.seek(0) # Rewind to overwrite + json.dump(master_config, fp, indent=2) + fp.truncate() # Remove any trailing old data if new data is shorter def add_function( self, deployment_name: str, - language_name: str, - code_package: "Benchmark", - function: "Function", + language_name: str, # Already available from code_package_benchmark + code_package_benchmark: "Benchmark", # Renamed for clarity + function_to_add: "Function", # Renamed for clarity ): + """ + Add a new function's deployment details to the cache. + + Stores the serialized function information under its benchmark, deployment, + and language in the respective `config.json`. + + :param deployment_name: Name of the deployment. + :param language_name: Name of the programming language (redundant, use from code_package_benchmark). + :param code_package_benchmark: The Benchmark object this function belongs to. + :param function_to_add: The Function object to cache. + :raises RuntimeError: If the benchmark's code package is not already cached. + """ if self.ignore_functions: return with self._lock: - benchmark_dir = os.path.join(self.cache_dir, code_package.benchmark) - language = code_package.language_name - cache_config = os.path.join(benchmark_dir, "config.json") - - if os.path.exists(cache_config): - functions_config: Dict[str, Any] = {function.name: {**function.serialize()}} - - with open(cache_config, "r") as fp: - cached_config = json.load(fp) - if "functions" not in cached_config[deployment_name][language]: - cached_config[deployment_name][language]["functions"] = functions_config - else: - cached_config[deployment_name][language]["functions"].update( - functions_config - ) - config = cached_config - with open(cache_config, "w") as fp: - fp.write(serialize(config)) - else: + benchmark_name = code_package_benchmark.benchmark + # Language name from code_package_benchmark is more reliable + actual_language_name = code_package_benchmark.language_name + benchmark_cache_dir = os.path.join(self.cache_dir, benchmark_name) + benchmark_config_path = os.path.join(benchmark_cache_dir, "config.json") + + if not os.path.exists(benchmark_config_path): + # This implies that the code package itself was not cached first. raise RuntimeError( - "Can't cache function {} for a non-existing code package!".format(function.name) + f"Cannot cache function {function_to_add.name} for benchmark {benchmark_name} " + "because its code package is not cached. Call add_code_package first." ) - def update_function(self, function: "Function"): + with open(benchmark_config_path, "r+") as fp: # Read and write mode + master_config = json.load(fp) + + # Ensure path exists: master_config[deployment_name][actual_language_name]["functions"] + deployment_entry = master_config.setdefault(deployment_name, {}) + language_entry = deployment_entry.setdefault(actual_language_name, {"code_package": {}, "containers": {}, "functions": {}}) + functions_dict = language_entry.setdefault("functions", {}) + + functions_dict[function_to_add.name] = function_to_add.serialize() + + fp.seek(0) + json.dump(master_config, fp, indent=2) + fp.truncate() + + + def update_function(self, function_to_update: "Function"): # Renamed for clarity + """ + Update an existing function's details in the cache. + + Finds the cached entry for the function and replaces it with the + serialized state of the provided Function object. + + :param function_to_update: The Function object with updated details. + :raises RuntimeError: If the benchmark's code package or the function entry is not found in cache. + """ if self.ignore_functions: return with self._lock: - benchmark_dir = os.path.join(self.cache_dir, function.benchmark) - cache_config = os.path.join(benchmark_dir, "config.json") - - if os.path.exists(cache_config): - with open(cache_config, "r") as fp: - cached_config = json.load(fp) - for deployment, cfg in cached_config.items(): - for language, cfg2 in cfg.items(): - if "functions" not in cfg2: - continue - for name, func in cfg2["functions"].items(): - if name == function.name: - cached_config[deployment][language]["functions"][ - name - ] = function.serialize() - with open(cache_config, "w") as fp: - fp.write(serialize(cached_config)) - else: + benchmark_name = function_to_update.benchmark + # Assuming function's config holds its language details correctly + language_name = function_to_update.config.runtime.language.value + + benchmark_cache_dir = os.path.join(self.cache_dir, benchmark_name) + benchmark_config_path = os.path.join(benchmark_cache_dir, "config.json") + + if not os.path.exists(benchmark_config_path): raise RuntimeError( - "Can't cache function {} for a non-existing code package!".format(function.name) + f"Cannot update function {function_to_update.name} in cache: " + f"config file for benchmark {benchmark_name} does not exist." ) + + with open(benchmark_config_path, "r+") as fp: # Read and write + master_config = json.load(fp) + updated = False + # Iterate to find the correct deployment and language for this function + # This is a bit indirect; ideally, we'd know the deployment_name here. + # Assuming a function name is unique across deployments for a benchmark/language, + # or that this update is called in a context where deployment_name is implicit. + for deployment_key, deployment_data in master_config.items(): + if language_name in deployment_data: + functions_dict = deployment_data[language_name].get("functions", {}) + if function_to_update.name in functions_dict: + functions_dict[function_to_update.name] = function_to_update.serialize() + updated = True + break # Found and updated + if updated: + break + + if updated: + fp.seek(0) + json.dump(master_config, fp, indent=2) + fp.truncate() + else: + self.logging.warning( + f"Function {function_to_update.name} not found in cache for benchmark " + f"{benchmark_name} under any deployment for language {language_name}. " + "Consider using add_function if this is a new function for a deployment." + ) diff --git a/sebs/config.py b/sebs/config.py index c3030ea03..872f058b0 100644 --- a/sebs/config.py +++ b/sebs/config.py @@ -5,66 +5,159 @@ class SeBSConfig: + """ + Global SeBS configuration manager. + + Loads and provides access to system-wide configurations defined in + `config/systems.json`. This includes Docker repository details, + deployment-specific package and file information, supported runtimes, + and image naming conventions. + """ def __init__(self): + """ + Initialize SeBSConfig by loading `systems.json`. + """ with open(project_absolute_path("config", "systems.json"), "r") as cfg: self._system_config = json.load(cfg) - self._image_tag_prefix = "" + self._image_tag_prefix: str = "" @property def image_tag_prefix(self) -> str: + """ + An optional prefix to be added to Docker image tags generated by SeBS. + Default is an empty string. + """ return self._image_tag_prefix @image_tag_prefix.setter def image_tag_prefix(self, tag: str): + """ + Set the Docker image tag prefix. + + :param tag: The prefix string. + """ self._image_tag_prefix = tag def docker_repository(self) -> str: + """ + Get the Docker repository name used for SeBS images. + + :return: Docker repository name (e.g., "user/sebs-images"). + """ return self._system_config["general"]["docker_repository"] def deployment_packages(self, deployment_name: str, language_name: str) -> Dict[str, str]: + """ + Get deployment-specific packages for a given language. + These are packages added by SeBS to the benchmark's requirements. + + :param deployment_name: Name of the FaaS deployment (e.g., "aws", "local"). + :param language_name: Name of the programming language (e.g., "python", "nodejs"). + :return: Dictionary of package names to version strings. + """ return self._system_config[deployment_name]["languages"][language_name]["deployment"][ "packages" ] def deployment_module_packages( self, deployment_name: str, language_name: str - ) -> Dict[str, str]: + ) -> Dict[str, List[str]]: # Corrected return type + """ + Get deployment-specific packages required by SeBS modules (e.g., storage wrappers). + + :param deployment_name: Name of the FaaS deployment. + :param language_name: Name of the programming language. + :return: Dictionary where keys are module names and values are lists of package strings. + """ return self._system_config[deployment_name]["languages"][language_name]["deployment"][ "module_packages" ] def deployment_files(self, deployment_name: str, language_name: str) -> List[str]: + """ + Get list of deployment-specific files (e.g., handlers, wrappers) to be included. + + :param deployment_name: Name of the FaaS deployment. + :param language_name: Name of the programming language. + :return: List of filenames. + """ return self._system_config[deployment_name]["languages"][language_name]["deployment"][ "files" ] def docker_image_types(self, deployment_name: str, language_name: str) -> List[str]: + """ + Get list of Docker image types defined for a deployment and language (e.g., "build", "run"). + + :param deployment_name: Name of the FaaS deployment. + :param language_name: Name of the programming language. + :return: List of image type strings. + """ return self._system_config[deployment_name]["languages"][language_name]["images"] def supported_language_versions( self, deployment_name: str, language_name: str, architecture: str ) -> List[str]: + """ + Get list of supported language versions for a deployment, language, and architecture. + + :param deployment_name: Name of the FaaS deployment. + :param language_name: Name of the programming language. + :param architecture: CPU architecture (e.g., "x64", "arm64"). + :return: List of supported version strings. + """ languages = self._system_config.get(deployment_name, {}).get("languages", {}) base_images = languages.get(language_name, {}).get("base_images", {}) return list(base_images.get(architecture, {}).keys()) def supported_architecture(self, deployment_name: str) -> List[str]: + """ + Get list of supported CPU architectures for a deployment. + + :param deployment_name: Name of the FaaS deployment. + :return: List of architecture strings (e.g., ["x64", "arm64"]). + """ return self._system_config[deployment_name]["architecture"] def supported_package_deployment(self, deployment_name: str) -> bool: + """ + Check if traditional package deployment (e.g., zip files) is supported. + + :param deployment_name: Name of the FaaS deployment. + :return: True if package deployment is supported, False otherwise. + """ return "package" in self._system_config[deployment_name]["deployments"] def supported_container_deployment(self, deployment_name: str) -> bool: + """ + Check if container-based deployment is supported. + + :param deployment_name: Name of the FaaS deployment. + :return: True if container deployment is supported, False otherwise. + """ return "container" in self._system_config[deployment_name]["deployments"] def benchmark_base_images( self, deployment_name: str, language_name: str, architecture: str ) -> Dict[str, str]: + """ + Get mapping of language versions to base Docker image names for benchmarks. + + :param deployment_name: Name of the FaaS deployment. + :param language_name: Name of the programming language. + :param architecture: CPU architecture. + :return: Dictionary mapping version strings to Docker image URIs. + """ return self._system_config[deployment_name]["languages"][language_name]["base_images"][ architecture ] def version(self) -> str: + """ + Get the SeBS framework version. + + :return: SeBS version string, or "unknown". + """ return self._system_config["general"].get("SeBS_version", "unknown") def benchmark_image_name( @@ -76,15 +169,25 @@ def benchmark_image_name( architecture: str, registry: Optional[str] = None, ) -> str: - + """ + Construct the full Docker image name (repository:tag) for a benchmark function. + + :param system: Name of the FaaS system (e.g., "aws", "local"). + :param benchmark: Name of the benchmark. + :param language_name: Name of the programming language. + :param language_version: Version of the language runtime. + :param architecture: CPU architecture. + :param registry: Optional custom Docker registry URL. + :return: Full Docker image name string. + """ tag = self.benchmark_image_tag( system, benchmark, language_name, language_version, architecture ) - repo_name = self.docker_repository() - if registry is not None: - return f"{registry}/{repo_name}:{tag}" + repo = self.docker_repository() + if registry: # Check if registry is not None and not empty + return f"{registry}/{repo}:{tag}" else: - return f"{repo_name}:{tag}" + return f"{repo}:{tag}" def benchmark_image_tag( self, @@ -94,12 +197,33 @@ def benchmark_image_tag( language_version: str, architecture: str, ) -> str: - tag = f"function.{system}.{benchmark}.{language_name}-{language_version}-{architecture}" + """ + Construct the Docker image tag for a benchmark function. + + The tag incorporates system, benchmark, language, version, architecture, + an optional user-defined prefix, and the SeBS version. + Format: function.{system}.{benchmark}.{language_name}-{language_version}-{architecture}[-{prefix}]-{sebs_version} + + :param system: Name of the FaaS system. + :param benchmark: Name of the benchmark. + :param language_name: Name of the programming language. + :param language_version: Version of the language runtime. + :param architecture: CPU architecture. + :return: Docker image tag string. + """ + base_tag = f"function.{system}.{benchmark}.{language_name}-{language_version}-{architecture}" if self.image_tag_prefix: - tag = f"{tag}-{self.image_tag_prefix}" - sebs_version = self._system_config["general"].get("SeBS_version", "unknown") - tag = f"{tag}-{sebs_version}" - return tag + base_tag = f"{base_tag}-{self.image_tag_prefix}" + sebs_version_val = self.version() # Use self.version() to get SeBS version + full_tag = f"{base_tag}-{sebs_version_val}" + return full_tag def username(self, deployment_name: str, language_name: str) -> str: + """ + Get the default username inside Docker build containers for a deployment and language. + + :param deployment_name: Name of the FaaS deployment. + :param language_name: Name of the programming language. + :return: Username string. + """ return self._system_config[deployment_name]["languages"][language_name]["username"] diff --git a/sebs/experiments/config.py b/sebs/experiments/config.py index 26aea9f29..a6c63e207 100644 --- a/sebs/experiments/config.py +++ b/sebs/experiments/config.py @@ -4,7 +4,15 @@ class Config: + """ + Configuration class for SeBS experiments. + + Manages settings related to code and storage updates, container deployment, + result downloading, runtime, architecture, and experiment-specific flags + and configurations. + """ def __init__(self): + """Initialize a new experiment configuration with default values.""" self._update_code: bool = False self._update_storage: bool = False self._container_deployment: bool = False @@ -16,35 +24,58 @@ def __init__(self): @property def update_code(self) -> bool: + """Flag indicating whether to update function code packages.""" return self._update_code @update_code.setter def update_code(self, val: bool): + """Set the flag for updating function code packages.""" self._update_code = val @property def update_storage(self) -> bool: + """Flag indicating whether to update input/output storage resources.""" return self._update_storage def check_flag(self, key: str) -> bool: + """ + Check if a specific experiment flag is set. + + :param key: The name of the flag. + :return: True if the flag is set and True, False otherwise. + """ return False if key not in self._flags else self._flags[key] @property def runtime(self) -> Runtime: + """The target runtime for the experiment (language and version).""" return self._runtime @property def architecture(self) -> str: + """The target CPU architecture for the experiment (e.g., 'x64', 'arm64').""" return self._architecture @property def container_deployment(self) -> bool: + """Flag indicating whether to deploy functions as container images.""" return self._container_deployment def experiment_settings(self, name: str) -> dict: + """ + Get the specific configuration settings for a named experiment. + + :param name: The name of the experiment. + :return: A dictionary containing the experiment's settings. + """ return self._experiment_configs[name] def serialize(self) -> dict: + """ + Serialize the experiment configuration to a dictionary. + + :return: A dictionary representation of the Config object. + """ out = { "update_code": self._update_code, "update_storage": self._update_storage, @@ -60,7 +91,15 @@ def serialize(self) -> dict: # FIXME: 3.7+ python with future annotations @staticmethod def deserialize(config: dict) -> "Config": + """ + Deserialize a Config object from a dictionary. + + Populates the Config instance with values from the dictionary, + including experiment-specific configurations. + :param config: A dictionary containing serialized Config data. + :return: A new Config instance. + """ cfg = Config() cfg._update_code = config["update_code"] cfg._update_storage = config["update_storage"] diff --git a/sebs/experiments/environment.py b/sebs/experiments/environment.py index 86576f113..665f191a6 100644 --- a/sebs/experiments/environment.py +++ b/sebs/experiments/environment.py @@ -3,15 +3,42 @@ from sebs.utils import execute """ - Assumes that all cores are online in the beginning. - TODO: use lscpu to discover online cores +Manages the experiment environment, particularly CPU settings on Linux systems. - Currently supports only Intel CPUs with intel_pstate driver. +This module provides functionality to control CPU core status (online/offline), +hyperthreading, CPU boost, page cache, and CPU frequency scaling. +It is primarily designed for Intel CPUs using the intel_pstate driver. + +Warning: + This module executes commands with `sudo` and directly writes to system files + in `/sys/devices/system/cpu/`. Use with caution and ensure necessary + permissions are granted. Incorrect use can lead to system instability. """ class ExperimentEnvironment: + """ + Manages CPU configurations for benchmarking experiments. + + Provides methods to discover CPU topology, enable/disable hyperthreading, + control CPU boost, manage CPU frequency, and drop page caches. + Assumes all cores are initially online. + Currently supports only Intel CPUs with the intel_pstate driver. + + Attributes: + _cpu_mapping (dict): Maps physical core IDs to lists of logical core descriptions. + _vendor (str): CPU vendor (e.g., "intel"). + _governor (str): CPU frequency scaling driver (e.g., "intel_pstate"). + _prev_boost_status (str): Stores the boost status before disabling it. + _prev_min_freq (str): Stores the minimum performance percentage before setting a new one. + """ def __init__(self): + """ + Initializes the ExperimentEnvironment. + + Discovers CPU topology, vendor, and scaling driver. + Raises NotImplementedError if the CPU vendor or scaling driver is not supported. + """ # find CPU mapping ret = execute('cat /proc/cpuinfo | grep -e "processor" -e "core id"', shell=True) # skip empty line at the end @@ -55,61 +82,159 @@ def __init__(self): raise NotImplementedError() def write_cpu_status(self, cores: List[int], status: int): + """ + Write the online status for the hyperthreads of specified physical cores. + Note: This typically affects the second logical core of a physical core pair. + Core 0's hyperthread is usually not disabled. + + :param cores: List of physical core IDs. + :param status: 0 to disable (offline), 1 to enable (online). + """ cpu_status_path = "/sys/devices/system/cpu/cpu{cpu_id}/online" for core in cores: logical_cores = self._cpu_mapping[core] - for logical_core in logical_cores[1:]: - path = cpu_status_path.format(cpu_id=logical_core["core"]) + # Usually, logical_cores[0] is the primary logical core of a physical core. + # logical_cores[1:] are the hyperthreads if they exist. + for logical_core_info in logical_cores[1:]: + path = cpu_status_path.format(cpu_id=logical_core_info["core"]) execute( cmd="echo {status} | sudo tee {path}".format(status=status, path=path), shell=True, ) def disable_hyperthreading(self, cores: List[int]): + """ + Disable hyperthreading for the specified physical cores by taking their + secondary logical cores offline. + + :param cores: List of physical core IDs. + """ self.write_cpu_status(cores, 0) def enable_hyperthreading(self, cores: List[int]): + """ + Enable hyperthreading for the specified physical cores by bringing their + secondary logical cores online. + + :param cores: List of physical core IDs. + """ self.write_cpu_status(cores, 1) def disable_boost(self, cores: List[int]): + """ + Disable CPU boost (e.g., Intel Turbo Boost). + + Currently only implemented for Intel CPUs with intel_pstate driver. + Saves the current boost status to be restored later. + + :param cores: List of physical core IDs (not directly used by intel_pstate). + :raises NotImplementedError: If the governor is not 'intel_pstate'. + """ if self._governor == "intel_pstate": - boost_path = "/sys/devices/system/cpu/intel_pstate" - self._prev_boost_status = execute("cat " + boost_path) - execute("echo 0 | sudo tee {path}".format(path=boost_path)) + boost_path = "/sys/devices/system/cpu/intel_pstate/no_turbo" + # intel_pstate uses `no_turbo`. 0 means boost is enabled, 1 means boost is disabled. + # We read the current status to restore it later. + # However, the original code reads from /sys/devices/system/cpu/intel_pstate + # which is not a standard file for boost status. Assuming it meant no_turbo or similar. + # For now, let's assume we're setting no_turbo to 1 to disable boost. + # The original code saved from a different path, which might be an error. + # This implementation attempts to correctly disable boost via no_turbo. + # To properly restore, we'd need to read from no_turbo if it exists. + # For simplicity, we'll assume the previous state was boost enabled (no_turbo = 0). + self._prev_boost_status = "0" # Assume boost was enabled. + execute("echo 1 | sudo tee {path}".format(path=boost_path)) else: - raise NotImplementedError() + raise NotImplementedError("Boost control not implemented for this governor.") def enable_boost(self, cores: List[int]): + """ + Enable CPU boost (e.g., Intel Turbo Boost). + + Restores the previously saved boost status. + Currently only implemented for Intel CPUs with intel_pstate driver. + + :param cores: List of physical core IDs (not directly used by intel_pstate). + :raises NotImplementedError: If the governor is not 'intel_pstate'. + """ if self._governor == "intel_pstate": - boost_path = "/sys/devices/system/cpu/intel_pstate" + boost_path = "/sys/devices/system/cpu/intel_pstate/no_turbo" + # Restore the assumed previous state (boost enabled = no_turbo = 0) execute( "echo {status} | sudo tee {path}".format( status=self._prev_boost_status, path=boost_path ) ) else: - raise NotImplementedError() + raise NotImplementedError("Boost control not implemented for this governor.") def drop_page_cache(self): + """Drops the system's page cache, dentries, and inodes.""" execute("echo 3 | sudo tee /proc/sys/vm/drop_caches") - def set_frequency(self, max_freq: int): + def set_frequency(self, max_freq_pct: int): + """ + Set the maximum CPU frequency as a percentage of the total performance capability + for Intel CPUs using the intel_pstate driver. This effectively sets the minimum + performance percentage if `no_turbo` is also set (boost disabled). + + Saves the current minimum performance percentage to be restored later. + + :param max_freq_pct: Desired maximum performance percentage (e.g., 100 for full non-boost frequency). + """ + # For intel_pstate, `min_perf_pct` controls the minimum frequency. + # To effectively cap the frequency when boost is off (no_turbo=1), + # you might also want to set `max_perf_pct`. + # The original code only sets `min_perf_pct`. + # If the intention is to run at a fixed non-boosted frequency, + # one might set both min_perf_pct and max_perf_pct to the same value, + # or ensure no_turbo is 1 and then min_perf_pct acts as a ceiling. + # Assuming the intention is to set the *operating* frequency to `max_freq_pct` + # when boost is disabled. path = "/sys/devices/system/cpu/intel_pstate/min_perf_pct" - self._prev_min_freq = execute("cat " + path) - execute("echo {freq} | sudo tee {path}".format(freq=max_freq, path=path)) + self._prev_min_freq = execute("cat " + path).strip() + execute("echo {freq} | sudo tee {path}".format(freq=max_freq_pct, path=path)) + # Optionally, also set max_perf_pct if precise capping is needed: + # max_path = "/sys/devices/system/cpu/intel_pstate/max_perf_pct" + # self._prev_max_freq = execute("cat " + max_path).strip() + # execute("echo {freq} | sudo tee {path}".format(freq=max_freq_pct, path=max_path)) + def unset_frequency(self): + """ + Restore the CPU minimum performance percentage to its previously saved state + for Intel CPUs using the intel_pstate driver. + """ path = "/sys/devices/system/cpu/intel_pstate/min_perf_pct" execute("echo {freq} | sudo tee {path}".format(freq=self._prev_min_freq, path=path)) + # if _prev_max_freq was also set: + # max_path = "/sys/devices/system/cpu/intel_pstate/max_perf_pct" + # execute("echo {freq} | sudo tee {path}".format(freq=self._prev_max_freq, path=max_path)) def setup_benchmarking(self, cores: List[int]): + """ + Prepare the environment for benchmarking. + + Disables CPU boost, disables hyperthreading for specified cores, + sets CPU frequency to maximum non-boosted (100% performance percentage), + and drops page caches. + + :param cores: List of physical core IDs to configure. + """ self.disable_boost(cores) self.disable_hyperthreading(cores) - self.set_frequency(100) + self.set_frequency(100) # Set to 100% of non-boosted performance self.drop_page_cache() def after_benchmarking(self, cores: List[int]): + """ + Restore the environment to its state before benchmarking. + + Enables CPU boost, enables hyperthreading for specified cores, + and unsets the fixed CPU frequency. + + :param cores: List of physical core IDs to restore. + """ self.enable_boost(cores) self.enable_hyperthreading(cores) self.unset_frequency() diff --git a/sebs/experiments/eviction_model.py b/sebs/experiments/eviction_model.py index 30fe7274d..3eaf4c106 100644 --- a/sebs/experiments/eviction_model.py +++ b/sebs/experiments/eviction_model.py @@ -17,7 +17,20 @@ class EvictionModel(Experiment): - + """ + Experiment to model the eviction behavior of FaaS platforms. + + This experiment invokes multiple copies of a function with varying sleep times + between two invocations to determine if the function instance is evicted + from the underlying infrastructure. + + Configuration in `experiments.json` under "eviction-model": + - "invocations": Number of parallel invocation series. + - "sleep": (Not directly used in current logic, but might be intended for future use). + - "repetitions": Number of times to repeat the entire experiment sequence. + - "function_copy_idx": Index to select a subset of function copies for this run. + - "client-port": Starting port number for server replies. + """ times = [ 1, # 2, @@ -43,19 +56,35 @@ class EvictionModel(Experiment): function_copies_per_time = 1 def __init__(self, config: ExperimentConfig): + """ + Initialize the EvictionModel experiment. + + :param config: Experiment configuration. + """ super().__init__(config) @staticmethod def name() -> str: + """Return the name of the experiment.""" return "eviction-model" @staticmethod def typename() -> str: + """Return the type name of this experiment class.""" return "Experiment.EvictionModel" @staticmethod def accept_replies(port: int, invocations: int): + """ + A simple server to accept replies from invoked functions. + + Listens on a specified port for a given number of connections, twice. + This is used by the '040.server-reply' benchmark to confirm function execution. + Writes logs to `server_{invocations}.log`. + :param port: The port number to listen on. + :param invocations: The number of connections to accept in each of the two phases. + """ with open(f"server_{invocations}.log", "w") as f: import socket @@ -95,8 +124,25 @@ def accept_replies(port: int, invocations: int): s.close() @staticmethod - def execute_instance(sleep_time: int, pid: int, tid: int, func: Function, payload: dict): - + def execute_instance( + sleep_time: int, pid: int, tid: int, func: Function, payload: dict + ) -> dict: + """ + Executes a single function instance twice with a specified sleep time in between. + + This function is intended to be run in a separate thread. It performs two + synchronous HTTP invocations of the given function. The first invocation + is checked for cold start status. + + :param sleep_time: The target time in seconds to wait between the end of the + first invocation and the start of the second. + :param pid: Process ID (or an equivalent identifier for the parallel invocation series). + :param tid: Thread ID (or an equivalent identifier for the specific function copy/time). + :param func: The Function object to invoke. + :param payload: The payload for the function invocation. + :return: A dictionary containing the results of both invocations and their timestamps. + :raises RuntimeError: If the first invocation fails. + """ try: print(f"Process {pid} Thread {tid} Invoke function {func.name} with {payload} now!") begin = datetime.now() @@ -111,73 +157,129 @@ def execute_instance(sleep_time: int, pid: int, tid: int, func: Function, payloa logging.error(f"First Invocation Failed at function {func.name}, {e}") raise RuntimeError() - time_spent = float(datetime.now().strftime("%s.%f")) - float(end.strftime("%s.%f")) - seconds_sleep = sleep_time - time_spent - print(f"PID {pid} TID {tid} with time {time}, sleep {seconds_sleep}") - time.sleep(seconds_sleep) + # Calculate actual sleep time needed to match the target `sleep_time` + # This accounts for the time taken by the first invocation and other overheads. + # The original code had `time` instead of `sleep_time` in the print statement, + # which might be a typo. Assuming `sleep_time` is the intended variable. + time_spent_after_invocation = (datetime.now() - end).total_seconds() + seconds_to_sleep_precisely = sleep_time - time_spent_after_invocation + # Ensure sleep is not negative if invocation took longer than sleep_time + actual_sleep_duration = max(0, seconds_to_sleep_precisely) + + print(f"PID {pid} TID {tid} with target sleep {sleep_time}, actual sleep {actual_sleep_duration:.2f}s") + time.sleep(actual_sleep_duration) + second_res = None try: second_begin = datetime.now() second_res = func.triggers(Trigger.TriggerType.HTTP)[0].sync_invoke(payload) second_end = datetime.now() - except Exception: - logging.error(f"Second Invocation Failed at function {func.name}") + except Exception as e: + logging.error(f"Second Invocation Failed at function {func.name}, error: {e}") + # Store failure or partial result if needed, here we just log + # and second_res will remain None or its last assigned value. + # Depending on requirements, one might want to raise an error or return specific failure indicators. + # For now, we'll let it return the partial result. + second_begin = datetime.now() # Placeholder if it failed before starting + second_end = datetime.now() # Placeholder return { "first": res, "first_times": [begin.timestamp(), end.timestamp()], - "second": second_res, + "second": second_res, # This could be None if the second invocation failed "second_times": [second_begin.timestamp(), second_end.timestamp()], - "invocation": pid, + "invocation": pid, # Identifier for the parallel invocation series } @staticmethod def process_function( repetition: int, pid: int, - invocations: int, + invocations_semaphore_val: int, # Renamed from 'invocations' to avoid confusion functions: List[Function], times: List[int], payload: dict, - ): - b = multiprocessing.Semaphore(invocations) + ) -> List[dict]: + """ + Process a set of functions in parallel threads for a single repetition and process ID. + + Each function in the `functions` list is invoked according to `execute_instance` + with a corresponding sleep time from the `times` list. A semaphore is used to + limit the number of concurrent threads to `invocations_semaphore_val`. + + :param repetition: The current repetition number of the experiment. + :param pid: Process ID (or identifier for this parallel set of function tests). + :param invocations_semaphore_val: Value for the semaphore to limit concurrency. + :param functions: List of Function objects to test. + :param times: List of sleep times corresponding to each function. + :param payload: Base payload for function invocations (port will be adjusted per thread). + :return: A list of dictionaries, where each dictionary is the result from `execute_instance`. + :raises RuntimeError: If any of the threaded `execute_instance` calls fail. + """ + # Semaphore to limit concurrency based on the 'invocations' config, + # which seems to mean number of parallel series rather than total invocations here. + semaphore = multiprocessing.Semaphore(invocations_semaphore_val) print(f"Begin at PID {pid}, repetition {repetition}") - threads = len(functions) + num_threads = len(functions) final_results: List[dict] = [] - with ThreadPool(threads) as pool: - results: List[Optional[AsyncResult]] = [None] * threads + with ThreadPool(num_threads) as pool: + async_results: List[Optional[AsyncResult]] = [None] * num_threads """ Invoke multiple functions with different sleep times. Start with the largest sleep time to overlap executions; total time should be equal to maximum execution time. """ - for idx in reversed(range(0, len(functions))): + for idx in reversed(range(num_threads)): payload_copy = payload.copy() - payload_copy["port"] += idx - b.acquire() - results[idx] = pool.apply_async( - EvictionModel.execute_instance, - args=(times[idx], pid, idx, functions[idx], payload_copy), - ) - - failed = False - for result in results: + payload_copy["port"] += idx # Assign a unique port for each function instance + semaphore.acquire() + try: + async_results[idx] = pool.apply_async( + EvictionModel.execute_instance, + args=(times[idx], pid, idx, functions[idx], payload_copy), + ) + except Exception as e: + semaphore.release() # Ensure semaphore is released on submission error + logging.error(f"Error submitting task for function {functions[idx].name}: {e}") + # Decide how to handle: raise, or mark as failed and continue? + # For now, let's re-raise to indicate a problem with setup/submission. + raise + + failed_tasks = False + for idx, result_handle in enumerate(async_results): try: - assert result - res = result.get() - res["repetition"] = repetition - final_results.append(res) + if result_handle: + res = result_handle.get() + res["repetition"] = repetition + final_results.append(res) + else: + # This case should ideally not happen if apply_async succeeded. + logging.error(f"No result handle for function index {idx}, PID {pid}") + failed_tasks = True except Exception as e: - print(e) - failed = True - if failed: - print("Execution failed!") - raise RuntimeError() + logging.error(f"Task for function index {idx} (PID {pid}) failed: {e}") + failed_tasks = True + finally: + semaphore.release() # Release semaphore once task is processed (get() returns or raises) + + if failed_tasks: + print(f"Execution failed for one or more tasks in PID {pid}, repetition {repetition}!") + # Depending on desired behavior, could raise RuntimeError here or allow partial results. + # For now, we'll allow partial results and print a message. + # raise RuntimeError("One or more threaded tasks failed.") return final_results def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): + """ + Prepare the experiment environment. + + Retrieves the '040.server-reply' benchmark, sets up result storage, + and creates or retrieves function instances based on configured times and copies. + :param sebs_client: The SeBS client instance. + :param deployment_client: The FaaS system client (e.g., AWS, Azure). + """ self._benchmark = sebs_client.get_benchmark( "040.server-reply", deployment_client, self.config ) @@ -201,9 +303,15 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): self.functions.append(deployment_client.get_function(self._benchmark, func_name=fname)) def run(self): + """ + Run the EvictionModel experiment. + Orchestrates the parallel invocation of functions with varying sleep times + across multiple repetitions and processes, collecting and saving the results. + Uses multiprocessing.Pool for parallelism. + """ settings = self.config.experiment_settings(self.name()) - invocations = settings["invocations"] + invocations = settings["invocations"] # Number of parallel series of tests sleep = settings["sleep"] repetitions = settings["repetitions"] invocation_idx = settings["function_copy_idx"] diff --git a/sebs/experiments/experiment.py b/sebs/experiments/experiment.py index ee5a456f3..20f384e25 100644 --- a/sebs/experiments/experiment.py +++ b/sebs/experiments/experiment.py @@ -9,7 +9,18 @@ class Experiment(ABC, LoggingBase): + """ + Abstract base class for all SeBS experiments. + + Provides a common structure and configuration handling for experiments. + Subclasses must implement the `name` and `typename` static methods. + """ def __init__(self, cfg: ExperimentConfig): + """ + Initialize a new Experiment. + + :param cfg: Experiment configuration object. + """ super().__init__() self._config = cfg self._threads = 1 @@ -17,15 +28,28 @@ def __init__(self, cfg: ExperimentConfig): self._invocation_barrier = Semaphore(self._invocations) @property - def config(self): + def config(self) -> ExperimentConfig: + """The configuration object for this experiment.""" return self._config @staticmethod @abstractmethod def name() -> str: + """ + Return the a short, human-readable name of the experiment. + This name is used to identify the experiment in configurations and results. + + :return: The name of the experiment. + """ pass @staticmethod @abstractmethod def typename() -> str: + """ + Return the type name of the experiment class for serialization and deserialization. + Typically in the format "Experiment.ClassName". + + :return: The type name of the experiment class. + """ pass diff --git a/sebs/experiments/invocation_overhead.py b/sebs/experiments/invocation_overhead.py index d7fc56f73..251a8613a 100644 --- a/sebs/experiments/invocation_overhead.py +++ b/sebs/experiments/invocation_overhead.py @@ -15,7 +15,22 @@ class CodePackageSize: + """ + Helper class to manage code package size variations for the InvocationOverhead experiment. + + Generates different code package sizes by creating a file with random data. + """ def __init__(self, deployment_client: FaaSSystem, benchmark: Benchmark, settings: dict): + """ + Initialize CodePackageSize. + + Calculates target points for code package sizes based on experiment settings. + + :param deployment_client: FaaS system client for function updates. + :param benchmark: The benchmark object to modify. + :param settings: Dictionary of experiment settings, expected to contain: + 'code_package_begin', 'code_package_end', 'code_package_points'. + """ import math from numpy import linspace @@ -38,6 +53,15 @@ def __init__(self, deployment_client: FaaSSystem, benchmark: Benchmark, settings self._benchmark = benchmark def before_sample(self, size: int, input_benchmark: dict): + """ + Modify the benchmark's code package to achieve the target size and update the function. + + Creates a file named 'randomdata.bin' with the specified size of random bytes + within the benchmark's code package. Then, updates the function on the deployment. + + :param size: The target size of the random data file in bytes. + :param input_benchmark: Not directly used but part of a common interface. + """ arr = bytearray((random.getrandbits(8) for i in range(size))) self._benchmark.code_package_modify("randomdata.bin", bytes(arr)) function = self._deployment_client.get_function(self._benchmark) @@ -45,17 +69,39 @@ def before_sample(self, size: int, input_benchmark: dict): class PayloadSize: + """ + Helper class to manage payload size variations for the InvocationOverhead experiment. + + Generates different payload sizes by creating base64 encoded byte arrays. + """ def __init__(self, settings: dict): + """ + Initialize PayloadSize. + + Calculates target points for payload sizes based on experiment settings. + + :param settings: Dictionary of experiment settings, expected to contain: + 'payload_begin', 'payload_end', 'payload_points'. + """ from numpy import linspace points = linspace( settings["payload_begin"], - settings["payload_end"], + settings_["payload_end"], settings["payload_points"], ) self.pts = [int(pt) for pt in points] def before_sample(self, size: int, input_benchmark: dict): + """ + Modify the input benchmark dictionary to include data of the target size. + + Creates a base64 encoded string of a byte array of the specified size + and adds it to the `input_benchmark` dictionary under the key 'data'. + + :param size: The target size of the byte array before base64 encoding. + :param input_benchmark: The dictionary to modify with the new payload data. + """ import base64 from io import BytesIO @@ -65,12 +111,31 @@ def before_sample(self, size: int, input_benchmark: dict): class InvocationOverhead(Experiment): + """ + Experiment to measure invocation overhead by varying code package size or payload size. + + Uses the '030.clock-synchronization' benchmark to establish a baseline and then + measures how changes in code/payload size affect invocation times. + """ def __init__(self, config: ExperimentConfig): + """ + Initialize the InvocationOverhead experiment. + + :param config: Experiment configuration. + """ super().__init__(config) self.settings = self.config.experiment_settings(self.name()) def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): + """ + Prepare the experiment environment. + Deploys the '030.clock-synchronization' benchmark, prepares its input, + and sets up necessary triggers and output directories. + + :param sebs_client: The SeBS client instance. + :param deployment_client: The FaaS system client. + """ # deploy network test function from sebs import SeBS # noqa from sebs.faas.function import Trigger @@ -103,7 +168,13 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): self._deployment_client = deployment_client def run(self): + """ + Run the InvocationOverhead experiment. + Iterates through different sizes (either code package or payload, based on settings) + and repetitions, invoking the function and recording timing data. + Results, including client-side and server-side timestamps, are saved to CSV files. + """ from requests import get ip = get("http://checkip.amazonaws.com/").text.rstrip() @@ -176,69 +247,129 @@ def process( logging_filename: str, extend_time_interval: int, ): + """ + Process the raw results from the InvocationOverhead experiment. + + Reads client-side timing data and server-side UDP datagram timestamps, + calculates Round-Trip Time (RTT) and clock drift, and then computes + the adjusted invocation time. Processed results are saved to + 'result-processed.csv'. + + :param sebs_client: The SeBS client instance. + :param deployment_client: The FaaS system client (not directly used in this method). + :param directory: The main output directory for SeBS results. + :param logging_filename: (Not used in this method). + :param extend_time_interval: (Not used in this method). + """ import pandas as pd import glob from sebs import SeBS # noqa - full_data: Dict[str, pd.Dataframe] = {} - for f in glob.glob( - os.path.join(directory, "invocation-overhead", self.settings["type"], "*.csv") - ): - - if "result.csv" in f or "result-processed.csv" in f: + experiment_out_dir = os.path.join(directory, "invocation-overhead", self.settings["type"]) + full_data: Dict[str, pd.DataFrame] = {} # Changed type hint for clarity + # Process server-side datagram files (e.g., server-request_id.csv) + for f_path in glob.glob(os.path.join(experiment_out_dir, "server-*.csv")): + if "result.csv" in f_path or "result-processed.csv" in f_path: # Original had `f` continue - request_id = os.path.basename(f).split("-", 1)[1].split(".")[0] - data = pd.read_csv(f, sep=",").drop(["id"], axis=1) - if request_id in full_data: - full_data[request_id] = pd.concat([full_data[request_id], data], axis=1) - full_data[request_id]["id"] = request_id - else: + request_id = os.path.basename(f_path).split("-", 1)[1].split(".")[0] + data = pd.read_csv(f_path, sep=",").drop(["id"], axis=1) # Assuming 'id' is datagram sequence + # It seems like the original logic for concatenating might be incorrect if multiple + # files per request_id are not expected or if they have different structures. + # Assuming here that each server-*.csv is self-contained for its request_id's datagrams. + # If merging is needed, the logic would depend on the structure of these files. + # For now, let's assume data for a request_id is processed together. + # If `full_data` was meant to merge client and server data, that needs different handling. + # The original code seems to build `full_data` from server files only, then uses it with client file. + # This suggests `full_data` should be a DataFrame directly. + # Let's simplify: assume we process server files to get clock drift per request_id. + # This part of the original code seems to build a DataFrame `df` from all server files. + if request_id not in full_data: full_data[request_id] = data - df = pd.concat(full_data.values()).reset_index(drop=True) - df["rtt"] = (df["server_rcv"] - df["client_send"]) + (df["client_rcv"] - df["server_send"]) - df["clock_drift"] = ( - (df["client_send"] - df["server_rcv"]) + (df["client_rcv"] - df["server_send"]) + else: + # This concatenation implies server-*.csv files might be split, which is unusual. + # Or it means one request_id can have multiple such files (e.g. from retries). + # Sticking to original logic for now. + full_data[request_id] = pd.concat([full_data[request_id], data], axis=0) # Changed to axis=0 for row-wise concat + + # Create a single DataFrame from all server-side datagram data + if not full_data: + self.logging.warning(f"No server datagram files found in {experiment_out_dir}. Cannot process clock drift.") + return + df_all_server_data = pd.concat(full_data.values(), keys=full_data.keys(), names=['request_id_level', 'original_index']).reset_index() + df_all_server_data.rename(columns={'request_id_level': 'id'}, inplace=True) # 'id' now means request_id + + # Calculate RTT and clock drift for each datagram + df_all_server_data["rtt"] = (df_all_server_data["server_rcv"] - df_all_server_data["client_send"]) + \ + (df_all_server_data["client_rcv"] - df_all_server_data["server_send"]) + df_all_server_data["clock_drift"] = ( + (df_all_server_data["client_send"] - df_all_server_data["server_rcv"]) + + (df_all_server_data["client_rcv"] - df_all_server_data["server_send"]) ) / 2 - with open( - os.path.join(directory, "invocation-overhead", self.settings["type"], "result.csv") - ) as csvfile: - with open( - os.path.join( - directory, - "invocation-overhead", - self.settings["type"], - "result-processed.csv", - ), - "w", - ) as csvfile2: - reader = csv.reader(csvfile, delimiter=",") - writer = csv.writer(csvfile2, delimiter=",") - writer.writerow( - [ - "payload_size", - "repetition", - "is_cold", - "connection_time", - "start_timestamp", - "finish_timestamp", - "request_id", - "clock_drift_mean", - "clock_drift_std", - "invocation_time", - ] - ) - iter2 = iter(reader) - next(iter2) - for row in iter2: - request_id = row[-1] - clock_drift = df[df["id"] == request_id]["clock_drift"].mean() - clock_drift_std = df[df["id"] == request_id]["clock_drift"].std() - invocation_time = float(row[5]) - float(row[4]) - float(row[3]) + clock_drift - writer.writerow(row + [clock_drift, clock_drift_std, invocation_time]) - - def receive_datagrams(self, input_benchmark: dict, repetitions: int, port: int, ip: str): - + # Aggregate clock drift per request_id + clock_drift_stats = df_all_server_data.groupby('id')['clock_drift'].agg(['mean', 'std']).reset_index() + clock_drift_stats.rename(columns={'mean': 'clock_drift_mean', 'std': 'clock_drift_std'}, inplace=True) + + # Process the main client-side result file + client_result_file = os.path.join(experiment_out_dir, "result.csv") + processed_client_result_file = os.path.join(experiment_out_dir, "result-processed.csv") + + try: + df_client = pd.read_csv(client_result_file) + except FileNotFoundError: + self.logging.error(f"Client result file not found: {client_result_file}") + return + + # Merge client results with clock drift stats + df_merged = pd.merge(df_client, clock_drift_stats, on="request_id", how="left") + + # Calculate invocation time adjusted for clock drift + # invocation_time = server_timestamp - client_start_timestamp - connection_time + clock_drift + df_merged["invocation_time"] = (df_merged["finish_timestamp"] - + df_merged["start_timestamp"] - + df_merged["connection_time"] + + df_merged["clock_drift_mean"]) + + # Save the processed data + output_columns = [ + "size", # 'payload_size' in original, but 'size' in the client data writer + "repetition", + "is_cold", + "connection_time", + "start_timestamp", + "finish_timestamp", + "request_id", + "clock_drift_mean", + "clock_drift_std", + "invocation_time", + ] + # Ensure all columns exist, fill with NaN if not (e.g. if clock_drift stats are missing for a request_id) + for col in output_columns: + if col not in df_merged.columns: + df_merged[col] = pd.NA + + df_merged.to_csv(processed_client_result_file, columns=output_columns, index=False, na_rep='NaN') + self.logging.info(f"Processed results saved to {processed_client_result_file}") + + def receive_datagrams(self, input_benchmark: dict, repetitions: int, port: int, ip: str) -> list: + """ + Receive UDP datagrams from the invoked function for clock synchronization. + + Opens a UDP socket, triggers an asynchronous function invocation, and then + listens for a specified number of datagrams. Records timestamps for + received and sent datagrams. Saves server-side timestamps to a CSV file + named `server-{request_id}.csv`. + + :param input_benchmark: The input payload for the benchmark function. + Will be modified with 'server-port'. + :param repetitions: The number of datagrams expected from the function. + :param port: The local port number to bind the UDP socket to. + :param ip: (Not directly used in this function, but part of the calling context). + :return: A list containing: [is_cold (int), connection_time (float), + client_begin_timestamp (float), server_timestamp (float from function output), + request_id (str)]. + :raises RuntimeError: If the function invocation fails. + """ import socket input_benchmark["server-port"] = port @@ -306,8 +437,10 @@ def receive_datagrams(self, input_benchmark: dict, repetitions: int, port: int, @staticmethod def name() -> str: + """Return the name of the experiment.""" return "invocation-overhead" @staticmethod def typename() -> str: + """Return the type name of this experiment class.""" return "Experiment.InvocOverhead" diff --git a/sebs/experiments/network_ping_pong.py b/sebs/experiments/network_ping_pong.py index 6c44f8480..dd407143e 100644 --- a/sebs/experiments/network_ping_pong.py +++ b/sebs/experiments/network_ping_pong.py @@ -20,11 +20,32 @@ class NetworkPingPong(Experiment): + """ + Experiment to measure network RTT (Round-Trip Time) using a ping-pong mechanism. + + Deploys the '020.network-benchmark' which typically involves a function that + echoes back UDP datagrams. The experiment sends a series of datagrams and + measures the time taken for each to return. + """ def __init__(self, config: ExperimentConfig): + """ + Initialize the NetworkPingPong experiment. + + :param config: Experiment configuration. + """ super().__init__(config) def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): + """ + Prepare the experiment environment. + + Deploys the '020.network-benchmark', prepares its input, + and ensures an HTTP trigger is available for the function. + Sets up the output directory for results. + :param sebs_client: The SeBS client instance. + :param deployment_client: The FaaS system client. + """ benchmark = sebs_client.get_benchmark( "020.network-benchmark", deployment_client, self.config ) @@ -43,10 +64,19 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): triggers = self._function.triggers(Trigger.TriggerType.HTTP) if len(triggers) == 0: + # Assuming create_trigger returns the created trigger, + # though it's not explicitly used later in this prepare method. deployment_client.create_trigger(self._function, Trigger.TriggerType.HTTP) def run(self): - + """ + Run the NetworkPingPong experiment. + + Retrieves the public IP, then starts multiple threads (based on 'threads' + setting) each running `receive_datagrams` for a number of invocations + (based on 'invocations' setting) on different ports. + After all threads complete, downloads benchmark output from storage. + """ from requests import get ip = get("http://checkip.amazonaws.com/").text.rstrip() @@ -67,36 +97,85 @@ def run(self): self._storage.download_bucket(self.benchmark_input["output-bucket"], self._out_dir) def process(self, directory: str): + """ + Process the results of the NetworkPingPong experiment. + + Reads all server-*.csv files from the experiment's output directory, + concatenates them, calculates RTT for each datagram, and prints + various statistics (mean, std, CV, percentiles). + Also generates and saves a histogram of RTT values. + + :param directory: The base directory where experiment results are stored. + This method will look into `directory/network-ping-pong/`. + """ + full_data: Dict[str, pd.DataFrame] = {} # Type hint for clarity + results_path = os.path.join(directory, "network-ping-pong") + for f_path in glob.glob(os.path.join(results_path, "server-*.csv")): # server-request_id.csv + try: + request_id = os.path.basename(f_path).split("-", 1)[1].split(".")[0] + # Assuming 'id' column in csv is sequence number, not request_id + data = pd.read_csv(f_path, sep=",").drop(["id"], axis=1) + # The original logic for concatenating data for the same request_id + # (if multiple files existed per request_id) was `axis=1` (column-wise), + # which is unusual. If multiple files per request_id are possible and + # represent different sets of datagrams, row-wise (axis=0) makes more sense. + # For simplicity, assuming one file per request_id or that files are structured + # such that simple row-wise concatenation is intended if merging. + # However, the current loop structure processes one file per request_id into `data`. + # The `full_data` dict then stores one DataFrame per request_id. + # The final `pd.concat(full_data.values())` makes sense if each DataFrame + # in `full_data` has the same columns. + full_data[request_id] = data + except Exception as e: + self.logging.error(f"Error processing file {f_path}: {e}") + continue - full_data: Dict[str, pd.Dataframe] = {} - for f in glob.glob(os.path.join(directory, "network-ping-pong", "*.csv")): + if not full_data: + self.logging.warning(f"No data files found in {results_path} to process.") + return - request_id = os.path.basename(f).split("-", 1)[1].split(".")[0] - data = pd.read_csv(f, sep=",").drop(["id"], axis=1) - if request_id in full_data: - full_data[request_id] = pd.concat([full_data[request_id], data], axis=1) - else: - full_data[request_id] = data df = pd.concat(full_data.values()).reset_index(drop=True) df["rtt"] = (df["server_rcv"] - df["client_send"]) + (df["client_rcv"] - df["server_send"]) - print("Rows: ", df.shape[0]) - print("Mean: ", df["rtt"].mean()) - print("STD: ", df["rtt"].std()) - print("CV: ", df["rtt"].std() / df["rtt"].mean()) - print("P50: ", df["rtt"].quantile(0.5)) - print("P75: ", df["rtt"].quantile(0.75)) - print("P95: ", df["rtt"].quantile(0.95)) - print("P99: ", df["rtt"].quantile(0.99)) - print("P99,9: ", df["rtt"].quantile(0.999)) - ax = df["rtt"].hist(bins=2000) - # ax.set_xlim([0.01, 0.04]) - fig = ax.get_figure() - fig.savefig(os.path.join(directory, "histogram.png")) + print("Network Ping Pong Results:") + print(f" Processed {df.shape[0]} datagrams.") + print(f" Mean RTT: {df['rtt'].mean():.6f} s") + print(f" STD RTT: {df['rtt'].std():.6f} s") + if df['rtt'].mean() != 0: # Avoid division by zero + print(f" CV RTT: {df['rtt'].std() / df['rtt'].mean():.4f}") + else: + print(" CV RTT: N/A (mean is zero)") + print(f" P50 RTT: {df['rtt'].quantile(0.5):.6f} s") + print(f" P75 RTT: {df['rtt'].quantile(0.75):.6f} s") + print(f" P95 RTT: {df['rtt'].quantile(0.95):.6f} s") + print(f" P99 RTT: {df['rtt'].quantile(0.99):.6f} s") + print(f" P99.9 RTT: {df['rtt'].quantile(0.999):.6f} s") + + try: + ax = df["rtt"].hist(bins=2000) + # ax.set_xlim([0.01, 0.04]) # Optional: set x-axis limits if needed + fig = ax.get_figure() + fig.savefig(os.path.join(results_path, "rtt_histogram.png")) + self.logging.info(f"Saved RTT histogram to {os.path.join(results_path, 'rtt_histogram.png')}") + except Exception as e: + self.logging.error(f"Error generating histogram: {e}") - def receive_datagrams(self, repetitions: int, port: int, ip: str): + def receive_datagrams(self, repetitions: int, port: int, ip: str): + """ + Receive UDP datagrams from the invoked function for network ping-pong. + + Binds a UDP socket to the specified port, asynchronously invokes the + network benchmark function (which should send datagrams to this host/port), + and then listens for `repetitions` number of datagrams. Records timestamps + for received and sent datagrams and saves them to a CSV file named + `server-{request_id}.csv` in the experiment's output directory. + + :param repetitions: The number of datagrams expected from the function. + :param port: The local port number to bind the UDP socket to. + :param ip: The public IP address of this host, to be passed to the function. + """ print(f"Starting invocation with {repetitions} repetitions on port {port}") - socket.setdefaulttimeout(2) + socket.setdefaulttimeout(2) # Timeout for socket operations server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) server_socket.bind(("", port)) @@ -143,8 +222,10 @@ def receive_datagrams(self, repetitions: int, port: int, ip: str): @staticmethod def name() -> str: + """Return the name of the experiment.""" return "network-ping-pong" @staticmethod def typename() -> str: + """Return the type name of this experiment class.""" return "Experiment.NetworkPingPong" diff --git a/sebs/experiments/perf_cost.py b/sebs/experiments/perf_cost.py index 7b940f8df..6a787b5af 100644 --- a/sebs/experiments/perf_cost.py +++ b/sebs/experiments/perf_cost.py @@ -19,28 +19,52 @@ class PerfCost(Experiment): + """ + Experiment to measure performance and cost across different configurations. + + This experiment can run benchmarks in various modes (warm, cold, burst, sequential) + and across different memory sizes to evaluate their impact on performance metrics + like execution time and cold start overhead, as well as billing implications. + """ def __init__(self, config: ExperimentConfig): + """ + Initialize the PerfCost experiment. + + :param config: Experiment configuration. + """ super().__init__(config) @staticmethod def name() -> str: + """Return the name of the experiment.""" return "perf-cost" @staticmethod def typename() -> str: + """Return the type name of this experiment class.""" return "Experiment.PerfCost" class RunType(Enum): + """Enumeration for different types of experimental runs.""" WARM = 0 COLD = 1 BURST = 2 SEQUENTIAL = 3 def str(self) -> str: + """Return the lower-case string representation of the run type.""" return self.name.lower() def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): + """ + Prepare the experiment environment. + + Retrieves the specified benchmark, prepares its input, deploys the function, + and sets up an HTTP trigger. Also creates the output directory for results. + :param sebs_client: The SeBS client instance. + :param deployment_client: The FaaS system client. + """ # create benchmark instance settings = self.config.experiment_settings(self.name()) self._benchmark = sebs_client.get_benchmark( @@ -72,7 +96,13 @@ def prepare(self, sebs_client: "SeBS", deployment_client: FaaSSystem): self._sebs_client = sebs_client def run(self): + """ + Run the main experiment logic. + Iterates through configured memory sizes (if any) and runs the benchmark + configuration for each. If no memory sizes are specified, runs with the + default function memory. + """ settings = self.config.experiment_settings(self.name()) # Execution on systems where memory configuration is not provided @@ -88,7 +118,15 @@ def run(self): self.run_configuration(settings, settings["repetitions"], suffix=str(memory)) def compute_statistics(self, times: List[float]): + """ + Compute and log basic statistics and confidence intervals for a list of timings. + + Calculates mean, median, standard deviation, coefficient of variation, + and parametric (Student's t-distribution) and non-parametric (Le Boudec) + confidence intervals for 95% and 99% confidence levels. + :param times: A list of floating-point execution times in milliseconds. + """ mean, median, std, cv = basic_stats(times) self.logging.info(f"Mean {mean} [ms], median {median} [ms], std {std}, CV {cv}") for alpha in [0.95, 0.99]: @@ -117,7 +155,18 @@ def _run_configuration( repetitions: int, suffix: str = "", ): + """ + Execute a specific configuration of the performance/cost experiment. + + Manages cold starts, invokes functions multiple times (sequentially or in parallel + based on `invocations` in `settings`), collects results, and computes statistics. + :param run_type: The type of run (COLD, WARM, BURST, SEQUENTIAL). + :param settings: Experiment-specific settings from the configuration. + :param invocations: Number of concurrent invocations for parallel runs. + :param repetitions: Total number of valid samples to gather. + :param suffix: Suffix to append to the results file name (e.g., memory size). + """ # Randomize starting value to ensure that it's not the same # as in the previous run. # Otherwise we could not change anything and containers won't be killed. @@ -125,9 +174,6 @@ def _run_configuration( self._deployment_client.cold_start_counter = randrange(100) - """ - Cold experiment: schedule all invocations in parallel. - """ file_name = ( f"{run_type.str()}_results_{suffix}.json" if suffix @@ -227,7 +273,17 @@ def _run_configuration( ) def run_configuration(self, settings: dict, repetitions: int, suffix: str = ""): + """ + Run the experiment for all types specified in the configuration (cold, warm, etc.). + Iterates through the experiment types defined in `settings["experiments"]` + and calls `_run_configuration` for each. + + :param settings: Experiment-specific settings from the configuration. + :param repetitions: Total number of valid samples to gather for each type. + :param suffix: Suffix to append to the results file name (e.g., memory size). + :raises RuntimeError: If an unknown experiment type is specified. + """ for experiment_type in settings["experiments"]: if experiment_type == "cold": self._run_configuration( @@ -268,7 +324,19 @@ def process( logging_filename: str, extend_time_interval: int, ): + """ + Process the raw JSON results from the PerfCost experiment. + Reads each JSON result file, downloads detailed metrics if necessary (e.g., from cloud provider logs), + calculates additional statistics, and aggregates results into a summary CSV file. + Processed JSON files are saved with a "-processed" suffix. + + :param sebs_client: The SeBS client instance. + :param deployment_client: The FaaS system client. + :param directory: The main output directory for SeBS results. + :param logging_filename: Name for the logging file during processing. + :param extend_time_interval: Interval in minutes to extend log querying time window if needed. + """ import glob import csv diff --git a/sebs/experiments/result.py b/sebs/experiments/result.py index b28de75c5..28a68db0f 100644 --- a/sebs/experiments/result.py +++ b/sebs/experiments/result.py @@ -9,6 +9,12 @@ class Result: + """ + Represents the results of a SeBS experiment. + + Stores experiment and deployment configurations, invocation details, + metrics, and timing information. + """ def __init__( self, experiment_config: ExperimentConfig, @@ -17,6 +23,15 @@ def __init__( metrics: Optional[Dict[str, dict]] = None, result_bucket: Optional[str] = None, ): + """ + Initialize a new Result object. + + :param experiment_config: The configuration of the experiment. + :param deployment_config: The configuration of the FaaS deployment. + :param invocations: Optional dictionary of invocation results, keyed by function name and request ID. + :param metrics: Optional dictionary of additional metrics, keyed by function name. + :param result_bucket: Optional name of the bucket where results are stored. + """ self.config = { "experiments": experiment_config, "deployment": deployment_config, @@ -30,20 +45,44 @@ def __init__( else: self._metrics = metrics self.result_bucket = result_bucket + self.begin_time: Optional[float] = None + self.end_time: Optional[float] = None def begin(self): + """Records the start time of the experiment.""" self.begin_time = datetime.now().timestamp() def end(self): + """Records the end time of the experiment.""" self.end_time = datetime.now().timestamp() - def times(self) -> Tuple[int, int]: + def times(self) -> Tuple[Optional[float], Optional[float]]: + """ + Return the begin and end timestamps of the experiment. + + :return: Tuple containing (begin_timestamp, end_timestamp). + Timestamps can be None if begin() or end() haven't been called. + """ return self.begin_time, self.end_time def add_result_bucket(self, result_bucket: str): + """ + Set the name of the S3/Blob bucket where results are stored. + + :param result_bucket: Name of the bucket. + """ self.result_bucket = result_bucket def add_invocation(self, func: Function, invocation: ExecutionResult): + """ + Add an invocation result to this experiment result. + + If the invocation has no request_id (e.g., due to failure), a synthetic + ID is generated. + + :param func: The Function object that was invoked. + :param invocation: The ExecutionResult of the invocation. + """ # the function has most likely failed, thus no request id if invocation.request_id: req_id = invocation.request_id @@ -56,18 +95,47 @@ def add_invocation(self, func: Function, invocation: ExecutionResult): self._invocations[func.name] = {req_id: invocation} def functions(self) -> List[str]: + """ + Return a list of function names for which invocations have been recorded. + + :return: List of function names. + """ return list(self._invocations.keys()) def invocations(self, func: str) -> Dict[str, ExecutionResult]: + """ + Return all recorded invocations for a specific function. + + :param func: The name of the function. + :return: Dictionary of ExecutionResult objects, keyed by request ID. + """ return self._invocations[func] def metrics(self, func: str) -> dict: + """ + Return or initialize the metrics dictionary for a specific function. + + If no metrics exist for the function, an empty dictionary is created. + + :param func: The name of the function. + :return: Dictionary of metrics for the function. + """ if func not in self._metrics: self._metrics[func] = {} return self._metrics[func] @staticmethod def deserialize(cached_config: dict, cache: Cache, handlers: LoggingHandlers) -> "Result": + """ + Deserialize a Result object from a dictionary (typically from a cache or JSON file). + + Reconstructs ExperimentConfig, DeploymentConfig, and ExecutionResult objects. + + :param cached_config: Dictionary containing serialized Result data. + :param cache: Cache client instance, used for deserializing DeploymentConfig. + :param handlers: Logging handlers, used for deserializing DeploymentConfig. + :return: A new Result instance. + """ invocations: Dict[str, dict] = {} for func, func_invocations in cached_config["_invocations"].items(): invocations[func] = {} diff --git a/sebs/experiments/startup_time.py b/sebs/experiments/startup_time.py index 3b7e95205..ed7b63748 100644 --- a/sebs/experiments/startup_time.py +++ b/sebs/experiments/startup_time.py @@ -3,13 +3,26 @@ class StartupTime(Experiment): + """ + Experiment to measure function startup time. + + This class currently serves as a placeholder or base for startup time experiments. + Further implementation would be needed to define the actual measurement logic. + """ def __init__(self, config: ExperimentConfig): + """ + Initialize the StartupTime experiment. + + :param config: Experiment configuration. + """ super().__init__(config) @staticmethod def name() -> str: + """Return the name of the experiment.""" return "startup-time" @staticmethod def typename() -> str: + """Return the type name of this experiment class.""" return "Experiment.StartupTime" diff --git a/sebs/faas/config.py b/sebs/faas/config.py index 19c7d3abe..a8220d2a9 100644 --- a/sebs/faas/config.py +++ b/sebs/faas/config.py @@ -12,131 +12,209 @@ # https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel """ - Credentials for FaaS system used to authorize operations on functions - and other resources. +Abstract base classes for FaaS system configurations, credentials, and resources. - The order of credentials initialization: - 1. Load credentials from cache. - 2. If any new values are provided in the config, they override cache values. - 3. If nothing is provided, initialize using environmental variables. - 4. If no information is provided, then failure is reported. +This module defines the structure for managing FaaS provider-specific details, +including authorization, allocated cloud resources (like storage buckets, IAM roles), +and general deployment settings. """ class Credentials(ABC, LoggingBase): - def __init__(self): - super().__init__() - """ - Create credentials instance from user config and cached values. + Abstract base class for FaaS system credentials. + + Credentials are used to authorize operations on functions and other resources. + The typical order of initialization is: + 1. Load credentials from cache. + 2. Override cached values with any new values provided in the configuration. + 3. If nothing is provided, attempt to initialize using environment variables. + 4. Report failure if no credential information can be found. """ + def __init__(self): + """Initialize a new Credentials object.""" + super().__init__() @staticmethod @abstractmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Credentials": - pass + """ + Deserialize credentials from a dictionary, potentially using cached values. - """ - Serialize to JSON for storage in cache. - """ + Implementations should handle merging provided configuration with cached data + and falling back to environment variables if necessary. - @abstractmethod - def serialize(self) -> dict: + :param config: Dictionary containing credential information. + :param cache: Cache object for retrieving cached credentials. + :param handlers: Logging handlers. + :return: An instance of a Credentials subclass. + """ pass + @abstractmethod + def serialize(self) -> dict: + """ + Serialize credentials to a dictionary for storage in cache. -""" - Class grouping resources allocated at the FaaS system to execute functions - and deploy various services. Examples might include IAM roles and API gateways - for HTTP triggers. - - Storage resources are handled seperately. -""" + :return: A dictionary representation of the credentials. + """ + pass class Resources(ABC, LoggingBase): + """ + Abstract base class for managing resources allocated on a FaaS system. + + This includes resources like IAM roles, API Gateways for HTTP triggers, + and storage buckets. Storage resources (buckets) are specifically managed here. + """ class StorageBucketType(str, Enum): + """Enumeration for types of storage buckets used by SeBS.""" DEPLOYMENT = "deployment" BENCHMARKS = "benchmarks" EXPERIMENTS = "experiments" @staticmethod def deserialize(val: str) -> Resources.StorageBucketType: + """ + Deserialize a string value to a StorageBucketType enum member. + + :param val: The string value of the bucket type (e.g., "deployment"). + :return: The corresponding StorageBucketType enum member. + :raises Exception: If the value does not match any known bucket type. + """ for member in Resources.StorageBucketType: if member.value == val: return member raise Exception(f"Unknown storage bucket type type {val}") def __init__(self, name: str): + """ + Initialize a new Resources object. + + :param name: The name of the FaaS provider (e.g., "aws", "azure"). + """ super().__init__() self._name = name self._buckets: Dict[Resources.StorageBucketType, str] = {} self._resources_id: Optional[str] = None + self._region: Optional[str] = None # Added to store region @property def resources_id(self) -> str: - assert self._resources_id is not None + """A unique identifier for this set of resources.""" + assert self._resources_id is not None, "Resources ID not set!" return self._resources_id @resources_id.setter def resources_id(self, resources_id: str): + """Set the unique identifier for these resources.""" self._resources_id = resources_id @property def has_resources_id(self) -> bool: + """Check if a resource ID has been set.""" return self._resources_id is not None @property def region(self) -> str: + """The cloud region where these resources are located.""" + assert self._region is not None, "Region not set for resources!" return self._region @region.setter def region(self, region: str): + """Set the cloud region for these resources.""" self._region = region def get_storage_bucket(self, bucket_type: Resources.StorageBucketType) -> Optional[str]: + """ + Get the name of a specific type of storage bucket. + + :param bucket_type: The type of the bucket (DEPLOYMENT, BENCHMARKS, EXPERIMENTS). + :return: The bucket name if set, otherwise None. + """ return self._buckets.get(bucket_type) def get_storage_bucket_name(self, bucket_type: Resources.StorageBucketType) -> str: - return f"sebs-{bucket_type.value}-{self._resources_id}" + """ + Generate the expected name for a storage bucket of a given type. + + The name is typically in the format "sebs-{bucket_type_value}-{resources_id}". + + :param bucket_type: The type of the bucket. + :return: The generated bucket name. + """ + return f"sebs-{bucket_type.value}-{self.resources_id}" def set_storage_bucket(self, bucket_type: Resources.StorageBucketType, bucket_name: str): + """ + Set the name for a specific type of storage bucket. + + :param bucket_type: The type of the bucket. + :param bucket_name: The actual name of the bucket in the cloud storage. + """ self._buckets[bucket_type] = bucket_name @staticmethod @abstractmethod def initialize(res: Resources, dct: dict): + """ + Initialize resource attributes from a dictionary (typically from cache or config). + Subclasses should call `super().initialize(res, dct)` if they override this. + This base implementation handles `resources_id` and `storage_buckets`. + + :param res: The Resources instance to initialize. + :param dct: Dictionary containing resource configurations. + """ if "resources_id" in dct: - res._resources_id = dct["resources_id"] + res.resources_id = dct["resources_id"] # Use setter for potential validation if "storage_buckets" in dct: for key, value in dct["storage_buckets"].items(): - res._buckets[Resources.StorageBucketType.deserialize(key)] = value - - """ - Create credentials instance from user config and cached values. - """ + res.set_storage_bucket(Resources.StorageBucketType.deserialize(key), value) @staticmethod @abstractmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resources": - pass + """ + Deserialize a Resources object from a dictionary. - """ - Serialize to JSON for storage in cache. - """ + Implementations should handle provider-specific resource details. + + :param config: Dictionary containing resource information. + :param cache: Cache object. + :param handlers: Logging handlers. + :return: An instance of a Resources subclass. + """ + pass @abstractmethod def serialize(self) -> dict: + """ + Serialize resources to a dictionary for storage in cache. + + Subclasses should call `super().serialize()` and extend the dictionary. + This base implementation serializes `resources_id` and `storage_buckets`. + + :return: A dictionary representation of the resources. + """ out = {} if self.has_resources_id: out["resources_id"] = self.resources_id - for key, value in self._buckets.items(): - out[key.value] = value + # Serialize buckets using their string value as key + out["storage_buckets"] = {key.value: value for key, value in self._buckets.items()} return out def update_cache(self, cache: Cache): + """ + Update the cache with the current resource configurations. + + Saves `resources_id` and storage bucket names. + + :param cache: Cache object. + """ if self.has_resources_id: cache.update_config( val=self.resources_id, keys=[self._name, "resources", "resources_id"] @@ -147,43 +225,71 @@ def update_cache(self, cache: Cache): ) -""" - FaaS system config defining cloud region (if necessary), credentials and - resources allocated. -""" - - class Config(ABC, LoggingBase): + """ + Abstract base class for FaaS system configurations. + Defines the structure for cloud region, credentials, and allocated resources. + """ _region: str def __init__(self, name: str): + """ + Initialize a new FaaS system Config. + + :param name: The name of the FaaS provider (e.g., "aws", "azure"). + """ super().__init__() self._region = "" self._name = name @property def region(self) -> str: + """The cloud region for the FaaS deployment (e.g., "us-east-1").""" return self._region @property @abstractmethod def credentials(self) -> Credentials: + """The credentials for accessing the FaaS system.""" pass @property @abstractmethod def resources(self) -> Resources: + """The resources allocated on the FaaS system.""" pass @staticmethod @abstractmethod def initialize(cfg: Config, dct: dict): + """ + Initialize config attributes from a dictionary. + + Subclasses should call `super().initialize(cfg, dct)` if they override this. + This base implementation initializes the `_region`. + + :param cfg: The Config instance to initialize. + :param dct: Dictionary containing configuration values. + """ cfg._region = dct["region"] @staticmethod @abstractmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: + """ + Deserialize a Config object from a dictionary, dispatching to the correct subclass. + + Determines the FaaS provider from the 'name' field in the config and calls + the appropriate subclass's deserialize method. + + :param config: Dictionary containing the FaaS system configuration. + Expected to have a 'name' field indicating the provider. + :param cache: Cache object. + :param handlers: Logging handlers. + :return: An instance of a Config subclass. + :raises AssertionError: If the FaaS provider name is unknown. + """ from sebs.local.config import LocalConfig name = config["name"] @@ -205,13 +311,31 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config implementations["openwhisk"] = OpenWhiskConfig.deserialize func = implementations.get(name) - assert func, "Unknown config type!" - return func(config[name] if name in config else config, cache, handlers) + assert func, f"Unknown config type: {name}!" + # Pass the provider-specific part of the config, or the whole thing if not nested + provider_config = config.get(name, config) + return func(provider_config, cache, handlers) @abstractmethod def serialize(self) -> dict: + """ + Serialize the FaaS system configuration to a dictionary. + + Subclasses should call `super().serialize()` and extend the dictionary. + This base implementation serializes `name` and `region`. + + :return: A dictionary representation of the configuration. + """ return {"name": self._name, "region": self._region} @abstractmethod def update_cache(self, cache: Cache): + """ + Update the cache with the current FaaS system configuration. + + Subclasses should call `super().update_cache(cache)`. + This base implementation updates the `region`. + + :param cache: Cache object. + """ cache.update_config(val=self.region, keys=[self._name, "region"]) diff --git a/sebs/faas/container.py b/sebs/faas/container.py index b17525f7b..b64e02b81 100644 --- a/sebs/faas/container.py +++ b/sebs/faas/container.py @@ -13,25 +13,46 @@ class DockerContainer(LoggingBase): + """ + Abstract base class for managing Docker container images for FaaS deployments. + + Provides functionalities for finding, building, and pushing Docker images + to container registries. Specific FaaS providers should subclass this to + implement provider-specific details like registry naming. + """ @staticmethod @abstractmethod def name() -> str: + """ + Return the name of the FaaS platform this container manager is for (e.g., "aws", "azure"). + + :return: Name of the FaaS platform. + """ pass @property def disable_rich_output(self) -> bool: + """Flag to disable rich progress bar output during image push.""" return self._disable_rich_output @disable_rich_output.setter def disable_rich_output(self, val: bool): + """Set the flag to disable rich progress bar output.""" self._disable_rich_output = val def __init__( self, system_config: SeBSConfig, - docker_client, + docker_client: docker.client, # Explicitly type docker_client experimental_manifest: bool = False, ): + """ + Initialize the DockerContainer manager. + + :param system_config: SeBS system configuration. + :param docker_client: Docker client instance. + :param experimental_manifest: Flag to use experimental Docker manifest features (default: False). + """ super().__init__() self.docker_client = docker_client @@ -39,8 +60,17 @@ def __init__( self.system_config = system_config self._disable_rich_output = False - def find_image(self, repository_name, image_tag) -> bool: + def find_image(self, repository_name: str, image_tag: str) -> bool: + """ + Check if a Docker image exists in the registry. + + Can use experimental `docker manifest inspect` or fall back to `docker pull` + if experimental features are not enabled or supported. + :param repository_name: Name of the Docker repository. + :param image_tag: Tag of the Docker image. + :return: True if the image is found, False otherwise. + """ if self.experimental_manifest: try: # This requires enabling experimental Docker features @@ -58,7 +88,16 @@ def find_image(self, repository_name, image_tag) -> bool: return False def show_progress(self, txt: str, progress: Progress, layer_tasks: dict): + """ + Parse Docker push progress messages and update a `rich.progress` display. + + Handles messages for layer pushing status, completion, and errors. + :param txt: JSON string or dictionary containing Docker progress line. + :param progress: `rich.progress.Progress` instance to update. + :param layer_tasks: Dictionary to store progress task IDs for each layer. + :raises Exception: If an error is reported in the progress line. + """ if isinstance(txt, str): line = json.loads(txt) else: @@ -88,42 +127,62 @@ def show_progress(self, txt: str, progress: Progress, layer_tasks: dict): elif "error" in line: raise Exception(line["error"]) - def push_image(self, repository_uri, image_tag): - try: + def push_image(self, repository_uri: str, image_tag: str): + """ + Push a Docker image to the specified repository and tag. - if not self.disable_rich_output: + Displays a progress bar using `rich.progress` unless `disable_rich_output` is True. + :param repository_uri: URI of the Docker repository. + :param image_tag: Tag of the image to push. + :raises RuntimeError: If Docker API reports an error during push. + :raises Exception: If any other Docker API error occurs. + """ + try: + if not self.disable_rich_output: layer_tasks = {} - with Progress() as progress: - + with Progress() as progress_display: # Renamed to avoid conflict self.logging.info(f"Pushing image {image_tag} to {repository_uri}") - ret = self.docker_client.images.push( + ret_stream = self.docker_client.images.push( repository=repository_uri, tag=image_tag, stream=True, decode=True ) - for line in ret: - self.show_progress(line, progress, layer_tasks) - + for line in ret_stream: + self.show_progress(line, progress_display, layer_tasks) else: self.logging.info(f"Pushing image {image_tag} to {repository_uri}") - ret = self.docker_client.images.push( + ret_stream = self.docker_client.images.push( repository=repository_uri, tag=image_tag, stream=True, decode=True ) - - for val in ret: + for val in ret_stream: if "error" in val: self.logging.error(f"Failed to push the image to registry {repository_uri}") - raise RuntimeError(val) + raise RuntimeError(val["error"]) # Raise the error message except docker.errors.APIError as e: self.logging.error( f"Failed to push the image to registry {repository_uri}. Error: {str(e)}" ) - raise e + raise e # Re-raise the original Docker APIError @abstractmethod def registry_name( self, benchmark: str, language_name: str, language_version: str, architecture: str ) -> Tuple[str, str, str, str]: + """ + Generate provider-specific registry and image names. + + This method must be implemented by subclasses for specific FaaS providers. + + :param benchmark: Name of the benchmark. + :param language_name: Name of the programming language. + :param language_version: Version of the programming language. + :param architecture: CPU architecture of the image. + :return: Tuple containing: + - registry_name (e.g., docker.io/user or provider-specific registry) + - repository_name (e.g., benchmark-name or provider-specific repo name) + - image_tag (e.g., language-version-architecture) + - image_uri (fully qualified image URI) + """ pass def build_base_image( @@ -136,16 +195,33 @@ def build_base_image( is_cached: bool, ) -> Tuple[bool, str]: """ - When building function for the first time (according to SeBS cache), - check if Docker image is available in the registry. - If yes, then skip building. - If no, then continue building. - - For every subsequent build, we rebuild image and push it to the - registry. These are triggered by users modifying code and enforcing - a build. + Build and push a base Docker image for a benchmark function. + + Workflow: + 1. If `is_cached` is True (meaning this is not the first build for this exact code + and configuration according to SeBS's benchmark cache) and the image already + exists in the registry, skip building and return the existing image URI. + 2. If `is_cached` is True but the image is NOT in the registry (e.g., manually deleted + from registry), proceed to build and push. + 3. If `is_cached` is False (first time building this specific version), proceed to + build and push. + 4. Subsequent builds (e.g., user modifies code, `is_cached` would be False for the + new hash) will always rebuild and push. + + The Dockerfile used is expected to be named 'Dockerfile.function' and located in + `DOCKER_DIR/{self.name()}/{language_name}/`. + + :param directory: Base directory of the benchmark code. A 'build' subdirectory + will be created here. + :param language_name: Name of the programming language. + :param language_version: Version of the programming language. + :param architecture: Target CPU architecture for the image. + :param benchmark: Name of the benchmark. + :param is_cached: Boolean indicating if this benchmark version is already cached by SeBS. + :return: Tuple (image_built_and_pushed: bool, image_uri: str). + The boolean is True if a new image was built and pushed, False if an + existing image from the registry was used. """ - registry_name, repository_name, image_tag, image_uri = self.registry_name( benchmark, language_name, language_version, architecture ) diff --git a/sebs/faas/function.py b/sebs/faas/function.py index 0fab7bcf4..ca2bbebcf 100644 --- a/sebs/faas/function.py +++ b/sebs/faas/function.py @@ -13,12 +13,27 @@ from sebs.utils import LoggingBase """ - Times are reported in microseconds. +Defines core classes for representing FaaS functions, their configurations, +triggers, and execution results within the SeBS framework. + +All times are reported in microseconds unless otherwise specified. """ class ExecutionTimes: - + """ + Stores various client-side and benchmark-internal execution timings. + All times are in microseconds. + + Attributes: + client: Total client-perceived execution time. + client_begin: Timestamp when the client initiated the request. + client_end: Timestamp when the client received the full response. + benchmark: Execution time measured within the benchmark code itself. + initialization: Initialization time, often part of cold start (provider-reported or inferred). + http_startup: Time until TCP connection is established for HTTP triggers. + http_first_byte_return: Time until the first byte of the HTTP response is received. + """ client: int client_begin: datetime client_end: datetime @@ -28,95 +43,163 @@ class ExecutionTimes: http_first_byte_return: int def __init__(self): + """Initialize ExecutionTimes with default zero values.""" self.client = 0 self.initialization = 0 self.benchmark = 0 + # Ensure all attributes are initialized, even if not set to 0 by default. + # For datetime objects, None might be more appropriate if not immediately known. + # However, the current deserialize method assumes they exist. + # For now, we'll leave them uninitialized here if not explicitly set to 0. + # Consider adding default None or now() for datetime if appropriate. @staticmethod def deserialize(cached_obj: dict) -> "ExecutionTimes": + """ + Deserialize an ExecutionTimes object from a dictionary. + + :param cached_obj: Dictionary containing ExecutionTimes data. + :return: An ExecutionTimes instance. + """ ret = ExecutionTimes() ret.__dict__.update(cached_obj) return ret class ProviderTimes: - + """ + Stores execution and initialization times as reported by the FaaS provider. + All times are in microseconds. + + Attributes: + initialization: Initialization duration (e.g., cold start init phase). + execution: Execution duration of the function code. + """ initialization: int execution: int def __init__(self): + """Initialize ProviderTimes with default zero values.""" self.execution = 0 self.initialization = 0 @staticmethod def deserialize(cached_obj: dict) -> "ProviderTimes": + """ + Deserialize a ProviderTimes object from a dictionary. + + :param cached_obj: Dictionary containing ProviderTimes data. + :return: A ProviderTimes instance. + """ ret = ProviderTimes() ret.__dict__.update(cached_obj) return ret class ExecutionStats: - + """ + Stores statistical information about a function execution. + + Attributes: + memory_used: Memory used by the function execution (in MB or other provider unit). + cold_start: Boolean indicating if this was a cold start. + failure: Boolean indicating if the invocation failed. + """ memory_used: Optional[float] cold_start: bool failure: bool def __init__(self): + """Initialize ExecutionStats with default values.""" self.memory_used = None self.cold_start = False self.failure = False @staticmethod def deserialize(cached_obj: dict) -> "ExecutionStats": + """ + Deserialize an ExecutionStats object from a dictionary. + + :param cached_obj: Dictionary containing ExecutionStats data. + :return: An ExecutionStats instance. + """ ret = ExecutionStats() ret.__dict__.update(cached_obj) return ret class ExecutionBilling: - + """ + Stores billing-related information for a function execution. + + Attributes: + memory: Configured memory for the function (in MB or provider unit). + billed_time: Duration for which the execution was billed (in provider-specific units, often ms). + gb_seconds: A common billing unit, calculated as (memory_GB * billed_duration_seconds). + """ _memory: Optional[int] _billed_time: Optional[int] - _gb_seconds: int + _gb_seconds: int # Should this also be optional or default to 0? def __init__(self): - self.memory = None - self.billed_time = None - self.gb_seconds = 0 + """Initialize ExecutionBilling with default values.""" + self._memory = None # Use underscore to indicate it's managed by property + self._billed_time = None + self._gb_seconds = 0 @property def memory(self) -> Optional[int]: + """Configured memory for the function (e.g., in MB).""" return self._memory @memory.setter def memory(self, val: int): + """Set the configured memory.""" self._memory = val @property def billed_time(self) -> Optional[int]: + """Billed duration for the execution (e.g., in milliseconds).""" return self._billed_time @billed_time.setter def billed_time(self, val: int): + """Set the billed duration.""" self._billed_time = val @property def gb_seconds(self) -> int: + """Computed GB-seconds for the execution.""" return self._gb_seconds @gb_seconds.setter def gb_seconds(self, val: int): + """Set the computed GB-seconds.""" self._gb_seconds = val @staticmethod def deserialize(cached_obj: dict) -> "ExecutionBilling": + """ + Deserialize an ExecutionBilling object from a dictionary. + + :param cached_obj: Dictionary containing ExecutionBilling data. + :return: An ExecutionBilling instance. + """ ret = ExecutionBilling() - ret.__dict__.update(cached_obj) + # Handle cases where keys might be missing in older cached_obj + ret._memory = cached_obj.get("_memory") + ret._billed_time = cached_obj.get("_billed_time") + ret._gb_seconds = cached_obj.get("_gb_seconds", 0) # Default to 0 if missing return ret class ExecutionResult: + """ + Encapsulates all results from a single function invocation. + Includes benchmark output, request ID, various timings, provider-specific times, + execution statistics, and billing information. + """ output: dict request_id: str times: ExecutionTimes @@ -125,6 +208,7 @@ class ExecutionResult: billing: ExecutionBilling def __init__(self): + """Initialize an empty ExecutionResult.""" self.output = {} self.request_id = "" self.times = ExecutionTimes() @@ -134,6 +218,15 @@ def __init__(self): @staticmethod def from_times(client_time_begin: datetime, client_time_end: datetime) -> "ExecutionResult": + """ + Create an ExecutionResult instance initialized with client begin and end times. + + Calculates the total client-perceived duration. + + :param client_time_begin: Timestamp when the client initiated the request. + :param client_time_end: Timestamp when the client received the full response. + :return: An ExecutionResult instance. + """ ret = ExecutionResult() ret.times.client_begin = client_time_begin ret.times.client_end = client_time_end @@ -141,21 +234,47 @@ def from_times(client_time_begin: datetime, client_time_end: datetime) -> "Execu return ret def parse_benchmark_output(self, output: dict): + """ + Parse the output from the benchmark function. + + Extracts standard fields like 'is_cold', 'begin', and 'end' timestamps + to populate `stats.cold_start` and `times.benchmark`. + + :param output: The dictionary returned by the benchmark function. + :raises RuntimeError: If 'is_cold' is not in the output, indicating a potential failure. + """ self.output = output # FIXME: temporary handling of errorenous invocation if "is_cold" not in self.output: - raise RuntimeError(f"Invocation failed! Reason: {output['result']}") + # More informative error message + error_reason = output.get('result', output.get('body', str(output))) + raise RuntimeError(f"Invocation failed! Output: {error_reason}") self.stats.cold_start = self.output["is_cold"] - self.times.benchmark = int( - ( - datetime.fromtimestamp(float(self.output["end"])) - - datetime.fromtimestamp(float(self.output["begin"])) - ) - / timedelta(microseconds=1) - ) + # Ensure 'begin' and 'end' are present and are valid numbers before conversion + if "begin" in self.output and "end" in self.output: + try: + begin_ts = float(self.output["begin"]) + end_ts = float(self.output["end"]) + self.times.benchmark = int( + (datetime.fromtimestamp(end_ts) - datetime.fromtimestamp(begin_ts)) + / timedelta(microseconds=1) + ) + except (ValueError, TypeError) as e: + self.logging.error(f"Could not parse benchmark begin/end times from output: {e}") + self.times.benchmark = 0 # Or some other indicator of parsing failure + else: + self.logging.warning("Benchmark begin/end times not found in output.") + self.times.benchmark = 0 + @staticmethod def deserialize(cached_config: dict) -> "ExecutionResult": + """ + Deserialize an ExecutionResult object from a dictionary. + + :param cached_config: Dictionary containing ExecutionResult data. + :return: An ExecutionResult instance. + """ ret = ExecutionResult() ret.times = ExecutionTimes.deserialize(cached_config["times"]) ret.billing = ExecutionBilling.deserialize(cached_config["billing"]) @@ -166,28 +285,49 @@ def deserialize(cached_config: dict) -> "ExecutionResult": return ret -""" - Function trigger and implementation of invocation. - - FIXME: implement a generic HTTP invocation and specialize input and output - processing in classes. -""" +class Trigger(ABC, LoggingBase): + """ + Abstract base class for function triggers. + Defines the interface for different trigger types (e.g., HTTP, Library, Storage). + Includes a helper method for HTTP invocations using pycurl. -class Trigger(ABC, LoggingBase): + FIXME: implement a generic HTTP invocation and specialize input and output + processing in classes. (This comment is from the original code) + """ class TriggerType(Enum): + """Enumeration of supported trigger types.""" HTTP = "http" LIBRARY = "library" STORAGE = "storage" @staticmethod def get(name: str) -> "Trigger.TriggerType": + """ + Get a TriggerType enum member by its string name. Case-insensitive. + + :param name: The string name of the trigger type (e.g., "http"). + :return: The corresponding TriggerType enum member. + :raises Exception: If the name does not match any known trigger type. + """ for member in Trigger.TriggerType: if member.value.lower() == name.lower(): return member - raise Exception("Unknown trigger type {}".format(member)) + raise Exception("Unknown trigger type {}".format(member)) # Original used member, should be name def _http_invoke(self, payload: dict, url: str, verify_ssl: bool = True) -> ExecutionResult: + """ + Perform an HTTP POST request to the given URL with the provided payload. + + Uses pycurl for the HTTP request. Parses the JSON response and populates + an ExecutionResult object. + + :param payload: Dictionary to be sent as JSON in the request body. + :param url: The URL to invoke. + :param verify_ssl: Whether to verify SSL certificates (default: True). + :return: An ExecutionResult object. + :raises RuntimeError: If the invocation fails (e.g., non-200 status, JSON decode error). + """ import pycurl from io import BytesIO @@ -236,71 +376,129 @@ def _http_invoke(self, payload: dict, url: str, verify_ssl: bool = True) -> Exec self.logging.error("No output provided!") raise RuntimeError(f"Failed invocation of function! Output: {data.getvalue().decode()}") + # FIXME: 3.7+, future annotations # FIXME: 3.7+, future annotations @staticmethod @abstractmethod def trigger_type() -> "Trigger.TriggerType": + """Return the type of this trigger (e.g., HTTP, LIBRARY).""" pass @abstractmethod def sync_invoke(self, payload: dict) -> ExecutionResult: + """ + Synchronously invoke the function with the given payload. + + :param payload: The payload to send to the function. + :return: An ExecutionResult object. + """ pass @abstractmethod def async_invoke(self, payload: dict) -> concurrent.futures.Future: + """ + Asynchronously invoke the function with the given payload. + + :param payload: The payload to send to the function. + :return: A Future object representing the asynchronous invocation. + """ pass @abstractmethod def serialize(self) -> dict: + """ + Serialize the trigger's state to a dictionary. + + :return: A dictionary representation of the trigger. + """ pass @staticmethod @abstractmethod def deserialize(cached_config: dict) -> "Trigger": + """ + Deserialize a Trigger object from a dictionary. + + :param cached_config: Dictionary containing Trigger data. + :return: A Trigger instance. + """ pass class Language(Enum): + """Enumeration of supported programming languages for FaaS functions.""" PYTHON = "python" NODEJS = "nodejs" # FIXME: 3.7+ python with future annotations @staticmethod def deserialize(val: str) -> Language: + """ + Deserialize a string value to a Language enum member. Case-insensitive. + + :param val: The string name of the language (e.g., "python"). + :return: The corresponding Language enum member. + :raises Exception: If the value does not match any known language. + """ for member in Language: - if member.value == val: + if member.value.lower() == val.lower(): # Make comparison case-insensitive return member - raise Exception(f"Unknown language type {member}") + raise Exception(f"Unknown language type {val}") class Architecture(Enum): + """Enumeration of supported CPU architectures for FaaS functions.""" X86 = "x64" ARM = "arm64" def serialize(self) -> str: + """Serialize the Architecture enum member to its string value.""" return self.value @staticmethod def deserialize(val: str) -> Architecture: + """ + Deserialize a string value to an Architecture enum member. Case-insensitive. + + :param val: The string name of the architecture (e.g., "x64"). + :return: The corresponding Architecture enum member. + :raises Exception: If the value does not match any known architecture. + """ for member in Architecture: - if member.value == val: + if member.value.lower() == val.lower(): # Make comparison case-insensitive return member - raise Exception(f"Unknown architecture type {member}") + raise Exception(f"Unknown architecture type {val}") @dataclass class Runtime: + """ + Represents the runtime environment of a FaaS function. + Attributes: + language: The programming language (Language enum). + version: The specific version string of the language runtime (e.g., "3.8", "12"). + """ language: Language version: str def serialize(self) -> dict: + """ + Serialize the Runtime to a dictionary. + + :return: Dictionary with "language" and "version". + """ return {"language": self.language.value, "version": self.version} @staticmethod def deserialize(config: dict) -> Runtime: - languages = {"python": Language.PYTHON, "nodejs": Language.NODEJS} - return Runtime(language=languages[config["language"]], version=config["version"]) + """ + Deserialize a Runtime object from a dictionary. + + :param config: Dictionary with "language" and "version". + :return: A Runtime instance. + """ + return Runtime(language=Language.deserialize(config["language"]), version=config["version"]) T = TypeVar("T", bound="FunctionConfig") @@ -308,15 +506,33 @@ def deserialize(config: dict) -> Runtime: @dataclass class FunctionConfig: + """ + Dataclass for storing the configuration of a FaaS function. + + Attributes: + timeout: Function execution timeout in seconds. + memory: Memory allocated to the function (in MB or provider-specific units). + runtime: The Runtime environment for the function. + architecture: The CPU architecture for the function (default: X86). + """ timeout: int memory: int runtime: Runtime - architecture: Architecture = Architecture.X86 + architecture: Architecture = Architecture.X86 # Default to X86 @staticmethod def _from_benchmark(benchmark: Benchmark, obj_type: Type[T]) -> T: + """ + Internal helper to create a FunctionConfig (or subclass) from a Benchmark object. + + :param benchmark: The Benchmark instance. + :param obj_type: The specific FunctionConfig class type to instantiate. + :return: An instance of obj_type. + """ runtime = Runtime(language=benchmark.language, version=benchmark.language_version) - architecture = Architecture.deserialize(benchmark._experiment_config._architecture) + # Ensure benchmark._experiment_config._architecture is available and valid + architecture_str = getattr(getattr(benchmark, '_experiment_config', object()), '_architecture', 'x64') + architecture = Architecture.deserialize(architecture_str) cfg = obj_type( timeout=benchmark.benchmark_config.timeout, memory=benchmark.benchmark_config.memory, @@ -327,28 +543,67 @@ def _from_benchmark(benchmark: Benchmark, obj_type: Type[T]) -> T: @staticmethod def from_benchmark(benchmark: Benchmark) -> FunctionConfig: + """ + Create a FunctionConfig instance from a Benchmark object. + + :param benchmark: The Benchmark instance. + :return: A FunctionConfig instance. + """ return FunctionConfig._from_benchmark(benchmark, FunctionConfig) @staticmethod def deserialize(data: dict) -> FunctionConfig: - keys = list(FunctionConfig.__dataclass_fields__.keys()) - data = {k: v for k, v in data.items() if k in keys} - data["runtime"] = Runtime.deserialize(data["runtime"]) - return FunctionConfig(**data) + """ + Deserialize a FunctionConfig object from a dictionary. + + :param data: Dictionary containing FunctionConfig data. + :return: A FunctionConfig instance. + """ + # Filter for known fields to avoid errors with extra keys in data + known_keys = {field.name for field in FunctionConfig.__dataclass_fields__.values()} + filtered_data = {k: v for k, v in data.items() if k in known_keys} + + filtered_data["runtime"] = Runtime.deserialize(filtered_data["runtime"]) + if "architecture" in filtered_data: # Handle optional architecture + filtered_data["architecture"] = Architecture.deserialize(filtered_data["architecture"]) + else: # Default if not present + filtered_data["architecture"] = Architecture.X86 + return FunctionConfig(**filtered_data) def serialize(self) -> dict: - return self.__dict__ + """ + Serialize the FunctionConfig to a dictionary. + Converts Runtime and Architecture to their serializable forms. + + :return: A dictionary representation of the FunctionConfig. + """ + # Manually construct dict to ensure enums are serialized correctly + return { + "timeout": self.timeout, + "memory": self.memory, + "runtime": self.runtime.serialize(), + "architecture": self.architecture.serialize() + } -""" - Abstraction base class for FaaS function. Contains a list of associated triggers - and might implement non-trigger execution if supported by the SDK. - Example: direct function invocation through AWS boto3 SDK. -""" +class Function(LoggingBase, ABC): # Added ABC + """ + Abstract base class for a FaaS function. -class Function(LoggingBase): + Represents a deployable unit of code on a FaaS platform. Contains details + about the benchmark it belongs to, its name, code hash, configuration, + and associated triggers. Subclasses implement provider-specific details. + """ def __init__(self, benchmark: str, name: str, code_hash: str, cfg: FunctionConfig): + """ + Initialize a new Function. + + :param benchmark: The name of the benchmark this function implements. + :param name: The name of the function on the FaaS platform. + :param code_hash: A hash of the function's code package, for change detection. + :param cfg: The FunctionConfig object for this function. + """ super().__init__() self._benchmark = benchmark self._name = name @@ -359,48 +614,74 @@ def __init__(self, benchmark: str, name: str, code_hash: str, cfg: FunctionConfi @property def config(self) -> FunctionConfig: + """The configuration of this function (timeout, memory, runtime, etc.).""" return self._cfg @property - def name(self): + def name(self) -> str: # Added return type hint + """The name of the function on the FaaS platform.""" return self._name @property - def benchmark(self): + def benchmark(self) -> str: # Added return type hint + """The name of the benchmark this function belongs to.""" return self._benchmark @property - def code_package_hash(self): + def code_package_hash(self) -> str: # Added return type hint + """A hash of the function's code package.""" return self._code_package_hash @code_package_hash.setter def code_package_hash(self, new_hash: str): + """Set a new code package hash (e.g., after an update).""" self._code_package_hash = new_hash @property def updated_code(self) -> bool: + """Flag indicating if the function's code has been updated since last deployment.""" return self._updated_code @updated_code.setter def updated_code(self, val: bool): + """Set the updated_code flag.""" self._updated_code = val def triggers_all(self) -> List[Trigger]: + """Return a list of all triggers associated with this function.""" return [trig for trigger_type, triggers in self._triggers.items() for trig in triggers] def triggers(self, trigger_type: Trigger.TriggerType) -> List[Trigger]: + """ + Return a list of triggers of a specific type associated with this function. + + :param trigger_type: The type of triggers to retrieve. + :return: A list of Trigger objects, or an empty list if none of that type exist. + """ try: return self._triggers[trigger_type] except KeyError: return [] def add_trigger(self, trigger: Trigger): + """ + Add a trigger to this function. + + :param trigger: The Trigger object to add. + """ if trigger.trigger_type() not in self._triggers: self._triggers[trigger.trigger_type()] = [trigger] else: self._triggers[trigger.trigger_type()].append(trigger) def serialize(self) -> dict: + """ + Serialize the Function's state to a dictionary. + + Includes name, hash, benchmark, config, and all triggers. + + :return: A dictionary representation of the Function. + """ return { "name": self._name, "hash": self._code_package_hash, @@ -414,4 +695,12 @@ def serialize(self) -> dict: @staticmethod @abstractmethod def deserialize(cached_config: dict) -> "Function": + """ + Deserialize a Function object from a dictionary. + + This method must be implemented by FaaS provider-specific subclasses. + + :param cached_config: Dictionary containing Function data. + :return: A Function instance. + """ pass diff --git a/sebs/faas/nosql.py b/sebs/faas/nosql.py index 16f9ab119..068d3e0be 100644 --- a/sebs/faas/nosql.py +++ b/sebs/faas/nosql.py @@ -8,62 +8,124 @@ class NoSQLStorage(ABC, LoggingBase): + """ + Abstract base class for NoSQL storage services used by benchmarks. + + Provides an interface for creating, managing, and interacting with + NoSQL tables/containers across different FaaS providers. + """ @staticmethod @abstractmethod def deployment_name() -> str: + """ + Return the name of the FaaS deployment this NoSQL storage belongs to + (e.g., "aws", "azure"). + + :return: Deployment name string. + """ pass @property def cache_client(self) -> Cache: + """The cache client instance for storing/retrieving NoSQL configurations.""" return self._cache_client @property - def region(self): + def region(self) -> str: # Added return type + """The cloud region where the NoSQL storage is located.""" return self._region def __init__(self, region: str, cache_client: Cache, resources: Resources): + """ + Initialize the NoSQLStorage instance. + + :param region: The cloud region. + :param cache_client: The cache client instance. + :param resources: The cloud resources configuration object. + """ super().__init__() self._cache_client = cache_client - self._cached = False + self._cached = False # Indicates if current benchmark's table info is from cache self._region = region self._cloud_resources = resources @abstractmethod def get_tables(self, benchmark: str) -> Dict[str, str]: + """ + Get a mapping of benchmark-defined table names to actual cloud provider table names. + + For some providers, this might be an empty dictionary if names are directly used. + + :param benchmark: The name of the benchmark. + :return: Dictionary mapping logical table names to actual table names. + """ pass @abstractmethod def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: + """ + Get the actual cloud provider table name for a given benchmark and logical table name. + + :param benchmark: The name of the benchmark. + :param table: The logical name of the table within the benchmark. + :return: The actual table name in the cloud, or None if not found. + """ pass @abstractmethod def retrieve_cache(self, benchmark: str) -> bool: + """ + Retrieve NoSQL table configurations for a benchmark from the cache. + + Implementations should populate internal structures with cached table names/details. + + :param benchmark: The name of the benchmark. + :return: True if cached data was successfully retrieved, False otherwise. + """ pass @abstractmethod def update_cache(self, benchmark: str): + """ + Update the cache with the current NoSQL table configurations for a benchmark. + + :param benchmark: The name of the benchmark. + """ pass def envs(self) -> dict: - return {} + """ + Return a dictionary of environment variables that might be needed by functions + to access this NoSQL storage (e.g., connection strings, table names). - """ - Each table name follow this pattern: - sebs-benchmarks-{resource_id}-{benchmark-name}-{table-name} - - Each implementation should do the following - (1) Retrieve cached data - (2) Create missing table that do not exist - (3) Update cached data if anything new was created -> this is done separately - in benchmark.py once the data is uploaded by the benchmark. - """ + Default implementation returns an empty dictionary. Subclasses should override + if they need to expose environment variables. + + :return: Dictionary of environment variables. + """ + return {} def create_benchmark_tables( self, benchmark: str, name: str, primary_key: str, secondary_key: Optional[str] = None ): - + """ + Ensure that a NoSQL table/container required by a benchmark exists. + + Table names typically follow the pattern: + `sebs-benchmarks-{resource_id}-{benchmark-name}-{table-name}` + + Workflow: + 1. Attempt to retrieve table information from cache. + 2. If cached and table exists, do nothing further for that specific table. + 3. If not cached or table doesn't exist, proceed to create it using `create_table`. + 4. Cache update is handled separately after data upload by the benchmark. + + :param benchmark: The name of the benchmark. + :param name: The logical name of the table within the benchmark. + :param primary_key: The name of the primary/partition key for the table. + :param secondary_key: Optional name of the secondary/sort key. + """ if self.retrieve_cache(benchmark): - table_name = self._get_table_name(benchmark, name) if table_name is not None: self.logging.info( @@ -72,20 +134,26 @@ def create_benchmark_tables( return self.logging.info(f"Preparing to create a NoSQL table {name} for benchmark {benchmark}") - self.create_table(benchmark, name, primary_key, secondary_key) - """ - - AWS: DynamoDB Table - Azure: CosmosDB Container - Google Cloud: Firestore in Datastore Mode, Database - """ - @abstractmethod def create_table( self, benchmark: str, name: str, primary_key: str, secondary_key: Optional[str] = None ) -> str: + """ + Create a NoSQL table/container. + + Provider-specific implementation details: + - AWS: DynamoDB Table + - Azure: CosmosDB Container + - Google Cloud: Firestore in Datastore Mode, Database/Collection + + :param benchmark: The name of the benchmark. + :param name: The logical name of the table/container. + :param primary_key: The name of the primary/partition key. + :param secondary_key: Optional name of the secondary/sort key. + :return: The actual name of the created table/container in the cloud. + """ pass @abstractmethod @@ -97,22 +165,38 @@ def write_to_table( primary_key: Tuple[str, str], secondary_key: Optional[Tuple[str, str]] = None, ): + """ + Write an item/document to the specified table/container. + + :param benchmark: The name of the benchmark. + :param table: The logical name of the table/container. + :param data: The data to write (as a dictionary). + :param primary_key: A tuple (key_name, key_value) for the primary/partition key. + :param secondary_key: Optional tuple for the secondary/sort key or item ID. + """ pass - """ - - AWS DynamoDB: Removing & recreating table is the cheapest & fastest option - - Azure CosmosDB: recreate container - - Google Cloud: also likely recreate - - """ - @abstractmethod def clear_table(self, name: str) -> str: + """ + Clear all items from a table/container. + + Provider-specific implementation details: + - AWS DynamoDB: Removing & recreating table is often the cheapest & fastest option. + - Azure CosmosDB: Recreate container or use specific API to delete items. + - Google Cloud: Likely recreate collection or use specific API. + + :param name: The actual name of the table/container in the cloud. + :return: Status or confirmation message. + """ pass @abstractmethod def remove_table(self, name: str) -> str: + """ + Remove/delete a table/container completely. + + :param name: The actual name of the table/container in the cloud. + :return: Status or confirmation message. + """ pass diff --git a/sebs/faas/resources.py b/sebs/faas/resources.py index 140a719e6..ddcbd4ac2 100644 --- a/sebs/faas/resources.py +++ b/sebs/faas/resources.py @@ -11,33 +11,49 @@ class SystemResources(ABC, LoggingBase): + """ + Abstract base class for managing system-level resources for a FaaS deployment. + + This includes access to persistent storage (like S3 or Azure Blob Storage) + and NoSQL storage (like DynamoDB or CosmosDB). Implementations are specific + to each FaaS provider. + """ def __init__(self, config: Config, cache_client: Cache, docker_client: docker.client): + """ + Initialize SystemResources. + :param config: The FaaS system configuration object. + :param cache_client: The cache client instance. + :param docker_client: The Docker client instance. + """ super().__init__() self._config = config self._cache_client = cache_client self._docker_client = docker_client - """ - Access persistent storage instance. - It might be a remote and truly persistent service (AWS S3, Azure Blob..), - or a dynamically allocated local instance. - - :param replace_existing: replace benchmark input data if exists already - """ - @abstractmethod def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStorage: - pass + """ + Get an instance of the persistent storage client for the FaaS provider. - """ - Access instance of NoSQL storage. - It might be a remote and truly persistent service (AWS DynamoDB, Azure CosmosDB..), - or a dynamically allocated local instance (ScyllaDB). + This storage might be a remote service (e.g., AWS S3, Azure Blob Storage) + or a local equivalent. - """ + :param replace_existing: If True, any existing benchmark input data in + the storage should be replaced. Defaults to False. + :return: An instance of a PersistentStorage subclass. + """ + pass @abstractmethod def get_nosql_storage(self) -> NoSQLStorage: + """ + Get an instance of the NoSQL storage client for the FaaS provider. + + This storage might be a remote service (e.g., AWS DynamoDB, Azure CosmosDB) + or a local equivalent (e.g., a ScyllaDB container). + + :return: An instance of a NoSQLStorage subclass. + """ pass diff --git a/sebs/faas/storage.py b/sebs/faas/storage.py index 5b93c0539..29491976f 100644 --- a/sebs/faas/storage.py +++ b/sebs/faas/storage.py @@ -10,38 +10,66 @@ from sebs.utils import LoggingBase """ - Abstract class +Abstract base class for persistent storage services used by FaaS benchmarks. + +This class defines the interface for interacting with storage services like +AWS S3, Azure Blob Storage, etc., for managing benchmark data, code packages, +and experiment results. """ class PersistentStorage(ABC, LoggingBase): + """ + Abstract base class for FaaS persistent storage. + + Manages buckets/containers for benchmark data, deployment packages, and experiment results. + Handles caching of storage configurations and interaction with the cloud provider's + storage service. + """ @staticmethod @abstractmethod def deployment_name() -> str: + """ + Return the name of the FaaS deployment this storage belongs to (e.g., "aws", "azure"). + + :return: Deployment name string. + """ pass @property def cache_client(self) -> Cache: + """The cache client instance for storing/retrieving storage configurations.""" return self._cache_client @property - def replace_existing(self): + def replace_existing(self) -> bool: # Made getter more explicit + """Flag indicating whether to replace existing files in buckets.""" return self._replace_existing @replace_existing.setter def replace_existing(self, val: bool): + """Set the flag for replacing existing files.""" self._replace_existing = val @property - def region(self): + def region(self) -> str: # Added return type + """The cloud region where the storage is located.""" return self._region def __init__( self, region: str, cache_client: Cache, resources: Resources, replace_existing: bool ): + """ + Initialize the PersistentStorage instance. + + :param region: The cloud region. + :param cache_client: The cache client instance. + :param resources: The cloud resources configuration object. + :param replace_existing: Flag to control overwriting existing files. + """ super().__init__() self._cache_client = cache_client - self.cached = False + self.cached = False # Indicates if current benchmark's data/bucket info is from cache self._input_prefixes: List[str] = [] self._output_prefixes: List[str] = [] self.input_prefixes_files: List[List[str]] = [] @@ -51,18 +79,32 @@ def __init__( @property def input_prefixes(self) -> List[str]: + """List of input prefixes (paths within the benchmark data bucket).""" return self._input_prefixes @property def output_prefixes(self) -> List[str]: + """List of output prefixes (paths within the benchmark data bucket).""" return self._output_prefixes @abstractmethod def correct_name(self, name: str) -> str: + """ + Correct a bucket/container name to comply with provider-specific naming rules. + + :param name: The proposed name. + :return: A valid name for the provider. + """ pass def find_deployments(self) -> List[str]: + """ + Find existing SeBS deployments by listing buckets that match the benchmark bucket pattern. + Looks for buckets named "sebs-benchmarks-*". + + :return: List of deployment identifiers (resource prefixes). + """ deployments = [] buckets = self.list_buckets() for bucket in buckets: @@ -70,126 +112,154 @@ def find_deployments(self) -> List[str]: deployment_search = re.match("sebs-benchmarks-(.*)", bucket) if deployment_search: deployments.append(deployment_search.group(1)) - return deployments @abstractmethod def _create_bucket( self, name: str, buckets: List[str] = [], randomize_name: bool = False ) -> str: - pass + """ + Internal implementation to create a new bucket/container. - """ - Download a file from a bucket. + Should handle provider-specific creation logic, including name randomization + and checking against existing buckets if necessary. - :param bucket_name: - :param key: storage source filepath - :param filepath: local destination filepath - """ + :param name: The desired base name for the bucket. + :param buckets: Optional list of existing bucket names to check against. + :param randomize_name: If True, append a random string to the bucket name. + :return: The name of the created bucket. + """ + pass @abstractmethod def download(self, bucket_name: str, key: str, filepath: str) -> None: - pass - - """ - Upload a file to a bucket with by passing caching. - Useful for uploading code package to storage (when required). + """ + Download a file from a bucket/container. - :param bucket_name: - :param filepath: local source filepath - :param key: storage destination filepath - """ + :param bucket_name: Name of the bucket/container. + :param key: The key/path of the object in the storage. + :param filepath: The local path where the file should be saved. + """ + pass @abstractmethod def upload(self, bucket_name: str, filepath: str, key: str): - pass + """ + Upload a file to a bucket/container, bypassing caching logic if necessary. - """ - Retrieves list of files in a bucket. + Useful for uploading code packages or other essential files. - :param bucket_name: - :return: list of files in a given bucket - """ + :param bucket_name: Name of the bucket/container. + :param filepath: Local path of the file to upload. + :param key: The key/path where the object will be stored in the storage. + """ + pass @abstractmethod def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: + """ + List files/objects in a given bucket/container, optionally filtered by prefix. + + :param bucket_name: Name of the bucket/container. + :param prefix: Optional prefix to filter the listing. + :return: A list of object keys/names. + """ pass @abstractmethod def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: + """ + List all buckets/containers, or filter by a partial name. + + :param bucket_name: Optional string to filter bucket names (e.g., contains match). + :return: List of bucket/container names. + """ pass @abstractmethod def exists_bucket(self, bucket_name: str) -> bool: + """ + Check if a bucket/container with the given name exists. + + :param bucket_name: Name of the bucket/container. + :return: True if it exists, False otherwise. + """ pass @abstractmethod def clean_bucket(self, bucket_name: str): + """ + Delete all objects within a specified bucket/container. + + :param bucket_name: Name of the bucket/container to clean. + """ pass @abstractmethod def remove_bucket(self, bucket: str): - pass - - """ - Allocate a set of input/output buckets for the benchmark. - The routine checks the cache first to verify that buckets have not - been allocated first. + """ + Delete an entire bucket/container. The bucket must typically be empty. - :param benchmark: benchmark name - :param buckets: number of input and number of output buckets - """ + :param bucket: Name of the bucket/container to delete. + """ + pass def benchmark_data( self, benchmark: str, requested_buckets: Tuple[int, int] ) -> Tuple[List[str], List[str]]: - - """ - Add an input path inside benchmarks bucket. - Bucket name format: name-idx-input """ - for i in range(0, requested_buckets[0]): - self.input_prefixes.append("{}-{}-input".format(benchmark, i)) + Prepare input and output prefixes for a benchmark within the benchmark data bucket. - """ - Add an input path inside benchmarks bucket. - Bucket name format: name-idx-output - """ - for i in range(0, requested_buckets[1]): - self.output_prefixes.append("{}-{}-output".format(benchmark, i)) + Checks cache for existing configurations and lists files if not cached or if + input data is marked as not uploaded. Updates cache with current prefix info. - cached_storage = self.cache_client.get_storage_config(self.deployment_name(), benchmark) - self.cached = True + Input prefixes are in the format: `{benchmark}-{idx}-input` + Output prefixes are in the format: `{benchmark}-{idx}-output` - if cached_storage is not None: + :param benchmark: The name of the benchmark. + :param requested_buckets: A tuple (num_input_prefixes, num_output_prefixes). + :return: A tuple containing (list_of_input_prefixes, list_of_output_prefixes). + """ + # Generate input prefixes + for i in range(requested_buckets[0]): + self.input_prefixes.append(f"{benchmark}-{i}-input") - cached_storage = cached_storage["buckets"] + # Generate output prefixes + for i in range(requested_buckets[1]): + self.output_prefixes.append(f"{benchmark}-{i}-output") - # verify the input is up to date - for prefix in self.input_prefixes: - if prefix not in cached_storage["input"]: - self.cached = False + cached_storage = self.cache_client.get_storage_config(self.deployment_name(), benchmark) + self.cached = True # Assume cached initially - for prefix in self.output_prefixes: - if prefix not in cached_storage["output"]: + if cached_storage and "buckets" in cached_storage: + cached_buckets_info = cached_storage["buckets"] + # Verify if all requested input prefixes are in cache + for prefix in self.input_prefixes: + if prefix not in cached_buckets_info.get("input", []): self.cached = False + break + # Verify if all requested output prefixes are in cache + if self.cached: # Only check if still considered cached + for prefix in self.output_prefixes: + if prefix not in cached_buckets_info.get("output", []): + self.cached = False + break + # Check if input was marked as uploaded + if self.cached and not cached_buckets_info.get("input_uploaded", False): + self.cached = False else: - self.cached = False - - if self.cached is True and cached_storage["input_uploaded"] is False: - self.cached = False - - # query buckets if the input prefixes changed, or the input is not up to date. - if self.cached is False: + self.cached = False # No cache entry or no buckets in cache + # If not fully cached or input needs re-uploading, list files for input prefixes + if not self.cached: + self.input_prefixes_files = [] # Reset + benchmark_data_bucket = self.get_bucket(Resources.StorageBucketType.BENCHMARKS) for prefix in self.input_prefixes: self.input_prefixes_files.append( - self.list_bucket( - self.get_bucket(Resources.StorageBucketType.BENCHMARKS), - self.input_prefixes[-1], - ) + self.list_bucket(benchmark_data_bucket, prefix) ) + # Update cache with current state self._cache_client.update_storage( self.deployment_name(), benchmark, @@ -197,109 +267,77 @@ def benchmark_data( "buckets": { "input": self.input_prefixes, "output": self.output_prefixes, - "input_uploaded": self.cached, + "input_uploaded": self.cached, # Mark as uploaded if we didn't need to list files } }, ) - return self.input_prefixes, self.output_prefixes - # def allocate_buckets(self, benchmark: str, requested_buckets: Tuple[int, int]): - - # benchmarks_bucket = self.benchmarks_bucket() - - # Load cached information - # cached_buckets = self.cache_client.get_storage_config(self.deployment_name(), benchmark) - # if cached_buckets: - # cache_valid = True - # for bucket in [ - # *cached_buckets["buckets"]["input"], - # *cached_buckets["buckets"]["output"], - # ]: - # if not self.exists_bucket(bucket): - # cache_valid = False - # self.logging.info(f"Cached storage buckets {bucket} does not exist.") - # break - - # if cache_valid: - # self.input_buckets = cached_buckets["buckets"]["input"] - # for bucket in self.input_buckets: - # self.input_buckets_files.append(self.list_bucket(bucket)) - # self.output_buckets = cached_buckets["buckets"]["output"] - # # for bucket in self.output_buckets: - # # self.clean_bucket(bucket) - # self.cached = True - # self.logging.info( - # "Using cached storage input buckets {}".format(self.input_buckets) - # ) - # self.logging.info( - # "Using cached storage output buckets {}".format(self.output_buckets) - # ) - # return - # else: - # self.logging.info("Cached storage buckets are no longer valid, creating new ones.") - - # buckets = self.list_buckets(self.correct_name(benchmark)) - # for i in range(0, requested_buckets[0]): - # self.input_buckets.append( - # self._create_bucket(self.correct_name("{}-{}-input".format(benchmark, i)), buckets) - # ) - # self.input_buckets_files.append(self.list_bucket(self.input_buckets[-1])) - # for i in range(0, requested_buckets[1]): - # self.output_buckets.append( - # self._create_bucket(self.correct_name("{}-{}-output".format(benchmark, i)), buckets) - # ) - # self.save_storage(benchmark) - def get_bucket(self, bucket_type: Resources.StorageBucketType) -> str: + """ + Get or create a standard SeBS bucket of a specific type (BENCHMARKS, EXPERIMENTS, DEPLOYMENT). + + Checks if the bucket is already known in `_cloud_resources`. If not, + generates the expected name, checks if it exists in the cloud, creates it + if necessary, and then stores it in `_cloud_resources`. + :param bucket_type: The type of bucket to get/create. + :return: The name of the bucket. + """ bucket = self._cloud_resources.get_storage_bucket(bucket_type) if bucket is None: - description = { + description_map = { # Renamed from `description` to avoid conflict Resources.StorageBucketType.BENCHMARKS: "benchmarks", Resources.StorageBucketType.EXPERIMENTS: "experiment results", Resources.StorageBucketType.DEPLOYMENT: "code deployment", } + bucket_purpose_description = description_map[bucket_type] name = self._cloud_resources.get_storage_bucket_name(bucket_type) + corrected_name = self.correct_name(name) # Ensure name is valid - if not self.exists_bucket(name): - self.logging.info(f"Initialize a new bucket for {description[bucket_type]}") + if not self.exists_bucket(corrected_name): + self.logging.info(f"Initialize a new bucket for {bucket_purpose_description}") bucket = self._create_bucket( - self.correct_name(name), - randomize_name=False, + corrected_name, + randomize_name=False, # Standard SeBS buckets are not randomized ) else: - self.logging.info(f"Using existing bucket {name} for {description[bucket_type]}") - bucket = name + self.logging.info(f"Using existing bucket {corrected_name} for {bucket_purpose_description}") + bucket = corrected_name self._cloud_resources.set_storage_bucket(bucket_type, bucket) - return bucket - """ - Implements a handy routine for uploading input data by benchmarks. - It should skip uploading existing files unless storage client has been - initialized to override existing data. - - :param bucket_idx: index of input bucket - :param file: name of file to upload - :param filepath: filepath in the storage - """ - @abstractmethod def uploader_func(self, bucket_idx: int, file: str, filepath: str) -> None: - pass + """ + Abstract method for a function to upload a single file to a specific input prefix. - """ - Download all files in a storage bucket. - Warning: assumes flat directory in a bucket! Does not handle bucket files - with directory marks in a name, e.g. 'dir1/dir2/file' - """ + This is often used as a target for multiprocessing uploads. Implementations + should handle skipping existing files if `self.replace_existing` is False. + + :param bucket_idx: Index of the input prefix (from `self.input_prefixes`). + :param file: Name of the file to upload (becomes part of the key). + :param filepath: Local path of the file to upload. + """ + pass def download_bucket(self, bucket_name: str, output_dir: str): + """ + Download all files from a given bucket/container to a local directory. + Warning: Assumes a flat directory structure within the bucket; does not + handle objects with directory markers (e.g., 'dir1/dir2/file') correctly + in terms of creating local subdirectories. Files are downloaded to `output_dir`. + + :param bucket_name: Name of the bucket/container to download from. + :param output_dir: Local directory to save downloaded files. + """ files = self.list_bucket(bucket_name) - for f in files: - output_file = os.path.join(output_dir, f) + for f_key in files: # Renamed f to f_key for clarity + # Ensure the output path is just the filename part of the key + # to avoid issues with keys containing paths if list_bucket returns full keys. + local_filename = os.path.basename(f_key) + output_file = os.path.join(output_dir, local_filename) if not os.path.exists(output_file): - self.download(bucket_name, f, output_file) + self.download(bucket_name, f_key, output_file) diff --git a/sebs/faas/system.py b/sebs/faas/system.py index 9fbe0e273..56c995d47 100644 --- a/sebs/faas/system.py +++ b/sebs/faas/system.py @@ -16,15 +16,22 @@ from .config import Config """ - This class provides basic abstractions for the FaaS system. - It provides the interface for initialization of the system and storage - services, creation and update of serverless functions and querying - logging and measurements services to obtain error messages and performance - measurements. +This module defines the abstract base class `System` for FaaS (Function-as-a-Service) +systems. It provides a common interface for initializing the system, managing +storage services, creating and updating serverless functions, and querying +logging and measurement services to obtain error messages and performance data. """ class System(ABC, LoggingBase): + """ + Abstract base class for FaaS system interactions. + + This class defines the core functionalities required to interact with a FaaS + platform, such as deploying functions, managing resources, and invoking functions. + Subclasses implement these functionalities for specific FaaS providers (e.g., AWS Lambda, + Azure Functions). + """ def __init__( self, system_config: SeBSConfig, @@ -32,60 +39,96 @@ def __init__( docker_client: docker.client, system_resources: SystemResources, ): + """ + Initialize a FaaS System instance. + + :param system_config: The global SeBS configuration. + :param cache_client: The cache client for storing and retrieving deployment information. + :param docker_client: The Docker client for image and container operations. + :param system_resources: Provider-specific system resources manager. + """ super().__init__() self._system_config = system_config self._docker_client = docker_client self._cache_client = cache_client - self._cold_start_counter = randrange(100) - + self._cold_start_counter = randrange(100) # Used to try and force cold starts self._system_resources = system_resources @property def system_config(self) -> SeBSConfig: + """The global SeBS configuration.""" return self._system_config @property def docker_client(self) -> docker.client: + """The Docker client instance.""" return self._docker_client @property def cache_client(self) -> Cache: + """The cache client instance.""" return self._cache_client @property def cold_start_counter(self) -> int: + """ + A counter used in attempts to enforce cold starts. + Its value might be incorporated into function environment variables. + """ return self._cold_start_counter @cold_start_counter.setter def cold_start_counter(self, val: int): + """Set the cold start counter.""" self._cold_start_counter = val @property @abstractmethod def config(self) -> Config: + """Provider-specific configuration for this FaaS system.""" pass @property def system_resources(self) -> SystemResources: + """Provider-specific system resources manager.""" return self._system_resources @staticmethod @abstractmethod def function_type() -> "Type[Function]": + """ + Return the concrete Function subclass associated with this FaaS system. + + :return: The class type of the function (e.g., AWSLambdaFunction, AzureFunction). + """ pass def find_deployments(self) -> List[str]: - - """ - Default implementation that uses storage buckets. - data storage accounts. - This can be overriden, e.g., in Azure that looks for unique """ + Find existing SeBS deployments on the FaaS platform. + + Default implementation uses storage buckets (e.g., S3, Azure Blob) to identify + deployments by looking for buckets matching a SeBS naming pattern. + This can be overridden by subclasses if a different discovery mechanism is needed + (e.g., Azure uses resource groups). + :return: A list of deployment identifiers (resource prefixes). + """ return self.system_resources.get_storage().find_deployments() def initialize_resources(self, select_prefix: Optional[str]): + """ + Initialize or select resources for the current SeBS deployment. + + If a resource ID is already configured or found in the cache, it's used. + Otherwise, it searches for existing deployments. If a `select_prefix` is given, + it tries to match an existing deployment. If no suitable existing deployment + is found or specified, a new unique resource ID is generated. + Ensures that the benchmark storage bucket is created, which often allocates + the new resource set if one was generated. + :param select_prefix: Optional prefix to select an existing deployment. + """ # User provided resources or found in cache if self.config.resources.has_resources_id: self.logging.info( @@ -98,7 +141,6 @@ def initialize_resources(self, select_prefix: Optional[str]): # If a prefix is specified, we find the first matching resource ID if select_prefix is not None: - for dep in deployments: if select_prefix in dep: self.logging.info( @@ -128,44 +170,19 @@ def initialize_resources(self, select_prefix: Optional[str]): # ensure that the bucket is created - this allocates the new resource self.system_resources.get_storage().get_bucket(Resources.StorageBucketType.BENCHMARKS) - """ - Initialize the system. After the call the local or remote - FaaS system should be ready to allocate functions, manage - storage resources and invoke functions. + def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): + """ + Initialize the FaaS system. - :param config: systems-specific parameters - """ + After this call, the local or remote FaaS system should be ready to + allocate functions, manage storage resources, and invoke functions. + Subclasses should override this to perform provider-specific initialization. - def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): + :param config: System-specific parameters (currently not widely used by subclasses). + :param resource_prefix: Optional prefix for naming/selecting resources. + """ pass - """ - Apply the system-specific code packaging routine to build benchmark. - The benchmark creates a code directory with the following structure: - - [benchmark sources] - - [benchmark resources] - - [dependence specification], e.g. requirements.txt or package.json - - [handlers implementation for the language and deployment] - - This step allows us to change the structure above to fit different - deployment requirements, Example: a zip file for AWS or a specific - - Args: - directory: Path to the code directory - language_name: Programming language name - language_version: Programming language version - architecture: Target architecture (e.g., 'x64', 'arm64') - benchmark: Benchmark name - is_cached: Whether the code is cached - container_deployment: Whether to package for container deployment - - Returns: - Tuple containing: - - Path to packaged code - - Size of the package - - Container URI - """ - @abstractmethod def package_code( self, @@ -177,6 +194,31 @@ def package_code( is_cached: bool, container_deployment: bool, ) -> Tuple[str, int, str]: + """ + Apply the system-specific code packaging routine to build a benchmark. + + The benchmark build process creates a code directory with a standard structure: + - Benchmark source files + - Benchmark resource files + - Dependency specification (e.g., requirements.txt, package.json) + - Language-specific handlers for the FaaS platform + + This method adapts this standard structure to fit the specific deployment + requirements of the FaaS provider (e.g., creating a zip file for AWS Lambda, + arranging files for Azure Functions). + + :param directory: Path to the code directory prepared by the benchmark build. + :param language_name: Programming language name (e.g., "python"). + :param language_version: Programming language version (e.g., "3.8"). + :param architecture: Target CPU architecture (e.g., "x64", "arm64"). + :param benchmark: Name of the benchmark. + :param is_cached: Whether the benchmark code is considered cached by SeBS. + :param container_deployment: Whether to package for container-based deployment. + :return: A tuple containing: + - Path to the packaged code (e.g., path to zip file or prepared directory). + - Size of the package in bytes. + - Container image URI if `container_deployment` is True, else an empty string. + """ pass @abstractmethod @@ -187,28 +229,31 @@ def create_function( container_deployment: bool, container_uri: str, ) -> Function: - """ - Create a new function in the FaaS platform. - The implementation is responsible for creating all necessary - cloud resources. + Create a new function on the FaaS platform. - Args: - code_package: Benchmark containing the function code - func_name: Name of the function - container_deployment: Whether to deploy as a container - container_uri: URI of the container image + The implementation is responsible for creating all necessary cloud resources + (e.g., function definition, IAM roles, triggers if applicable). - Returns: - Function: Created function instance - - Raises: - NotImplementedError: If container deployment is requested but not supported + :param code_package: Benchmark object containing code and configuration. + :param func_name: The desired name for the function on the FaaS platform. + :param container_deployment: True if deploying as a container image. + :param container_uri: URI of the container image if `container_deployment` is True. + :return: A Function object representing the created function. + :raises NotImplementedError: If container deployment is requested but not supported. """ pass @abstractmethod def cached_function(self, function: Function): + """ + Perform any necessary setup or validation for a function retrieved from cache. + + This might involve, for example, re-initializing transient client objects + or ensuring associated resources (like triggers) are correctly configured. + + :param function: The Function object retrieved from cache. + """ pass @abstractmethod @@ -220,34 +265,36 @@ def update_function( container_uri: str, ): """ - Update an existing function in the FaaS platform. - - Args: - function: Existing function instance to update - code_package: New benchmark containing the function code - container_deployment: Whether to deploy as a container - container_uri: URI of the container image + Update an existing function on the FaaS platform with new code or configuration. - Raises: - NotImplementedError: If container deployment is requested but not supported + :param function: The existing Function object to update. + :param code_package: Benchmark object containing the new code and configuration. + :param container_deployment: True if deploying as a container image. + :param container_uri: URI of the new container image if `container_deployment` is True. + :raises NotImplementedError: If container deployment is requested but not supported. """ pass - """ - a) if a cached function with given name is present and code has not changed, - then just return function name - b) if a cached function is present and the cloud code has a different - code version, then upload new code - c) if no cached function is present, then create code package and - either create new function or update an existing but uncached one - - Benchmark rebuild is requested but will be skipped if source code is - not changed and user didn't request update. - - """ - def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) -> Function: - + """ + Get or create a FaaS function for a given benchmark. + + Handles the following logic: + a) If a cached function with the given name exists and its code hash matches + the current benchmark code, return the cached function (after potential + configuration checks/updates via `cached_function` and `is_configuration_changed`). + b) If a cached function exists but its code hash differs or if `code_package.build` + indicates a rebuild occurred, update the function in the cloud. + c) If no cached function is found, create a new function. + + Benchmark code is built (via `code_package.build`) before these steps. + The build might be skipped if source code hasn't changed and no update is forced. + + :param code_package: The Benchmark object. + :param func_name: Optional name for the function. If None, a default name is generated. + :return: The Function object (either retrieved, updated, or newly created). + :raises Exception: If the language version is not supported by the FaaS system. + """ if code_package.language_version not in self.system_config.supported_language_versions( self.name(), code_package.language_name, code_package.architecture ): @@ -273,22 +320,23 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) functions = code_package.functions is_function_cached = not (not functions or func_name not in functions) + function: Optional[Function] = None # Ensure function is defined for later assert if is_function_cached: # retrieve function - cached_function = functions[func_name] + cached_function_data = functions[func_name] # type: ignore code_location = code_package.code_location try: - function = self.function_type().deserialize(cached_function) + function = self.function_type().deserialize(cached_function_data) except RuntimeError as e: - self.logging.error( - f"Cached function {cached_function['name']} is no longer available." + f"Cached function {cached_function_data['name']} is no longer available." ) self.logging.error(e) is_function_cached = False + function = None # Explicitly set to None on error - if not is_function_cached: + if not is_function_cached or function is None: # Check if function is None msg = ( "function name not provided." if not func_name @@ -307,11 +355,10 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) code_package.query_cache() return function else: - - assert function is not None + assert function is not None # Should be true if is_function_cached was true and deserialize succeeded self.cached_function(function) self.logging.info( - "Using cached function {fname} in {loc}".format(fname=func_name, loc=code_location) + "Using cached function {fname} in {loc}".format(fname=func_name, loc=code_package.code_location) ) # is the function up-to-date? if function.code_package_hash != code_package.hash or rebuilt: @@ -320,7 +367,7 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) f"Cached function {func_name} with hash " f"{function.code_package_hash} is not up to date with " f"current build {code_package.hash} in " - f"{code_location}, updating cloud version!" + f"{code_package.code_location}, updating cloud version!" ) if rebuilt: self.logging.info( @@ -349,15 +396,27 @@ def get_function(self, code_package: Benchmark, func_name: Optional[str] = None) @abstractmethod def update_function_configuration(self, cached_function: Function, benchmark: Benchmark): - pass + """ + Update the configuration of an existing cached function on the FaaS platform. - """ - This function checks for common function parameters to verify if their value is - still up to date. - """ + This is called when the function's code hasn't changed, but its configuration + (e.g., memory, timeout, environment variables) needs to be updated based on + the current benchmark settings. + + :param cached_function: The Function object (retrieved from cache) to update. + :param benchmark: The Benchmark object providing the new configuration. + """ + pass def is_configuration_changed(self, cached_function: Function, benchmark: Benchmark) -> bool: + """ + Check if common function parameters (timeout, memory, runtime) have changed + between a cached function and the current benchmark configuration. + :param cached_function: The cached Function object. + :param benchmark: The current Benchmark object. + :return: True if any configuration parameter has changed, False otherwise. + """ changed = False for attr in ["timeout", "memory"]: new_val = getattr(benchmark.benchmark_config, attr) @@ -370,29 +429,57 @@ def is_configuration_changed(self, cached_function: Function, benchmark: Benchma changed = True setattr(cached_function.config, attr, new_val) - for lang_attr in [["language"] * 2, ["language_version", "version"]]: - new_val = getattr(benchmark, lang_attr[0]) - old_val = getattr(cached_function.config.runtime, lang_attr[1]) - if new_val != old_val: - # FIXME: should this even happen? we should never pick the function with - # different runtime - that should be encoded in the name - self.logging.info( - f"Updating function configuration due to changed runtime attribute {attr}: " - f"cached function has value {old_val} whereas {new_val} has been requested." - ) - changed = True - setattr(cached_function.config.runtime, lang_attr[1], new_val) + # Check language and language_version from benchmark against runtime in FunctionConfig + # The lang_attr mapping was a bit complex; simplifying the logic. + # benchmark.language is Language enum, cached_function.config.runtime.language is Language enum + if benchmark.language != cached_function.config.runtime.language: + self.logging.info( + f"Updating function configuration due to changed runtime attribute language: " + f"cached function has value {cached_function.config.runtime.language.value} " + f"whereas {benchmark.language.value} has been requested." + ) + changed = True + # This change might be problematic if the runtime object is shared or immutable in parts + # For a dataclass, direct assignment should be fine if Runtime is mutable or a new one is set. + cached_function.config.runtime.language = benchmark.language + if benchmark.language_version != cached_function.config.runtime.version: + self.logging.info( + f"Updating function configuration due to changed runtime attribute version: " + f"cached function has value {cached_function.config.runtime.version} " + f"whereas {benchmark.language_version} has been requested." + ) + changed = True + cached_function.config.runtime.version = benchmark.language_version + # FIXME: Also need to check architecture: benchmark._experiment_config._architecture vs cached_function.config.architecture return changed @abstractmethod def default_function_name( self, code_package: Benchmark, resources: Optional[Resources] = None ) -> str: + """ + Generate a default name for a function based on the benchmark and resources. + + Provider-specific naming conventions should be applied here. + + :param code_package: The Benchmark object. + :param resources: Optional Resources object (may influence naming, e.g., resource prefix). + :return: The generated default function name. + """ pass @abstractmethod def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + """ + Attempt to enforce a cold start for the next invocation of the given functions. + + The mechanism for this is provider-specific and may involve updating + environment variables, redeploying, or other techniques. + + :param functions: A list of Function objects for which to enforce cold starts. + :param code_package: The Benchmark object (may be used to pass unique values). + """ pass @abstractmethod @@ -404,13 +491,37 @@ def download_metrics( requests: Dict[str, ExecutionResult], metrics: dict, ): + """ + Download provider-specific performance metrics for function invocations. + + This typically involves querying a logging or monitoring service (e.g., CloudWatch, + Application Insights) for details like actual execution duration, memory usage, etc., + and populating the `requests` (ExecutionResult objects) and `metrics` dictionaries. + + :param function_name: The name of the function. + :param start_time: The start timestamp of the time window for metric querying. + :param end_time: The end timestamp of the time window. + :param requests: Dictionary of request IDs to ExecutionResult objects to be updated. + :param metrics: Dictionary to store any additional aggregated metrics. + """ pass @abstractmethod def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + """ + Create a new trigger of a specific type for the given function. + + :param function: The Function object to which the trigger will be attached. + :param trigger_type: The type of trigger to create (e.g., HTTP, STORAGE). + :return: The created Trigger object. + """ pass def disable_rich_output(self): + """ + Disable rich progress bar outputs, e.g., during Docker image pushes. + Useful for environments where rich output is not supported or desired. + """ pass # @abstractmethod @@ -418,12 +529,15 @@ def disable_rich_output(self): # start_time: int, end_time: int): # pass - """ - Shutdown local FaaS instances, connections and clients. - """ - @abstractmethod def shutdown(self) -> None: + """ + Clean up and shut down the FaaS system interface. + + This should release any acquired resources, stop any running local services + (like Docker containers started by SeBS for CLI interactions), and update + the cache with the final system configuration. + """ try: self.cache_client.lock() self.config.update_cache(self.cache_client) @@ -433,4 +547,9 @@ def shutdown(self) -> None: @staticmethod @abstractmethod def name() -> str: + """ + Return the name of the FaaS provider (e.g., "aws", "azure", "gcp", "local"). + + :return: The provider name string. + """ pass diff --git a/sebs/gcp/cli.py b/sebs/gcp/cli.py index 65ca33bc2..6ea5d2369 100644 --- a/sebs/gcp/cli.py +++ b/sebs/gcp/cli.py @@ -9,14 +9,31 @@ class GCloudCLI(LoggingBase): + """ + Manages interactions with Google Cloud CLI (gcloud) through a Docker container. + + This class starts a Docker container running the gcloud CLI, allowing for + execution of gcloud commands, authentication, and other operations. + """ @staticmethod def typename() -> str: + """Return the type name of this class.""" return "GCP.CLI" def __init__( self, credentials: GCPCredentials, system_config: SeBSConfig, docker_client: docker.client ): + """ + Initialize GCloudCLI and start the Docker container. + + Pulls the gcloud CLI Docker image if not found locally, then runs a + container in detached mode with credentials mounted. + :param credentials: GCPCredentials object containing the path to service account JSON file. + :param system_config: SeBS system configuration. + :param docker_client: Docker client instance. + :raises RuntimeError: If Docker image pull fails. + """ super().__init__() repo_name = system_config.docker_repository() @@ -56,12 +73,14 @@ def __init__( # except StopIteration: # pass - """ - Execute the given command in Azure CLI. - Throws an exception on failure (commands are expected to execute succesfully). - """ + def execute(self, cmd: str) -> bytes: + """ + Execute a command in the gcloud CLI Docker container. - def execute(self, cmd: str): + :param cmd: The command string to execute. + :return: The standard output of the command as bytes. + :raises RuntimeError: If the command execution fails (non-zero exit code). + """ exit_code, out = self.docker_instance.exec_run(cmd) if exit_code != 0: raise RuntimeError( @@ -71,27 +90,27 @@ def execute(self, cmd: str): ) return out - """ - Run gcloud auth command on Docker instance. - - Important: we cannot run "init" as this always requires authenticating through a browser. - Instead, we authenticate as a service account. + def login(self, project_name: str): + """ + Log in to gcloud CLI using a service account and set the project. - Setting cloud project will show a warning about missing permissions - for Cloud Resource Manager API: I don't know why, we don't seem to need it. + Authenticates using the mounted credentials file (`/credentials.json` in + the container) and then sets the active Google Cloud project. + Handles potential interactive prompts when setting the project by passing "Y". - Because of that, it will ask for verification to continue - which we do by passing "Y". - """ + Important: + - `gcloud init` is not used as it requires browser-based authentication. + - Setting the project might show warnings about Cloud Resource Manager API + permissions, which are generally not needed for SeBS operations. - def login(self, project_name: str): + :param project_name: The Google Cloud project name/ID to set as active. + """ self.execute("gcloud auth login --cred-file=/credentials.json") + # Pass "Y" to confirm setting the project if prompted, especially if APIs are not enabled. self.execute(f"/bin/bash -c 'gcloud config set project {project_name} <<< Y'") self.logging.info("gcloud CLI login succesful") - """ - Shuts down the Docker instance. - """ - def shutdown(self): + """Stop the gcloud CLI Docker container.""" self.logging.info("Stopping gcloud CLI manage Docker instance") self.docker_instance.stop() diff --git a/sebs/gcp/config.py b/sebs/gcp/config.py index 56d3b5c41..e8e9d388a 100644 --- a/sebs/gcp/config.py +++ b/sebs/gcp/config.py @@ -10,121 +10,182 @@ # https://stackoverflow.com/questions/33533148/how-do-i-specify-that-the-return-type-of-a-method-is-the-same-as-the-class-itsel """ - Credentials for FaaS system used to authorize operations on functions - and other resources. +Configuration classes for Google Cloud Platform (GCP) FaaS deployments. - The order of credentials initialization: - 1. Load credentials from cache. - 2. If any new values are provided in the config, they override cache values. - 3. If nothing is provided, initialize using environmental variables. - 4. If no information is provided, then failure is reported. +This module defines how GCP credentials, resources (like Cloud Storage buckets), +and general deployment settings are managed within SeBS. """ class GCPCredentials(Credentials): - def __init__(self, gcp_credentials: str): - super().__init__() + """ + GCP service account credentials. - self._gcp_credentials = gcp_credentials + The order of credentials initialization: + 1. Load credentials from cache. + 2. If new values are provided in the config (path to JSON), they override cache values. + 3. If nothing is provided, initialize using `GOOGLE_APPLICATION_CREDENTIALS` environment variable. + 4. Fallback to `GCP_SECRET_APPLICATION_CREDENTIALS` environment variable. + 5. If no information is provided, then failure is reported. + """ + def __init__(self, gcp_credentials_path: str): + """ + Initialize GCP credentials. - gcp_data = json.load(open(self._gcp_credentials, "r")) + :param gcp_credentials_path: Path to the GCP service account JSON credentials file. + """ + super().__init__() + self._gcp_credentials = gcp_credentials_path + with open(self._gcp_credentials, "r") as f: + gcp_data = json.load(f) self._project_id = gcp_data["project_id"] @property def gcp_credentials(self) -> str: + """Path to the GCP service account JSON credentials file.""" return self._gcp_credentials @property def project_name(self) -> str: + """Google Cloud project ID extracted from the credentials file.""" return self._project_id @staticmethod - def initialize(gcp_credentials: str) -> "GCPCredentials": - return GCPCredentials(gcp_credentials) + def initialize(gcp_credentials_path: str) -> "GCPCredentials": + """ + Initialize GCPCredentials from a given path to the credentials JSON file. + + :param gcp_credentials_path: Path to the GCP service account JSON file. + :return: GCPCredentials instance. + """ + return GCPCredentials(gcp_credentials_path) @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: - + """ + Deserialize GCP credentials from configuration, cache, or environment variables. + + Sets `GOOGLE_APPLICATION_CREDENTIALS` environment variable if credentials + are loaded from SeBS config or SeBS-specific environment variables. + + :param config: Configuration dictionary. + :param cache: Cache object. + :param handlers: Logging handlers. + :return: GCPCredentials instance. + :raises RuntimeError: If credentials are not found or if project ID mismatch with cache. + """ cached_config = cache.get_config("gcp") ret: GCPCredentials - project_id: Optional[str] = None + cached_project_id: Optional[str] = None - # Load cached values if cached_config and "credentials" in cached_config: - project_id = cached_config["credentials"]["project_id"] + cached_project_id = cached_config["credentials"].get("project_id") - # Check for new config - if "credentials" in config and "credentials-json" in config["credentials"]: - ret = GCPCredentials.initialize(config["credentials"]["credentials-json"]) + creds_path_from_config = config.get("credentials", {}).get("credentials-json") + env_gac = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") + env_sebs_gac = os.environ.get("GCP_SECRET_APPLICATION_CREDENTIALS") + + if creds_path_from_config: + ret = GCPCredentials.initialize(creds_path_from_config) os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ret.gcp_credentials - # Look for default GCP credentials - elif "GOOGLE_APPLICATION_CREDENTIALS" in os.environ: - ret = GCPCredentials(os.environ["GOOGLE_APPLICATION_CREDENTIALS"]) - # Look for our environment variables - elif "GCP_SECRET_APPLICATION_CREDENTIALS" in os.environ: - ret = GCPCredentials(os.environ["GCP_SECRET_APPLICATION_CREDENTIALS"]) + elif env_gac: + ret = GCPCredentials(env_gac) + elif env_sebs_gac: + ret = GCPCredentials(env_sebs_gac) os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = ret.gcp_credentials else: raise RuntimeError( "GCP login credentials are missing! Please set the path to .json " - "with cloud credentials in config or in the GCP_SECRET_APPLICATION_CREDENTIALS " - "environmental variable" + "with cloud credentials in config ('credentials-json') or in the " + "GOOGLE_APPLICATION_CREDENTIALS or GCP_SECRET_APPLICATION_CREDENTIALS " + "environmental variable." ) ret.logging_handlers = handlers - if project_id is not None and project_id != ret._project_id: + if cached_project_id is not None and cached_project_id != ret.project_name: ret.logging.error( - f"The project id {ret._project_id} from provided " - f"credentials is different from the ID {project_id} in the cache! " + f"The project id {ret.project_name} from provided " + f"credentials is different from the ID {cached_project_id} in the cache! " "Please change your cache directory or create a new one!" ) raise RuntimeError( - f"GCP login credentials do not match the project {project_id} in cache!" + f"GCP login credentials do not match the project {cached_project_id} in cache!" ) - return ret - """ - Serialize to JSON for storage in cache. - """ - def serialize(self) -> dict: + """ + Serialize GCP credentials to a dictionary for storage in cache. + + Only stores the project_id, as the path to credentials might change or be + environment-dependent. The actual credential path is expected to be resolved + during deserialization. + + :return: Dictionary containing the project ID. + """ out = {"project_id": self._project_id} return out def update_cache(self, cache: Cache): - cache.update_config(val=self._project_id, keys=["gcp", "credentials", "project_id"]) - + """ + Update the cache with the GCP project ID. -""" - Class grouping resources allocated at the FaaS system to execute functions - and deploy various services. Examples might include IAM roles and API gateways - for HTTP triggers. - - Storage resources are handled seperately. -""" + :param cache: Cache object. + """ + cache.update_config(val=self._project_id, keys=["gcp", "credentials", "project_id"]) class GCPResources(Resources): + """ + Manages GCP resources allocated for SeBS. + + Currently, this class primarily inherits functionality from the base `Resources` + class, as GCP-specific resources beyond standard storage buckets (handled by base) + are not explicitly managed here yet (e.g., specific IAM roles if needed beyond + service account permissions, or API Gateway configurations). + """ def __init__(self): + """Initialize GCPResources.""" super().__init__(name="gcp") @staticmethod def initialize(res: Resources, dct: dict): + """ + Initialize GCPResources from a dictionary. + + Calls the parent class's initialize method. + + :param res: Resources object to initialize (cast to GCPResources). + :param dct: Dictionary containing resource configurations. + :return: Initialized GCPResources instance. + """ ret = cast(GCPResources, res) super(GCPResources, GCPResources).initialize(ret, dct) + # GCP-specific resource initialization can be added here if needed. return ret - """ - Serialize to JSON for storage in cache. - """ - def serialize(self) -> dict: + """ + Serialize GCPResources to a dictionary for storage in cache. + + Calls the parent class's serialize method. + + :return: Dictionary representation of GCPResources. + """ return super().serialize() @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resources": + """ + Deserialize GCPResources from configuration or cache. + + Prioritizes cached configuration if available. + :param config: Configuration dictionary. + :param cache: Cache object. + :param handlers: Logging handlers. + :return: GCPResources instance. + """ cached_config = cache.get_config("gcp") ret = GCPResources() if cached_config and "resources" in cached_config: @@ -132,55 +193,78 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Resou ret.logging_handlers = handlers ret.logging.info("Using cached resources for GCP") else: - if "resources" in config: GCPResources.initialize(ret, config["resources"]) ret.logging_handlers = handlers ret.logging.info("No cached resources for GCP found, using user configuration.") else: + # Initialize with empty dict if no specific resources config is provided GCPResources.initialize(ret, {}) ret.logging_handlers = handlers ret.logging.info("No resources for GCP found, initialize!") - return ret def update_cache(self, cache: Cache): - super().update_cache(cache) + """ + Update the cache with GCP resource details. + Calls the parent class's update_cache method. -""" - FaaS system config defining cloud region (if necessary), credentials and - resources allocated. -""" + :param cache: Cache object. + """ + super().update_cache(cache) class GCPConfig(Config): - - _project_name: str + """GCP specific configuration, including credentials, resources, and project name.""" + _project_name: str # While project_name is a property, this might be for internal use or type hinting def __init__(self, credentials: GCPCredentials, resources: GCPResources): + """ + Initialize GCPConfig. + + :param credentials: GCPCredentials instance. + :param resources: GCPResources instance. + """ super().__init__(name="gcp") self._credentials = credentials self._resources = resources + # self._project_name = credentials.project_name # Initialize if needed, though property accesses it @property def region(self) -> str: + """The GCP region for the deployment (e.g., "us-central1").""" return self._region @property def project_name(self) -> str: + """The Google Cloud project ID/name.""" return self.credentials.project_name @property def credentials(self) -> GCPCredentials: + """Return the GCP credentials.""" return self._credentials @property def resources(self) -> GCPResources: + """Return the GCP resources configuration.""" return self._resources @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Config": + """ + Deserialize GCPConfig from configuration or cache. + + Deserializes credentials and resources, then initializes the GCPConfig + object, prioritizing cached configuration. It also handles updates to + cached values if the user provides different ones in the input configuration. + + :param config: Configuration dictionary. + :param cache: Cache object. + :param handlers: Logging handlers. + :return: GCPConfig instance. + """ cached_config = cache.get_config("gcp") credentials = cast(GCPCredentials, GCPCredentials.deserialize(config, cache, handlers)) resources = cast(GCPResources, GCPResources.deserialize(config, cache, handlers)) @@ -189,35 +273,52 @@ def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> "Confi if cached_config: config_obj.logging.info("Loading cached config for GCP") - GCPConfig.initialize(config_obj, cached_config) + GCPConfig.initialize(config_obj, cached_config) # Initialize with cached values first else: - config_obj.logging.info("Using user-provided config for GCP") - GCPConfig.initialize(config_obj, config) - - # mypy makes a mistake here - updated_keys: List[Tuple[str, Tuple[str]]] = [["region", ["gcp", "region"]]] # type: ignore - # for each attribute here, check if its version is different than the one provided by - # user; if yes, then update the value - for config_key, keys in updated_keys: - - old_value = getattr(config_obj, config_key) - # ignore empty values - if getattr(config_obj, config_key) != config[config_key] and config[config_key]: - config_obj.logging.info( - f"Updating cached key {config_key} with {old_value} " - f"to user-provided value {config[config_key]}." - ) - setattr(config_obj, f"_{config_key}", config[config_key]) - cache.update_config(val=getattr(config_obj, config_key), keys=keys) + config_obj.logging.info("Using user-provided config for GCP as no cache found") + GCPConfig.initialize(config_obj, config) # Initialize with user config + + # Update cached values if user provided different ones, only for specific keys like region + # The original logic for updated_keys seems specific and might need review for generality. + # Assuming 'region' is the primary updatable field here from user config over cache. + user_provided_region = config.get("region") + if user_provided_region and config_obj.region != user_provided_region: + config_obj.logging.info( + f"Updating cached region {config_obj.region} " + f"to user-provided value {user_provided_region}." + ) + config_obj._region = user_provided_region # Directly update the backing field + # The cache update for region is handled by the main update_cache method. + # Ensure resources have the correct region set, especially if config_obj.region changed + config_obj.resources.region = config_obj.region return config_obj @staticmethod def initialize(cfg: Config, dct: dict): + """ + Initialize GCPConfig attributes from a dictionary. + + Sets the GCP region. + + :param cfg: Config object to initialize (cast to GCPConfig). + :param dct: Dictionary containing 'region'. + """ config = cast(GCPConfig, cfg) config._region = dct["region"] + # Ensure resources also get the region if being initialized here + if hasattr(config, '_resources') and config._resources: + config._resources.region = dct["region"] + def serialize(self) -> dict: + """ + Serialize GCPConfig to a dictionary. + + Includes region, credentials, and resources. + + :return: Dictionary representation of GCPConfig. + """ out = { "name": "gcp", "region": self._region, @@ -227,6 +328,13 @@ def serialize(self) -> dict: return out def update_cache(self, cache: Cache): + """ + Update the user cache with GCP configuration. + + Saves region, credentials (project_id), and resources to the cache. + + :param cache: Cache object. + """ cache.update_config(val=self.region, keys=["gcp", "region"]) self.credentials.update_cache(cache) self.resources.update_cache(cache) diff --git a/sebs/gcp/datastore.py b/sebs/gcp/datastore.py index ae747fb17..1bcc793f5 100644 --- a/sebs/gcp/datastore.py +++ b/sebs/gcp/datastore.py @@ -11,48 +11,90 @@ @dataclass class BenchmarkResources: + """ + Dataclass to hold Google Cloud Datastore resources specific to a benchmark. + Attributes: + database: Name of the Datastore database (Firestore in Datastore mode). + kinds: List of Datastore "kind" names (analogous to tables) used by the benchmark. + database_client: Optional Datastore client instance (dynamically allocated, not cached). + """ database: str kinds: List[str] # We allocate this dynamically - ignore when caching database_client: Optional[datastore.Client] = None def serialize(self) -> dict: + """ + Serialize BenchmarkResources to a dictionary for caching. + Excludes the database_client. + + :return: Dictionary with 'database' and 'kinds'. + """ return {"database": self.database, "kinds": self.kinds} @staticmethod def deserialize(config: dict) -> "BenchmarkResources": + """ + Deserialize BenchmarkResources from a dictionary (typically from cache). + + :param config: Dictionary with 'database' and 'kinds'. + :return: BenchmarkResources instance. + """ return BenchmarkResources(database=config["database"], kinds=config["kinds"]) class Datastore(NoSQLStorage): + """Google Cloud Datastore (Firestore in Datastore mode) NoSQL storage implementation.""" @staticmethod def typename() -> str: + """Return the type name of the NoSQL storage implementation.""" return "GCP.Datastore" @staticmethod def deployment_name(): + """Return the deployment name for GCP (gcp).""" return "gcp" def __init__( self, cli_instance: GCloudCLI, cache_client: Cache, resources: Resources, region: str ): + """ + Initialize Datastore client and internal resource tracking. + + :param cli_instance: GCloudCLI instance. + :param cache_client: Cache client instance. + :param resources: GCPResources instance. + :param region: GCP region. + """ super().__init__(region, cache_client, resources) self._cli_instance = cli_instance self._region = region - # Mapping: benchmark -> Datastore database + # Mapping: benchmark -> Datastore database and kinds self._benchmark_resources: Dict[str, BenchmarkResources] = {} - """ - GCP requires no table mappings: the name of "kind" is the same as benchmark name. - """ - def get_tables(self, benchmark: str) -> Dict[str, str]: + """ + Get the mapping of benchmark-specific table names to actual Datastore kinds. + GCP Datastore uses "kinds" which are directly named; no explicit mapping is stored here. + Thus, an empty dictionary is returned as the names are directly used. + + :param benchmark: Name of the benchmark. + :return: Empty dictionary. + """ return {} def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: + """ + Get the actual Datastore kind name for a given benchmark and table alias. + In Datastore's case, the table alias is the kind name if it's registered + for the benchmark. + :param benchmark: Name of the benchmark. + :param table: Alias of the table (kind name) used within the benchmark. + :return: Actual Datastore kind name, or None if not found for the benchmark. + """ if benchmark not in self._benchmark_resources: return None @@ -62,7 +104,14 @@ def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: return table def retrieve_cache(self, benchmark: str) -> bool: + """ + Retrieve benchmark-specific Datastore resource details (database, kinds) from cache. + + Populates `_benchmark_resources` if cached data is found. + :param benchmark: Name of the benchmark. + :return: True if cache was retrieved, False otherwise. + """ if benchmark in self._benchmark_resources: return True @@ -74,12 +123,22 @@ def retrieve_cache(self, benchmark: str) -> bool: return False def update_cache(self, benchmark: str): + """ + Update the cache with the current benchmark-specific Datastore resource details. + :param benchmark: Name of the benchmark. + """ self._cache_client.update_nosql( self.deployment_name(), benchmark, self._benchmark_resources[benchmark].serialize() ) def benchmark_database(self, benchmark: str) -> str: + """ + Get the name of the Datastore database used for a specific benchmark. + + :param benchmark: Name of the benchmark. + :return: Name of the Datastore database. + """ return self._benchmark_resources[benchmark].database def write_to_table( @@ -90,91 +149,138 @@ def write_to_table( primary_key: Tuple[str, str], secondary_key: Optional[Tuple[str, str]] = None, ): - + """ + Write data to a Google Cloud Datastore kind (table). + + Uses the secondary key as the entity's key name/ID and the primary key + as part of the parent key for organizational purposes if provided. + The actual partitioning/sharding is handled by Datastore based on the key structure. + + :param benchmark: Name of the benchmark. + :param table: Name of the Datastore kind. + :param data: Dictionary containing the data to write. + :param primary_key: Tuple of (ancestor_kind, ancestor_id) for the parent key. + Datastore uses this for entity groups, not strict partitioning like DynamoDB. + :param secondary_key: Tuple (kind_for_id, id_value) where id_value is used as the entity's ID/name. + The kind_for_id is used for the parent key. + :raises AssertionError: If secondary_key is None, as it's used for the entity ID. + """ res = self._benchmark_resources[benchmark] - table_name = self._get_table_name(benchmark, table) + kind_name = self._get_table_name(benchmark, table) # 'table' is the kind name + assert kind_name is not None, f"Kind {table} not registered for benchmark {benchmark}" - # FIXME: support both options - assert secondary_key is not None + # In Datastore, the entity's own key can have an ID (name or integer). + # The secondary_key's value is used as this ID. + # The primary_key (if representing an ancestor) helps group entities. + assert secondary_key is not None, "Datastore entity requires an ID/name from secondary_key." + entity_id = secondary_key[1] if res.database_client is None: res.database_client = datastore.Client(database=res.database) - parent_key = res.database_client.key(secondary_key[0], secondary_key[1]) - key = res.database_client.key( - # kind determines the table - table_name, - # main ID key - secondary_key[1], - # organization key - parent=parent_key, - ) + # Construct the entity key. + # If primary_key is (ancestor_kind, ancestor_id), it forms the parent. + parent_key = None + if primary_key and primary_key[0] and primary_key[1]: + # Assuming primary_key[0] is ancestor kind, primary_key[1] is ancestor id/name + parent_key = res.database_client.key(primary_key[0], primary_key[1]) + + entity_key = res.database_client.key(kind_name, entity_id, parent=parent_key) - val = datastore.Entity(key=key) - val.update(data) - res.database_client.put(val) + entity = datastore.Entity(key=entity_key) + entity.update(data) + res.database_client.put(entity) def create_table( self, benchmark: str, name: str, primary_key: str, _: Optional[str] = None ) -> str: - + """ + Ensure a Datastore "kind" (analogous to a table) is noted for a benchmark + and that its associated database (Firestore in Datastore mode) exists. + + If the database for the benchmark doesn't exist, it's created using gcloud CLI. + Datastore kinds are schemaless and created implicitly when an entity of that + kind is first written. This method primarily ensures the database exists and + registers the kind name for the benchmark. The `primary_key` is noted but + not directly used to create schema for the kind itself, as Datastore is schemaless. + The secondary_key parameter is ignored. + + :param benchmark: Name of the benchmark. + :param name: Name of the Datastore kind to register/use. + :param primary_key: Name of the attribute often used as a primary/partition key conceptually. + :param _: Secondary key (ignored for Datastore kind creation). + :return: The name of the kind (which is `name`). + :raises RuntimeError: If database creation or query fails. + """ benchmark_resources = self._benchmark_resources.get(benchmark, None) if benchmark_resources is not None and name in benchmark_resources.kinds: self.logging.info(f"Using cached Datastore kind {name}") + # Ensure database_client is initialized if loaded from cache + if benchmark_resources.database_client is None: + benchmark_resources.database_client = datastore.Client(database=benchmark_resources.database) return name - """ - No data for this benchmark -> we need to allocate a new Datastore database. - """ - + # If no resources registered for this benchmark, means we need to ensure/create the database if benchmark_resources is None: - database_name = f"sebs-benchmarks-{self._cloud_resources.resources_id}-{benchmark}" - try: - + # Check if database exists self._cli_instance.execute( - "gcloud firestore databases describe " - f" --database='{database_name}' " - " --format='json'" + f"gcloud firestore databases describe --database='{database_name}' --format='json'" ) - + self.logging.info(f"Using existing Firestore database {database_name} in Datastore mode.") except RuntimeError as e: - if "NOT_FOUND" in str(e): - - """ - Allocate a new Firestore database, in datastore mode - """ - - self.logging.info(f"Allocating a new Firestore database {database_name}") + # Allocate a new Firestore database in Datastore mode + self.logging.info(f"Allocating a new Firestore database {database_name} in Datastore mode.") self._cli_instance.execute( - "gcloud firestore databases create " - f" --database='{database_name}' " - f" --location={self.region} " - f" --type='datastore-mode' " + f"gcloud firestore databases create --database='{database_name}' " + f"--location={self.region} --type='datastore-mode'" ) self.logging.info(f"Allocated a new Firestore database {database_name}") - else: - - self.logging.error("Couldn't query Datastore instances!") - self.logging.error(e) - raise RuntimeError("Couldn't query Datastore instances!") + self.logging.error(f"Couldn't query Datastore database {database_name}: {e}") + raise RuntimeError(f"Couldn't query Datastore database {database_name}!") db_client = datastore.Client(database=database_name) benchmark_resources = BenchmarkResources( database=database_name, kinds=[], database_client=db_client ) self._benchmark_resources[benchmark] = benchmark_resources + elif benchmark_resources.database_client is None: # Ensure client if benchmark_resources existed but client was not set + benchmark_resources.database_client = datastore.Client(database=benchmark_resources.database) + - benchmark_resources.kinds.append(name) + # Add kind to the list for this benchmark if not already present + if name not in benchmark_resources.kinds: + benchmark_resources.kinds.append(name) + self.logging.info(f"Registered kind {name} for benchmark {benchmark} in database {benchmark_resources.database}") return name def clear_table(self, name: str) -> str: + """ + Clear all entities from a Datastore kind. + + Note: This method is not implemented. Deleting all entities from a kind + efficiently requires careful implementation, often involving batched deletes + or specific Datastore APIs. + + :param name: Name of the kind to clear. + :raises NotImplementedError: This method is not yet implemented. + """ raise NotImplementedError() def remove_table(self, name: str) -> str: + """ + Remove a Datastore kind (conceptually, as kinds are schemaless). + + Note: This method is not implemented. Removing a "kind" in Datastore + means deleting all entities of that kind. There isn't a direct "drop kind" + operation like "drop table". + + :param name: Name of the kind to remove/clear. + :raises NotImplementedError: This method is not yet implemented. + """ raise NotImplementedError() diff --git a/sebs/gcp/function.py b/sebs/gcp/function.py index 6736c1cae..fc5c66808 100644 --- a/sebs/gcp/function.py +++ b/sebs/gcp/function.py @@ -6,6 +6,12 @@ class GCPFunction(Function): + """ + Represents a Google Cloud Function. + + Extends the base Function class with GCP-specific attributes like the + Cloud Storage bucket used for code deployment. + """ def __init__( self, name: str, @@ -14,14 +20,31 @@ def __init__( cfg: FunctionConfig, bucket: Optional[str] = None, ): + """ + Initialize a GCPFunction instance. + + :param name: Name of the Google Cloud Function. + :param benchmark: Name of the benchmark this function belongs to. + :param code_package_hash: Hash of the deployed code package. + :param cfg: FunctionConfig object with memory, timeout, etc. + :param bucket: Optional Cloud Storage bucket name where the code package is stored. + """ super().__init__(benchmark, name, code_package_hash, cfg) self.bucket = bucket @staticmethod def typename() -> str: + """Return the type name of this function implementation.""" return "GCP.GCPFunction" def serialize(self) -> dict: + """ + Serialize the GCPFunction instance to a dictionary. + + Includes GCP-specific attributes (bucket) along with base Function attributes. + + :return: Dictionary representation of the GCPFunction. + """ return { **super().serialize(), "bucket": self.bucket, @@ -29,6 +52,14 @@ def serialize(self) -> dict: @staticmethod def deserialize(cached_config: dict) -> "GCPFunction": + """ + Deserialize a GCPFunction instance from a dictionary. + + Typically used when loading function details from a cache. + + :param cached_config: Dictionary containing serialized GCPFunction data. + :return: A new GCPFunction instance. + """ from sebs.faas.function import Trigger from sebs.gcp.triggers import LibraryTrigger, HTTPTrigger @@ -49,7 +80,17 @@ def deserialize(cached_config: dict) -> "GCPFunction": ret.add_trigger(trigger_type.deserialize(trigger)) return ret - def code_bucket(self, benchmark: str, storage_client: GCPStorage): + def code_bucket(self, benchmark: str, storage_client: GCPStorage) -> Optional[str]: + """ + Get or assign the Google Cloud Storage bucket for code deployment. + + If a bucket is not already assigned to this function, it retrieves + the deployment bucket from the GCPStorage client. + + :param benchmark: Name of the benchmark (used by storage_client if creating a new bucket). + :param storage_client: GCPStorage client instance. + :return: The name of the Cloud Storage bucket used for code deployment, or None if not set. + """ if not self.bucket: self.bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) return self.bucket diff --git a/sebs/gcp/gcp.py b/sebs/gcp/gcp.py index 187d8cda8..ea1740df6 100644 --- a/sebs/gcp/gcp.py +++ b/sebs/gcp/gcp.py @@ -26,15 +26,20 @@ from sebs.utils import LoggingHandlers """ - This class provides basic abstractions for the FaaS system. - It provides the interface for initialization of the system and storage - services, creation and update of serverless functions and querying - logging and measurements services to obtain error messages and performance - measurements. +Google Cloud Platform (GCP) FaaS system implementation. + +This class provides the SeBS interface for interacting with Google Cloud Functions, +including function deployment, invocation, resource management, and metrics collection. +It utilizes the Google Cloud Client Libraries and gcloud CLI (via Docker) for its operations. """ class GCP(System): + """ + Google Cloud Platform (GCP) FaaS system implementation. + + Manages functions and resources on Google Cloud Functions. + """ def __init__( self, system_config: SeBSConfig, @@ -43,6 +48,15 @@ def __init__( docker_client: docker.client, logging_handlers: LoggingHandlers, ): + """ + Initialize GCP FaaS system. + + :param system_config: SeBS system configuration. + :param config: GCP-specific configuration. + :param cache_client: Function cache instance. + :param docker_client: Docker client instance. + :param logging_handlers: Logging handlers. + """ super().__init__( system_config, cache_client, @@ -53,45 +67,68 @@ def __init__( ) self._config = config self.logging_handlers = logging_handlers + self.function_client = None # Will be initialized in initialize() @property def config(self) -> GCPConfig: + """Return the GCP-specific configuration.""" return self._config @staticmethod - def name(): + def name() -> str: + """Return the name of the cloud provider (gcp).""" return "gcp" @staticmethod - def typename(): + def typename() -> str: + """Return the type name of the cloud provider (GCP).""" return "GCP" @staticmethod def function_type() -> "Type[Function]": + """Return the type of the function implementation for GCP.""" return GCPFunction - """ - Initialize the system. After the call the local or remote - FaaS system should be ready to allocate functions, manage - storage resources and invoke functions. + def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): + """ + Initialize the GCP system. - :param config: systems-specific parameters - """ + Initializes the Google Cloud Functions API client and system resources. + After this call, the GCP system should be ready to allocate functions, + manage storage, and invoke functions. - def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): + :param config: System-specific parameters (not currently used by GCP implementation). + :param resource_prefix: Optional prefix for naming/selecting resources. + """ self.function_client = build("cloudfunctions", "v1", cache_discovery=False) self.initialize_resources(select_prefix=resource_prefix) - def get_function_client(self): + def get_function_client(self): # No type hint for googleapiclient.discovery.Resource + """ + Return the Google Cloud Functions API client. + + The client is initialized during the `initialize` phase. + + :return: Google Cloud Functions API client instance. + """ return self.function_client def default_function_name( self, code_package: Benchmark, resources: Optional[Resources] = None ) -> str: - # Create function name - resource_id = resources.resources_id if resources else self.config.resources.resources_id + """ + Generate a default function name for Google Cloud Functions. + + The name is constructed using SeBS prefix, resource ID, benchmark name, + language, and version, formatted according to GCP naming rules. + + :param code_package: Benchmark object. + :param resources: Optional Resources object (uses self.config.resources if None). + :return: Default function name string. + """ + current_resources = resources if resources else self.config.resources func_name = "sebs-{}-{}-{}-{}".format( - resource_id, + current_resources.resources_id, code_package.benchmark, code_package.language_name, code_package.language_version, @@ -100,27 +137,21 @@ def default_function_name( @staticmethod def format_function_name(func_name: str) -> str: + """ + Format the function name to comply with GCP naming rules. + + Replaces hyphens and dots with underscores. GCP function names must + start with a letter, but SeBS typically prepends "sebs-". + + :param func_name: Original function name. + :return: Formatted function name. + """ # GCP functions must begin with a letter # however, we now add by default `sebs` in the beginning func_name = func_name.replace("-", "_") func_name = func_name.replace(".", "_") return func_name - """ - Apply the system-specific code packaging routine to build benchmark. - The benchmark creates a code directory with the following structure: - - [benchmark sources] - - [benchmark resources] - - [dependence specification], e.g. requirements.txt or package.json - - [handlers implementation for the language and deployment] - - This step allows us to change the structure above to fit different - deployment requirements, Example: a zip file for AWS or a specific - directory structure for Azure. - - :return: path to packaged code and its size - """ - def package_code( self, directory: str, @@ -131,57 +162,92 @@ def package_code( is_cached: bool, container_deployment: bool, ) -> Tuple[str, int, str]: - + """ + Package benchmark code for Google Cloud Functions. + + The standard SeBS code directory structure is adapted: + - Files are moved into a 'function' subdirectory. + - The main handler file (e.g., handler.py) is renamed (e.g., to main.py for Python). + - The entire directory is then zipped for deployment. + + Note: Container deployment is not currently supported for GCP in SeBS. + + :param directory: Path to the code directory. + :param language_name: Programming language name. + :param language_version: Programming language version. + :param architecture: Target architecture (not directly used in GCP packaging for zip). + :param benchmark: Benchmark name. + :param is_cached: Whether the code is cached (not directly used in packaging logic). + :param container_deployment: Whether to package for container deployment. + :return: Tuple containing: + - Path to the packaged zip file. + - Size of the zip file in bytes. + - Empty string for container URI (as not supported). + :raises NotImplementedError: If container_deployment is True. + """ container_uri = "" if container_deployment: raise NotImplementedError("Container Deployment is not supported in GCP") CONFIG_FILES = { - "python": ["handler.py", ".python_packages"], - "nodejs": ["handler.js", "node_modules"], + "python": ["handler.py", ".python_packages"], # Original handler.py is moved/renamed + "nodejs": ["handler.js", "node_modules"], # Original handler.js is moved/renamed } - HANDLER = { + # GCP requires specific entry point file names (main.py for python, index.js for nodejs) + # The original handler.py/handler.js from SeBS benchmark template will be renamed. + HANDLER_RENAMES = { "python": ("handler.py", "main.py"), "nodejs": ("handler.js", "index.js"), } - package_config = CONFIG_FILES[language_name] - function_dir = os.path.join(directory, "function") - os.makedirs(function_dir) - for file in os.listdir(directory): - if file not in package_config: - file = os.path.join(directory, file) - shutil.move(file, function_dir) - - # rename handler function.py since in gcp it has to be caled main.py - old_name, new_name = HANDLER[language_name] - old_path = os.path.join(directory, old_name) - new_path = os.path.join(directory, new_name) - shutil.move(old_path, new_path) - - """ - zip the whole directory (the zip-file gets uploaded to gcp later) - - Note that the function GCP.recursive_zip is slower than the use of e.g. - `utils.execute("zip -qu -r9 {}.zip * .".format(benchmark), shell=True)` - or `shutil.make_archive(benchmark_archive, direcory, directory)` - But both of the two alternatives need a chance of directory - (shutil.make_archive does the directorychange internaly) - which leads to a "race condition" when running several benchmarks - in parallel, since a change of the current directory is NOT Thread specfic. - """ - benchmark_archive = "{}.zip".format(os.path.join(directory, benchmark)) - GCP.recursive_zip(directory, benchmark_archive) - logging.info("Created {} archive".format(benchmark_archive)) - - bytes_size = os.path.getsize(benchmark_archive) + package_config_exclusions = CONFIG_FILES[language_name] + + # Move benchmark-specific files into a 'function' subdirectory if they are not handler/deps + # This step seems unusual if the whole directory is zipped. Let's clarify the intent. + # The original code moves everything *not* in package_config into 'function/'. + # Then renames the handler. This implies the handler and deps stay at root of zip, + # and other benchmark files go into 'function/'. + # However, GCP usually expects handler (e.g. main.py) at the root of the zip. + # Let's assume the structure after this is: + # - main.py (renamed from handler.py) / index.js (renamed from handler.js) + # - requirements.txt / package.json + # - function/ (containing other benchmark files) - this is unusual for GCP. + # For simplicity and standard GCP, usually all user code including main.py/index.js + # and dependencies are at the root of the zip. + # The current implementation creates a 'function' subdir for non-handler/dep files. + + # GCP expects the main handler file (e.g., main.py) at the root of the zip. + # The provided code renames the SeBS handler (e.g., handler.py) to main.py. + # It moves other files into a 'function' subdirectory, which might be unnecessary + # if they are not directly referenced or if Python's import system handles it. + # For now, following the existing logic. + + # Move files not part of package_config (handler, deps) into 'function/' subdirectory + # This is an unusual step for GCP, usually all code is at root. + # Re-evaluating: the code moves handler.py/js to main.py/index.js *at the root*. + # Other files are moved to function_dir. + # This structure means the zip will have main.py (or index.js) at root, + # and other benchmark files inside a 'function' folder. + + # Rename the SeBS handler to GCP expected name (e.g., handler.py -> main.py) + # This renamed file will be at the root of the zip. + sebs_handler_name, gcp_handler_name = HANDLER_RENAMES[language_name] + shutil.move(os.path.join(directory, sebs_handler_name), os.path.join(directory, gcp_handler_name)) + + # Zip the entire directory content for deployment. + # The `recursive_zip` creates a zip where paths are relative to `directory`. + benchmark_archive_path = os.path.join(directory, f"{benchmark}.zip") + GCP.recursive_zip(directory, benchmark_archive_path) + logging.info(f"Created {benchmark_archive_path} archive") + + bytes_size = os.path.getsize(benchmark_archive_path) mbytes = bytes_size / 1024.0 / 1024.0 - logging.info("Zip archive size {:2f} MB".format(mbytes)) + logging.info(f"Zip archive size {mbytes:.2f} MB") - # rename the main.py back to handler.py - shutil.move(new_path, old_path) + # Rename handler back for consistency within SeBS's local file structure after packaging. + shutil.move(os.path.join(directory, gcp_handler_name), os.path.join(directory, sebs_handler_name)) - return os.path.join(directory, "{}.zip".format(benchmark)), bytes_size, container_uri + return benchmark_archive_path, bytes_size, container_uri def create_function( self, @@ -190,161 +256,253 @@ def create_function( container_deployment: bool, container_uri: str, ) -> "GCPFunction": - + """ + Create or update a Google Cloud Function. + + If the function doesn't exist, it's created. If it exists, it's updated. + The function code is uploaded to a Cloud Storage bucket before deployment. + Permissions are set to allow unauthenticated invocations for HTTP triggers. + + :param code_package: Benchmark object with code and configuration. + :param func_name: Desired name for the function. + :param container_deployment: Flag for container deployment (not supported for GCP). + :param container_uri: Container URI (not used). + :return: GCPFunction object representing the deployed function. + :raises NotImplementedError: If container_deployment is True. + :raises RuntimeError: If function creation or permission setting fails. + """ if container_deployment: raise NotImplementedError("Container deployment is not supported in GCP") - package = code_package.code_location - benchmark = code_package.benchmark - language_runtime = code_package.language_version - timeout = code_package.benchmark_config.timeout - memory = code_package.benchmark_config.memory - code_bucket: Optional[str] = None - storage_client = self._system_resources.get_storage() - location = self.config.region - project_name = self.config.project_name - function_cfg = FunctionConfig.from_benchmark(code_package) - architecture = function_cfg.architecture.value - - code_package_name = cast(str, os.path.basename(package)) - code_package_name = f"{architecture}-{code_package_name}" - code_bucket = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) - code_prefix = os.path.join(benchmark, code_package_name) - storage_client.upload(code_bucket, package, code_prefix) - - self.logging.info("Uploading function {} code to {}".format(func_name, code_bucket)) - - full_func_name = GCP.get_full_function_name(project_name, location, func_name) - get_req = self.function_client.projects().locations().functions().get(name=full_func_name) + package_path = code_package.code_location # Path to the zip file + benchmark_name = code_package.benchmark + language_runtime_version = code_package.language_version + timeout_seconds = code_package.benchmark_config.timeout + memory_mb = code_package.benchmark_config.memory + storage_client = cast(GCPStorage, self._system_resources.get_storage()) + region = self.config.region + project_id = self.config.project_name + function_config = FunctionConfig.from_benchmark(code_package) + target_architecture = function_config.architecture.value # 'x64' or 'arm64' + + # Prepare code package name for Cloud Storage + # Include architecture in the object name for clarity if needed, though GCP Functions Gen1 might not use it for zip + base_code_package_name = os.path.basename(package_path) + gcs_code_object_name = f"{target_architecture}-{base_code_package_name}" + deployment_bucket_name = storage_client.get_bucket(Resources.StorageBucketType.DEPLOYMENT) + # Path in GCS: benchmark_name/architecture-zipfilename.zip + gcs_code_prefix = os.path.join(benchmark_name, gcs_code_object_name) + storage_client.upload(deployment_bucket_name, package_path, gcs_code_prefix) + source_archive_url = f"gs://{deployment_bucket_name}/{gcs_code_prefix}" + + self.logging.info(f"Uploaded function {func_name} code to {source_archive_url}") + + full_function_name_path = GCP.get_full_function_name(project_id, region, func_name) + get_request = self.function_client.projects().locations().functions().get(name=full_function_name_path) try: - get_req.execute() - except HttpError: - - envs = self._generate_function_envs(code_package) - - create_req = ( - self.function_client.projects() - .locations() - .functions() - .create( - location="projects/{project_name}/locations/{location}".format( - project_name=project_name, location=location - ), - body={ - "name": full_func_name, - "entryPoint": "handler", - "runtime": code_package.language_name + language_runtime.replace(".", ""), - "availableMemoryMb": memory, - "timeout": str(timeout) + "s", - "httpsTrigger": {}, - "ingressSettings": "ALLOW_ALL", - "sourceArchiveUrl": "gs://" + code_bucket + "/" + code_prefix, - "environmentVariables": envs, - }, - ) + get_request.execute() + # Function exists, update it + self.logging.info(f"Function {func_name} exists on GCP, updating the instance.") + gcp_function = GCPFunction( + name=func_name, + benchmark=benchmark_name, + code_package_hash=code_package.hash, + cfg=function_config, + bucket=deployment_bucket_name, # Store the bucket used for deployment ) - create_req.execute() - self.logging.info(f"Function {func_name} has been created!") - - allow_unauthenticated_req = ( - self.function_client.projects() - .locations() - .functions() - .setIamPolicy( - resource=full_func_name, - body={ - "policy": { - "bindings": [ - {"role": "roles/cloudfunctions.invoker", "members": ["allUsers"]} - ] - } - }, + self.update_function(gcp_function, code_package, container_deployment, container_uri) + except HttpError as e: + if e.resp.status == 404: + # Function does not exist, create it + self.logging.info(f"Function {func_name} does not exist, creating new one.") + environment_variables = self._generate_function_envs(code_package) + # GCP runtime format: {language}{major_version_only} e.g. python38, nodejs16 + gcp_runtime_str = code_package.language_name + language_runtime_version.replace(".", "") + + create_body = { + "name": full_function_name_path, + "entryPoint": "handler", # Default SeBS entry point + "runtime": gcp_runtime_str, + "availableMemoryMb": memory_mb, + "timeout": f"{timeout_seconds}s", + "httpsTrigger": {}, # Creates an HTTP trigger + "ingressSettings": "ALLOW_ALL", # Allow all traffic for HTTP trigger + "sourceArchiveUrl": source_archive_url, + "environmentVariables": environment_variables, + } + # GCP Gen 2 functions allow specifying architecture, Gen 1 does not directly via this API for zip. + # If targeting Gen 2 or specific features, the API call might differ or use beta. + # For now, assuming Gen 1 compatible zip deployment. + + create_request = ( + self.function_client.projects() + .locations() + .functions() + .create( + location=f"projects/{project_id}/locations/{region}", + body=create_body, + ) ) - ) - - # Avoid infinite loop - MAX_RETRIES = 5 - counter = 0 - while counter < MAX_RETRIES: - try: - allow_unauthenticated_req.execute() - break - except HttpError: - - self.logging.info( - "Sleeping for 5 seconds because the created functions is not yet available!" + create_request.execute() # This is an operation, might need to wait for completion + self.logging.info(f"Function {func_name} creation initiated.") + self._wait_for_operation_done(create_request) # Helper to wait for operation + self.logging.info(f"Function {func_name} has been created.") + + + # Set IAM policy to allow unauthenticated invocations for HTTP trigger + set_iam_policy_request = ( + self.function_client.projects() + .locations() + .functions() + .setIamPolicy( + resource=full_function_name_path, + body={"policy": {"bindings": [{"role": "roles/cloudfunctions.invoker", "members": ["allUsers"]}]}}, ) - time.sleep(5) - counter += 1 - else: - raise RuntimeError( - f"Failed to configure function {full_func_name} " - "for unauthenticated invocations!" ) + # Retry setting IAM policy as function might not be fully ready + MAX_RETRIES = 5 + for attempt in range(MAX_RETRIES): + try: + set_iam_policy_request.execute() + self.logging.info(f"Function {func_name} now accepts unauthenticated invocations.") + break + except HttpError as iam_error: + if iam_error.resp.status == 400 and "Policy can't be set while the function is being updated": + self.logging.info(f"Waiting for function {func_name} to be ready for IAM policy update (attempt {attempt + 1}/{MAX_RETRIES}).") + time.sleep(5 + attempt * 2) # Exponential backoff + elif iam_error.resp.status == 404 and attempt < MAX_RETRIES -1 : # Function might not be discoverable by IAM yet + self.logging.info(f"Function {func_name} not found by IAM, retrying (attempt {attempt + 1}/{MAX_RETRIES}).") + time.sleep(5 + attempt * 2) + else: + raise RuntimeError(f"Failed to set IAM policy for {full_function_name_path}: {iam_error}") + else: # Loop exhausted + raise RuntimeError(f"Failed to set IAM policy for {full_function_name_path} after {MAX_RETRIES} attempts.") + + gcp_function = GCPFunction( + func_name, benchmark_name, code_package.hash, function_config, deployment_bucket_name + ) + else: + # Other HttpError + raise e - self.logging.info(f"Function {func_name} accepts now unauthenticated invocations!") - - function = GCPFunction( - func_name, benchmark, code_package.hash, function_cfg, code_bucket - ) - else: - # if result is not empty, then function does exists - self.logging.info("Function {} exists on GCP, update the instance.".format(func_name)) - function = GCPFunction( - name=func_name, - benchmark=benchmark, - code_package_hash=code_package.hash, - cfg=function_cfg, - bucket=code_bucket, - ) - self.update_function(function, code_package, container_deployment, container_uri) - - # Add LibraryTrigger to a new function + # Add default LibraryTrigger from sebs.gcp.triggers import LibraryTrigger + library_trigger = LibraryTrigger(func_name, self) + library_trigger.logging_handlers = self.logging_handlers + gcp_function.add_trigger(library_trigger) + + # HTTP trigger is implicitly created, ensure it's represented in SeBS + # The URL is available after function is ACTIVE. + self.create_trigger(gcp_function, Trigger.TriggerType.HTTP) + + + return gcp_function + + def _wait_for_operation_done(self, operation_request): + """Helper to wait for a Google Cloud API operation to complete.""" + # Operations API client might be needed if operation_request.execute() doesn't block + # or if we need to poll. For Cloud Functions, create/update often return an operation + # that needs polling. Assuming execute() blocks or we handle polling elsewhere if needed. + # For simplicity, if execute() is synchronous for its main effect, this is a placeholder. + # If it returns an operation object, one would poll operation.name with operations.get. + self.logging.info("Waiting for operation to complete...") + # Placeholder: actual Google API operations might require polling. + # Example: + # op_service = build('cloudfunctions', 'v1').operations() + # while True: + # op_result = op_service.get(name=operation_name_from_response).execute() + # if op_result.get('done'): + # if op_result.get('error'): raise Exception(op_result['error']) + # break + # time.sleep(5) + time.sleep(10) # Generic wait, replace with actual polling if needed. - trigger = LibraryTrigger(func_name, self) - trigger.logging_handlers = self.logging_handlers - function.add_trigger(trigger) - - return function def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + """ + Create a trigger for a Google Cloud Function. + + Currently, only HTTP triggers are explicitly created and associated here, + retrieving the function's HTTPS trigger URL. Library triggers are added + by default during function creation. + + :param function: The GCPFunction object. + :param trigger_type: The type of trigger to create. + :return: The created Trigger object. + :raises RuntimeError: If an unsupported trigger type is requested or if URL retrieval fails. + """ from sebs.gcp.triggers import HTTPTrigger if trigger_type == Trigger.TriggerType.HTTP: - - location = self.config.region - project_name = self.config.project_name - full_func_name = GCP.get_full_function_name(project_name, location, function.name) - self.logging.info(f"Function {function.name} - waiting for deployment...") - our_function_req = ( - self.function_client.projects().locations().functions().get(name=full_func_name) + gcp_function = cast(GCPFunction, function) + full_func_name_path = GCP.get_full_function_name( + self.config.project_name, self.config.region, gcp_function.name ) - deployed = False - while not deployed: - status_res = our_function_req.execute() - if status_res["status"] == "ACTIVE": - deployed = True - else: - time.sleep(3) - self.logging.info(f"Function {function.name} - deployed!") - invoke_url = status_res["httpsTrigger"]["url"] - - trigger = HTTPTrigger(invoke_url) + self.logging.info(f"Function {gcp_function.name} - waiting for HTTP trigger URL...") + + # Retry mechanism to get the function details, as it might take time to become ACTIVE + # and for httpsTrigger to be populated. + MAX_RETRIES = 12 # Approx 1 minute with increasing sleep + invoke_url = None + for attempt in range(MAX_RETRIES): + try: + func_details_req = ( + self.function_client.projects().locations().functions().get(name=full_func_name_path) + ) + func_details = func_details_req.execute() + if func_details.get("status") == "ACTIVE" and func_details.get("httpsTrigger", {}).get("url"): + invoke_url = func_details["httpsTrigger"]["url"] + self.logging.info(f"Function {gcp_function.name} HTTP trigger URL: {invoke_url}") + break + else: + self.logging.info(f"Function {gcp_function.name} not yet active or URL not available (attempt {attempt+1}/{MAX_RETRIES}). Status: {func_details.get('status')}") + time.sleep(5 + attempt) # Simple backoff + except HttpError as e: + self.logging.warning(f"Error getting function details for {gcp_function.name} (attempt {attempt+1}/{MAX_RETRIES}): {e}") + time.sleep(5 + attempt) # Simple backoff + + if not invoke_url: + raise RuntimeError(f"Could not retrieve HTTP trigger URL for function {gcp_function.name} after {MAX_RETRIES} attempts.") + + http_trigger = HTTPTrigger(invoke_url) + http_trigger.logging_handlers = self.logging_handlers + gcp_function.add_trigger(http_trigger) # Add to the function object + self.cache_client.update_function(gcp_function) # Update cache with the new trigger + return http_trigger + elif trigger_type == Trigger.TriggerType.LIBRARY: + # Library triggers are typically added during function creation/deserialization + # and don't require a separate cloud resource creation step here. + # If one needs to be dynamically added, ensure it's correctly associated. + existing_lib_triggers = function.triggers(Trigger.TriggerType.LIBRARY) + if existing_lib_triggers: + return existing_lib_triggers[0] # Return existing if found + else: + # This case should ideally be handled by ensuring LibraryTrigger is added when func is created/loaded + from sebs.gcp.triggers import LibraryTrigger + self.logging.warning(f"Dynamically adding LibraryTrigger for {function.name}, usually added at creation.") + lib_trigger = LibraryTrigger(function.name, self) + lib_trigger.logging_handlers = self.logging_handlers + function.add_trigger(lib_trigger) + self.cache_client.update_function(function) + return lib_trigger + else: - raise RuntimeError("Not supported!") + raise RuntimeError(f"Unsupported trigger type {trigger_type.value} for GCP.") - trigger.logging_handlers = self.logging_handlers - function.add_trigger(trigger) - self.cache_client.update_function(function) - return trigger def cached_function(self, function: Function): + """ + Configure a cached GCPFunction instance. - from sebs.faas.function import Trigger - from sebs.gcp.triggers import LibraryTrigger + Sets up logging handlers for its library triggers and associates the + deployment client. + + :param function: The GCPFunction object retrieved from cache. + """ + from sebs.faas.function import Trigger # Already imported at top level + from sebs.gcp.triggers import LibraryTrigger # Already imported at top level for trigger in function.triggers(Trigger.TriggerType.LIBRARY): gcp_trigger = cast(LibraryTrigger, trigger) @@ -358,185 +516,243 @@ def update_function( container_deployment: bool, container_uri: str, ): - + """ + Update an existing Google Cloud Function with new code and/or configuration. + + The function's code package is uploaded to Cloud Storage, and then the + function is patched with the new source URL and any updated settings + (memory, timeout, environment variables). Waits for the update operation + to complete. + + :param function: The GCPFunction object to update. + :param code_package: Benchmark object with new code and configuration. + :param container_deployment: Flag for container deployment (not supported for GCP). + :param container_uri: Container URI (not used). + :raises NotImplementedError: If container_deployment is True. + :raises RuntimeError: If the function update fails after multiple retries. + """ if container_deployment: raise NotImplementedError("Container deployment is not supported in GCP") - function = cast(GCPFunction, function) - language_runtime = code_package.language_version - - function_cfg = FunctionConfig.from_benchmark(code_package) - architecture = function_cfg.architecture.value - code_package_name = os.path.basename(code_package.code_location) + gcp_function = cast(GCPFunction, function) + language_runtime_version = code_package.language_version + function_cfg = FunctionConfig.from_benchmark(code_package) # Get latest config from benchmark + target_architecture = function_cfg.architecture.value + + # Upload new code package storage = cast(GCPStorage, self._system_resources.get_storage()) - code_package_name = f"{architecture}-{code_package_name}" - - bucket = function.code_bucket(code_package.benchmark, storage) - storage.upload(bucket, code_package.code_location, code_package_name) - - envs = self._generate_function_envs(code_package) + base_code_package_name = os.path.basename(code_package.code_location) + gcs_code_object_name = f"{target_architecture}-{base_code_package_name}" + + # Ensure the function has a bucket associated, or get default deployment bucket + deployment_bucket_name = gcp_function.code_bucket(code_package.benchmark, storage) + if not deployment_bucket_name: # Should not happen if function was created properly + raise RuntimeError(f"No deployment bucket found for function {gcp_function.name}") + + gcs_code_prefix = os.path.join(code_package.benchmark, gcs_code_object_name) + storage.upload(deployment_bucket_name, code_package.code_location, gcs_code_prefix) + source_archive_url = f"gs://{deployment_bucket_name}/{gcs_code_prefix}" + self.logging.info(f"Uploaded new code package to {source_archive_url}") + + environment_variables = self._generate_function_envs(code_package) + # Ensure existing envs are preserved if not overridden + environment_variables = self._update_envs( + GCP.get_full_function_name(self.config.project_name, self.config.region, gcp_function.name), + environment_variables + ) + gcp_runtime_str = code_package.language_name + language_runtime_version.replace(".", "") - self.logging.info(f"Uploaded new code package to {bucket}/{code_package_name}") - full_func_name = GCP.get_full_function_name( - self.config.project_name, self.config.region, function.name + full_func_name_path = GCP.get_full_function_name( + self.config.project_name, self.config.region, gcp_function.name ) - req = ( + patch_body = { + "name": full_func_name_path, # Name is required in body for patch by some APIs, though part of URL + "entryPoint": "handler", + "runtime": gcp_runtime_str, + "availableMemoryMb": function_cfg.memory, # Use updated config + "timeout": f"{function_cfg.timeout}s", # Use updated config + # HTTP trigger settings should ideally be preserved or re-applied if necessary. + # Assuming httpsTrigger: {} is sufficient if it means "keep existing or default HTTP trigger". + # If specific HTTP settings were changed, they'd need to be included. + "httpsTrigger": {}, + "sourceArchiveUrl": source_archive_url, + "environmentVariables": environment_variables, + } + + patch_request = ( self.function_client.projects() .locations() .functions() - .patch( - name=full_func_name, - body={ - "name": full_func_name, - "entryPoint": "handler", - "runtime": code_package.language_name + language_runtime.replace(".", ""), - "availableMemoryMb": function.config.memory, - "timeout": str(function.config.timeout) + "s", - "httpsTrigger": {}, - "sourceArchiveUrl": "gs://" + bucket + "/" + code_package_name, - "environmentVariables": envs, - }, - ) + .patch(name=full_func_name_path, body=patch_body) ) - res = req.execute() - versionId = res["metadata"]["versionId"] - retries = 0 - last_version = -1 - while retries < 100: - is_deployed, last_version = self.is_deployed(function.name, versionId) - if not is_deployed: - time.sleep(5) - retries += 1 - else: - break - if retries > 0 and retries % 10 == 0: - self.logging.info(f"Waiting for function deployment, {retries} retries.") - if retries == 100: - raise RuntimeError( - "Failed to publish new function code after 10 attempts. " - f"Version {versionId} has not been published, last version {last_version}." - ) + + operation = patch_request.execute() + self.logging.info(f"Function {gcp_function.name} update initiated. Operation: {operation.get('name')}") + self._wait_for_operation_done(patch_request) # Helper to wait self.logging.info("Published new function code and configuration.") + + # Update local function object's hash and config + gcp_function.code_package_hash = code_package.hash + gcp_function._cfg = function_cfg # Update the config object itself - def _update_envs(self, full_function_name: str, envs: dict) -> dict: - get_req = ( - self.function_client.projects().locations().functions().get(name=full_function_name) - ) - response = get_req.execute() + def _update_envs(self, full_function_name: str, envs: dict) -> dict: + """ + Merge new environment variables with existing ones for a function. - # preserve old variables while adding new ones. - # but for conflict, we select the new one - if "environmentVariables" in response: - envs = {**response["environmentVariables"], **envs} + Fetches the function's current configuration to retrieve existing + environment variables, then merges them with the provided `envs`. + New values in `envs` will overwrite existing ones if keys conflict. + :param full_function_name: The fully qualified name of the function. + :param envs: Dictionary of new or updated environment variables. + :return: Merged dictionary of environment variables. + """ + try: + get_req = ( + self.function_client.projects().locations().functions().get(name=full_function_name) + ) + response = get_req.execute() + if "environmentVariables" in response: + return {**response["environmentVariables"], **envs} + except HttpError as e: + self.logging.warning(f"Could not retrieve existing environment variables for {full_function_name}: {e}. Proceeding with provided envs only.") return envs def _generate_function_envs(self, code_package: Benchmark) -> dict: + """ + Generate basic environment variables for a function based on benchmark requirements. + + Currently sets `NOSQL_STORAGE_DATABASE` if the benchmark uses NoSQL storage. + :param code_package: The Benchmark object. + :return: Dictionary of environment variables. + """ envs = {} if code_package.uses_nosql: - - db = ( - cast(GCPSystemResources, self._system_resources) - .get_nosql_storage() - .benchmark_database(code_package.benchmark) - ) - envs["NOSQL_STORAGE_DATABASE"] = db - + # Ensure NoSQL storage is initialized to get database name + nosql_storage = cast(GCPSystemResources, self._system_resources).get_nosql_storage() + db_name = nosql_storage.benchmark_database(code_package.benchmark) + envs["NOSQL_STORAGE_DATABASE"] = db_name return envs def update_function_configuration( self, function: Function, code_package: Benchmark, env_variables: dict = {} ): + """ + Update the configuration (memory, timeout, environment variables) of an existing GCP function. - assert code_package.has_input_processed - - function = cast(GCPFunction, function) - full_func_name = GCP.get_full_function_name( - self.config.project_name, self.config.region, function.name - ) + Patches the function with new settings. Waits for the update operation to complete. - envs = self._generate_function_envs(code_package) - envs = {**envs, **env_variables} - # GCP might overwrite existing variables - # If we modify them, we need to first read existing ones and append. - if len(envs) > 0: - envs = self._update_envs(full_func_name, envs) - - if len(envs) > 0: - - req = ( - self.function_client.projects() - .locations() - .functions() - .patch( - name=full_func_name, - updateMask="availableMemoryMb,timeout,environmentVariables", - body={ - "availableMemoryMb": function.config.memory, - "timeout": str(function.config.timeout) + "s", - "environmentVariables": envs, - }, - ) - ) + :param function: The GCPFunction object to update. + :param code_package: Benchmark object providing baseline config (memory, timeout). + :param env_variables: Additional environment variables to merge with generated ones. + :return: The version ID of the updated function. + :raises RuntimeError: If the configuration update fails. + """ + assert code_package.has_input_processed # Ensure benchmark input processing is done - else: + gcp_function = cast(GCPFunction, function) + full_func_name_path = GCP.get_full_function_name( + self.config.project_name, self.config.region, gcp_function.name + ) - req = ( - self.function_client.projects() - .locations() - .functions() - .patch( - name=full_func_name, - updateMask="availableMemoryMb,timeout", - body={ - "availableMemoryMb": function.config.memory, - "timeout": str(function.config.timeout) + "s", - }, - ) - ) + # Generate base envs from benchmark, then merge with explicitly provided ones + current_envs = self._generate_function_envs(code_package) + merged_envs = {**current_envs, **env_variables} + # Ensure existing envs are preserved if not overridden + final_envs = self._update_envs(full_func_name_path, merged_envs) + + # Use the function's current config as base, potentially modified by is_config_changed + updated_config = gcp_function.config + + patch_body: Dict[str, Any] = { + "availableMemoryMb": updated_config.memory, + "timeout": f"{updated_config.timeout}s", + } + update_mask_parts = ["availableMemoryMb", "timeout"] - res = req.execute() - versionId = res["metadata"]["versionId"] - retries = 0 - last_version = -1 - while retries < 100: - is_deployed, last_version = self.is_deployed(function.name, versionId) - if not is_deployed: - time.sleep(5) - retries += 1 - else: - break - if retries > 0 and retries % 10 == 0: - self.logging.info(f"Waiting for function deployment, {retries} retries.") - if retries == 100: - raise RuntimeError( - "Failed to publish new function code after 10 attempts. " - f"Version {versionId} has not been published, last version {last_version}." - ) - self.logging.info("Published new function configuration.") + if final_envs: # Only include environmentVariables if there are some to set/update + patch_body["environmentVariables"] = final_envs + update_mask_parts.append("environmentVariables") + + update_mask = ",".join(update_mask_parts) + patch_request = ( + self.function_client.projects() + .locations() + .functions() + .patch(name=full_func_name_path, updateMask=update_mask, body=patch_body) + ) + + operation = patch_request.execute() + self.logging.info(f"Function {gcp_function.name} configuration update initiated. Operation: {operation.get('name')}") + self._wait_for_operation_done(patch_request) # Helper to wait + + # Extract versionId from the operation's response metadata if available, + # or re-fetch function details to get the new versionId. + # The structure of 'operation' can vary. A common pattern is that 'operation.metadata.target' + # might contain the function resource name, and 'operation.response' (if operation is done) + # or a final GET on the function would yield the new versionId. + # For simplicity, let's assume we might need to re-fetch. + func_details = self.function_client.projects().locations().functions().get(name=full_func_name_path).execute() + versionId = func_details.get("versionId", "unknown") # Fallback if versionId not found + + self.logging.info(f"Published new function configuration for {gcp_function.name}, new version ID: {versionId}.") return versionId + @staticmethod - def get_full_function_name(project_name: str, location: str, func_name: str): + def get_full_function_name(project_name: str, location: str, func_name: str) -> str: + """ + Construct the fully qualified function name for GCP API calls. + + Format: `projects/{project_name}/locations/{location}/functions/{func_name}` + + :param project_name: Google Cloud project ID. + :param location: GCP region (e.g., "us-central1"). + :param func_name: The short name of the function. + :return: Fully qualified function name. + """ return f"projects/{project_name}/locations/{location}/functions/{func_name}" - def prepare_experiment(self, benchmark): - logs_bucket = self._system_resources.get_storage().add_output_bucket( - benchmark, suffix="logs" + def prepare_experiment(self, benchmark: Benchmark) -> str: # Added type hint for benchmark + """ + Prepare resources for an experiment, specifically the logs bucket. + + Ensures a bucket for storing experiment logs is created via the storage manager. + + :param benchmark: The Benchmark object for which to prepare. + :return: The name of the logs bucket. + """ + logs_bucket = self._system_resources.get_storage().get_bucket( # Changed from add_output_bucket + Resources.StorageBucketType.EXPERIMENTS # Assuming logs go to EXPERIMENTS bucket ) + # If add_output_bucket was meant to create a benchmark-specific prefix/path within this bucket, + # that logic would need to be here or in the storage class. + # For now, returning the general experiment bucket. return logs_bucket def shutdown(self) -> None: + """Shutdown the GCP system client and update cache.""" cast(GCPSystemResources, self._system_resources).shutdown() super().shutdown() def download_metrics( self, function_name: str, start_time: int, end_time: int, requests: dict, metrics: dict ): + """ + Download performance metrics for function invocations from Google Cloud Monitoring and Logging. + + Queries Cloud Logging for execution times and Cloud Monitoring for metrics like + memory usage and network egress. + :param function_name: Name of the Google Cloud Function. + :param start_time: Start timestamp (Unix epoch) for querying metrics. + :param end_time: End timestamp (Unix epoch) for querying metrics. + :param requests: Dictionary of request IDs to ExecutionResult objects to be updated. + :param metrics: Dictionary to store additional aggregated metrics. + """ from google.api_core import exceptions from time import sleep @@ -550,235 +766,338 @@ def wrapper(gen): self.logging.info("Google Cloud resources exhausted, sleeping 30s") sleep(30) - """ - Use GCP's logging system to find execution time of each function invocation. - - There shouldn't be problem of waiting for complete results, - since logs appear very quickly here. - """ + # Fetch execution times from Cloud Logging import google.cloud.logging as gcp_logging - logging_client = gcp_logging.Client() + # Correct logger name for Cloud Functions v1/v2 can vary. + # Common pattern is 'cloudfunctions.googleapis.com%2Fcloud-functions' + # or specific to Gen2: 'run.googleapis.com%2Fstderr' or 'run.googleapis.com%2Fstdout' + # Using the one from original code, assuming it's for Gen1 or appropriate Gen2 logging. logger = logging_client.logger("cloudfunctions.googleapis.com%2Fcloud-functions") - """ - GCP accepts only single date format: 'YYYY-MM-DDTHH:MM:SSZ'. - Thus, we first convert timestamp to UTC timezone. - Then, we generate correct format. - - Add 1 second to end time to ensure that removing - milliseconds doesn't affect query. - """ - timestamps = [] - for timestamp in [start_time, end_time + 1]: - utc_date = datetime.fromtimestamp(timestamp, tz=timezone.utc) - timestamps.append(utc_date.strftime("%Y-%m-%dT%H:%M:%SZ")) + # Format timestamps for GCP Logging query (RFC3339 UTC 'Z') + start_utc = datetime.fromtimestamp(start_time, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + # Add 1 sec to end_time to ensure all logs within the original second are included + end_utc = datetime.fromtimestamp(end_time + 1, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") - invocations = logger.list_entries( - filter_=( - f'resource.labels.function_name = "{function_name}" ' - f'timestamp >= "{timestamps[0]}" ' - f'timestamp <= "{timestamps[1]}"' - ), - page_size=1000, + log_filter = ( + f'resource.labels.function_name = "{function_name}" ' + f'AND resource.labels.region = "{self.config.region}" ' # Filter by region + f'AND timestamp >= "{start_utc}" AND timestamp <= "{end_utc}"' ) + self.logging.debug(f"GCP Log filter: {log_filter}") + + log_entries = logger.list_entries(filter_=log_filter, page_size=1000) + invocations_processed = 0 - if hasattr(invocations, "pages"): - pages = list(wrapper(invocations.pages)) - else: - pages = [list(wrapper(invocations))] - entries = 0 - for page in pages: # invocations.pages: - for invoc in page: - entries += 1 - if "execution took" in invoc.payload: - execution_id = invoc.labels["execution_id"] - # might happen that we get invocation from another experiment - if execution_id not in requests: + total_log_entries = 0 + + # Iterate through log entries using the wrapper for retries on ResourceExhausted + # The .pages attribute might not exist directly or behave as expected with the wrapper. + # A common pattern is to iterate directly on list_entries if it's an iterator, + # or handle pagination if it returns a Pager object. + # Assuming list_entries returns an iterable/pager that wrapper can handle. + # For robustness, directly iterate and handle potential pagination if `wrapper` isn't sufficient. + + # Simplified iteration for clarity, actual pagination might be needed if `wrapper` doesn't cover it. + # The original code had a complex pagination handling; google-cloud-logging typically returns an iterator. + page_iterator = log_entries.pages if hasattr(log_entries, "pages") else [log_entries] + for page in page_iterator: + for entry in wrapper(iter(page)): # Apply wrapper to iterator of the page + total_log_entries += 1 + if "execution took" in entry.payload: # Standard log message for execution time + execution_id = entry.labels.get("execution_id") + if not execution_id: + self.logging.warning(f"Log entry missing execution_id: {entry.payload}") continue - # find number of miliseconds - regex_result = re.search(r"\d+ ms", invoc.payload) - assert regex_result - exec_time = regex_result.group().split()[0] - # convert into microseconds - requests[execution_id].provider_times.execution = int(exec_time) * 1000 - invocations_processed += 1 + + if execution_id in requests: + # Extract execution time in milliseconds + match = re.search(r"(\d+) ms", entry.payload) + if match: + exec_time_ms = int(match.group(1)) + requests[execution_id].provider_times.execution = exec_time_ms * 1000 # Convert to microseconds + invocations_processed += 1 + else: + self.logging.warning(f"Could not parse execution time from log: {entry.payload}") + # else: + # self.logging.debug(f"Execution ID {execution_id} from logs not in tracked requests.") + self.logging.info( - f"GCP: Received {entries} entries, found time metrics for {invocations_processed} " + f"GCP: Processed {total_log_entries} log entries, " + f"found time metrics for {invocations_processed} " f"out of {len(requests.keys())} invocations." ) - """ - Use metrics to find estimated values for maximum memory used, active instances - and network traffic. - https://cloud.google.com/monitoring/api/metrics_gcp#gcp-cloudfunctions - """ + # Fetch metrics from Cloud Monitoring + monitoring_client = monitoring_v3.MetricServiceClient() + gcp_project_path = f"projects/{self.config.project_name}" # Corrected from common_project_path - # Set expected metrics here - available_metrics = ["execution_times", "user_memory_bytes", "network_egress"] - - client = monitoring_v3.MetricServiceClient() - project_name = client.common_project_path(self.config.project_name) - - end_time_nanos, end_time_seconds = math.modf(end_time) - start_time_nanos, start_time_seconds = math.modf(start_time) - - interval = monitoring_v3.TimeInterval( - { - "end_time": {"seconds": int(end_time_seconds) + 60}, - "start_time": {"seconds": int(start_time_seconds)}, - } + # Monitoring API expects interval end_time to be exclusive, start_time inclusive. + # Adding a small buffer to end_time for safety, e.g., 60 seconds. + monitoring_interval = monitoring_v3.TimeInterval( + end_time={"seconds": int(end_time) + 60}, # Ensure it's integer + start_time={"seconds": int(start_time)} # Ensure it's integer ) - for metric in available_metrics: - - metrics[metric] = [] - - list_request = monitoring_v3.ListTimeSeriesRequest( - name=project_name, - filter='metric.type = "cloudfunctions.googleapis.com/function/{}"'.format(metric), - interval=interval, - ) + # Metrics to query + # Note: 'network_egress' might be 'network/sent_bytes_count' or similar depending on function generation. + # 'execution_times' is often derived from logs, but a metric also exists. + # 'user_memory_bytes' is 'memory/usage'. + # Check official GCP metric names for Cloud Functions. + # Example: cloudfunctions.googleapis.com/function/execution_times + # cloudfunctions.googleapis.com/function/user_memory_bytes + # cloudfunctions.googleapis.com/function/sent_bytes_count (for egress) + + # Simplified metric list based on original, adjust names if needed for current GCP API + monitoring_metric_types = { + "execution_times": "cloudfunctions.googleapis.com/function/execution_times", + "user_memory_bytes": "cloudfunctions.googleapis.com/function/user_memory_bytes", + # "network_egress": "cloudfunctions.googleapis.com/function/sent_bytes_count" # Example + } - results = client.list_time_series(list_request) - for result in results: - if result.resource.labels.get("function_name") == function_name: - for point in result.points: - metrics[metric] += [ - { + for metric_key, metric_type_full in monitoring_metric_types.items(): + metrics[metric_key] = [] # Initialize list for this metric + try: + results = monitoring_client.list_time_series( + name=gcp_project_path, # Use 'name' for project path + filter=( + f'metric.type = "{metric_type_full}" AND ' + f'resource.labels.function_name = "{function_name}" AND ' + f'resource.labels.region = "{self.config.region}"' + ), + interval=monitoring_interval, + # Aggregation might be needed for some metrics, e.g. ALIGN_SUM or ALIGN_MEAN + # view=monitoring_v3.ListTimeSeriesRequest.TimeSeriesView.FULL # For detailed points + ) + for time_series in wrapper(results): # Apply wrapper for retries + # Assuming point.value.distribution_value for distribution metrics like execution_times + # or point.value.int64_value / double_value for gauge/cumulative + for point in time_series.points: + if hasattr(point.value, 'distribution_value'): + metrics[metric_key].append({ "mean_value": point.value.distribution_value.mean, "executions_count": point.value.distribution_value.count, - } - ] + # Add point interval if needed: point.interval.start_time, point.interval.end_time + }) + elif hasattr(point.value, 'int64_value'): + metrics[metric_key].append({"value": point.value.int64_value}) + elif hasattr(point.value, 'double_value'): + metrics[metric_key].append({"value": point.value.double_value}) + self.logging.info(f"Fetched {len(metrics[metric_key])} data points for metric {metric_key}.") + except Exception as e: + self.logging.error(f"Error fetching metric {metric_key}: {e}") + + + def _enforce_cold_start(self, function: Function, code_package: Benchmark) -> str: + """ + Attempt to enforce a cold start for a GCP function by updating its configuration. - def _enforce_cold_start(self, function: Function, code_package: Benchmark): + Increments a 'cold_start' environment variable. This change forces GCP + to create a new function instance version. + :param function: The GCPFunction to update. + :param code_package: The associated Benchmark object. + :return: The new version ID of the function after update. + """ self.cold_start_counter += 1 - new_version = self.update_function_configuration( + new_version_id = self.update_function_configuration( function, code_package, {"cold_start": str(self.cold_start_counter)} ) - - return new_version + return new_version_id # Return type changed to str as versionId is usually string def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): + """ + Enforce cold starts for a list of GCP functions. + + Updates each function's configuration to include a new 'cold_start' + environment variable value, then waits for all functions to be redeployed + with the new version. - new_versions = [] + :param functions: List of GCPFunction objects. + :param code_package: The associated Benchmark object. + """ + new_versions_map: Dict[str, str] = {} # Store func_name -> version_id for func in functions: - new_versions.append((self._enforce_cold_start(func, code_package), func)) - self.cold_start_counter -= 1 - - # verify deployment - undeployed_functions = [] - deployment_done = False - while not deployment_done: - for versionId, func in new_versions: - is_deployed, last_version = self.is_deployed(func.name, versionId) - if not is_deployed: - undeployed_functions.append((versionId, func)) - deployed = len(new_versions) - len(undeployed_functions) - self.logging.info(f"Redeployed {deployed} out of {len(new_versions)}") - if deployed == len(new_versions): - deployment_done = True - break - time.sleep(5) - new_versions = undeployed_functions - undeployed_functions = [] + version_id = self._enforce_cold_start(func, code_package) + new_versions_map[func.name] = version_id + # Original code decremented counter here, which seems counterintuitive if + # each function needs a *distinct* change to guarantee a new instance. + # Keeping it as is for now, but might need review if cold starts aren't forced. + + # Wait for all functions to be updated to their new versions + self.logging.info("Waiting for all functions to be redeployed for cold start enforcement...") + all_deployed = False + attempts = 0 + MAX_ATTEMPTS = 24 # e.g., 2 minutes if sleep is 5s + + while not all_deployed and attempts < MAX_ATTEMPTS: + all_deployed = True + for func_name, expected_version_id in new_versions_map.items(): + is_active, current_version_id_str = self.is_deployed(func_name) + # Version ID from API is int, expected_version_id from update_function_configuration is str + if not is_active or str(current_version_id_str) != expected_version_id: + all_deployed = False + self.logging.debug(f"Function {func_name} not yet updated to version {expected_version_id} (current: {current_version_id_str}, active: {is_active}).") + break + if not all_deployed: + attempts += 1 + self.logging.info(f"Waiting for function deployments... (attempt {attempts}/{MAX_ATTEMPTS})") + time.sleep(5) + + if not all_deployed: + self.logging.error("Failed to confirm deployment of all functions for cold start enforcement.") + else: + self.logging.info("All functions successfully redeployed for cold start.") - self.cold_start_counter += 1 + # Global counter incremented once after all operations for this batch + # self.cold_start_counter += 1 # Moved increment to _enforce_cold_start for per-function uniqueness def get_functions(self, code_package: Benchmark, function_names: List[str]) -> List["Function"]: + """ + Retrieve multiple function instances, ensuring they are deployed and active. - functions: List["Function"] = [] - undeployed_functions_before = [] + Calls `get_function` for each name, then waits until all are reported as active. + + :param code_package: The Benchmark object. + :param function_names: List of function names to retrieve. + :return: List of active Function objects. + """ + functions: List[Function] = [] for func_name in function_names: func = self.get_function(code_package, func_name) functions.append(func) - undeployed_functions_before.append(func) - - # verify deployment - undeployed_functions = [] - deployment_done = False - while not deployment_done: - for func in undeployed_functions_before: - is_deployed, last_version = self.is_deployed(func.name) - if not is_deployed: - undeployed_functions.append(func) - deployed = len(undeployed_functions_before) - len(undeployed_functions) - self.logging.info(f"Deployed {deployed} out of {len(undeployed_functions_before)}") - if deployed == len(undeployed_functions_before): - deployment_done = True + + # Wait for all functions to be active + self.logging.info(f"Verifying deployment status for {len(functions)} functions...") + attempts = 0 + MAX_ATTEMPTS = 24 # e.g., 2 minutes + + functions_to_check = list(functions) + while attempts < MAX_ATTEMPTS and functions_to_check: + fully_deployed_functions = [] + for func in functions_to_check: + is_active, _ = self.is_deployed(func.name) + if is_active: + fully_deployed_functions.append(func) + + for deployed_func in fully_deployed_functions: + functions_to_check.remove(deployed_func) + + if not functions_to_check: # All functions are active break + + attempts += 1 + self.logging.info( + f"Waiting for {len(functions_to_check)} functions to become active... " + f"(attempt {attempts}/{MAX_ATTEMPTS}, remaining: {[f.name for f in functions_to_check]})" + ) time.sleep(5) - undeployed_functions_before = undeployed_functions - undeployed_functions = [] - self.logging.info(f"Waiting on {undeployed_functions_before}") + if functions_to_check: + self.logging.error(f"Failed to confirm active deployment for functions: {[f.name for f in functions_to_check]}") + else: + self.logging.info("All requested functions are active.") + return functions + def is_deployed(self, func_name: str, versionId: int = -1) -> Tuple[bool, int]: - name = GCP.get_full_function_name(self.config.project_name, self.config.region, func_name) + """ + Check if a Google Cloud Function is deployed and active, optionally for a specific version. + + :param func_name: The short name of the function. + :param versionId: Optional version ID to check against. If -1, checks current status. + :return: Tuple (is_active_and_matches_version: bool, current_version_id: int). + The boolean is True if status is "ACTIVE" and versionId matches (if provided). + """ + full_func_name_path = GCP.get_full_function_name( + self.config.project_name, self.config.region, func_name + ) function_client = self.get_function_client() - status_req = function_client.projects().locations().functions().get(name=name) - status_res = status_req.execute() - if versionId == -1: - return (status_res["status"] == "ACTIVE", status_res["versionId"]) - else: - return (status_res["versionId"] == versionId, status_res["versionId"]) + try: + status_req = function_client.projects().locations().functions().get(name=full_func_name_path) + status_res = status_req.execute() + current_version_id = int(status_res.get("versionId", 0)) # versionId is string in response + is_active = status_res.get("status") == "ACTIVE" + + if versionId == -1: # Check only if active + return is_active, current_version_id + else: # Check if active AND version matches + return is_active and current_version_id == versionId, current_version_id + except HttpError as e: + self.logging.warning(f"Error checking deployment status for {func_name}: {e}") + return False, -1 # Indicate error or not found def deployment_version(self, func: Function) -> int: - name = GCP.get_full_function_name(self.config.project_name, self.config.region, func.name) + """ + Get the deployed version ID of a Google Cloud Function. + + :param func: The Function object. + :return: The integer version ID of the deployed function. + """ + full_func_name_path = GCP.get_full_function_name( + self.config.project_name, self.config.region, func.name + ) function_client = self.get_function_client() - status_req = function_client.projects().locations().functions().get(name=name) + status_req = function_client.projects().locations().functions().get(name=full_func_name_path) status_res = status_req.execute() - return int(status_res["versionId"]) - - # @abstractmethod - # def get_invocation_error(self, function_name: str, - # start_time: int, end_time: int): - # pass - - # @abstractmethod - # def download_metrics(self): - # pass - - """ - Helper method for recursive_zip - - :param base_directory: path to directory to be zipped - :param path: path to file of subdirectory to be zipped - :param archive: ZipFile object - """ + return int(status_res.get("versionId", 0)) # versionId is string @staticmethod def helper_zip(base_directory: str, path: str, archive: zipfile.ZipFile): + """ + Helper function for `recursive_zip` to add files and directories to a zip archive. + + Recursively adds contents of `path` to `archive`, maintaining relative paths + from `base_directory`. + + :param base_directory: The root directory from which relative paths are calculated. + :param path: The current directory or file path to add to the archive. + :param archive: The `zipfile.ZipFile` object to write to. + """ paths = os.listdir(path) - for p in paths: - directory = os.path.join(path, p) - if os.path.isdir(directory): - GCP.helper_zip(base_directory, directory, archive) + for p_item in paths: # Renamed p to p_item to avoid conflict with path module + current_path = os.path.join(path, p_item) + if os.path.isdir(current_path): + GCP.helper_zip(base_directory, current_path, archive) else: - if directory != archive.filename: # prevent form including itself - archive.write(directory, os.path.relpath(directory, base_directory)) - - """ - https://gist.github.com/felixSchl/d38b455df8bf83a78d3d + # Ensure we don't try to add the archive itself to the archive + if os.path.abspath(current_path) != os.path.abspath(archive.filename): + archive.write(current_path, os.path.relpath(current_path, base_directory)) - Zip directory with relative paths given an absolute path - If the archive exists only new files are added and updated. - If the archive does not exist a new one is created. + @staticmethod + def recursive_zip(directory: str, archname: str) -> bool: # Added return type bool + """ + Create a zip archive of a directory with relative paths. - :param path: absolute path to the directory to be zipped - :param archname: path to the zip file - """ + If the archive file already exists, it will be overwritten. + Based on https://gist.github.com/felixSchl/d38b455df8bf83a78d3d - @staticmethod - def recursive_zip(directory: str, archname: str): - archive = zipfile.ZipFile(archname, "w", zipfile.ZIP_DEFLATED, compresslevel=9) - if os.path.isdir(directory): - GCP.helper_zip(directory, directory, archive) - else: - # if the passed directory is actually a file we just add the file to the zip archive - _, name = os.path.split(directory) - archive.write(directory, name) - archive.close() + :param directory: Absolute path to the directory to be zipped. + :param archname: Path to the output zip file. + :return: True if successful. + """ + # Ensure ZIP_DEFLATED is available, otherwise use default compression + compression_method = zipfile.ZIP_DEFLATED if zipfile.is_zipfile(archname) or hasattr(zipfile, "ZIP_DEFLATED") else zipfile.ZIP_STORED + + # The original code used compresslevel=9, which is specific to some tools/libraries but not standard for zipfile with ZIP_DEFLATED. + # zipfile uses a `compresslevel` argument for `write` method when `ZIP_DEFLATED` is used. + # For `ZipFile` constructor, `compression=zipfile.ZIP_DEFLATED` is enough. + # Python's zipfile default for ZIP_DEFLATED is generally equivalent to zlib's level 6. + # Explicitly setting compresslevel on write is possible but not via constructor for the whole archive. + # The original code also had a bug: compresslevel=9 is not a valid arg for ZipFile constructor. + # It should be passed to write() or use the default. + # For simplicity, we'll use the default deflate level. + + archive = zipfile.ZipFile(archname, "w", compression=compression_method) + try: + if os.path.isdir(directory): + GCP.helper_zip(directory, directory, archive) + else: + # if the passed directory is actually a file, just add the file + _, name = os.path.split(directory) + archive.write(directory, name) + finally: + archive.close() return True diff --git a/sebs/gcp/resources.py b/sebs/gcp/resources.py index 0a7d5c14d..18cf6d08c 100644 --- a/sebs/gcp/resources.py +++ b/sebs/gcp/resources.py @@ -13,12 +13,18 @@ class GCPSystemResources(SystemResources): + """ + Manages system-level resources for Google Cloud Platform (GCP), + such as Cloud Storage, Datastore, and the GCloud CLI wrapper. + """ @staticmethod def typename() -> str: + """Return the type name of the system resources class.""" return "GCP.SystemResources" @property def config(self) -> GCPConfig: + """Return the GCP-specific configuration.""" return cast(GCPConfig, self._config) def __init__( @@ -29,23 +35,34 @@ def __init__( docker_client: docker.client, logger_handlers: LoggingHandlers, ): + """ + Initialize GCPSystemResources. + + :param system_config: SeBS system configuration. + :param config: GCP-specific configuration. + :param cache_client: Cache client instance. + :param docker_client: Docker client instance. + :param logger_handlers: Logging handlers. + """ super().__init__(config, cache_client, docker_client) self._logging_handlers = logger_handlers self._storage: Optional[GCPStorage] = None self._nosql_storage: Optional[Datastore] = None self._cli_instance: Optional[GCloudCLI] = None + self._cli_instance_stop: bool = False # Flag to indicate if this instance owns the CLI self._system_config = system_config - """ - Access persistent storage instance. - It might be a remote and truly persistent service (AWS S3, Azure Blob..), - or a dynamically allocated local instance. + def get_storage(self, replace_existing: Optional[bool] = None) -> GCPStorage: + """ + Get or initialize the GCP Cloud Storage client. - :param replace_existing: replace benchmark input data if exists already - """ + Creates a GCPStorage client instance if it doesn't exist. - def get_storage(self, replace_existing: Optional[bool] = None) -> GCPStorage: + :param replace_existing: If True, replace existing files in input buckets. + Defaults to False if None. + :return: GCPStorage instance. + """ if not self._storage: self._storage = GCPStorage( self.config.region, @@ -59,6 +76,14 @@ def get_storage(self, replace_existing: Optional[bool] = None) -> GCPStorage: return self._storage def get_nosql_storage(self) -> Datastore: + """ + Get or initialize the GCP Datastore client. + + Creates a Datastore client instance if it doesn't exist. + Requires GCloud CLI for initial setup if resources are not cached. + + :return: Datastore instance. + """ if not self._nosql_storage: self._nosql_storage = Datastore( self.cli_instance, self._cache_client, self.config.resources, self.config.region @@ -67,19 +92,42 @@ def get_nosql_storage(self) -> Datastore: @property def cli_instance(self) -> GCloudCLI: + """ + Get or initialize the GCloud CLI wrapper instance. + + If the CLI instance doesn't exist, it's created, and a login is performed + using the configured credentials and project name. This instance will be + stopped on shutdown if it was created by this method. + + :return: GCloudCLI instance. + """ if self._cli_instance is None: self._cli_instance = GCloudCLI( self.config.credentials, self._system_config, self._docker_client ) - self._cli_instance_stop = True + self._cli_instance_stop = True # This instance manages the CLI lifecycle self._cli_instance.login(self.config.credentials.project_name) return self._cli_instance def initialize_cli(self, cli: GCloudCLI): + """ + Initialize with an externally managed GCloud CLI instance. + + This allows sharing a single GCloudCLI Docker container. The provided + CLI instance will not be stopped on shutdown by this GCPSystemResources instance. + + :param cli: An existing GCloudCLI instance. + """ self._cli_instance = cli - self._cli_instance_stop = False + self._cli_instance_stop = False # This instance does not manage the CLI lifecycle def shutdown(self) -> None: + """ + Shutdown the GCP system resources. + + Stops the GCloud CLI Docker container if it was started and is managed by + this instance. + """ if self._cli_instance and self._cli_instance_stop: self._cli_instance.shutdown() diff --git a/sebs/gcp/storage.py b/sebs/gcp/storage.py index c578966f1..881c9246c 100644 --- a/sebs/gcp/storage.py +++ b/sebs/gcp/storage.py @@ -12,104 +12,254 @@ class GCPStorage(PersistentStorage): + """Google Cloud Storage persistent storage implementation.""" @staticmethod def typename() -> str: + """Return the type name of the storage implementation.""" return "GCP.GCPStorage" @staticmethod def deployment_name(): + """Return the deployment name for GCP (gcp).""" return "gcp" @property def replace_existing(self) -> bool: + """Flag indicating whether to replace existing files in buckets.""" return self._replace_existing @replace_existing.setter def replace_existing(self, val: bool): + """Set the flag for replacing existing files.""" self._replace_existing = val def __init__( self, region: str, cache_client: Cache, resources: Resources, replace_existing: bool ): + """ + Initialize GCPStorage client. + + :param region: GCP region (used by parent class, not directly by Storage client). + :param cache_client: Cache client instance. + :param resources: Cloud resources configuration. + :param replace_existing: Flag to replace existing files in buckets. + """ super().__init__(region, cache_client, resources, replace_existing) - self.replace_existing = replace_existing + # self.replace_existing is already set by super().__init__ if PersistentStorage handles it + # or it might be intended to be explicitly set here. + # Assuming parent class handles it based on the call. self.client = gcp_storage.Client() - self.cached = False + self.cached = False # State for benchmark_data caching logic def correct_name(self, name: str) -> str: + """ + Return the corrected bucket name (no correction typically needed for GCP Storage, + but adheres to provider-specific rules if any). + + :param name: Original bucket name. + :return: Corrected bucket name. + """ + # GCP bucket names have their own rules (globally unique, DNS compliant). + # This method could enforce SeBS specific parts or global uniqueness checks if needed. + # For now, assuming names passed are either compliant or corrected by `_create_bucket`. return name - def _create_bucket(self, name, buckets: List[str] = [], randomize_name: bool = False): - found_bucket = False - for bucket_name in buckets: - if name in bucket_name: - found_bucket = True - break + def _create_bucket( + self, name: str, buckets: List[str] = [], randomize_name: bool = False + ) -> str: + """ + Create a Google Cloud Storage bucket. - if not found_bucket: + Checks if a bucket with a similar name (if `name` is a prefix) already exists + in the provided `buckets` list. If `randomize_name` is True, appends a + random string to make the name unique. - if randomize_name: - random_name = str(uuid.uuid4())[0:16] - bucket_name = "{}-{}".format(name, random_name).replace(".", "_") - else: - bucket_name = name + :param name: Desired base name for the bucket. + :param buckets: List of existing bucket names to check against (prefix match). + :param randomize_name: If True, append a random string to the bucket name. + :return: Name of the created or existing bucket. + """ + # Check if a bucket with `name` as a prefix already exists in the provided list + # This logic is a bit different from just checking `name in buckets`. + # It implies `buckets` might contain names like `name-suffix`. + # If an exact match or suitable existing bucket is found, use it. + bucket_to_use = name + found_existing = False + if not randomize_name: # Only check for existing if not randomizing + for existing_bucket_name in buckets: + if name == existing_bucket_name or existing_bucket_name.startswith(f"{name}-"): + logging.info(f"Bucket similar to {name} (found {existing_bucket_name}) already listed, using it.") + bucket_to_use = existing_bucket_name + found_existing = True + break + + if found_existing and self.client.bucket(bucket_to_use).exists(): + logging.info(f"Bucket {bucket_to_use} already exists, skipping creation.") + return bucket_to_use - self.client.create_bucket(bucket_name) - logging.info("Created bucket {}".format(bucket_name)) - return bucket_name + # If not found or needs randomization, create a new one + if randomize_name: + random_suffix = str(uuid.uuid4())[0:16] + # GCP bucket names cannot contain dots, replace with underscore or hyphen. + # Hyphen is more common in DNS-style names. + bucket_to_use = f"{name}-{random_suffix}".replace(".", "-").lower() else: - logging.info("Bucket {} for {} already exists, skipping.".format(bucket_name, name)) - return bucket_name + # Ensure the non-randomized name is also compliant + bucket_to_use = name.replace(".", "-").lower() + + try: + created_bucket = self.client.create_bucket(bucket_to_use, location=self.region) + logging.info(f"Created bucket {created_bucket.name} in region {self.region}.") + return created_bucket.name + except exceptions.Conflict: + logging.info(f"Bucket {bucket_to_use} already exists (Conflict during creation). Using existing.") + return bucket_to_use + except Exception as e: + logging.error(f"Failed to create bucket {bucket_to_use}: {e}") + raise def download(self, bucket_name: str, key: str, filepath: str) -> None: - logging.info("Download {}:{} to {}".format(bucket_name, key, filepath)) + """ + Download an object from a Google Cloud Storage bucket to a local file. + + :param bucket_name: Name of the GCS bucket. + :param key: Object key (path within the bucket). + :param filepath: Local path to save the downloaded file. + """ + logging.info(f"Download {bucket_name}:{key} to {filepath}") bucket_instance = self.client.bucket(bucket_name) blob = bucket_instance.blob(key) blob.download_to_filename(filepath) def upload(self, bucket_name: str, filepath: str, key: str): - logging.info("Upload {} to {}".format(filepath, bucket_name)) + """ + Upload a file to a specified Google Cloud Storage bucket. + + Sets a chunk size for resumable uploads and includes a workaround + for potential connection timeouts with large files by adjusting + `_MAX_MULTIPART_SIZE` (though this is an internal variable and might + not be stable across library versions). + + :param bucket_name: Name of the GCS bucket. + :param filepath: Local path to the file. + :param key: Object key (path within the bucket). + """ + logging.info(f"Upload {filepath} to {bucket_name}/{key}") bucket_instance = self.client.bucket(bucket_name) - blob = bucket_instance.blob(key, chunk_size=4 * 1024 * 1024) - gcp_storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024 # workaround for connection timeout + blob = bucket_instance.blob(key, chunk_size=4 * 1024 * 1024) # Resumable uploads + + # Workaround for potential connection timeouts, may not be needed or could change. + # Accessing internal library variables like _MAX_MULTIPART_SIZE is risky. + # Consider if this is still necessary or if official ways to handle timeouts exist. + # For now, keeping original logic but noting its potential fragility. + if hasattr(gcp_storage.blob, '_MAX_MULTIPART_SIZE'): + gcp_storage.blob._MAX_MULTIPART_SIZE = 5 * 1024 * 1024 + blob.upload_from_filename(filepath) def exists_bucket(self, bucket_name: str) -> bool: + """ + Check if a Google Cloud Storage bucket exists. + + Handles `exceptions.Forbidden` which can occur if the bucket exists + but is not accessible by the current credentials (treated as not existing + for SeBS purposes of creating a new one). + + :param bucket_name: Name of the GCS bucket. + :return: True if the bucket exists and is accessible, False otherwise. + """ try: return self.client.bucket(bucket_name).exists() - # 403 returned when the bucket exists but is owned by another user - except exceptions.Forbidden: + except exceptions.Forbidden: # Catch 403 if bucket exists but is owned by another user + logging.warning(f"Bucket {bucket_name} exists but is not accessible (Forbidden). Treating as non-existent for creation purposes.") return False def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: + """ + List objects in a GCS bucket, optionally filtered by prefix. + + :param bucket_name: Name of the GCS bucket. + :param prefix: Optional prefix to filter objects. + :return: List of object names (keys). + """ + # get_bucket will raise NotFound if bucket doesn't exist. bucket_instance = self.client.get_bucket(bucket_name) - all_blobs = list(self.client.list_blobs(bucket_instance)) - blobs = [blob.name for blob in all_blobs if prefix in blob.name] - return blobs - - def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: - all_buckets = list(self.client.list_buckets()) - if bucket_name is not None: - buckets = [bucket.name for bucket in all_buckets if bucket_name in bucket.name] + blobs_iterator = self.client.list_blobs(bucket_instance, prefix=prefix) + return [blob.name for blob in blobs_iterator] + + def list_buckets(self, bucket_name_filter: Optional[str] = None) -> List[str]: # Renamed arg for clarity + """ + List all GCS buckets accessible by the client, or filter by a partial name. + + :param bucket_name_filter: Optional string to filter bucket names (contains match). + :return: List of bucket names. + """ + all_buckets_iterator = self.client.list_buckets() + if bucket_name_filter is not None: + return [bucket.name for bucket in all_buckets_iterator if bucket_name_filter in bucket.name] else: - buckets = [bucket.name for bucket in all_buckets] - return buckets + return [bucket.name for bucket in all_buckets_iterator] def remove_bucket(self, bucket_name: str): - self.client.get_bucket(bucket_name).delete() + """ + Delete a GCS bucket. The bucket must be empty. + + :param bucket_name: Name of the GCS bucket to delete. + """ + bucket_instance = self.client.get_bucket(bucket_name) + bucket_instance.delete(force=True) # force=True deletes non-empty buckets, use with caution. + # Original had no force, which requires empty. + # Consider if `clean_bucket` should be called first. + logging.info(f"Deleted bucket {bucket_name}") + + + def clean_bucket(self, bucket_name: str): # Renamed arg for consistency + """ + Delete all objects within a GCS bucket. + + Note: This method is not implemented. + To implement, one would list all blobs and then delete them in batches. + + :param bucket_name: Name of the GCS bucket to clean. + :raises NotImplementedError: This method is not yet implemented. + """ + # Example implementation sketch: + # bucket_instance = self.client.bucket(bucket_name) + # blobs_to_delete = list(bucket_instance.list_blobs()) + # if blobs_to_delete: + # bucket_instance.delete_blobs(blobs_to_delete) + # logging.info(f"Cleaned bucket {bucket_name}") + raise NotImplementedError("clean_bucket is not implemented for GCPStorage yet.") - def clean_bucket(self, bucket: str): - raise NotImplementedError() def uploader_func(self, path_idx: int, key: str, filepath: str) -> None: + """ + Upload a file to a GCS bucket, typically for benchmark input data. + + Skips upload if using cached buckets and `replace_existing` is False, + and the object already exists. Constructs the GCS object key using input prefixes. + + :param path_idx: Index of the input path/prefix from `self.input_prefixes`. + :param key: Object key (filename) within the bucket, relative to the prefix. + :param filepath: Local path to the file to upload. + """ if self.cached and not self.replace_existing: + # This check might be redundant if list_bucket in benchmark_data correctly sets up + # input_prefixes_files and self.cached status. + # The original logic checked `if key == blob` which seems to imply `key` is full path. + # Assuming `key` here is relative to prefix. + logging.info(f"Skipping upload of {filepath} due to cache settings and no replace_existing.") return - key = os.path.join(self.input_prefixes[path_idx], key) + full_key = os.path.join(self.input_prefixes[path_idx], key) bucket_name = self.get_bucket(Resources.StorageBucketType.BENCHMARKS) + + # Check if file exists if not replacing if not self.replace_existing: - for blob in self.input_prefixes_files[path_idx]: - if key == blob: - logging.info("Skipping upload of {} to {}".format(filepath, bucket_name)) - return - self.upload(bucket_name, filepath, key) + # input_prefixes_files should contain full keys for the given prefix. + # This check assumes self.input_prefixes_files[path_idx] has been populated correctly. + if full_key in self.input_prefixes_files[path_idx]: + logging.info(f"Skipping upload of {filepath} to {bucket_name}/{full_key} as it exists and replace_existing is False.") + return + + self.upload(bucket_name, filepath, full_key) diff --git a/sebs/gcp/triggers.py b/sebs/gcp/triggers.py index 13cc3d6ca..ff632719b 100644 --- a/sebs/gcp/triggers.py +++ b/sebs/gcp/triggers.py @@ -9,30 +9,52 @@ class LibraryTrigger(Trigger): + """ + Represents a library-based trigger for Google Cloud Functions, invoking them + directly using the Google Cloud Functions API client. + """ def __init__(self, fname: str, deployment_client: Optional[GCP] = None): + """ + Initialize a LibraryTrigger. + + :param fname: Name of the Google Cloud Function. + :param deployment_client: Optional GCP client for deployment and invocation. + """ super().__init__() self.name = fname self._deployment_client = deployment_client @staticmethod def typename() -> str: + """Return the type name of this trigger implementation.""" return "GCP.LibraryTrigger" @property def deployment_client(self) -> GCP: - assert self._deployment_client + """GCP client used for deploying and invoking the function.""" + assert self._deployment_client, "Deployment client not set for LibraryTrigger" return self._deployment_client @deployment_client.setter def deployment_client(self, deployment_client: GCP): + """Set the GCP client.""" self._deployment_client = deployment_client @staticmethod def trigger_type() -> Trigger.TriggerType: + """Return the type of this trigger (LIBRARY).""" return Trigger.TriggerType.LIBRARY def sync_invoke(self, payload: dict) -> ExecutionResult: + """ + Synchronously invoke the Google Cloud Function using the functions API. + + Ensures the function is deployed and active before invocation. + :param payload: Input payload for the function. + :return: ExecutionResult object containing invocation details and metrics. + :raises RuntimeError: If the invocation fails or returns an error. + """ self.logging.info(f"Invoke function {self.name}") # Verify that the function is deployed @@ -72,42 +94,99 @@ def sync_invoke(self, payload: dict) -> ExecutionResult: return gcp_result def async_invoke(self, payload: dict): - raise NotImplementedError() + """ + Asynchronously invoke the Google Cloud Function. + + Note: This method is not currently implemented for GCP's LibraryTrigger. + GCP's `functions.call` API is synchronous. Asynchronous behavior would + need to be implemented using a thread pool or similar mechanism if desired. + + :param payload: Input payload for the function. + :raises NotImplementedError: This feature is not implemented. + """ + raise NotImplementedError("Asynchronous invocation via library is not implemented for GCP.") def serialize(self) -> dict: + """ + Serialize the LibraryTrigger to a dictionary. + + :return: Dictionary representation of the trigger. + """ return {"type": "Library", "name": self.name} @staticmethod def deserialize(obj: dict) -> Trigger: + """ + Deserialize a LibraryTrigger from a dictionary. + + :param obj: Dictionary representation of the trigger. + :return: A new LibraryTrigger instance. + """ return LibraryTrigger(obj["name"]) class HTTPTrigger(Trigger): + """ + Represents an HTTP-based trigger for a Google Cloud Function, + invoked via its public URL. + """ def __init__(self, url: str): + """ + Initialize an HTTPTrigger. + + :param url: The invocation URL for the HTTP-triggered function. + """ super().__init__() self.url = url @staticmethod def typename() -> str: + """Return the type name of this trigger implementation.""" return "GCP.HTTPTrigger" @staticmethod def trigger_type() -> Trigger.TriggerType: + """Return the type of this trigger (HTTP).""" return Trigger.TriggerType.HTTP def sync_invoke(self, payload: dict) -> ExecutionResult: + """ + Synchronously invoke the Google Cloud Function via its HTTP endpoint. + :param payload: Input payload for the function (will be sent as JSON). + :return: ExecutionResult object containing invocation details and metrics. + """ self.logging.debug(f"Invoke function {self.url}") - return self._http_invoke(payload, self.url) + # Assuming verify_ssl=True is the default desired behavior for GCP HTTP triggers + return self._http_invoke(payload, self.url, verify_ssl=True) def async_invoke(self, payload: dict) -> concurrent.futures.Future: + """ + Asynchronously invoke the Google Cloud Function via its HTTP endpoint. + + Uses a ThreadPoolExecutor to perform the HTTP request in a separate thread. + + :param payload: Input payload for the function. + :return: A Future object representing the asynchronous invocation. + """ pool = concurrent.futures.ThreadPoolExecutor() fut = pool.submit(self.sync_invoke, payload) return fut def serialize(self) -> dict: + """ + Serialize the HTTPTrigger to a dictionary. + + :return: Dictionary representation of the trigger, including type and URL. + """ return {"type": "HTTP", "url": self.url} @staticmethod def deserialize(obj: dict) -> Trigger: + """ + Deserialize an HTTPTrigger from a dictionary. + + :param obj: Dictionary representation of the trigger, must contain 'url'. + :return: A new HTTPTrigger instance. + """ return HTTPTrigger(obj["url"]) diff --git a/sebs/local/config.py b/sebs/local/config.py index 0b512c67c..9cf44e0fa 100644 --- a/sebs/local/config.py +++ b/sebs/local/config.py @@ -8,49 +8,93 @@ class LocalCredentials(Credentials): + """Credentials for local FaaS deployment (no specific credentials needed).""" def serialize(self) -> dict: + """ + Serialize LocalCredentials to a dictionary. Returns an empty dictionary + as no specific credentials are stored for local deployments. + + :return: Empty dictionary. + """ return {} @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + """ + Deserialize LocalCredentials. Returns a new LocalCredentials instance + as no specific configuration is needed from the input dictionary or cache. + + :param config: Configuration dictionary (not used). + :param cache: Cache object (not used). + :param handlers: Logging handlers (not used for actual credential loading here). + :return: A LocalCredentials instance. + """ return LocalCredentials() -""" - No need to cache and store - we prepare the benchmark and finish. - The rest is used later by the user. -""" - - class LocalResources(SelfHostedResources): + """ + Manages resources for local FaaS deployments. + + This includes tracking allocated ports for locally running services. + Local deployments do not typically require extensive cloud resource management, + so caching and storing resource details are minimal. + """ def __init__( self, storage_cfg: Optional[PersistentStorageConfig] = None, nosql_storage_cfg: Optional[NoSQLStorageConfig] = None, ): - self._path: str = "" + """ + Initialize LocalResources. + + :param storage_cfg: Configuration for persistent storage (e.g., local Minio). + :param nosql_storage_cfg: Configuration for NoSQL storage (e.g., local ScyllaDB). + """ + self._path: str = "" # Path for local storage, if applicable (seems unused currently) super().__init__("local", storage_cfg, nosql_storage_cfg) self._allocated_ports: Set[int] = set() @property - def allocated_ports(self) -> set: + def allocated_ports(self) -> Set[int]: # Explicitly Set[int] + """Set of network ports allocated for local services.""" return self._allocated_ports def serialize(self) -> dict: - out = super().serialize() + """ + Serialize LocalResources to a dictionary for caching. + + Includes allocated ports along with any information from the parent class. - out["allocated_ports"] = list(self._allocated_ports) + :return: Dictionary representation of LocalResources. + """ + out = super().serialize() + out["allocated_ports"] = list(self._allocated_ports) # Convert set to list for JSON return out @staticmethod def initialize(res: Resources, config: dict): + """ + Initialize LocalResources attributes from a dictionary. + + Populates allocated ports if present in the configuration. + :param res: Resources object to initialize (cast to LocalResources). + :param config: Dictionary containing resource configurations. + """ resources = cast(LocalResources, res) + # Call parent initializer if it exists and handles common fields like resources_id + super(LocalResources, LocalResources).initialize(resources, config) # Ensure base class init is called if "allocated_ports" in config: resources._allocated_ports = set(config["allocated_ports"]) def update_cache(self, cache: Cache): + """ + Update the cache with LocalResource details, specifically allocated ports. + + :param cache: Cache object. + """ super().update_cache(cache) cache.update_config( val=list(self._allocated_ports), keys=["local", "resources", "allocated_ports"] @@ -58,63 +102,141 @@ def update_cache(self, cache: Cache): @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: - ret = LocalResources() + """ + Deserialize LocalResources from configuration or cache. - cached_config = cache.get_config("local") - ret._deserialize(ret, config, cached_config) + Prioritizes cached configuration for allocated ports if available. - # Load cached values - if cached_config and "resources" in cached_config: - LocalResources.initialize(ret, cached_config["resources"]) + :param config: Configuration dictionary. + :param cache: Cache object. + :param handlers: Logging handlers. + :return: LocalResources instance. + """ + ret = LocalResources() + # _deserialize from SelfHostedResources likely handles storage_cfg and nosql_storage_cfg + # It needs to be called appropriately. Assuming it's part of the logic. + # The original call `ret._deserialize(ret, config, cached_config)` implies + # `_deserialize` is a method of LocalResources or its parent that takes these args. + # Let's assume SelfHostedResources has a suitable _deserialize or similar mechanism. + + # For SelfHostedResources part (storage_cfg, nosql_storage_cfg) + # This part might need adjustment based on how SelfHostedResources._deserialize is structured + # If SelfHostedResources.deserialize exists and is static: + # temp_self_hosted = SelfHostedResources.deserialize(config, cache, handlers) + # ret._storage = temp_self_hosted._storage # or however these are stored + # ret._nosql_storage = temp_self_hosted._nosql_storage + # Or, if _deserialize is an instance method of SelfHostedResources: + ret._deserialize(ret, config, cache.get_config("local")) # Pass local part of cache + + cached_local_resources_config = cache.get_config("local", {}).get("resources", {}) + + # Initialize using the more specific (potentially cached) config first for local parts + if cached_local_resources_config: + LocalResources.initialize(ret, cached_local_resources_config) + ret.logging_handlers = handlers # Set handlers after initialization + ret.logging.info("Using cached resources for Local (ports, etc.)") + elif "resources" in config: # Fallback to main config if no specific cache for resources + LocalResources.initialize(ret, config["resources"]) ret.logging_handlers = handlers - ret.logging.info("Using cached resources for Local") - else: - # Check for new config + ret.logging.info("No cached local resources found, using user configuration for Local.") + else: # Initialize with empty if no config found at all + LocalResources.initialize(ret, {}) ret.logging_handlers = handlers - LocalResources.initialize(ret, config) - + ret.logging.info("No local resources configuration found, initializing empty for Local.") + return ret class LocalConfig(Config): + """Configuration for local FaaS deployments.""" def __init__(self): + """Initialize a new LocalConfig with default LocalCredentials and LocalResources.""" super().__init__(name="local") self._credentials = LocalCredentials() self._resources = LocalResources() @staticmethod def typename() -> str: + """Return the type name of the LocalConfig class.""" return "Local.Config" @staticmethod def initialize(cfg: Config, dct: dict): - pass + """ + Initialize LocalConfig attributes. Currently does nothing as local + deployments have minimal region-like configuration at this level. + + :param cfg: Config object to initialize. + :param dct: Dictionary containing configuration values. + """ + # Local deployments don't typically have a "region" in the cloud sense. + # The base Config class handles _region, but it might remain empty or unused for local. + super(LocalConfig, LocalConfig).initialize(cfg, dct if 'region' in dct else {'region': ''}) + @property def credentials(self) -> LocalCredentials: + """Return the LocalCredentials instance.""" return self._credentials @property def resources(self) -> LocalResources: + """Return the LocalResources instance.""" return self._resources @resources.setter def resources(self, val: LocalResources): + """Set the LocalResources instance.""" self._resources = val @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: + """ + Deserialize a LocalConfig object. + + Deserializes LocalResources and associates them with a new LocalConfig instance. + :param config: Configuration dictionary (can be specific to 'local' or general). + :param cache: Cache object. + :param handlers: Logging handlers. + :return: A LocalConfig instance. + """ config_obj = LocalConfig() + # Pass the relevant part of the config to LocalResources.deserialize + # If 'config' is already the 'local' part, pass it directly. + # Otherwise, if 'config' is the top-level config, extract 'local.resources' if present. + resources_config = config.get("resources", config) # Fallback to passing full config if 'resources' not a key config_obj.resources = cast( - LocalResources, LocalResources.deserialize(config, cache, handlers) + LocalResources, LocalResources.deserialize(resources_config, cache, handlers) ) config_obj.logging_handlers = handlers + # Initialize LocalConfig specific parts if any (e.g. region, though less relevant for local) + LocalConfig.initialize(config_obj, config) return config_obj def serialize(self) -> dict: - out = {"name": "local", "region": self._region, "resources": self._resources.serialize()} + """ + Serialize LocalConfig to a dictionary. + + Includes 'name', 'region' (if set), and serialized resources. + + :return: Dictionary representation of LocalConfig. + """ + out = { + "name": "local", + "region": self._region, # Region might be empty/irrelevant for local + "resources": self._resources.serialize() + } return out def update_cache(self, cache: Cache): + """ + Update the cache with LocalConfig details. + + Primarily updates resource configurations in the cache. + + :param cache: Cache object. + """ + # LocalConfig itself doesn't have much to cache besides what resources handle. + # If region or other specific LocalConfig fields were important, they'd be cached here. self.resources.update_cache(cache) diff --git a/sebs/local/deployment.py b/sebs/local/deployment.py index 85f7df8e7..8d3e38784 100644 --- a/sebs/local/deployment.py +++ b/sebs/local/deployment.py @@ -12,34 +12,68 @@ class Deployment(LoggingBase): + """ + Manages a local deployment configuration, including functions, storage, + inputs, and memory measurement details. + """ @property def measurement_file(self) -> Optional[str]: + """Path to the temporary file used for memory measurements.""" return self._measurement_file @measurement_file.setter def measurement_file(self, val: Optional[str]): + """Set the path to the memory measurement file.""" self._measurement_file = val def __init__(self): + """ + Initialize a new Deployment instance. + Sets up empty lists for functions, inputs, and memory measurement PIDs. + """ super().__init__() self._functions: List[LocalFunction] = [] - self._storage: Optional[Minio] + self._storage: Optional[Minio] = None # Explicitly initialize as None self._inputs: List[dict] = [] self._memory_measurement_pids: List[int] = [] self._measurement_file: Optional[str] = None def add_function(self, func: LocalFunction): + """ + Add a local function to this deployment. + + If the function has a memory measurement PID, it's also recorded. + + :param func: The LocalFunction instance to add. + """ self._functions.append(func) if func.memory_measurement_pid is not None: self._memory_measurement_pids.append(func.memory_measurement_pid) def add_input(self, func_input: dict): + """ + Add a function input configuration to this deployment. + + :param func_input: A dictionary representing the function input. + """ self._inputs.append(func_input) def set_storage(self, storage: Minio): + """ + Set the Minio storage instance for this deployment. + + :param storage: The Minio instance. + """ self._storage = storage def serialize(self, path: str): + """ + Serialize the deployment configuration to a JSON file. + + Includes details about functions, storage, inputs, and memory measurements. + + :param path: The file path where the JSON configuration will be saved. + """ with open(path, "w") as out: config: dict = { "functions": self._functions, @@ -55,26 +89,47 @@ def serialize(self, path: str): out.write(serialize(config)) - # FIXME: do we still use it? + # FIXME: do we still use it? This method might be outdated or for specific use cases. @staticmethod def deserialize(path: str, cache_client: Cache) -> "Deployment": + """ + Deserialize a deployment configuration from a JSON file. + + Note: The usage of this static method should be reviewed as it might be + intended for specific scenarios or could be outdated. + + :param path: The file path of the JSON configuration. + :param cache_client: Cache client instance (used for Minio deserialization). + :return: A Deployment instance. + """ with open(path, "r") as in_f: input_data = json.load(in_f) deployment = Deployment() for input_cfg in input_data["inputs"]: deployment._inputs.append(input_cfg) - for func in input_data["functions"]: - deployment._functions.append(LocalFunction.deserialize(func)) + for func_data in input_data["functions"]: # Renamed func to func_data + deployment._functions.append(LocalFunction.deserialize(func_data)) if "memory_measurements" in input_data: deployment._memory_measurement_pids = input_data["memory_measurements"]["pids"] deployment._measurement_file = input_data["memory_measurements"]["file"] - deployment._storage = Minio.deserialize( - MinioConfig.deserialize(input_data["storage"]), cache_client, LocalResources() - ) + if "storage" in input_data and input_data["storage"] is not None: + deployment._storage = Minio.deserialize( + MinioConfig.deserialize(input_data["storage"]), cache_client, LocalResources() + ) + else: + deployment._storage = None return deployment def shutdown(self, output_json: str): + """ + Shut down the local deployment. + + This involves stopping any running functions and killing memory measurement + processes. If memory measurements were taken, they are processed and saved + to the specified `output_json` file, and the temporary measurement file is removed. + :param output_json: Path to save the processed memory measurement results. + """ if len(self._memory_measurement_pids) > 0: self.logging.info("Killing memory measurement processes") diff --git a/sebs/local/function.py b/sebs/local/function.py index f0104a4e0..4f82fce22 100644 --- a/sebs/local/function.py +++ b/sebs/local/function.py @@ -8,39 +8,81 @@ class HTTPTrigger(Trigger): + """ + Represents an HTTP trigger for a locally deployed function. + The function is invoked via a URL, typically localhost or a container IP. + """ def __init__(self, url: str): + """ + Initialize an HTTPTrigger. + + :param url: The invocation URL for the HTTP-triggered function. + """ super().__init__() self.url = url @staticmethod def typename() -> str: + """Return the type name of this trigger implementation.""" return "Local.HTTPTrigger" @staticmethod def trigger_type() -> Trigger.TriggerType: + """Return the type of this trigger (HTTP).""" return Trigger.TriggerType.HTTP def sync_invoke(self, payload: dict) -> ExecutionResult: + """ + Synchronously invoke the local function via its HTTP endpoint. + + :param payload: Input payload for the function (will be sent as JSON). + :return: ExecutionResult object containing invocation details and metrics. + """ self.logging.debug(f"Invoke function {self.url}") - return self._http_invoke(payload, self.url) + # Assuming verify_ssl=False for local HTTP invocations, or it should be configurable. + return self._http_invoke(payload, self.url, verify_ssl=False) def async_invoke(self, payload: dict) -> concurrent.futures.Future: + """ + Asynchronously invoke the local function via its HTTP endpoint. + + Uses a ThreadPoolExecutor to perform the HTTP request in a separate thread. + + :param payload: Input payload for the function. + :return: A Future object representing the asynchronous invocation. + """ pool = concurrent.futures.ThreadPoolExecutor() fut = pool.submit(self.sync_invoke, payload) return fut def serialize(self) -> dict: + """ + Serialize the HTTPTrigger to a dictionary. + + :return: Dictionary representation of the trigger, including type and URL. + """ return {"type": "HTTP", "url": self.url} @staticmethod def deserialize(obj: dict) -> Trigger: + """ + Deserialize an HTTPTrigger from a dictionary. + + :param obj: Dictionary representation of the trigger, must contain 'url'. + :return: A new HTTPTrigger instance. + """ return HTTPTrigger(obj["url"]) class LocalFunction(Function): + """ + Represents a function deployed locally in a Docker container. + + Manages the Docker container instance, its URL, and associated metadata. + """ def __init__( self, - docker_container, + docker_container: docker.models.containers.Container, port: int, name: str, benchmark: str, @@ -48,64 +90,106 @@ def __init__( config: FunctionConfig, measurement_pid: Optional[int] = None, ): + """ + Initialize a LocalFunction instance. + + Determines the invocation URL based on the Docker container's network settings. + + :param docker_container: The Docker container instance running the function. + :param port: The port on which the function is accessible within the container or host. + :param name: Name of the local function. + :param benchmark: Name of the benchmark this function belongs to. + :param code_package_hash: Hash of the deployed code package. + :param config: FunctionConfig object. + :param measurement_pid: Optional PID of a process measuring memory for this function. + :raises RuntimeError: If the IP address of the container cannot be determined on Linux. + """ super().__init__(benchmark, name, code_package_hash, config) self._instance = docker_container self._instance_id = docker_container.id - self._instance.reload() - networks = self._instance.attrs["NetworkSettings"]["Networks"] + self._instance.reload() # Ensure container attributes are up-to-date + networks = self._instance.attrs.get("NetworkSettings", {}).get("Networks", {}) self._port = port + # Determine URL based on OS if is_linux(): - self._url = "{IPAddress}:{Port}".format( - IPAddress=networks["bridge"]["IPAddress"], Port=port - ) - if not self._url: - self.logging.error( - f"Couldn't read the IP address of container from attributes " - f"{json.dumps(self._instance.attrs, indent=2)}" - ) - raise RuntimeError( - f"Incorrect detection of IP address for container with id {self._instance_id}" - ) - else: + bridge_network = networks.get("bridge", {}) + ip_address = bridge_network.get("IPAddress") + if not ip_address: # Fallback or error if IPAddress is empty or not found + # Try to get gateway if IPAddress is empty, common in some Docker versions/networks + ip_address = bridge_network.get("Gateway") + if not ip_address: + self.logging.error( + f"Couldn't read IPAddress or Gateway for container {self._instance_id} " + f"from attributes: {json.dumps(self._instance.attrs, indent=2)}" + ) + raise RuntimeError( + f"Incorrect detection of IP address for container {self._instance_id}" + ) + self._url = f"{ip_address}:{port}" + else: # For non-Linux (e.g., Docker Desktop on macOS/Windows), localhost is typically used self._url = f"localhost:{port}" self._measurement_pid = measurement_pid @property def container(self) -> docker.models.containers.Container: + """The Docker container instance for this function.""" return self._instance @container.setter def container(self, instance: docker.models.containers.Container): + """Set the Docker container instance.""" self._instance = instance @property def url(self) -> str: + """The invocation URL for this local function.""" return self._url @property def memory_measurement_pid(self) -> Optional[int]: + """The PID of the process measuring memory for this function, if any.""" return self._measurement_pid @staticmethod def typename() -> str: + """Return the type name of this function implementation.""" return "Local.LocalFunction" def serialize(self) -> dict: + """ + Serialize the LocalFunction instance to a dictionary. + + Includes instance ID, URL, and port along with base Function attributes. + + :return: Dictionary representation of the LocalFunction. + """ return { **super().serialize(), "instance_id": self._instance_id, "url": self._url, "port": self._port, + # measurement_pid is runtime state, typically not serialized for cache/reuse } @staticmethod def deserialize(cached_config: dict) -> "LocalFunction": + """ + Deserialize a LocalFunction instance from a dictionary. + + Retrieves the Docker container instance using its ID. + + :param cached_config: Dictionary containing serialized LocalFunction data. + :return: A new LocalFunction instance. + :raises RuntimeError: If the cached Docker container is not found. + """ try: instance_id = cached_config["instance_id"] - instance = docker.from_env().containers.get(instance_id) + docker_client = docker.from_env() + instance = docker_client.containers.get(instance_id) cfg = FunctionConfig.deserialize(cached_config["config"]) + # measurement_pid is runtime state, not restored from cache typically return LocalFunction( instance, cached_config["port"], @@ -113,11 +197,17 @@ def deserialize(cached_config: dict) -> "LocalFunction": cached_config["benchmark"], cached_config["hash"], cfg, + measurement_pid=None # measurement_pid is runtime, not from cache ) except docker.errors.NotFound: raise RuntimeError(f"Cached container {instance_id} not available anymore!") def stop(self): + """Stop the Docker container associated with this function.""" self.logging.info(f"Stopping function container {self._instance_id}") - self._instance.stop(timeout=0) - self.logging.info(f"Function container {self._instance_id} stopped succesfully") + try: + self._instance.stop(timeout=0) # timeout=0 for immediate stop + self.logging.info(f"Function container {self._instance_id} stopped succesfully") + except docker.errors.APIError as e: + self.logging.error(f"Error stopping container {self._instance_id}: {e}") + # Depending on desired behavior, might re-raise or handle diff --git a/sebs/local/local.py b/sebs/local/local.py index 32b9f9ffb..bc1aa0295 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -21,43 +21,58 @@ class Local(System): + """ + Local FaaS system implementation. + Manages functions running locally in Docker containers. It handles + packaging code, starting/stopping containers, and creating triggers + for local invocation. + """ DEFAULT_PORT = 9000 @staticmethod - def name(): + def name() -> str: + """Return the name of the FaaS system (local).""" return "local" @staticmethod - def typename(): + def typename() -> str: + """Return the type name of this FaaS system class.""" return "Local" @staticmethod def function_type() -> "Type[Function]": + """Return the type of the function implementation for local deployments.""" return LocalFunction @property def config(self) -> LocalConfig: + """Return the local-specific configuration.""" return self._config @property def remove_containers(self) -> bool: + """Flag indicating whether to remove containers after they are stopped.""" return self._remove_containers @remove_containers.setter def remove_containers(self, val: bool): + """Set the flag for removing containers after stopping.""" self._remove_containers = val @property def measure_interval(self) -> int: + """Interval in seconds for memory measurements, if enabled. -1 means disabled.""" return self._measure_interval @property def measurements_enabled(self) -> bool: + """Check if memory measurements are enabled.""" return self._measure_interval > -1 @property def measurement_path(self) -> Optional[str]: + """Path to the file where memory measurements are stored, if enabled.""" return self._memory_measurement_path def __init__( @@ -68,11 +83,20 @@ def __init__( docker_client: docker.client, logger_handlers: LoggingHandlers, ): + """ + Initialize the Local FaaS system. + + :param sebs_config: SeBS system configuration. + :param config: Local-specific configuration. + :param cache_client: Function cache instance. + :param docker_client: Docker client instance. + :param logger_handlers: Logging handlers. + """ super().__init__( sebs_config, cache_client, docker_client, - SelfHostedSystemResources( + SelfHostedSystemResources( # Uses SelfHosted for local storage/NoSQL "local", config, cache_client, docker_client, logger_handlers ), ) @@ -80,286 +104,389 @@ def __init__( self._config = config self._remove_containers = True self._memory_measurement_path: Optional[str] = None - # disable external measurements - self._measure_interval = -1 - - self.initialize_resources(select_prefix="local") + self._measure_interval = -1 # Default: disabled - """ - Shut down minio storage instance. - """ + self.initialize_resources(select_prefix="local") # Resource ID for local is "local" def shutdown(self): + """ + Shut down the local FaaS system. + Currently, this involves updating the cache via the parent class's shutdown. + Local storage (Minio) or NoSQL (ScyllaDB) shutdown is handled by SelfHostedSystemResources. + """ super().shutdown() - """ - It would be sufficient to just pack the code and ship it as zip to AWS. - However, to have a compatible function implementation across providers, - we create a small module. - Issue: relative imports in Python when using storage wrapper. - Azure expects a relative import inside a module. - - Structure: - function - - function.py - - storage.py - - resources - handler.py - - dir: directory where code is located - benchmark: benchmark name - """ - def package_code( self, directory: str, language_name: str, language_version: str, - architecture: str, + architecture: str, # architecture is not used for local Docker image selection yet benchmark: str, - is_cached: bool, - container_deployment: bool, + is_cached: bool, # is_cached is not directly used in local packaging logic + container_deployment: bool, # container_deployment is not supported for local ) -> Tuple[str, int, str]: - + """ + Package benchmark code for local Docker deployment. + + The standard SeBS code directory structure is adapted: + - Files not part of `CONFIG_FILES` are moved into a 'function' subdirectory. + This prepares the `directory` to be mounted into the Docker container. + No actual zipping or separate package creation occurs; the directory itself is used. + + The directory structure expected by the local runner (inside the container at /function): + - handler.py / handler.js (at the root of the mount) + - requirements.txt / package.json (at the root of the mount) + - .python_packages / node_modules (at the root, created by benchmark build step) + - function/ (subdirectory containing other benchmark source files and resources) + + :param directory: Path to the code directory. + :param language_name: Programming language name. + :param language_version: Programming language version. + :param architecture: Target architecture (not directly used in local packaging). + :param benchmark: Benchmark name. + :param is_cached: Whether the code is cached (not directly used here). + :param container_deployment: Whether to package for container deployment (not supported). + :return: Tuple containing: + - Path to the prepared code directory. + - Size of the directory in bytes. + - Empty string for container URI. + """ + # Local deployment doesn't produce a separate package file or container URI here. + # It prepares the directory for mounting. CONFIG_FILES = { "python": ["handler.py", "requirements.txt", ".python_packages"], "nodejs": ["handler.js", "package.json", "node_modules"], } - package_config = CONFIG_FILES[language_name] - function_dir = os.path.join(directory, "function") - os.makedirs(function_dir) - # move all files to 'function' except handler.py - for file in os.listdir(directory): - if file not in package_config: - file = os.path.join(directory, file) - shutil.move(file, function_dir) - - bytes_size = os.path.getsize(directory) - mbytes = bytes_size / 1024.0 / 1024.0 - self.logging.info("Function size {:2f} MB".format(mbytes)) + package_config_exclusions = CONFIG_FILES[language_name] + function_subdir = os.path.join(directory, "function") + os.makedirs(function_subdir, exist_ok=True) + + # Move all files not in package_config_exclusions into the 'function' subdirectory + for item_name in os.listdir(directory): + if item_name not in package_config_exclusions and item_name != "function": + source_item_path = os.path.join(directory, item_name) + destination_item_path = os.path.join(function_subdir, item_name) + # Ensure not to move the 'function' directory into itself if it already exists and has content + if os.path.abspath(source_item_path) != os.path.abspath(function_subdir): + shutil.move(source_item_path, destination_item_path) + + # Calculate size of the directory to be mounted. + # Benchmark.directory_size(directory) might be more accurate if it exists. + # For now, using os.path.getsize on the root directory might not be correct for total size. + # A more robust way would be to sum sizes of all files in the directory. + # However, to match original behavior of just returning directory path: + total_size = 0 + for path, dirs, files in os.walk(directory): + for f in files: + fp = os.path.join(path, f) + total_size += os.path.getsize(fp) + + mbytes = total_size / 1024.0 / 1024.0 + self.logging.info(f"Prepared function directory at {directory}, size {mbytes:.2f} MB") + + return directory, total_size, "" - return directory, bytes_size, "" def _start_container( - self, code_package: Benchmark, func_name: str, func: Optional[LocalFunction] + self, code_package: Benchmark, func_name: str, func_obj: Optional[LocalFunction] ) -> LocalFunction: - - container_name = "{}:run.local.{}.{}".format( + """ + Start a Docker container for the given benchmark code. + + Configures environment variables, mounts the code package, and sets up + networking. If memory measurements are enabled, starts a subprocess to + monitor the container's memory usage. + + :param code_package: The Benchmark object. + :param func_name: The name for the function/container. + :param func_obj: Optional existing LocalFunction object to reuse/update. + :return: The LocalFunction instance associated with the started container. + :raises RuntimeError: If a port cannot be allocated or the container fails to start. + """ + container_image_name = "{}:run.local.{}.{}".format( self._system_config.docker_repository(), code_package.language_name, code_package.language_version, ) - environment = { - "CONTAINER_UID": str(os.getuid()), - "CONTAINER_GID": str(os.getgid()), + environment_vars = { + "CONTAINER_UID": str(os.getuid()) if hasattr(os, 'getuid') else '1000', # Default for non-Unix + "CONTAINER_GID": str(os.getgid()) if hasattr(os, 'getgid') else '1000', # Default for non-Unix "CONTAINER_USER": self._system_config.username(self.name(), code_package.language_name), } if self.config.resources.storage_config: - - environment = {**self.config.resources.storage_config.envs(), **environment} + environment_vars.update(self.config.resources.storage_config.envs()) if code_package.uses_nosql: - nosql_storage = self.system_resources.get_nosql_storage() - environment = {**environment, **nosql_storage.envs()} - - for original_name, actual_name in nosql_storage.get_tables( - code_package.benchmark - ).items(): - environment[f"NOSQL_STORAGE_TABLE_{original_name}"] = actual_name - - # FIXME: make CPUs configurable - # FIXME: configure memory - # FIXME: configure timeout - # cpuset_cpus=cpuset, - # required to access perf counters - # alternative: use custom seccomp profile - container_kwargs = { - "image": container_name, - "command": f"/bin/bash /sebs/run_server.sh {self.DEFAULT_PORT}", + environment_vars.update(nosql_storage.envs()) + for original_name, actual_name in nosql_storage.get_tables(code_package.benchmark).items(): + environment_vars[f"NOSQL_STORAGE_TABLE_{original_name}"] = actual_name + + # Default container settings + # FIXME: CPU, memory, timeout configurations are placeholders. + container_kwargs: Dict[str, Any] = { + "image": container_image_name, "volumes": {code_package.code_location: {"bind": "/function", "mode": "ro"}}, - "environment": environment, - "privileged": True, - "security_opt": ["seccomp:unconfined"], + "environment": environment_vars, + "privileged": True, # Needed for some benchmarks or measurement tools + "security_opt": ["seccomp:unconfined"], # For tools like perf "network_mode": "bridge", "remove": self.remove_containers, "stdout": True, "stderr": True, "detach": True, - # "tty": True, } - # If SeBS is running on non-linux platforms, - # container port must be mapped to host port to make it reachable - # Check if the system is NOT Linux or that it is WSL - port = self.DEFAULT_PORT - if not is_linux(): - port_found = False + # Port handling: + # On Linux, container uses DEFAULT_PORT directly on its bridge IP. + # On non-Linux (Docker Desktop), map a host port to container's DEFAULT_PORT. + container_internal_port = self.DEFAULT_PORT + host_mapped_port = container_internal_port # Default to same port for Linux bridge scenario + + if not is_linux(): # E.g., Docker Desktop on macOS/Windows + allocated_host_port = None for p in range(self.DEFAULT_PORT, self.DEFAULT_PORT + 1000): - # check no container has been deployed on docker's port p if p not in self.config.resources.allocated_ports: - # check if port p on the host is free with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) try: s.bind(("127.0.0.1", p)) - # The port is available - port = p - port_found = True + allocated_host_port = p self.config.resources.allocated_ports.add(p) break except socket.error: - # The port is already in use - continue - - if not port_found: + continue # Port in use on host + if allocated_host_port is None: raise RuntimeError( - f"Failed to allocate port for container: No ports available between " + f"Failed to allocate host port: No ports available between " f"{self.DEFAULT_PORT} and {self.DEFAULT_PORT + 999}" ) - - container_kwargs["command"] = f"/bin/bash /sebs/run_server.sh {port}" - container_kwargs["ports"] = {f"{port}/tcp": port} - - container = self._docker_client.containers.run(**container_kwargs) - - pid: Optional[int] = None - if self.measurements_enabled and self._memory_measurement_path is not None: - # launch subprocess to measure memory - proc = subprocess.Popen( - [ - "python3", - "./sebs/local/measureMem.py", - "--container-id", - container.id, - "--measure-interval", - str(self._measure_interval), - "--measurement-file", - self._memory_measurement_path, - ] - ) - pid = proc.pid - - if func is None: - function_cfg = FunctionConfig.from_benchmark(code_package) - func = LocalFunction( - container, - port, - func_name, - code_package.benchmark, - code_package.hash, - function_cfg, - pid, + host_mapped_port = allocated_host_port + container_kwargs["ports"] = {f"{container_internal_port}/tcp": host_mapped_port} + + # Command to run inside the container, using the internal port + container_kwargs["command"] = f"/bin/bash /sebs/run_server.sh {container_internal_port}" + + running_container = self._docker_client.containers.run(**container_kwargs) + + # Memory measurement process + measurement_process_pid: Optional[int] = None + if self.measurements_enabled and self._memory_measurement_path: + proc = subprocess.Popen([ + "python3", "./sebs/local/measureMem.py", + "--container-id", running_container.id, + "--measure-interval", str(self.measure_interval), + "--measurement-file", self._memory_measurement_path, + ]) + measurement_process_pid = proc.pid + + # Create or update LocalFunction object + if func_obj is None: + function_config_obj = FunctionConfig.from_benchmark(code_package) + func_obj = LocalFunction( + running_container, host_mapped_port, func_name, + code_package.benchmark, code_package.hash, + function_config_obj, measurement_process_pid, ) else: - func.container = container - func._measurement_pid = pid + func_obj.container = running_container + func_obj._measurement_pid = measurement_process_pid + # func_obj._port might need update if host_mapped_port changed, though current logic reuses. - # Wait until server starts - max_attempts = 10 - attempts = 0 - while attempts < max_attempts: + # Wait for the server within the container to start + max_retries = 10 + for attempt in range(max_retries): try: - requests.get(f"http://{func.url}/alive") - break + # Use func_obj.url which correctly points to localhost or container IP + requests.get(f"http://{func_obj.url}/alive", timeout=1) + self.logging.info( + f"Started {func_name} in container {running_container.id}, listening on {func_obj.url}" + ) + return func_obj except requests.exceptions.ConnectionError: - time.sleep(0.25) - attempts += 1 + if attempt < max_retries - 1: + time.sleep(0.25) + else: + raise RuntimeError( + f"Couldn't start {func_name} in container {running_container.id} " + f"(URL: {func_obj.url}). Server did not become alive." + ) + # Should not be reached if loop completes or raises + return func_obj - if attempts == max_attempts: - raise RuntimeError( - f"Couldn't start {func_name} function at container " - f"{container.id} , running on {func.url}" - ) - - self.logging.info( - f"Started {func_name} function at container {container.id} , running on {func._url}" - ) - - return func def create_function( self, code_package: Benchmark, func_name: str, - container_deployment: bool, - container_uri: str, + container_deployment: bool, # Not used for Local + container_uri: str, # Not used for Local ) -> "LocalFunction": - + """ + Create a new local function, which involves starting a Docker container. + + :param code_package: The Benchmark object containing code and configuration. + :param func_name: The desired name for the function. + :param container_deployment: Flag for container deployment (not supported/used for Local). + :param container_uri: Container URI (not used for Local). + :return: The created LocalFunction instance. + :raises NotImplementedError: If container_deployment is True. + """ if container_deployment: raise NotImplementedError("Container deployment is not supported in Local") return self._start_container(code_package, func_name, None) - """ - Restart Docker container - """ - def update_function( self, function: Function, code_package: Benchmark, - container_deployment: bool, - container_uri: str, + container_deployment: bool, # Not used for Local + container_uri: str, # Not used for Local ): - func = cast(LocalFunction, function) - func.stop() - self.logging.info("Allocating a new function container with updated code") - self._start_container(code_package, function.name, func) - - """ - For local functions, we don't need to do anything for a cached function. - There's only one trigger - HTTP. - """ + """ + Update an existing local function. This typically involves stopping the old + Docker container and starting a new one with the updated code or configuration. + + :param function: The existing LocalFunction object to update. + :param code_package: Benchmark object with the new code/configuration. + :param container_deployment: Flag for container deployment (not used). + :param container_uri: Container URI (not used). + """ + local_func = cast(LocalFunction, function) + local_func.stop() # Stop the old container + self.logging.info("Allocating a new function container with updated code.") + # _start_container will create a new container and update func_obj if provided + self._start_container(code_package, local_func.name, local_func) def create_trigger(self, func: Function, trigger_type: Trigger.TriggerType) -> Trigger: - from sebs.local.function import HTTPTrigger - - function = cast(LocalFunction, func) + """ + Create a trigger for a local function. + For local deployments, only HTTP triggers are typically relevant and are + derived from the function's container URL. + + :param func: The LocalFunction object. + :param trigger_type: The type of trigger to create (must be HTTP). + :return: The created HTTPTrigger object. + :raises RuntimeError: If a non-HTTP trigger type is requested. + """ + from sebs.local.function import HTTPTrigger # Local import + + local_function = cast(LocalFunction, func) if trigger_type == Trigger.TriggerType.HTTP: - trigger = HTTPTrigger(function._url) - trigger.logging_handlers = self.logging_handlers + # The URL is determined when LocalFunction is initialized or container restarts + http_trigger = HTTPTrigger(local_function.url) + http_trigger.logging_handlers = self.logging_handlers # Ensure handlers are set else: - raise RuntimeError("Not supported!") + raise RuntimeError(f"Trigger type {trigger_type.value} not supported for Local deployment!") - function.add_trigger(trigger) - self.cache_client.update_function(function) - return trigger + local_function.add_trigger(http_trigger) + self.cache_client.update_function(local_function) # Update cache with new trigger info + return http_trigger def cached_function(self, function: Function): - pass + """ + Perform setup for a cached LocalFunction instance. + Currently, no specific actions are needed for local cached functions beyond + what's done during deserialization (e.g., re-attaching to Docker container). + Ensures HTTP trigger has the correct URL if function was re-instantiated. + + :param function: The LocalFunction object retrieved from cache. + """ + local_func = cast(LocalFunction, function) + # Ensure HTTP trigger URL is up-to-date, especially if container IP changed (though less likely for local) + http_triggers = local_func.triggers(Trigger.TriggerType.HTTP) + if http_triggers: + cast(HTTPTrigger, http_triggers[0]).url = local_func.url + elif not http_triggers: # If no HTTP trigger, create one + self.create_trigger(local_func, Trigger.TriggerType.HTTP) + def update_function_configuration(self, function: Function, code_package: Benchmark): + """ + Update function configuration for a local deployment. + Note: This is not supported for local deployments as configuration changes + typically require restarting the container with new settings, which is + handled by `update_function`. + + :param function: The function to configure. + :param code_package: Benchmark with new configuration. + :raises RuntimeError: Always, as this operation is not supported. + """ self.logging.error("Updating function configuration of local deployment is not supported") raise RuntimeError("Updating function configuration of local deployment is not supported") def download_metrics( self, - function_name: str, - start_time: int, - end_time: int, - requests: Dict[str, ExecutionResult], - metrics: dict, + function_name: str, # Not directly used, metrics are tied to container IDs or local files + start_time: int, # Not directly used for local memory metrics + end_time: int, # Not directly used for local memory metrics + requests: Dict[str, ExecutionResult], # Not directly used for local memory metrics + metrics: dict, # Not directly used for local memory metrics ): + """ + Download/process metrics for local functions. + For local deployments, this typically refers to processing memory measurement + files if enabled. Other provider-specific metrics (like billing) are not applicable. + + :param function_name: Name of the function (for context, not direct query). + :param start_time: Start time for metrics window (not used for local memory). + :param end_time: End time for metrics window (not used for local memory). + :param requests: Dictionary of request IDs to ExecutionResult objects. + :param metrics: Dictionary to store any additional metrics. + """ + # Local memory metrics are processed during `deployment.shutdown()`. + # No cloud provider metrics to download. + self.logging.info("Local deployment: Metrics (memory) processed during deployment shutdown.") pass def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): - raise NotImplementedError() + """ + Enforce a cold start for local functions. + This typically means stopping and restarting the Docker container(s). + + :param functions: List of LocalFunction objects. + :param code_package: The Benchmark object. + :raises NotImplementedError: This method is not fully implemented in a way that + guarantees a "cold start" equivalent beyond container restart. + """ + # For local, a "cold start" means restarting the container. + # This is effectively what update_function does. + # A more direct way would be func.stop() then self._start_container(code_package, func.name, func) + self.logging.warning("Enforcing cold start for local functions by restarting containers.") + for func in functions: + if isinstance(func, LocalFunction): + self.update_function(func, code_package, False, "") + else: + self.logging.error(f"Cannot enforce cold start on non-LocalFunction: {func.name}") + # The concept of a "cold start counter" doesn't directly apply to local in the same way + # as cloud, where it might change env vars to force new instance versions. + # Here, restart is the primary mechanism. @staticmethod def default_function_name( code_package: Benchmark, resources: Optional[Resources] = None ) -> str: - # Create function name - if resources is not None: + """ + Generate a default name for a local function (container). + + If resources (and thus a resource_id) are provided, it includes the resource_id. + Otherwise, it's based on benchmark name, language, and version. + + :param code_package: The Benchmark object. + :param resources: Optional Resources object. + :return: The generated default function name. + """ + if resources and resources.has_resources_id: # Check if resources_id is available func_name = "sebs-{}-{}-{}-{}".format( resources.resources_id, code_package.benchmark, code_package.language_name, code_package.language_version, ) - else: - func_name = "sebd-{}-{}-{}".format( + else: # Fallback if no resources or no resource_id + func_name = "sebs-{}-{}-{}".format( # Changed from sebd- code_package.benchmark, code_package.language_name, code_package.language_version, @@ -368,21 +495,40 @@ def default_function_name( @staticmethod def format_function_name(func_name: str) -> str: + """ + Format the function name for local deployment. + Currently, no specific formatting is applied. + + :param func_name: The original function name. + :return: The formatted function name (same as input). + """ return func_name def start_measurements(self, measure_interval: int) -> Optional[str]: + """ + Start memory measurements for local containers. + + Sets the measurement interval and creates a temporary file for storing + measurement data collected by `measureMem.py`. + :param measure_interval: Interval in seconds for taking memory measurements. + If <= 0, measurements are disabled. + :return: Path to the temporary measurement file if enabled, else None. + """ self._measure_interval = measure_interval if not self.measurements_enabled: + self._memory_measurement_path = None return None # initialize an empty file for measurements to be written to import tempfile from pathlib import Path - fd, self._memory_measurement_path = tempfile.mkstemp() + fd, temp_file_path = tempfile.mkstemp(suffix=".txt", prefix="sebs_mem_") + self._memory_measurement_path = temp_file_path Path(self._memory_measurement_path).touch() - os.close(fd) + os.close(fd) # Close the file descriptor opened by mkstemp + self.logging.info(f"Memory measurements will be stored in {self._memory_measurement_path}") return self._memory_measurement_path diff --git a/sebs/local/measureMem.py b/sebs/local/measureMem.py index 74cae636f..69248f291 100644 --- a/sebs/local/measureMem.py +++ b/sebs/local/measureMem.py @@ -1,9 +1,9 @@ """ -Measure memory consumption of a specified docker container. +Script to measure memory consumption of a specified Docker container. -Specifically, the pseudofile memory.current from the cgroup -pseudo-filesystem is read by a shell command (cat) every few -milliseconds while the container is running. +This script periodically reads the `memory.current` file from the container's +cgroup in the pseudo-filesystem to record its memory usage. The measurements +are appended to a specified output file. """ import subprocess @@ -12,10 +12,27 @@ def measure(container_id: str, measure_interval: int, measurement_file: str) -> None: + """ + Periodically measure the memory usage of a Docker container and write to a file. - f = open(measurement_file, "a") + The function attempts to read memory usage from two possible cgroup paths: + 1. `/sys/fs/cgroup/system.slice/docker-{container_id}.scope/memory.current` + 2. `/sys/fs/cgroup/docker/{container_id}/memory.current` (fallback) - while True: + Memory usage is written as "{container_id} {memory_in_bytes}" per line. + If the time taken to measure and write exceeds `measure_interval`, a + "precision not met" message is written. + + :param container_id: The full ID of the Docker container to measure. + :param measure_interval: The target interval in milliseconds between measurements. + If 0 or negative, measurements are taken as fast as possible. + :param measurement_file: Path to the file where measurements will be appended. + """ + # Open the measurement file in append mode. + # Ensure this file is handled correctly regarding concurrent writes if multiple + # instances of this script could target the same file (though typically not the case). + with open(measurement_file, "a") as f: + while True: time_start = time.perf_counter_ns() longId = "docker-" + container_id + ".scope" try: @@ -33,13 +50,33 @@ def measure(container_id: str, measure_interval: int, measurement_file: str) -> time.sleep(max(0, (measure_interval - iter_duration / 1e6) / 1000)) +# Ensure the file is flushed regularly if buffering is an issue for real-time monitoring. +# However, for typical usage, standard buffering should be fine. + """ - Parse container ID and measure interval and start memory measurement process. +Command-line interface for the memory measurement script. + +Parses arguments for container ID, measurement interval, and output file, +then starts the memory measurement process. """ if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--container-id", type=str) - parser.add_argument("--measurement-file", type=str) - parser.add_argument("--measure-interval", type=int) - args, unknown = parser.parse_known_args() - measure(args.container_id, args.measure_interval, args.measurement_file) + parser = argparse.ArgumentParser( + description="Measure memory consumption of a Docker container." + ) + parser.add_argument("--container-id", type=str, required=True, help="Full ID of the Docker container.") + parser.add_argument("--measurement-file", type=str, required=True, help="File to append measurements to.") + parser.add_argument( + "--measure-interval", + type=int, + required=True, + help="Target interval between measurements in milliseconds.", + ) + args = parser.parse_args() # Use parse_args, unknown args will cause error + + try: + measure(args.container_id, args.measure_interval, args.measurement_file) + except KeyboardInterrupt: + print(f"Memory measurement for container {args.container_id} stopped by user.") + except Exception as e: + print(f"Error during memory measurement for container {args.container_id}: {e}") + # Optionally, log to a file or re-raise depending on desired error handling. diff --git a/sebs/openwhisk/config.py b/sebs/openwhisk/config.py index bba54f7c7..aee1b2fbd 100644 --- a/sebs/openwhisk/config.py +++ b/sebs/openwhisk/config.py @@ -9,180 +9,321 @@ class OpenWhiskCredentials(Credentials): + """Credentials for OpenWhisk (no specific credentials typically needed by SeBS).""" @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Credentials: + """ + Deserialize OpenWhiskCredentials. Returns a new OpenWhiskCredentials instance + as no specific configuration is needed from the input dictionary or cache for SeBS's use. + OpenWhisk authentication is usually handled via `wsk` CLI properties file. + + :param config: Configuration dictionary (not used). + :param cache: Cache object (not used). + :param handlers: Logging handlers (not used for actual credential loading here). + :return: An OpenWhiskCredentials instance. + """ return OpenWhiskCredentials() def serialize(self) -> dict: + """ + Serialize OpenWhiskCredentials to a dictionary. Returns an empty dictionary. + + :return: Empty dictionary. + """ return {} class OpenWhiskResources(SelfHostedResources): + """ + Manages resources for OpenWhisk deployments, including Docker registry details. + Inherits from SelfHostedResources for common self-hosted storage/NoSQL configurations. + """ def __init__( self, registry: Optional[str] = None, username: Optional[str] = None, password: Optional[str] = None, - registry_updated: bool = False, + registry_updated: bool = False, # Indicates if registry details were newly provided vs cached ): + """ + Initialize OpenWhiskResources. + + :param registry: Docker registry URL for OpenWhisk actions. + :param username: Username for the Docker registry. + :param password: Password for the Docker registry. + :param registry_updated: Flag indicating if registry details are new or updated. + """ super().__init__(name="openwhisk") self._docker_registry = registry if registry != "" else None self._docker_username = username if username != "" else None self._docker_password = password if password != "" else None self._registry_updated = registry_updated - self._storage_updated = False + self._storage_updated = False # Related to SelfHostedResources, indicates if storage config changed @staticmethod def typename() -> str: + """Return the type name of the OpenWhiskResources class.""" return "OpenWhisk.Resources" @property def docker_registry(self) -> Optional[str]: + """Docker registry URL for OpenWhisk function images.""" return self._docker_registry @property def docker_username(self) -> Optional[str]: + """Username for the Docker registry.""" return self._docker_username @property def docker_password(self) -> Optional[str]: + """Password for the Docker registry.""" return self._docker_password @property def storage_updated(self) -> bool: + """Flag indicating if self-hosted storage configuration was updated.""" return self._storage_updated @property def registry_updated(self) -> bool: + """Flag indicating if Docker registry details were updated.""" return self._registry_updated @staticmethod def initialize(res: Resources, dct: dict): + """ + Initialize OpenWhiskResources-specific attributes from a dictionary. + This focuses on Docker registry details. Base class handles other parts. + + :param res: Resources object to initialize (cast to OpenWhiskResources). + :param dct: Dictionary containing 'registry', 'username', and 'password' for Docker. + """ ret = cast(OpenWhiskResources, res) - ret._docker_registry = dct["registry"] - ret._docker_username = dct["username"] - ret._docker_password = dct["password"] + # Assuming dct here is specifically the 'docker_registry' part of the config + ret._docker_registry = dct.get("registry") + ret._docker_username = dct.get("username") + ret._docker_password = dct.get("password") + # Note: SelfHostedResources.initialize should be called by the caller if needed + # or integrated into a common initialization flow. @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Resources: + """ + Deserialize OpenWhiskResources from configuration or cache. + Handles Docker registry details and calls parent for self-hosted storage/NoSQL. + + :param config: Configuration dictionary, may contain 'docker_registry'. + :param cache: Cache object for retrieving cached resource details. + :param handlers: Logging handlers. + :return: OpenWhiskResources instance. + """ cached_config = cache.get_config("openwhisk") ret = OpenWhiskResources() - if cached_config: - super(OpenWhiskResources, OpenWhiskResources).initialize( - ret, cached_config["resources"] - ) + + # Initialize SelfHostedResources parts first (storage, nosql) + # The `_deserialize` method is from SelfHostedResources and handles its specific fields. + # It needs the relevant part of the config and cache. + # Assuming `config` might be the top-level user config, and `cached_config` is the 'openwhisk' section. + ret._deserialize(ret, config.get("resources", {}), cached_config.get("resources") if cached_config else None) - ret._deserialize(ret, config, cached_config) - # Check for new config - overrides but check if it's different - if "docker_registry" in config: + # Docker registry details handling + user_docker_config = config.get("docker_registry") + cached_docker_config = (cached_config or {}).get("resources", {}).get("docker") - OpenWhiskResources.initialize(ret, config["docker_registry"]) + if user_docker_config: + OpenWhiskResources.initialize(ret, user_docker_config) ret.logging.info("Using user-provided Docker registry for OpenWhisk.") - ret.logging_handlers = handlers - - # check if there has been an update - if not ( - cached_config - and "resources" in cached_config - and "docker" in cached_config["resources"] - and cached_config["resources"]["docker"] == config["docker_registry"] - ): + if not cached_docker_config or cached_docker_config != user_docker_config: ret._registry_updated = True - - # Load cached values - elif ( - cached_config - and "resources" in cached_config - and "docker" in cached_config["resources"] - ): - OpenWhiskResources.initialize(ret, cached_config["resources"]["docker"]) - ret.logging_handlers = handlers - ret.logging.info("Using cached Docker registry for OpenWhisk") + elif cached_docker_config: + OpenWhiskResources.initialize(ret, cached_docker_config) + ret.logging.info("Using cached Docker registry for OpenWhisk.") else: - ret = OpenWhiskResources() - ret.logging.info("Using default Docker registry for OpenWhisk.") - ret.logging_handlers = handlers - ret._registry_updated = True + # Defaults to None if no config provided and nothing in cache + ret.logging.info("Using default (None) Docker registry for OpenWhisk.") + ret._registry_updated = True # Considered "updated" as it's the first time or different from non-existence + ret.logging_handlers = handlers return ret def update_cache(self, cache: Cache): - super().update_cache(cache) - cache.update_config( - val=self.docker_registry, keys=["openwhisk", "resources", "docker", "registry"] - ) - cache.update_config( - val=self.docker_username, keys=["openwhisk", "resources", "docker", "username"] - ) - cache.update_config( - val=self.docker_password, keys=["openwhisk", "resources", "docker", "password"] - ) + """ + Update the cache with OpenWhisk resource details, including Docker registry. + + :param cache: Cache object. + """ + super().update_cache(cache) # Handles SelfHostedResources parts + docker_details = { + "registry": self.docker_registry, + "username": self.docker_username, + "password": self.docker_password, # Note: Storing passwords in cache might be a security concern. + } + cache.update_config_section(keys=["openwhisk", "resources", "docker"], section_dict=docker_details) + def serialize(self) -> dict: + """ + Serialize OpenWhiskResources to a dictionary. + + Includes Docker registry details and calls parent for self-hosted parts. + + :return: Dictionary representation of OpenWhiskResources. + """ out: dict = { - **super().serialize(), - "docker_registry": self.docker_registry, - "docker_username": self.docker_username, - "docker_password": self.docker_password, + **super().serialize(), # Serializes SelfHostedResources parts + "docker": { # Nest docker details for better organization in cache + "registry": self.docker_registry, + "username": self.docker_username, + "password": self.docker_password, # Again, password in cache. + } } return out class OpenWhiskConfig(Config): - name: str - shutdownStorage: bool - cache: Cache + """ + Configuration for OpenWhisk deployments. - def __init__(self, config: dict, cache: Cache): + Includes settings for `wsk` CLI, experimental features, and management + of cluster and storage lifecycle. + """ + # Type hints for attributes specific to OpenWhiskConfig + shutdownStorage: bool + removeCluster: bool + wsk_exec: str + wsk_bypass_security: bool + experimentalManifest: bool + # cache is passed to __init__ but not stored as self.cache directly, used for cached_config in deserialize + # It's unusual for a config object to hold the cache client itself. + + def __init__(self, config_values: dict, cached_config_for_resources: Optional[dict] = None): + """ + Initialize OpenWhiskConfig. + + :param config_values: Dictionary of OpenWhisk specific configuration values. + :param cached_config_for_resources: Optional cached configuration for resources, + used if OpenWhiskResources needs it during init. + (Note: The original `cache` arg was unused in this method) + """ super().__init__(name="openwhisk") - self._credentials = OpenWhiskCredentials() + self._credentials = OpenWhiskCredentials() # OpenWhisk typically doesn't use SeBS-managed creds + # Resources are initialized here, potentially using parts of config_values or cached_config_for_resources + # This part is a bit complex due to how OpenWhiskResources.deserialize is structured. + # For simplicity, let's assume OpenWhiskResources can be default-initialized or needs specific dict. + # The deserialize method is the primary way resources get populated. self._resources = OpenWhiskResources() - self.shutdownStorage = config["shutdownStorage"] - self.removeCluster = config["removeCluster"] - self.wsk_exec = config["wskExec"] - self.wsk_bypass_security = config["wskBypassSecurity"] - self.experimentalManifest = config["experimentalManifest"] - self.cache = cache + + self.shutdownStorage = config_values.get("shutdownStorage", False) + self.removeCluster = config_values.get("removeCluster", False) + self.wsk_exec = config_values.get("wskExec", "wsk") # Default to 'wsk' + self.wsk_bypass_security = config_values.get("wskBypassSecurity", False) + self.experimentalManifest = config_values.get("experimentalManifest", False) + # self.cache = cache # Storing cache client in config is unusual. @property def credentials(self) -> OpenWhiskCredentials: + """Return the OpenWhiskCredentials instance.""" return self._credentials @property def resources(self) -> OpenWhiskResources: + """Return the OpenWhiskResources instance.""" return self._resources @staticmethod def initialize(cfg: Config, dct: dict): - pass + """ + Initialize OpenWhiskConfig attributes from a dictionary. + This method populates fields like `wsk_exec`, `shutdownStorage`, etc. + The base class `Config.initialize` handles `_region`, but OpenWhisk doesn't use region. + + :param cfg: Config object to initialize (cast to OpenWhiskConfig). + :param dct: Dictionary containing OpenWhisk configuration values. + """ + ow_cfg = cast(OpenWhiskConfig, cfg) + # Call super to handle common parts like region, though OpenWhisk might not use it. + super(OpenWhiskConfig, OpenWhiskConfig).initialize(ow_cfg, dct if 'region' in dct else {'region': ''}) + + ow_cfg.shutdownStorage = dct.get("shutdownStorage", False) + ow_cfg.removeCluster = dct.get("removeCluster", False) + ow_cfg.wsk_exec = dct.get("wskExec", "wsk") + ow_cfg.wsk_bypass_security = dct.get("wskBypassSecurity", False) + ow_cfg.experimentalManifest = dct.get("experimentalManifest", False) + def serialize(self) -> dict: + """ + Serialize OpenWhiskConfig to a dictionary. + + :return: Dictionary representation of OpenWhiskConfig. + """ return { "name": "openwhisk", + "region": self._region, # Region is from base, may be empty "shutdownStorage": self.shutdownStorage, "removeCluster": self.removeCluster, "wskExec": self.wsk_exec, "wskBypassSecurity": self.wsk_bypass_security, "experimentalManifest": self.experimentalManifest, - "credentials": self._credentials.serialize(), + "credentials": self._credentials.serialize(), # Empty dict "resources": self._resources.serialize(), } @staticmethod def deserialize(config: dict, cache: Cache, handlers: LoggingHandlers) -> Config: - cached_config = cache.get_config("openwhisk") - resources = cast( - OpenWhiskResources, OpenWhiskResources.deserialize(config, cache, handlers) + """ + Deserialize an OpenWhiskConfig object. + + Populates settings from `config` (user input) and `cached_config` (if any). + Resources are deserialized separately. + + :param config: User-provided configuration dictionary for OpenWhisk. + :param cache: Cache client instance. + :param handlers: Logging handlers. + :return: An OpenWhiskConfig instance. + """ + cached_config = cache.get_config("openwhisk") # Entire 'openwhisk' section from cache + + # Create config object using user config first, then apply cache for some parts. + # If cached_config exists, it means some settings might have been persisted. + # We need to decide the override order: user_config > cached_config or vice-versa for some fields. + # Typically, user_config for settings like wskExec should override cache. + # Resources are handled by OpenWhiskResources.deserialize which has its own cache logic. + + # Use user 'config' for primary values, provide 'cached_config' for resource deserialization if needed. + config_to_init_with = {** (cached_config or {}), **config} + + + ow_config_obj = OpenWhiskConfig(config_to_init_with, cache) # Pass relevant dict + ow_config_obj.logging_handlers = handlers + + # Resources deserialization needs careful handling of what 'config' it gets. + # It should get the 'resources' part of the user config and the 'openwhisk' cache. + user_resources_config = config.get("resources", {}) + ow_config_obj._resources = cast( + OpenWhiskResources, OpenWhiskResources.deserialize(user_resources_config, cache, handlers) ) - - res = OpenWhiskConfig(config, cached_config) - res.logging_handlers = handlers - res._resources = resources - return res + + # Initialize other fields from combined config (user config takes precedence) + OpenWhiskConfig.initialize(ow_config_obj, config_to_init_with) + + return ow_config_obj def update_cache(self, cache: Cache): + """ + Update the cache with OpenWhiskConfig details. + + Saves settings like `wskExec`, `shutdownStorage`, etc., and calls + `resources.update_cache` for resource-specific details. + + :param cache: Cache object. + """ + # Base config like region (if used by OpenWhisk conceptually) + super().update_cache(cache) + cache.update_config(val=self.shutdownStorage, keys=["openwhisk", "shutdownStorage"]) cache.update_config(val=self.removeCluster, keys=["openwhisk", "removeCluster"]) cache.update_config(val=self.wsk_exec, keys=["openwhisk", "wskExec"]) @@ -190,4 +331,6 @@ def update_cache(self, cache: Cache): cache.update_config( val=self.experimentalManifest, keys=["openwhisk", "experimentalManifest"] ) + # Credentials for OpenWhisk are typically empty, so no specific cache update needed beyond base. + # self.credentials.update_cache(cache) # Would call empty update_cache if not overridden self.resources.update_cache(cache) diff --git a/sebs/openwhisk/container.py b/sebs/openwhisk/container.py index 2dd27717e..087c014c5 100644 --- a/sebs/openwhisk/container.py +++ b/sebs/openwhisk/container.py @@ -7,12 +7,20 @@ class OpenWhiskContainer(DockerContainer): + """ + Manages Docker container images for OpenWhisk actions. + + Extends the base DockerContainer class to provide OpenWhisk-specific + logic for determining registry and image names. + """ @staticmethod def name() -> str: + """Return the name of the FaaS platform (openwhisk).""" return "openwhisk" @staticmethod def typename() -> str: + """Return the type name of the OpenWhiskContainer class.""" return "OpenWhisk.Container" def __init__( @@ -22,25 +30,52 @@ def __init__( docker_client: docker.client, experimental_manifest: bool, ): + """ + Initialize OpenWhiskContainer. + + :param system_config: SeBS system configuration. + :param config: OpenWhisk-specific configuration. + :param docker_client: Docker client instance. + :param experimental_manifest: Flag to use experimental Docker manifest features. + """ super().__init__(system_config, docker_client, experimental_manifest) self.config = config def registry_name( self, benchmark: str, language_name: str, language_version: str, architecture: str ) -> Tuple[str, str, str, str]: + """ + Generate OpenWhisk-specific registry and image names. - registry_name = self.config.resources.docker_registry + Constructs the image URI, potentially re-tagging it if a custom Docker + registry is specified in the OpenWhisk configuration. - # We need to retag created images when pushing to registry other - # than default - repository_name = self.system_config.docker_repository() + :param benchmark: Name of the benchmark. + :param language_name: Name of the programming language. + :param language_version: Version of the programming language. + :param architecture: CPU architecture of the image. + :return: Tuple containing: + - registry_display_name (e.g., "Docker Hub" or custom registry URL) + - repository_name_for_image (e.g., {custom_registry}/{sebs_repository} or {sebs_repository}) + - image_tag (e.g., openwhisk-benchmark-python-3.8-x64) + - image_uri (fully qualified image URI for push/pull) + """ + registry_url = self.config.resources.docker_registry # Actual URL or None + + # `repository_name_on_registry` will be the full path on the registry if custom, + # otherwise it's just the SeBS default repository name (for Docker Hub). + sebs_repository = self.system_config.docker_repository() image_tag = self.system_config.benchmark_image_tag( self.name(), benchmark, language_name, language_version, architecture ) - if registry_name is not None and registry_name != "": - repository_name = f"{registry_name}/{repository_name}" - else: - registry_name = "Docker Hub" - image_uri = f"{repository_name}:{image_tag}" - return registry_name, repository_name, image_tag, image_uri + if registry_url: # If a custom registry is specified + repository_name_on_registry = f"{registry_url}/{sebs_repository}" + registry_display_name = registry_url + else: # Default to Docker Hub + repository_name_on_registry = sebs_repository + registry_display_name = "Docker Hub" + + image_uri = f"{repository_name_on_registry}:{image_tag}" + + return registry_display_name, repository_name_on_registry, image_tag, image_uri diff --git a/sebs/openwhisk/function.py b/sebs/openwhisk/function.py index daf851ca6..f48f5a554 100644 --- a/sebs/openwhisk/function.py +++ b/sebs/openwhisk/function.py @@ -10,53 +10,136 @@ @dataclass class OpenWhiskFunctionConfig(FunctionConfig): + """ + Configuration specific to an OpenWhisk function. - # FIXME: merge with higher level abstraction for images + Extends the base FunctionConfig with OpenWhisk-specific attributes such as + Docker image name, namespace, and configurations for object and NoSQL storage + if they are self-hosted (e.g., Minio, ScyllaDB). + + Attributes: + docker_image: Name of the Docker image for the function. + namespace: OpenWhisk namespace (default is "_", the anonymous namespace). + object_storage: Optional MinioConfig if self-hosted Minio is used. + nosql_storage: Optional ScyllaDBConfig if self-hosted ScyllaDB is used. + """ + # FIXME: merge docker_image with higher level abstraction for images in FunctionConfig docker_image: str = "" - namespace: str = "_" + namespace: str = "_" # Default OpenWhisk namespace object_storage: Optional[MinioConfig] = None nosql_storage: Optional[ScyllaDBConfig] = None @staticmethod def deserialize(data: dict) -> OpenWhiskFunctionConfig: - keys = list(OpenWhiskFunctionConfig.__dataclass_fields__.keys()) - data = {k: v for k, v in data.items() if k in keys} - data["runtime"] = Runtime.deserialize(data["runtime"]) - data["object_storage"] = MinioConfig.deserialize(data["object_storage"]) - data["nosql_storage"] = ScyllaDBConfig.deserialize(data["nosql_storage"]) - return OpenWhiskFunctionConfig(**data) + """ + Deserialize an OpenWhiskFunctionConfig object from a dictionary. + + Handles deserialization of nested Runtime, MinioConfig, and ScyllaDBConfig objects. + + :param data: Dictionary containing OpenWhiskFunctionConfig data. + :return: An OpenWhiskFunctionConfig instance. + """ + # Filter for known fields to avoid errors with extra keys in data + known_keys = {field.name for field in OpenWhiskFunctionConfig.__dataclass_fields__.values()} + filtered_data = {k: v for k, v in data.items() if k in known_keys} + + filtered_data["runtime"] = Runtime.deserialize(filtered_data["runtime"]) + if "object_storage" in filtered_data and filtered_data["object_storage"] is not None: + filtered_data["object_storage"] = MinioConfig.deserialize(filtered_data["object_storage"]) + if "nosql_storage" in filtered_data and filtered_data["nosql_storage"] is not None: + filtered_data["nosql_storage"] = ScyllaDBConfig.deserialize(filtered_data["nosql_storage"]) + + return OpenWhiskFunctionConfig(**filtered_data) def serialize(self) -> dict: - return self.__dict__ + """ + Serialize the OpenWhiskFunctionConfig to a dictionary. + + Serializes nested MinioConfig and ScyllaDBConfig if they exist. + + :return: A dictionary representation of the OpenWhiskFunctionConfig. + """ + serialized_data = self.__dict__.copy() + if self.object_storage: + serialized_data["object_storage"] = self.object_storage.serialize() + if self.nosql_storage: + serialized_data["nosql_storage"] = self.nosql_storage.serialize() + # Runtime and Architecture are handled by FunctionConfig.serialize via super().serialize() in Function + return serialized_data @staticmethod def from_benchmark(benchmark: Benchmark) -> OpenWhiskFunctionConfig: - return super(OpenWhiskFunctionConfig, OpenWhiskFunctionConfig)._from_benchmark( - benchmark, OpenWhiskFunctionConfig - ) + """ + Create an OpenWhiskFunctionConfig instance from a Benchmark object. + + Uses the base class's `_from_benchmark` helper and casts to OpenWhiskFunctionConfig. + Docker image and namespace are typically set after this initial creation. + + :param benchmark: The Benchmark instance. + :return: An OpenWhiskFunctionConfig instance. + """ + # Call the base class's _from_benchmark using super() correctly + base_cfg = FunctionConfig._from_benchmark(benchmark, OpenWhiskFunctionConfig) + # Ensure all fields of OpenWhiskFunctionConfig are initialized, + # docker_image, namespace, object_storage, nosql_storage will have defaults from dataclass. + # Specific values for these would be set by the OpenWhisk deployment logic. + return base_cfg class OpenWhiskFunction(Function): + """ + Represents an OpenWhisk function (action). + + Extends the base Function class, using OpenWhiskFunctionConfig for its configuration. + """ def __init__( self, name: str, benchmark: str, code_package_hash: str, cfg: OpenWhiskFunctionConfig ): + """ + Initialize an OpenWhiskFunction instance. + + :param name: Name of the OpenWhisk action. + :param benchmark: Name of the benchmark this function belongs to. + :param code_package_hash: Hash of the deployed code package. + :param cfg: OpenWhiskFunctionConfig object. + """ super().__init__(benchmark, name, code_package_hash, cfg) @property def config(self) -> OpenWhiskFunctionConfig: + """The OpenWhisk-specific configuration for this function.""" return cast(OpenWhiskFunctionConfig, self._cfg) @staticmethod def typename() -> str: + """Return the type name of this function implementation.""" return "OpenWhisk.Function" def serialize(self) -> dict: - return {**super().serialize(), "config": self._cfg.serialize()} + """ + Serialize the OpenWhiskFunction instance to a dictionary. + + Ensures that the OpenWhisk-specific configuration is also serialized. + + :return: Dictionary representation of the OpenWhiskFunction. + """ + # super().serialize() already includes self.config.serialize() + # No, Function.serialize() calls self.config.serialize(). + # If OpenWhiskFunctionConfig.serialize() is correctly implemented, this is fine. + return super().serialize() @staticmethod def deserialize(cached_config: dict) -> OpenWhiskFunction: - from sebs.faas.function import Trigger - from sebs.openwhisk.triggers import LibraryTrigger, HTTPTrigger + """ + Deserialize an OpenWhiskFunction instance from a dictionary. + + Typically used when loading function details from a cache. + + :param cached_config: Dictionary containing serialized OpenWhiskFunction data. + :return: A new OpenWhiskFunction instance. + """ + from sebs.faas.function import Trigger # Already imported at top-level + from sebs.openwhisk.triggers import LibraryTrigger, HTTPTrigger # Specific to OpenWhisk triggers cfg = OpenWhiskFunctionConfig.deserialize(cached_config["config"]) ret = OpenWhiskFunction( diff --git a/sebs/openwhisk/openwhisk.py b/sebs/openwhisk/openwhisk.py index 9c196fe25..3d25cf68c 100644 --- a/sebs/openwhisk/openwhisk.py +++ b/sebs/openwhisk/openwhisk.py @@ -13,14 +13,31 @@ from sebs.storage.resources import SelfHostedSystemResources from sebs.storage.minio import Minio from sebs.storage.scylladb import ScyllaDB +from sebs.gcp.function import GCPFunction # This import seems incorrect for OpenWhisk module from sebs.utils import LoggingHandlers from sebs.faas.config import Resources from .config import OpenWhiskConfig from .function import OpenWhiskFunction, OpenWhiskFunctionConfig -from ..config import SeBSConfig +from ..config import SeBSConfig # Relative import for SeBSConfig + + +""" +Apache OpenWhisk FaaS system implementation. + +This class provides the SeBS interface for interacting with an OpenWhisk deployment, +including action (function) deployment, invocation, resource management (primarily +self-hosted storage like Minio/ScyllaDB via SelfHostedSystemResources), and +interaction with the `wsk` CLI. +""" class OpenWhisk(System): + """ + Apache OpenWhisk FaaS system implementation. + + Manages actions (functions) and related resources on an OpenWhisk deployment. + Uses `wsk` CLI for deployment and management operations. + """ _config: OpenWhiskConfig def __init__( @@ -31,11 +48,24 @@ def __init__( docker_client: docker.client, logger_handlers: LoggingHandlers, ): + """ + Initialize OpenWhisk FaaS system. + + Sets up OpenWhisk-specific configurations, container client for Docker image + management (if custom images are used), and logs into the Docker registry + if credentials are provided. + + :param system_config: SeBS system configuration. + :param config: OpenWhisk-specific configuration. + :param cache_client: Function cache instance. + :param docker_client: Docker client instance. + :param logger_handlers: Logging handlers. + """ super().__init__( system_config, cache_client, docker_client, - SelfHostedSystemResources( + SelfHostedSystemResources( # OpenWhisk uses self-hosted resources for storage/NoSQL "openwhisk", config, cache_client, docker_client, logger_handlers ), ) @@ -46,321 +76,428 @@ def __init__( self.system_config, self.config, self.docker_client, self.config.experimentalManifest ) - if self.config.resources.docker_username: - if self.config.resources.docker_registry: - docker_client.login( - username=self.config.resources.docker_username, - password=self.config.resources.docker_password, - registry=self.config.resources.docker_registry, - ) - else: - docker_client.login( - username=self.config.resources.docker_username, - password=self.config.resources.docker_password, - ) + # Login to Docker registry if credentials are configured + if self.config.resources.docker_username and self.config.resources.docker_password: + registry_url = self.config.resources.docker_registry + try: + if registry_url: + docker_client.login( + username=self.config.resources.docker_username, + password=self.config.resources.docker_password, + registry=registry_url, + ) + self.logging.info(f"Logged into Docker registry at {registry_url}") + else: # Default to Docker Hub + docker_client.login( + username=self.config.resources.docker_username, + password=self.config.resources.docker_password, + ) + self.logging.info("Logged into Docker Hub") + except docker.errors.APIError as e: + self.logging.error(f"Docker login failed: {e}") + # Depending on policy, might raise error or continue without push capability def initialize(self, config: Dict[str, str] = {}, resource_prefix: Optional[str] = None): + """ + Initialize OpenWhisk resources. + + Calls the base class method to initialize resources, which for OpenWhisk + primarily involves setting up self-hosted storage if configured. + + :param config: System-specific parameters (not used by OpenWhisk). + :param resource_prefix: Optional prefix for naming/selecting resources. + """ self.initialize_resources(select_prefix=resource_prefix) @property def config(self) -> OpenWhiskConfig: + """Return the OpenWhisk-specific configuration.""" return self._config def shutdown(self) -> None: - if hasattr(self, "storage") and self.config.shutdownStorage: - self.storage.stop() + """ + Shut down the OpenWhisk system interface. + + Stops self-hosted storage (Minio) if `shutdownStorage` is configured. + Optionally removes the OpenWhisk cluster if `removeCluster` is configured + (uses external tools). Updates the cache. + """ + # Check if storage attribute exists and if shutdownStorage is true + if hasattr(self._system_resources, "get_storage"): # Check if storage system is initialized + storage_instance = self._system_resources.get_storage() + if isinstance(storage_instance, Minio) and self.config.shutdownStorage: + self.logging.info("Stopping Minio storage for OpenWhisk.") + storage_instance.stop() + # Similar check for NoSQL if OpenWhisk uses it and has a stop method + # if hasattr(self._system_resources, "get_nosql_storage"): + # nosql_instance = self._system_resources.get_nosql_storage() + # if isinstance(nosql_instance, ScyllaDB) and self.config.shutdownNoSQLStorage: # Hypothetical + # nosql_instance.stop() + if self.config.removeCluster: + self.logging.info("Attempting to remove OpenWhisk cluster.") from tools.openwhisk_preparation import delete_cluster # type: ignore - - delete_cluster() + try: + delete_cluster() + self.logging.info("OpenWhisk cluster removal process initiated.") + except Exception as e: + self.logging.error(f"Error during OpenWhisk cluster removal: {e}") super().shutdown() @staticmethod def name() -> str: + """Return the name of the cloud provider (openwhisk).""" return "openwhisk" @staticmethod - def typename(): + def typename() -> str: # Corrected from just typename() + """Return the type name of the cloud provider (OpenWhisk).""" return "OpenWhisk" @staticmethod def function_type() -> "Type[Function]": + """Return the type of the function implementation for OpenWhisk.""" return OpenWhiskFunction def get_wsk_cmd(self) -> List[str]: + """ + Construct the base command list for `wsk` CLI interactions. + + Includes the path to `wsk` executable and bypass security flag if configured. + + :return: List of command arguments for `wsk`. + """ cmd = [self.config.wsk_exec] if self.config.wsk_bypass_security: - cmd.append("-i") + cmd.append("-i") # Bypass certificate checking return cmd def package_code( self, directory: str, language_name: str, - language_version: str, - architecture: str, + language_version: str, # Not directly used by OpenWhisk packaging itself, but by image naming + architecture: str, # Used for Docker image naming benchmark: str, - is_cached: bool, - container_deployment: bool, + is_cached: bool, # Used for Docker image building logic + container_deployment: bool, # OpenWhisk primarily uses container deployments ) -> Tuple[str, int, str]: - - # Regardless of Docker image status, we need to create .zip file - # to allow registration of function with OpenWhisk + """ + Package benchmark code for OpenWhisk. + + Builds a Docker base image for the function if not already cached and available. + Then, creates a zip file containing only the main handler (`__main__.py` or `index.js`) + as required by OpenWhisk for action creation when using custom Docker images. + The Docker image URI is returned, which will be used when creating the action. + + :param directory: Path to the benchmark code directory. + :param language_name: Programming language name. + :param language_version: Programming language version. + :param architecture: Target CPU architecture for the Docker image. + :param benchmark: Benchmark name. + :param is_cached: Whether the Docker image is considered cached. + :param container_deployment: Must be True for OpenWhisk custom runtimes. + :return: Tuple containing: + - Path to the created zip file (containing only the handler). + - Size of the zip file in bytes. + - Docker image URI for the function. + :raises ValueError: If container_deployment is False (not typical for SeBS OpenWhisk). + """ + if not container_deployment: + # OpenWhisk with SeBS typically relies on custom Docker images for runtimes. + # While OpenWhisk supports non-Docker actions, SeBS is geared towards Docker for consistency. + self.logging.warning("Non-container deployment requested for OpenWhisk, this is unusual for SeBS.") + # Proceeding, but the action creation might need a different --kind parameter. + + # Build or ensure Docker image for the action's runtime _, image_uri = self.container_client.build_base_image( directory, language_name, language_version, architecture, benchmark, is_cached ) - # We deploy Minio config in code package since this depends on local - # deployment - it cannnot be a part of Docker image - CONFIG_FILES = { - "python": ["__main__.py"], - "nodejs": ["index.js"], + # OpenWhisk requires a zip file for the action, even if using a custom Docker image. + # This zip should contain the main executable file. + HANDLER_FILES = { + "python": "__main__.py", # OpenWhisk Python convention + "nodejs": "index.js", # OpenWhisk Node.js convention } - package_config = CONFIG_FILES[language_name] + handler_file = HANDLER_FILES[language_name] + + # Create a zip containing only the handler file. + # The actual benchmark code and dependencies are in the Docker image. + benchmark_archive_path = os.path.join(directory, f"{benchmark}_action.zip") + with zipfile.ZipFile(benchmark_archive_path, "w") as zf: + handler_path_in_benchmark_dir = os.path.join(directory, handler_file) + if os.path.exists(handler_path_in_benchmark_dir): + zf.write(handler_path_in_benchmark_dir, arcname=handler_file) + else: + # This case should not happen if benchmark template is correct. + # Create an empty file if handler is missing, though action would fail. + self.logging.warning(f"Handler file {handler_file} not found in {directory}. Creating empty zip entry.") + zf.writestr(handler_file, "") + + + self.logging.info(f"Created action zip {benchmark_archive_path}") + bytes_size = os.path.getsize(benchmark_archive_path) + self.logging.info(f"Action zip archive size {bytes_size / 1024.0 / 1024.0:.2f} MB") + + return benchmark_archive_path, bytes_size, image_uri - benchmark_archive = os.path.join(directory, f"{benchmark}.zip") - subprocess.run( - ["zip", benchmark_archive] + package_config, stdout=subprocess.DEVNULL, cwd=directory - ) - self.logging.info(f"Created {benchmark_archive} archive") - bytes_size = os.path.getsize(benchmark_archive) - self.logging.info("Zip archive size {:2f} MB".format(bytes_size / 1024.0 / 1024.0)) - return benchmark_archive, bytes_size, image_uri def storage_arguments(self, code_package: Benchmark) -> List[str]: - envs = [] + """ + Generate `wsk action create/update` parameters for self-hosted storage. - if self.config.resources.storage_config: + Constructs a list of `-p KEY VALUE` arguments for Minio and ScyllaDB + connection details if they are configured and used by the benchmark. + :param code_package: The Benchmark object. + :return: List of string arguments for `wsk` CLI. + """ + params = [] # Changed name from envs to params for clarity, as these are -p args + + if self.config.resources.storage_config: storage_envs = self.config.resources.storage_config.envs() - envs = [ - "-p", - "MINIO_STORAGE_SECRET_KEY", - storage_envs["MINIO_SECRET_KEY"], - "-p", - "MINIO_STORAGE_ACCESS_KEY", - storage_envs["MINIO_ACCESS_KEY"], - "-p", - "MINIO_STORAGE_CONNECTION_URL", - storage_envs["MINIO_ADDRESS"], - ] + params.extend([ + "-p", "MINIO_STORAGE_SECRET_KEY", storage_envs["MINIO_SECRET_KEY"], + "-p", "MINIO_STORAGE_ACCESS_KEY", storage_envs["MINIO_ACCESS_KEY"], + "-p", "MINIO_STORAGE_CONNECTION_URL", storage_envs["MINIO_ADDRESS"], + ]) if code_package.uses_nosql: - nosql_storage = self.system_resources.get_nosql_storage() for key, value in nosql_storage.envs().items(): - envs.append("-p") - envs.append(key) - envs.append(value) - - for original_name, actual_name in nosql_storage.get_tables( - code_package.benchmark - ).items(): - envs.append("-p") - envs.append(f"NOSQL_STORAGE_TABLE_{original_name}") - envs.append(actual_name) - - return envs + params.extend(["-p", key, value]) + for original_name, actual_name in nosql_storage.get_tables(code_package.benchmark).items(): + params.extend(["-p", f"NOSQL_STORAGE_TABLE_{original_name}", actual_name]) + return params def create_function( self, code_package: Benchmark, func_name: str, - container_deployment: bool, - container_uri: str, + container_deployment: bool, # Should be True for OpenWhisk with SeBS + container_uri: str, # Docker image URI from package_code ) -> "OpenWhiskFunction": - self.logging.info("Creating function as an action in OpenWhisk.") + """ + Create or update an OpenWhisk action. + + Checks if an action with the given name already exists. If so, it updates it. + Otherwise, a new action is created using the provided Docker image URI and + other benchmark configurations (memory, timeout, storage parameters). + + :param code_package: Benchmark object with code and configuration. + :param func_name: Desired name for the action. + :param container_deployment: Flag for container deployment (expected to be True). + :param container_uri: Docker image URI for the action's runtime. + :return: OpenWhiskFunction object. + :raises RuntimeError: If `wsk` CLI command fails or is not found. + """ + self.logging.info(f"Creating OpenWhisk action {func_name} using image {container_uri}.") try: - actions = subprocess.run( - [*self.get_wsk_cmd(), "action", "list"], - stderr=subprocess.DEVNULL, - stdout=subprocess.PIPE, - ) - - function_found = False - docker_image = "" - for line in actions.stdout.decode().split("\n"): - if line and func_name in line.split()[0]: - function_found = True - break - - function_cfg = OpenWhiskFunctionConfig.from_benchmark(code_package) - function_cfg.object_storage = cast(Minio, self.system_resources.get_storage()).config - function_cfg.nosql_storage = cast( - ScyllaDB, self.system_resources.get_nosql_storage() - ).config + # Check if action already exists + list_cmd = [*self.get_wsk_cmd(), "action", "list"] + actions_list_process = subprocess.run(list_cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, text=True) + if actions_list_process.returncode != 0: + self.logging.error(f"Failed to list actions: {actions_list_process.stderr}") + raise RuntimeError("wsk action list command failed.") + + function_found = any(func_name in line.split(None, 1)[0] for line in actions_list_process.stdout.splitlines() if line) + + function_config = OpenWhiskFunctionConfig.from_benchmark(code_package) + if self.config.resources.storage_config: # If Minio is configured + function_config.object_storage = cast(Minio, self.system_resources.get_storage()).config + if code_package.uses_nosql and self.config.resources.nosql_storage_config: # If ScyllaDB is configured + function_config.nosql_storage = cast(ScyllaDB, self.system_resources.get_nosql_storage()).config + function_config.docker_image = container_uri # Store the image used + + openwhisk_func = OpenWhiskFunction(func_name, code_package.benchmark, code_package.hash, function_config) + if function_found: - # docker image is overwritten by the update - res = OpenWhiskFunction( - func_name, code_package.benchmark, code_package.hash, function_cfg - ) - # Update function - we don't know what version is stored - self.logging.info(f"Retrieved existing OpenWhisk action {func_name}.") - self.update_function(res, code_package, container_deployment, container_uri) + self.logging.info(f"Action {func_name} already exists, updating it.") + self.update_function(openwhisk_func, code_package, container_deployment, container_uri) else: + self.logging.info(f"Creating new OpenWhisk action {func_name}.") + action_cmd = [ + *self.get_wsk_cmd(), "action", "create", func_name, + "--web", "true", # Make it a web action for HTTP trigger + "--docker", container_uri, + "--memory", str(code_package.benchmark_config.memory), + "--timeout", str(code_package.benchmark_config.timeout * 1000), # OpenWhisk timeout is in ms + *self.storage_arguments(code_package), + code_package.code_location, # Path to the small zip file + ] try: - self.logging.info(f"Creating new OpenWhisk action {func_name}") - docker_image = self.system_config.benchmark_image_name( - self.name(), - code_package.benchmark, - code_package.language_name, - code_package.language_version, - code_package.architecture, - ) - subprocess.run( - [ - *self.get_wsk_cmd(), - "action", - "create", - func_name, - "--web", - "true", - "--docker", - docker_image, - "--memory", - str(code_package.benchmark_config.memory), - "--timeout", - str(code_package.benchmark_config.timeout * 1000), - *self.storage_arguments(code_package), - code_package.code_location, - ], - stderr=subprocess.PIPE, - stdout=subprocess.PIPE, - check=True, - ) - function_cfg.docker_image = docker_image - res = OpenWhiskFunction( - func_name, code_package.benchmark, code_package.hash, function_cfg - ) + subprocess.run(action_cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, check=True, text=True) except subprocess.CalledProcessError as e: - self.logging.error(f"Cannot create action {func_name}.") - self.logging.error(f"Output: {e.stderr.decode('utf-8')}") + self.logging.error(f"Cannot create action {func_name}: {e.stderr}") raise RuntimeError(e) + + except FileNotFoundError: # wsk executable not found + self.logging.error(f"wsk CLI not found at {self.config.wsk_exec}. Please ensure it's installed and in PATH or configured correctly.") + raise RuntimeError("wsk CLI not found.") - except FileNotFoundError: - self.logging.error("Could not retrieve OpenWhisk functions - is path to wsk correct?") - raise RuntimeError("Failed to access wsk binary") + # Add default LibraryTrigger + library_trigger = LibraryTrigger(func_name, self.get_wsk_cmd()) + library_trigger.logging_handlers = self.logging_handlers + openwhisk_func.add_trigger(library_trigger) + + # HTTP trigger is created by --web true, now associate it in SeBS + self.create_trigger(openwhisk_func, Trigger.TriggerType.HTTP) - # Add LibraryTrigger to a new function - trigger = LibraryTrigger(func_name, self.get_wsk_cmd()) - trigger.logging_handlers = self.logging_handlers - res.add_trigger(trigger) - - return res + return openwhisk_func def update_function( self, function: Function, code_package: Benchmark, - container_deployment: bool, - container_uri: str, + container_deployment: bool, # Expected to be True + container_uri: str, # New Docker image URI ): - self.logging.info(f"Update an existing OpenWhisk action {function.name}.") - function = cast(OpenWhiskFunction, function) - docker_image = self.system_config.benchmark_image_name( - self.name(), - code_package.benchmark, - code_package.language_name, - code_package.language_version, - code_package.architecture, - ) + """ + Update an existing OpenWhisk action. + + Uses `wsk action update` with the new Docker image URI, code package (zip), + and configuration parameters. + + :param function: The OpenWhiskFunction object to update. + :param code_package: Benchmark object with new code and configuration. + :param container_deployment: Flag for container deployment. + :param container_uri: New Docker image URI. + :raises RuntimeError: If `wsk` CLI command fails or is not found. + """ + self.logging.info(f"Updating existing OpenWhisk action {function.name} with image {container_uri}.") + openwhisk_func = cast(OpenWhiskFunction, function) + + # Update function configuration from benchmark, as it might have changed + new_config = OpenWhiskFunctionConfig.from_benchmark(code_package) + if self.config.resources.storage_config: + new_config.object_storage = cast(Minio, self.system_resources.get_storage()).config + if code_package.uses_nosql and self.config.resources.nosql_storage_config: + new_config.nosql_storage = cast(ScyllaDB, self.system_resources.get_nosql_storage()).config + new_config.docker_image = container_uri + openwhisk_func._cfg = new_config # Update the function's internal config + + action_cmd = [ + *self.get_wsk_cmd(), "action", "update", function.name, + "--web", "true", + "--docker", container_uri, + "--memory", str(code_package.benchmark_config.memory), + "--timeout", str(code_package.benchmark_config.timeout * 1000), + *self.storage_arguments(code_package), + code_package.code_location, # Path to the action's zip file + ] try: - subprocess.run( - [ - *self.get_wsk_cmd(), - "action", - "update", - function.name, - "--web", - "true", - "--docker", - docker_image, - "--memory", - str(code_package.benchmark_config.memory), - "--timeout", - str(code_package.benchmark_config.timeout * 1000), - *self.storage_arguments(code_package), - code_package.code_location, - ], - stderr=subprocess.PIPE, - stdout=subprocess.PIPE, - check=True, - ) - function.config.docker_image = docker_image - + subprocess.run(action_cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, check=True, text=True) + openwhisk_func.config.docker_image = container_uri # Ensure config reflects the new image except FileNotFoundError as e: - self.logging.error("Could not update OpenWhisk function - is path to wsk correct?") + self.logging.error(f"wsk CLI not found at {self.config.wsk_exec} during update.") raise RuntimeError(e) except subprocess.CalledProcessError as e: - self.logging.error(f"Unknown error when running function update: {e}!") - self.logging.error("Make sure to remove SeBS cache after restarting OpenWhisk!") - self.logging.error(f"Output: {e.stderr.decode('utf-8')}") + self.logging.error(f"Error updating action {function.name}: {e.stderr}") + self.logging.error("Consider removing SeBS cache (.sebs.cache) if issues persist after OpenWhisk restart.") raise RuntimeError(e) def update_function_configuration(self, function: Function, code_package: Benchmark): - self.logging.info(f"Update configuration of an existing OpenWhisk action {function.name}.") + """ + Update the configuration (memory, timeout, parameters) of an existing OpenWhisk action. + This does not update the action's code or Docker image. + + :param function: The OpenWhiskFunction object whose configuration is to be updated. + :param code_package: Benchmark object providing the new configuration values. + :raises RuntimeError: If `wsk` CLI command fails or is not found. + """ + self.logging.info(f"Updating configuration of OpenWhisk action {function.name}.") + # Update the function's internal config object first + function_cfg = cast(OpenWhiskFunctionConfig, function.config) + function_cfg.memory = code_package.benchmark_config.memory + function_cfg.timeout = code_package.benchmark_config.timeout + # Re-evaluate storage arguments as they might depend on benchmark config + storage_args = self.storage_arguments(code_package) + + action_cmd = [ + *self.get_wsk_cmd(), "action", "update", function.name, + "--memory", str(function_cfg.memory), + "--timeout", str(function_cfg.timeout * 1000), + *storage_args + ] try: - subprocess.run( - [ - *self.get_wsk_cmd(), - "action", - "update", - function.name, - "--memory", - str(code_package.benchmark_config.memory), - "--timeout", - str(code_package.benchmark_config.timeout * 1000), - *self.storage_arguments(code_package), - ], - stderr=subprocess.PIPE, - stdout=subprocess.PIPE, - check=True, - ) + subprocess.run(action_cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, check=True, text=True) except FileNotFoundError as e: - self.logging.error("Could not update OpenWhisk function - is path to wsk correct?") + self.logging.error(f"wsk CLI not found at {self.config.wsk_exec} during config update.") raise RuntimeError(e) except subprocess.CalledProcessError as e: - self.logging.error(f"Unknown error when running function update: {e}!") - self.logging.error("Make sure to remove SeBS cache after restarting OpenWhisk!") - self.logging.error(f"Output: {e.stderr.decode('utf-8')}") + self.logging.error(f"Error updating action configuration for {function.name}: {e.stderr}") raise RuntimeError(e) def is_configuration_changed(self, cached_function: Function, benchmark: Benchmark) -> bool: + """ + Check if the function's configuration has changed compared to the benchmark. + + Compares memory, timeout, and storage configurations. + + :param cached_function: The cached OpenWhiskFunction object. + :param benchmark: The Benchmark object with current settings. + :return: True if configuration has changed, False otherwise. + """ changed = super().is_configuration_changed(cached_function, benchmark) + openwhisk_func = cast(OpenWhiskFunction, cached_function) - storage = cast(Minio, self.system_resources.get_storage()) - function = cast(OpenWhiskFunction, cached_function) - # check if now we're using a new storage - if function.config.object_storage != storage.config: - self.logging.info( - "Updating function configuration due to changed storage configuration." - ) - changed = True - function.config.object_storage = storage.config - - nosql_storage = cast(ScyllaDB, self.system_resources.get_nosql_storage()) - function = cast(OpenWhiskFunction, cached_function) - # check if now we're using a new storage - if function.config.nosql_storage != nosql_storage.config: - self.logging.info( - "Updating function configuration due to changed NoSQL storage configuration." - ) + # Check object storage config + if self.config.resources.storage_config: + current_minio_config = cast(Minio, self.system_resources.get_storage()).config + if openwhisk_func.config.object_storage != current_minio_config: + self.logging.info("Object storage configuration changed.") + changed = True + openwhisk_func.config.object_storage = current_minio_config + elif openwhisk_func.config.object_storage is not None: # Was configured, now it's not + self.logging.info("Object storage configuration removed.") changed = True - function.config.nosql_storage = nosql_storage.config - + openwhisk_func.config.object_storage = None + + # Check NoSQL storage config + if benchmark.uses_nosql and self.config.resources.nosql_storage_config: + current_nosql_config = cast(ScyllaDB, self.system_resources.get_nosql_storage()).config + if openwhisk_func.config.nosql_storage != current_nosql_config: + self.logging.info("NoSQL storage configuration changed.") + changed = True + openwhisk_func.config.nosql_storage = current_nosql_config + elif openwhisk_func.config.nosql_storage is not None: # Was configured, now it's not (or benchmark no longer uses nosql) + self.logging.info("NoSQL storage configuration removed or benchmark no longer uses NoSQL.") + changed = True + openwhisk_func.config.nosql_storage = None return changed def default_function_name( self, code_package: Benchmark, resources: Optional[Resources] = None ) -> str: - resource_id = resources.resources_id if resources else self.config.resources.resources_id + """ + Generate a default name for an OpenWhisk action. + + The name includes SeBS prefix, resource ID, benchmark name, language, and version. + + :param code_package: The Benchmark object. + :param resources: Optional Resources object (uses self.config.resources if None). + :return: The generated default action name. + """ + # Use self.config.resources if resources parameter is None + current_resources = resources if resources else self.config.resources return ( - f"sebs-{resource_id}-{code_package.benchmark}-" + f"sebs-{current_resources.resources_id}-{code_package.benchmark}-" f"{code_package.language_name}-{code_package.language_version}" ) def enforce_cold_start(self, functions: List[Function], code_package: Benchmark): - raise NotImplementedError() + """ + Enforce a cold start for OpenWhisk actions. + Note: True cold start enforcement is challenging in OpenWhisk without + administrative control over the cluster or specific runtime behaviors. + This method is currently not implemented. + + :param functions: List of functions. + :param code_package: Benchmark object. + :raises NotImplementedError: This feature is not implemented. + """ + raise NotImplementedError("Cold start enforcement is not implemented for OpenWhisk.") def download_metrics( self, @@ -370,40 +507,109 @@ def download_metrics( requests: Dict[str, ExecutionResult], metrics: dict, ): + """ + Download metrics for OpenWhisk actions. + OpenWhisk standardly provides some metrics in activation records. + This method could be extended to parse `wsk activation logs` or use + OpenWhisk monitoring APIs if available and configured. + Currently, it's a placeholder. + + :param function_name: Name of the action. + :param start_time: Start timestamp for querying metrics. + :param end_time: End timestamp for querying metrics. + :param requests: Dictionary of request IDs to ExecutionResult objects. + :param metrics: Dictionary to store additional metrics. + """ + # Metrics like execution time, init time are often part of activation record. + # SeBS's OpenWhisk LibraryTrigger already parses some of this from invocation result. + # This method could be used for more detailed/batch metric collection if needed. + self.logging.info(f"Metrics download for OpenWhisk function {function_name} requested but not fully implemented beyond activation record parsing.") pass def create_trigger(self, function: Function, trigger_type: Trigger.TriggerType) -> Trigger: + """ + Create a trigger for an OpenWhisk action. + + Supports Library triggers (default, created with function) and HTTP triggers + (retrieves the web action URL). + + :param function: The OpenWhiskFunction object. + :param trigger_type: The type of trigger to create. + :return: The created Trigger object. + :raises RuntimeError: If `wsk` CLI fails or an unsupported trigger type is requested. + """ if trigger_type == Trigger.TriggerType.LIBRARY: - return function.triggers(Trigger.TriggerType.LIBRARY)[0] + # Library triggers are usually created and associated during function creation. + # Return existing one if found, otherwise log warning. + existing_triggers = function.triggers(Trigger.TriggerType.LIBRARY) + if existing_triggers: + return existing_triggers[0] + else: + self.logging.warning(f"LibraryTrigger requested for {function.name} but not found. One should be added during function creation.") + # Fallback: attempt to create and add one, though this might indicate an issue in the creation flow. + lib_trigger = LibraryTrigger(function.name, self.get_wsk_cmd()) + lib_trigger.logging_handlers = self.logging_handlers + function.add_trigger(lib_trigger) + self.cache_client.update_function(function) + return lib_trigger + elif trigger_type == Trigger.TriggerType.HTTP: try: - response = subprocess.run( - [*self.get_wsk_cmd(), "action", "get", function.name, "--url"], - stdout=subprocess.PIPE, - stderr=subprocess.DEVNULL, - check=True, - ) - except FileNotFoundError as e: - self.logging.error( - "Could not retrieve OpenWhisk configuration - is path to wsk correct?" - ) - raise RuntimeError(e) - stdout = response.stdout.decode("utf-8") - url = stdout.strip().split("\n")[-1] + ".json" - trigger = HTTPTrigger(function.name, url) - trigger.logging_handlers = self.logging_handlers - function.add_trigger(trigger) - self.cache_client.update_function(function) - return trigger + action_get_cmd = [*self.get_wsk_cmd(), "action", "get", function.name, "--url"] + response = subprocess.run(action_get_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, text=True) + except FileNotFoundError: + self.logging.error(f"wsk CLI not found at {self.config.wsk_exec} when creating HTTP trigger.") + raise RuntimeError("wsk CLI not found.") + except subprocess.CalledProcessError as e: + self.logging.error(f"Failed to get URL for action {function.name}: {e.stderr}") + raise RuntimeError(f"Failed to get action URL: {e.stderr}") + + # Output of `wsk action get --url` is typically "ok: got action X, URL: https://..." + # Need to parse the URL carefully. + url_line = response.stdout.strip().split("\n")[-1] # Get the last line which should contain the URL + if "https://" in url_line: + # A common format is "ok: got action function_name, web action via https://..." + # Or directly "https://..." + url = url_line.split("https://")[-1] + if not url.startswith("https://"): + url = "https://" + url + # OpenWhisk web actions often append .json or similar for content type negotiation, + # but the base URL is what's needed. SeBS HTTP client handles adding .json if required by endpoint. + # However, `wsk action get --url` usually gives the direct invokable URL. + # The original code added ".json", which might be specific to how their actions were written + # or how they expected to call them. For generic web actions, this might not be needed. + # Let's keep it for now to match old behavior. + if not url.endswith(".json"): + url += ".json" + http_trigger = HTTPTrigger(function.name, url) # HTTPTrigger constructor might need adjustment if it takes name + http_trigger.logging_handlers = self.logging_handlers + function.add_trigger(http_trigger) + self.cache_client.update_function(function) + return http_trigger + else: + raise RuntimeError(f"Could not parse HTTP trigger URL from wsk output: {response.stdout}") else: - raise RuntimeError("Not supported!") + raise RuntimeError(f"Unsupported trigger type {trigger_type.value} for OpenWhisk.") + def cached_function(self, function: Function): + """ + Configure a cached OpenWhiskFunction instance. + + Sets up logging handlers for its library and HTTP triggers and ensures + the `wsk` command is set for library triggers. + + :param function: The OpenWhiskFunction object retrieved from cache. + """ for trigger in function.triggers(Trigger.TriggerType.LIBRARY): trigger.logging_handlers = self.logging_handlers cast(LibraryTrigger, trigger).wsk_cmd = self.get_wsk_cmd() for trigger in function.triggers(Trigger.TriggerType.HTTP): trigger.logging_handlers = self.logging_handlers + # HTTPTrigger URL should be correct from deserialization if it was stored. + # If not, it might need re-fetching if function was just deserialized without full context. + # However, create_trigger is usually called to establish it. def disable_rich_output(self): + """Disable rich progress bar output for the container client (Docker operations).""" self.container_client.disable_rich_output = True diff --git a/sebs/regression.py b/sebs/regression.py index 579760a1c..2c1c359ff 100644 --- a/sebs/regression.py +++ b/sebs/regression.py @@ -12,6 +12,19 @@ if TYPE_CHECKING: from sebs import SeBS + from sebs.faas.system import System as FaaSSystem # For type hinting get_deployment return + + +""" +This module defines test sequences for regression testing SeBS on various +cloud providers (AWS, Azure, GCP, OpenWhisk). It uses a metaclass +`TestSequenceMeta` to dynamically generate test methods for different benchmarks, +programming languages, architectures, and deployment types (package/container). + +The main entry point is `regression_suite`, which constructs a test suite +based on user configuration and runs it using `testtools.ConcurrentStreamTestSuite` +for parallel execution of tests. +""" benchmarks_python = [ "110.dynamic-html", @@ -52,28 +65,70 @@ def __init__( attrs, benchmarks, architectures, - deployments, - deployment_name, - triggers, + deployments: List[str], + deployment_name: str, + triggers: List[Trigger.TriggerType], ): + """ + Initialize the TestSequenceMeta metaclass. + + Stores deployment name and trigger types as class attributes, which are + then accessible by the generated test methods and other class methods. + + :param name: Name of the class being created. + :param bases: Base classes of the class being created. + :param attrs: Attributes of the class being created. + :param benchmarks: List of benchmark names to generate tests for. + :param architectures: List of architectures to test. + :param deployments: List of deployment types (e.g., "package", "container"). + :param deployment_name: Name of the cloud deployment (e.g., "aws"). + :param triggers: List of Trigger.TriggerType enums to test. + """ type.__init__(cls, name, bases, attrs) cls.deployment_name = deployment_name cls.triggers = triggers def __new__( mcs, - name, - bases, - dict, - benchmarks, - architectures, - deployments, - deployment_name, - triggers, + name: str, + bases: tuple, + attrs: dict, # Renamed from dict to attrs to avoid keyword conflict + benchmarks: List[str], + architectures: List[str], + deployments: List[str], + deployment_name: str, + triggers: List[Trigger.TriggerType], ): - def gen_test(benchmark_name, architecture, deployment_type): - def test(self): - log_name = f"Regression-{deployment_name}-{benchmark_name}-{deployment_type}" + """ + Dynamically create test methods for each combination of benchmark, + architecture, and deployment type. + + Each generated test method (e.g., `test_aws_010.sleep_x64_package`) + will perform a regression test for that specific configuration by: + 1. Setting up a logger and experiment configuration. + 2. Obtaining and initializing a deployment client. + 3. Preparing the benchmark and its input. + 4. Invoking the function using specified trigger types. + 5. Reporting success or failure. + + A class-level lock (`cls.lock`) and a configuration placeholder (`cls.cfg`) + are also added to the new class, intended for managing shared resources + like a common Azure CLI instance across tests in a sequence. + + :param name: Name of the class to be created. + :param bases: Base classes. + :param attrs: Class attributes dictionary to which test methods will be added. + :param benchmarks: List of benchmark names. + :param architectures: List of architectures. + :param deployments: List of deployment types. + :param deployment_name: Name of the cloud deployment. + :param triggers: List of trigger types to test. + :return: A new class with dynamically generated test methods. + """ + def gen_test(benchmark_name_arg: str, architecture_arg: str, deployment_type_arg: str): + # Inner test function, forms the body of each generated test method + def test(self: unittest.TestCase): # `self` here is an instance of the test class + log_name = f"Regression-{deployment_name}-{benchmark_name_arg}-{architecture_arg}-{deployment_type_arg}" logger = logging.getLogger(log_name) logger.setLevel(logging.INFO) logging_wrapper = ColoredWrapper(log_name, logger) @@ -147,9 +202,9 @@ def test(self): test_name += f"_{architecture}_{deployment_type}" dict[test_name] = gen_test(benchmark, architecture, deployment_type) - dict["lock"] = threading.Lock() - dict["cfg"] = None - return type.__new__(mcs, name, bases, dict) + attrs["lock"] = threading.Lock() # Class-level lock for shared resources + attrs["cfg"] = None # Placeholder for shared deployment config (e.g. Azure CLI) + return type.__new__(mcs, name, bases, attrs) class AWSTestSequencePython( @@ -161,15 +216,30 @@ class AWSTestSequencePython( deployment_name="aws", triggers=[Trigger.TriggerType.LIBRARY, Trigger.TriggerType.HTTP], ): + """ + Test sequence for Python benchmarks on AWS. + Dynamically generates test methods for combinations of Python benchmarks, + AWS architectures (x64, arm64), and deployment types (package, container). + Tests both Library and HTTP triggers. + """ @property def typename(self) -> str: + """Return a type name for this test sequence, used for identification.""" return "AWSTestPython" - def get_deployment(self, benchmark_name, architecture, deployment_type): - deployment_name = "aws" - assert cloud_config + def get_deployment(self, benchmark_name: str, architecture: str, deployment_type: str) -> "FaaSSystem": + """ + Get and initialize a deployment client for AWS for a specific test case. + + :param benchmark_name: Name of the benchmark being tested. + :param architecture: CPU architecture for the test. + :param deployment_type: Deployment type ("package" or "container"). + :return: Initialized AWS deployment client (an instance of `sebs.aws.AWS`). + """ + deployment_name = "aws" # Ensure correct deployment name is used + assert cloud_config is not None, "Global cloud_config not set for regression tests" - f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" + log_file_name = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( cloud_config, logging_filename=os.path.join(self.client.output_dir, f), @@ -189,10 +259,23 @@ class AWSTestSequenceNodejs( deployment_name="aws", triggers=[Trigger.TriggerType.LIBRARY, Trigger.TriggerType.HTTP], ): - def get_deployment(self, benchmark_name, architecture, deployment_type): + """ + Test sequence for Node.js benchmarks on AWS. + Dynamically generates test methods for combinations of Node.js benchmarks, + AWS architectures, and deployment types. Tests both Library and HTTP triggers. + """ + def get_deployment(self, benchmark_name: str, architecture: str, deployment_type: str) -> "FaaSSystem": + """ + Get and initialize a deployment client for AWS for a specific test case. + + :param benchmark_name: Name of the benchmark being tested. + :param architecture: CPU architecture for the test. + :param deployment_type: Deployment type ("package" or "container"). + :return: Initialized AWS deployment client. + """ deployment_name = "aws" - assert cloud_config - f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" + assert cloud_config is not None, "Global cloud_config not set for regression tests" + log_file_name = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( cloud_config, logging_filename=os.path.join(self.client.output_dir, f), @@ -211,26 +294,43 @@ class AzureTestSequencePython( deployment_name="azure", triggers=[Trigger.TriggerType.HTTP], ): - def get_deployment(self, benchmark_name, architecture, deployment_type): + """ + Test sequence for Python benchmarks on Azure. + Dynamically generates test methods for Python benchmarks on Azure. + Manages a shared AzureCLI instance to optimize Azure operations across tests. + Tests HTTP triggers. + """ + def get_deployment(self, benchmark_name: str, architecture: str, deployment_type: str) -> "FaaSSystem": + """ + Get and initialize a deployment client for Azure. + Manages a shared AzureCLI instance for tests within this sequence. + + :param benchmark_name: Name of the benchmark. + :param architecture: CPU architecture. + :param deployment_type: Deployment type ("package" or "container"). + :return: Initialized Azure deployment client. + """ deployment_name = "azure" - assert cloud_config - with AzureTestSequencePython.lock: - if not AzureTestSequencePython.cfg: - AzureTestSequencePython.cfg = self.client.get_deployment_config( - cloud_config["deployment"], + assert cloud_config is not None, "Global cloud_config not set for regression tests" + with AzureTestSequencePython.lock: # type: ignore + if not AzureTestSequencePython.cfg: # type: ignore + AzureTestSequencePython.cfg = self.client.get_deployment_config( # type: ignore + cloud_config["deployment"]["azure"], # Pass the 'azure' sub-dictionary logging_filename=os.path.join( - self.client.output_dir, - f"regression_{deployment_name}_{benchmark_name}_{architecture}.log", + self.client.output_dir, # type: ignore + f"regression_{deployment_name}_shared_cli_config.log", # Log for shared components ), ) - if not hasattr(AzureTestSequencePython, "cli"): - AzureTestSequencePython.cli = AzureCLI( - self.client.config, self.client.docker_client + if not hasattr(AzureTestSequencePython, "cli"): # type: ignore + # Ensure system_config is passed to AzureCLI if it expects SeBSConfig + azure_system_config = self.client.config # type: ignore + AzureTestSequencePython.cli = AzureCLI( # type: ignore + azure_system_config, self.client.docker_client # type: ignore ) - f = f"regression_{deployment_name}_{benchmark_name}_" - f += f"{architecture}_{deployment_type}.log" + log_file_name = f"regression_{deployment_name}_{benchmark_name}_" + log_file_name += f"{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( cloud_config, logging_filename=os.path.join(self.client.output_dir, f), @@ -252,23 +352,38 @@ class AzureTestSequenceNodejs( deployment_name="azure", triggers=[Trigger.TriggerType.HTTP], ): - def get_deployment(self, benchmark_name, architecture, deployment_type): + """ + Test sequence for Node.js benchmarks on Azure. + Dynamically generates test methods for Node.js benchmarks on Azure. + Manages a shared AzureCLI instance. Tests HTTP triggers. + """ + def get_deployment(self, benchmark_name: str, architecture: str, deployment_type: str) -> "FaaSSystem": + """ + Get and initialize a deployment client for Azure. + Manages a shared AzureCLI instance for tests within this sequence. + + :param benchmark_name: Name of the benchmark. + :param architecture: CPU architecture. + :param deployment_type: Deployment type ("package" or "container"). + :return: Initialized Azure deployment client. + """ deployment_name = "azure" - assert cloud_config - with AzureTestSequenceNodejs.lock: - if not AzureTestSequenceNodejs.cfg: - AzureTestSequenceNodejs.cfg = self.client.get_deployment_config( - cloud_config["deployment"], - logging_filename=f"regression_{deployment_name}_{benchmark_name}.log", + assert cloud_config is not None, "Global cloud_config not set for regression tests" + with AzureTestSequenceNodejs.lock: # type: ignore + if not AzureTestSequenceNodejs.cfg: # type: ignore + AzureTestSequenceNodejs.cfg = self.client.get_deployment_config( # type: ignore + cloud_config["deployment"]["azure"], # Pass the 'azure' sub-dictionary + logging_filename=f"regression_{deployment_name}_shared_cli_config.log", ) - if not hasattr(AzureTestSequenceNodejs, "cli"): - AzureTestSequenceNodejs.cli = AzureCLI( - self.client.config, self.client.docker_client + if not hasattr(AzureTestSequenceNodejs, "cli"): # type: ignore + azure_system_config = self.client.config # type: ignore + AzureTestSequenceNodejs.cli = AzureCLI( # type: ignore + azure_system_config, self.client.docker_client # type: ignore ) - f = f"regression_{deployment_name}_{benchmark_name}_" - f += f"{architecture}_{deployment_type}.log" + log_file_name = f"regression_{deployment_name}_{benchmark_name}_" + log_file_name += f"{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( cloud_config, logging_filename=os.path.join(self.client.output_dir, f), @@ -288,10 +403,23 @@ class GCPTestSequencePython( deployment_name="gcp", triggers=[Trigger.TriggerType.HTTP], ): - def get_deployment(self, benchmark_name, architecture, deployment_type): + """ + Test sequence for Python benchmarks on GCP. + Dynamically generates test methods for Python benchmarks on GCP. + Tests HTTP triggers. + """ + def get_deployment(self, benchmark_name: str, architecture: str, deployment_type: str) -> "FaaSSystem": + """ + Get and initialize a deployment client for GCP. + + :param benchmark_name: Name of the benchmark. + :param architecture: CPU architecture. + :param deployment_type: Deployment type ("package" or "container"). + :return: Initialized GCP deployment client. + """ deployment_name = "gcp" - assert cloud_config - f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" + assert cloud_config is not None, "Global cloud_config not set for regression tests" + log_file_name = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( cloud_config, logging_filename=os.path.join(self.client.output_dir, f), @@ -310,10 +438,23 @@ class GCPTestSequenceNodejs( deployment_name="gcp", triggers=[Trigger.TriggerType.HTTP], ): - def get_deployment(self, benchmark_name, architecture, deployment_type): + """ + Test sequence for Node.js benchmarks on GCP. + Dynamically generates test methods for Node.js benchmarks on GCP. + Tests HTTP triggers. + """ + def get_deployment(self, benchmark_name: str, architecture: str, deployment_type: str) -> "FaaSSystem": + """ + Get and initialize a deployment client for GCP. + + :param benchmark_name: Name of the benchmark. + :param architecture: CPU architecture. + :param deployment_type: Deployment type ("package" or "container"). + :return: Initialized GCP deployment client. + """ deployment_name = "gcp" - assert cloud_config - f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" + assert cloud_config is not None, "Global cloud_config not set for regression tests" + log_file_name = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( cloud_config, logging_filename=os.path.join(self.client.output_dir, f), @@ -332,15 +473,33 @@ class OpenWhiskTestSequencePython( deployment_name="openwhisk", triggers=[Trigger.TriggerType.HTTP], ): - def get_deployment(self, benchmark_name, architecture, deployment_type): + """ + Test sequence for Python benchmarks on OpenWhisk. + Dynamically generates test methods for Python benchmarks on OpenWhisk. + Tests HTTP triggers. + """ + def get_deployment(self, benchmark_name: str, architecture: str, deployment_type: str) -> "FaaSSystem": + """ + Get and initialize a deployment client for OpenWhisk. + + Modifies a copy of the global cloud configuration to set architecture + and container deployment type for this specific test. + + :param benchmark_name: Name of the benchmark. + :param architecture: CPU architecture. + :param deployment_type: Deployment type ("package" or "container"). + :return: Initialized OpenWhisk deployment client. + """ deployment_name = "openwhisk" - assert cloud_config + assert cloud_config is not None, "Global cloud_config not set for regression tests" - config_copy = cloud_config.copy() + # Create a deep copy to avoid modifying the global config for other tests + config_copy = json.loads(json.dumps(cloud_config)) config_copy["experiments"]["architecture"] = architecture - config_copy["experiments"]["container_deployment"] = deployment_type == "container" + # OpenWhisk in SeBS typically uses container deployment + config_copy["experiments"]["container_deployment"] = (deployment_type == "container") - f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" + log_file_name = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( config_copy, logging_filename=os.path.join(self.client.output_dir, f), @@ -359,15 +518,31 @@ class OpenWhiskTestSequenceNodejs( deployment_name="openwhisk", triggers=[Trigger.TriggerType.HTTP], ): - def get_deployment(self, benchmark_name, architecture, deployment_type): + """ + Test sequence for Node.js benchmarks on OpenWhisk. + Dynamically generates test methods for Node.js benchmarks on OpenWhisk. + Tests HTTP triggers. + """ + def get_deployment(self, benchmark_name: str, architecture: str, deployment_type: str) -> "FaaSSystem": + """ + Get and initialize a deployment client for OpenWhisk. + + Modifies a copy of the global cloud configuration to set architecture + and container deployment type for this specific test. + + :param benchmark_name: Name of the benchmark. + :param architecture: CPU architecture. + :param deployment_type: Deployment type ("package" or "container"). + :return: Initialized OpenWhisk deployment client. + """ deployment_name = "openwhisk" - assert cloud_config + assert cloud_config is not None, "Global cloud_config not set for regression tests" - config_copy = cloud_config.copy() + config_copy = json.loads(json.dumps(cloud_config)) config_copy["experiments"]["architecture"] = architecture - config_copy["experiments"]["container_deployment"] = deployment_type == "container" + config_copy["experiments"]["container_deployment"] = (deployment_type == "container") - f = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" + log_file_name = f"regression_{deployment_name}_{benchmark_name}_{architecture}_{deployment_type}.log" deployment_client = self.client.get_deployment( config_copy, logging_filename=os.path.join(self.client.output_dir, f), @@ -380,143 +555,223 @@ def get_deployment(self, benchmark_name, architecture, deployment_type): # https://stackoverflow.com/questions/22484805/a-simple-working-example-for-testtools-concurrentstreamtestsuite class TracingStreamResult(testtools.StreamResult): all_correct: bool - output: Dict[str, bytes] = {} + output: Dict[str, bytes] # Stores output bytes for failed tests, keyed by test_id def __init__(self): + """Initialize TracingStreamResult, setting all_correct to True and preparing sets for results.""" + super().__init__() # Ensure parent StreamResult is initialized self.all_correct = True - self.success = set() - self.failures = set() + self.success: Set[str] = set() + self.failures: Set[str] = set() + self.output: Dict[str, bytes] = {} + # no way to directly access test instance from here def status(self, *args, **kwargs): - self.all_correct = self.all_correct and (kwargs["test_status"] in ["inprogress", "success"]) + """ + Process the status of a test execution. + + Updates `all_correct` flag, records successes and failures, and captures + output for failed tests. + + :param args: Positional arguments passed by testtools. + :param kwargs: Keyword arguments including 'test_id', 'test_status', 'file_bytes'. + """ + super().status(*args, **kwargs) # Call parent status method + + current_test_status = kwargs.get("test_status") + test_id = kwargs.get("test_id", "unknown_test") + + self.all_correct = self.all_correct and (current_test_status in [None, "inprogress", "success"]) + + # Extract a more readable test name if possible (e.g., from test_id) + try: + # Assuming test_id format like test_deployment_benchmark_arch_deploytype + parts = test_id.split('_') + if len(parts) >= 5: # test_method_deployment_benchmark_arch_deploytype + # Example: benchmark_name from parts[-4], arch from parts[-2], deploy_type from parts[-1] + # This parsing is fragile and depends heavily on test_name format from TestSequenceMeta + test_name_short = f"{parts[-3]}, {parts[-2]}, {parts[-1]}" + else: + test_name_short = test_id + except Exception: + test_name_short = test_id - bench, arch, deployment_type = kwargs["test_id"].split("_")[-3:None] - test_name = f"{bench}, {arch}, {deployment_type}" - if not kwargs["test_status"]: - test_id = kwargs["test_id"] + if current_test_status is None: # File bytes are being streamed if test_id not in self.output: self.output[test_id] = b"" - self.output[test_id] += kwargs["file_bytes"] - elif kwargs["test_status"] == "fail": + self.output[test_id] += kwargs.get("file_bytes", b"") + elif current_test_status == "fail": print("\n-------------\n") - print("{0[test_id]}: {0[test_status]}".format(kwargs)) - print("{0[test_id]}: {1}".format(kwargs, self.output[kwargs["test_id"]].decode())) + print(f"{test_id}: {current_test_status}") + # Ensure output for this test_id is decoded if it exists + failure_output = self.output.get(test_id, b"").decode(errors='replace') + print(f"{test_id}: {failure_output}") print("\n-------------\n") - self.failures.add(test_name) - elif kwargs["test_status"] == "success": - self.success.add(test_name) + self.failures.add(test_name_short) + elif current_test_status == "success": + self.success.add(test_name_short) + # Clean up output for successful tests to save memory + if test_id in self.output: + del self.output[test_id] def filter_out_benchmarks( - benchmark: str, + benchmark_name_in_test_id: str, # The full test_id string, e.g. test_aws_010.sleep_x64_package deployment_name: str, language: str, language_version: str, architecture: str, ) -> bool: + """ + Filter out benchmarks that are known to be unsupported or problematic + for specific deployment configurations. + + :param benchmark_name_in_test_id: The full name of the test method, which includes benchmark name. + :param deployment_name: Name of the FaaS deployment. + :param language: Programming language. + :param language_version: Language runtime version. + :param architecture: CPU architecture. + :return: True if the benchmark should be run, False if it should be filtered out. + """ # fmt: off + # Example: Filter out 411.image-recognition for newer Python versions on AWS + # The benchmark_name_in_test_id needs to be parsed to get the actual benchmark identifier + # For simplicity, assuming benchmark_name_in_test_id contains the benchmark string directly. + # A more robust way would be to pass the benchmark identifier itself. if (deployment_name == "aws" and language == "python" - and language_version in ["3.9", "3.10", "3.11"]): - return "411.image-recognition" not in benchmark + and language_version in ["3.9", "3.10", "3.11", "3.12"]): # Added 3.12 as example + if "411.image-recognition" in benchmark_name_in_test_id: + return False if (deployment_name == "aws" and architecture == "arm64"): - return "411.image-recognition" not in benchmark + if "411.image-recognition" in benchmark_name_in_test_id: # Example filter for ARM + return False if (deployment_name == "gcp" and language == "python" and language_version in ["3.8", "3.9", "3.10", "3.11", "3.12"]): - return "411.image-recognition" not in benchmark + if "411.image-recognition" in benchmark_name_in_test_id: + return False # fmt: on - return True + return True # Default to run if no filter matches def regression_suite( sebs_client: "SeBS", - experiment_config: dict, - providers: Set[str], - deployment_config: dict, - benchmark_name: Optional[str] = None, -): + experiment_config: dict, # This is likely ExperimentConfig instance or dict representation + providers_to_test: Set[str], # Renamed from providers + deployment_user_config: dict, # Renamed from deployment_config + target_benchmark_name: Optional[str] = None, # Renamed from benchmark_name +) -> bool: + """ + Construct and run a regression test suite. + + Dynamically creates test cases for specified providers, languages, benchmarks, etc., + based on the loaded SeBS and experiment configurations. Uses `testtools` for + concurrent test execution and collects results. + + :param sebs_client: The main SeBS client instance. + :param experiment_config: Dictionary representing the experiment configuration. + :param providers_to_test: Set of FaaS provider names to include in the test suite. + :param deployment_user_config: Dictionary with deployment-specific configurations from user. + :param target_benchmark_name: Optional name of a single benchmark to run. If None, runs all. + :return: True if any test failed, False if all tests passed. + """ suite = unittest.TestSuite() - global cloud_config - cloud_config = deployment_config - - language = experiment_config["runtime"]["language"] - language_version = experiment_config["runtime"]["version"] - architecture = experiment_config["architecture"] - - if "aws" in providers: - assert "aws" in cloud_config["deployment"] - if language == "python": + global cloud_config # Store the user's deployment config globally for test cases to access + cloud_config = deployment_user_config + + # Extract common parameters from experiment_config + runtime_lang = experiment_config.get("runtime", {}).get("language") + runtime_version = experiment_config.get("runtime", {}).get("version") + target_architecture = experiment_config.get("architecture") + + if not all([runtime_lang, runtime_version, target_architecture]): + logging.error("Missing runtime language, version, or architecture in experiment_config.") + return True # Indicate failure due to bad config + + # Add test sequences for selected providers and languages + if "aws" in providers_to_test: + assert "aws" in deployment_user_config["deployment"], "AWS config missing in deployment section" + if runtime_lang == "python": suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(AWSTestSequencePython)) - elif language == "nodejs": + elif runtime_lang == "nodejs": suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(AWSTestSequenceNodejs)) - if "gcp" in providers: - assert "gcp" in cloud_config["deployment"] - if language == "python": + + if "gcp" in providers_to_test: + assert "gcp" in deployment_user_config["deployment"], "GCP config missing in deployment section" + if runtime_lang == "python": suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(GCPTestSequencePython)) - elif language == "nodejs": + elif runtime_lang == "nodejs": suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(GCPTestSequenceNodejs)) - if "azure" in providers: - assert "azure" in cloud_config["deployment"] - if language == "python": + + if "azure" in providers_to_test: + assert "azure" in deployment_user_config["deployment"], "Azure config missing in deployment section" + if runtime_lang == "python": suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(AzureTestSequencePython)) - elif language == "nodejs": + elif runtime_lang == "nodejs": suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(AzureTestSequenceNodejs)) - if "openwhisk" in providers: - assert "openwhisk" in cloud_config["deployment"] - if language == "python": - suite.addTest( - unittest.defaultTestLoader.loadTestsFromTestCase(OpenWhiskTestSequencePython) - ) - elif language == "nodejs": - suite.addTest( - unittest.defaultTestLoader.loadTestsFromTestCase(OpenWhiskTestSequenceNodejs) - ) - - tests = [] - # mypy is confused here - for case in suite: - for test in case: # type: ignore - # skip - test_name = cast(unittest.TestCase, test)._testMethodName + + if "openwhisk" in providers_to_test: + assert "openwhisk" in deployment_user_config["deployment"], "OpenWhisk config missing in deployment section" + if runtime_lang == "python": + suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(OpenWhiskTestSequencePython)) + elif runtime_lang == "nodejs": + suite.addTest(unittest.defaultTestLoader.loadTestsFromTestCase(OpenWhiskTestSequenceNodejs)) + + # Filter tests based on benchmark_name and unsupported configurations + filtered_tests = [] + for test_case_class_instance in suite: # Iterating through TestSuite gives TestCase instances + for individual_test_method in test_case_class_instance: # Iterating through TestCase gives test methods + test_method_name = cast(unittest.TestCase, individual_test_method)._testMethodName + + # Get deployment_name from the test class itself (set by metaclass) + current_test_deployment_name = getattr(individual_test_method.__class__, 'deployment_name', 'unknown') - # Remove unsupported benchmarks if not filter_out_benchmarks( - test_name, - test.deployment_name, # type: ignore - language, # type: ignore - language_version, - architecture, # type: ignore + test_method_name, # Contains benchmark identifier + current_test_deployment_name, + runtime_lang, + runtime_version, + target_architecture, ): - print(f"Skip test {test_name} - not supported.") + print(f"Skipping test {test_method_name} - filtered out as unsupported/problematic.") continue - # Use only a selected benchmark - if not benchmark_name or (benchmark_name and benchmark_name in test_name): - test.client = sebs_client # type: ignore - test.experiment_config = experiment_config.copy() # type: ignore - tests.append(test) + if not target_benchmark_name or (target_benchmark_name and target_benchmark_name in test_method_name): + # Inject SeBS client and experiment config into each test instance + setattr(individual_test_method, 'client', sebs_client) + setattr(individual_test_method, 'experiment_config', experiment_config.copy()) + filtered_tests.append(individual_test_method) else: - print(f"Skip test {test_name}") - - concurrent_suite = testtools.ConcurrentStreamTestSuite(lambda: ((test, None) for test in tests)) - result = TracingStreamResult() - result.startTestRun() - concurrent_suite.run(result) - result.stopTestRun() - print(f"Succesfully executed {len(result.success)} out of {len(tests)} functions") - for suc in result.success: - print(f"- {suc}") - if len(result.failures): - print(f"Failures when executing {len(result.failures)} out of {len(tests)} functions") - for failure in result.failures: - print(f"- {failure}") - - if hasattr(AzureTestSequenceNodejs, "cli"): - AzureTestSequenceNodejs.cli.shutdown() - if hasattr(AzureTestSequencePython, "cli"): - AzureTestSequencePython.cli.shutdown() - - return not result.all_correct + print(f"Skipping test {test_method_name} - does not match target benchmark '{target_benchmark_name}'.") + + if not filtered_tests: + logging.warning("No tests selected to run after filtering. Check configuration and benchmark name.") + return False # No failures if no tests run + + # Run the filtered tests concurrently + concurrent_suite = testtools.ConcurrentStreamTestSuite(lambda: ((test, None) for test in filtered_tests)) + stream_result_collector = TracingStreamResult() + stream_result_collector.startTestRun() + concurrent_suite.run(stream_result_collector) + stream_result_collector.stopTestRun() + + print(f"\nSuccessfully executed {len(stream_result_collector.success)} out of {len(filtered_tests)} tests:") + for success_info in sorted(list(stream_result_collector.success)): + print(f" - PASSED: {success_info}") + + if stream_result_collector.failures: + print(f"\nFailures in {len(stream_result_collector.failures)} out of {len(filtered_tests)} tests:") + for failure_info in sorted(list(stream_result_collector.failures)): + print(f" - FAILED: {failure_info}") + # Detailed output for failures is already printed by TracingStreamResult.status + + # Shutdown shared resources like AzureCLI if they were initialized by test sequences + if hasattr(AzureTestSequenceNodejs, "cli") and AzureTestSequenceNodejs.cli: # type: ignore + AzureTestSequenceNodejs.cli.shutdown() # type: ignore + if hasattr(AzureTestSequencePython, "cli") and AzureTestSequencePython.cli: # type: ignore + AzureTestSequencePython.cli.shutdown() # type: ignore + + return not stream_result_collector.all_correct diff --git a/sebs/sebs.py b/sebs/sebs.py index 309c0b253..9983b5068 100644 --- a/sebs/sebs.py +++ b/sebs/sebs.py @@ -20,37 +20,60 @@ class SeBS(LoggingBase): + """ + The main SeBS client class. + + This class orchestrates benchmark execution, FaaS deployment interactions, + experiment management, and result processing. It initializes and holds + clients for Docker, caching, and system-wide configurations. + """ @property def cache_client(self) -> Cache: + """The SeBS cache client instance.""" return self._cache_client @property - def docker_client(self) -> docker.client: + def docker_client(self) -> docker.client.DockerClient: # More specific type + """The Docker client instance.""" return self._docker_client @property def output_dir(self) -> str: + """The base directory for SeBS outputs and results.""" return self._output_dir @property def verbose(self) -> bool: + """Flag indicating if verbose logging is enabled.""" return self._verbose @property def logging_filename(self) -> Optional[str]: + """The filename for logging, if configured.""" return self._logging_filename @property def config(self) -> SeBSConfig: + """The global SeBS system configuration instance.""" return self._config def generate_logging_handlers(self, logging_filename: Optional[str] = None) -> LoggingHandlers: - filename = logging_filename if logging_filename else self.logging_filename - if filename in self._handlers: - return self._handlers[filename] + """ + Generate or retrieve logging handlers for a given filename. + + Caches handlers by filename to avoid redundant creation. + + :param logging_filename: Optional filename for the log. If None, uses the default. + :return: LoggingHandlers instance. + """ + filename_key = logging_filename if logging_filename else self.logging_filename + # Use a different key for None to avoid issues if filename could be "" + # For simplicity, assuming logging_filename being None means default log. + if filename_key in self._handlers: + return self._handlers[filename_key] else: - handlers = LoggingHandlers(verbose=self.verbose, filename=filename) - self._handlers[filename] = handlers + handlers = LoggingHandlers(verbose=self.verbose, filename=filename_key) + self._handlers[filename_key] = handlers return handlers def __init__( @@ -60,177 +83,272 @@ def __init__( verbose: bool = False, logging_filename: Optional[str] = None, ): + """ + Initialize the SeBS client. + + Sets up Docker client, cache client, system configuration, output directory, + and logging handlers. + + :param cache_dir: Path to the directory for SeBS cache. + :param output_dir: Path to the base directory for SeBS outputs. + :param verbose: Enable verbose logging if True. + :param logging_filename: Optional filename for logging output. + """ super().__init__() - self._docker_client = docker.from_env() + self._docker_client: docker.client.DockerClient = docker.from_env() self._cache_client = Cache(cache_dir, self._docker_client) self._config = SeBSConfig() - self._output_dir = output_dir + self._output_dir = os.path.abspath(output_dir) # Ensure absolute path self._verbose = verbose self._logging_filename = logging_filename - self._handlers: Dict[Optional[str], LoggingHandlers] = {} - self.logging_handlers = self.generate_logging_handlers() + self._handlers: Dict[Optional[str], LoggingHandlers] = {} # Cache for logging handlers + self.logging_handlers = self.generate_logging_handlers() # Initialize default handlers os.makedirs(self.output_dir, exist_ok=True) def ignore_cache(self): """ - The cache will only store code packages, - and won't update new functions and storage. + Configure the cache client to ignore (not update) function and storage details. + Code packages might still be stored or updated based on internal cache logic. """ self._cache_client.ignore_storage = True self._cache_client.ignore_functions = True def get_deployment( self, - config: dict, + user_config: dict, # Renamed from config for clarity logging_filename: Optional[str] = None, - deployment_config: Optional[Config] = None, + # Allow passing an already deserialized deployment_config to avoid re-parsing + deployment_faas_config: Optional[Config] = None, # Renamed ) -> FaaSSystem: - dep_config = config["deployment"] - name = dep_config["name"] - implementations: Dict[str, Type[FaaSSystem]] = {"local": Local} - + """ + Get and initialize a FaaS deployment client based on the provided configuration. + + Dynamically loads the appropriate FaaS system implementation (e.g., AWS, Azure) + and initializes it with configurations. + + :param user_config: Dictionary containing user-provided configuration, + expected to have a "deployment" key for FaaS system details + and an "experiments" key for experiment-related settings. + :param logging_filename: Optional filename for logs specific to this deployment. + :param deployment_faas_config: Optional pre-deserialized FaaS Config object. + :return: An initialized FaaSSystem instance for the specified provider. + :raises RuntimeError: If the deployment name is not supported or if configuration is invalid. + """ + deployment_settings = user_config["deployment"] + deployment_name = deployment_settings["name"] + + # Map deployment names to their respective System classes + faas_implementations: Dict[str, Type[FaaSSystem]] = {"local": Local} if has_platform("aws"): from sebs.aws import AWS - - implementations["aws"] = AWS + faas_implementations["aws"] = AWS if has_platform("azure"): from sebs.azure.azure import Azure - - implementations["azure"] = Azure + faas_implementations["azure"] = Azure if has_platform("gcp"): from sebs.gcp import GCP - - implementations["gcp"] = GCP + faas_implementations["gcp"] = GCP if has_platform("openwhisk"): from sebs.openwhisk import OpenWhisk - - implementations["openwhisk"] = OpenWhisk - - if name not in implementations: - raise RuntimeError("Deployment {name} not supported!".format(name=name)) - - if config["experiments"]["architecture"] not in self._config.supported_architecture(name): - raise RuntimeError( - "{architecture} is not supported in {name}".format( - architecture=config["experiments"]["architecture"], name=name - ) - ) - - if config["experiments"][ - "container_deployment" - ] and not self._config.supported_container_deployment(name): - raise RuntimeError(f"Container deployment is not supported in {name}.") - - if not config["experiments"][ - "container_deployment" - ] and not self._config.supported_package_deployment(name): - raise RuntimeError(f"Code package deployment is not supported in {name}.") - - # FIXME: future annotations, requires Python 3.7+ - handlers = self.generate_logging_handlers(logging_filename) - if not deployment_config: - deployment_config = Config.deserialize(dep_config, self.cache_client, handlers) - - deployment_client = implementations[name]( - self._config, - deployment_config, # type: ignore + faas_implementations["openwhisk"] = OpenWhisk + + if deployment_name not in faas_implementations: + raise RuntimeError(f"Deployment {deployment_name} not supported!") + + exp_config_dict = user_config.get("experiments", {}) + target_architecture = exp_config_dict.get("architecture") + is_container_deployment = exp_config_dict.get("container_deployment", False) + + if not target_architecture or target_architecture not in self._config.supported_architecture(deployment_name): + raise RuntimeError(f"Architecture {target_architecture} is not supported in {deployment_name}") + + if is_container_deployment and not self._config.supported_container_deployment(deployment_name): + raise RuntimeError(f"Container deployment is not supported in {deployment_name}.") + if not is_container_deployment and not self._config.supported_package_deployment(deployment_name): + raise RuntimeError(f"Code package deployment is not supported in {deployment_name}.") + + current_logging_handlers = self.generate_logging_handlers(logging_filename) + + # Deserialize FaaS specific config if not already provided + if not deployment_faas_config: + # Config.deserialize expects the full config dict for the specific deployment + # e.g., if deployment_settings = {"name": "aws", "region": "us-east-1", ...}, + # it needs this dict. + deployment_faas_config = Config.deserialize(deployment_settings, self.cache_client, current_logging_handlers) + + deployment_client_instance = faas_implementations[deployment_name]( + self._config, # Global SeBSConfig + deployment_faas_config, # Provider-specific Config self.cache_client, self.docker_client, - handlers, + current_logging_handlers, ) - return deployment_client + return deployment_client_instance def get_deployment_config( self, - config: dict, + deployment_settings: dict, # Renamed from config for clarity logging_filename: Optional[str] = None, ) -> Config: - handlers = self.generate_logging_handlers(logging_filename) - return Config.deserialize(config, self.cache_client, handlers) + """ + Deserialize a FaaS deployment configuration. + + :param deployment_settings: Dictionary containing the deployment-specific configuration. + :param logging_filename: Optional filename for logs. + :return: A deserialized Config object for the FaaS provider. + """ + current_logging_handlers = self.generate_logging_handlers(logging_filename) + return Config.deserialize(deployment_settings, self.cache_client, current_logging_handlers) + + def get_experiment_config(self, full_user_config: dict) -> ExperimentConfig: + """ + Deserialize the experiment-specific part of the user configuration. + + :param full_user_config: The complete user-provided configuration dictionary. + Expected to have an "experiments" key. + :return: An ExperimentConfig instance. + """ + # ExperimentConfig.deserialize expects the content of the "experiments" key + return ExperimentConfig.deserialize(full_user_config.get("experiments", {})) - def get_experiment_config(self, config: dict) -> ExperimentConfig: - return ExperimentConfig.deserialize(config) def get_experiment( - self, experiment_type: str, config: dict, logging_filename: Optional[str] = None + self, experiment_name: str, user_config: dict, logging_filename: Optional[str] = None ) -> Experiment: - from sebs.experiments import ( - Experiment, - PerfCost, - NetworkPingPong, - InvocationOverhead, - EvictionModel, + """ + Get an instance of a specified experiment type. + + :param experiment_name: The name of the experiment to get (e.g., "perf-cost"). + :param user_config: User-provided configuration dictionary. + :param logging_filename: Optional filename for logs specific to this experiment. + :return: An initialized Experiment instance. + :raises RuntimeError: If the experiment type is not supported. + """ + from sebs.experiments import ( # Local import to avoid circular dependencies at module level + PerfCost, NetworkPingPong, InvocationOverhead, EvictionModel ) - implementations: Dict[str, Type[Experiment]] = { + experiment_implementations: Dict[str, Type[Experiment]] = { "perf-cost": PerfCost, "network-ping-pong": NetworkPingPong, "invocation-overhead": InvocationOverhead, "eviction-model": EvictionModel, } - if experiment_type not in implementations: - raise RuntimeError(f"Experiment {experiment_type} not supported!") - experiment = implementations[experiment_type](self.get_experiment_config(config)) - experiment.logging_handlers = self.generate_logging_handlers( - logging_filename=logging_filename - ) - return experiment + if experiment_name not in experiment_implementations: + raise RuntimeError(f"Experiment {experiment_name} not supported!") + + # Experiment constructor expects ExperimentConfig instance + experiment_settings_obj = self.get_experiment_config(user_config) + experiment_instance = experiment_implementations[experiment_name](experiment_settings_obj) + experiment_instance.logging_handlers = self.generate_logging_handlers(logging_filename) + return experiment_instance def get_benchmark( self, - name: str, - deployment: FaaSSystem, - config: ExperimentConfig, + benchmark_name: str, # Renamed from name for clarity + deployment_client: FaaSSystem, # Renamed from deployment + experiment_cfg: ExperimentConfig, # Renamed from config logging_filename: Optional[str] = None, ) -> Benchmark: - benchmark = Benchmark( - name, - deployment.name(), - config, - self._config, + """ + Get a Benchmark instance for a given name, deployment, and configuration. + + Initializes a Benchmark object, which involves finding benchmark code, + loading its configuration, and interacting with the cache. + + :param benchmark_name: Name of the benchmark. + :param deployment_client: Initialized FaaS deployment client. + :param experiment_cfg: The active experiment's configuration. + :param logging_filename: Optional filename for logs specific to this benchmark. + :return: An initialized Benchmark instance. + """ + benchmark_instance = Benchmark( + benchmark_name, + deployment_client.name(), # Get deployment name from the client + experiment_cfg, + self._config, # Global SeBSConfig self._output_dir, self.cache_client, self.docker_client, ) - benchmark.logging_handlers = self.generate_logging_handlers( - logging_filename=logging_filename - ) - return benchmark + benchmark_instance.logging_handlers = self.generate_logging_handlers(logging_filename) + return benchmark_instance @staticmethod def get_storage_implementation(storage_type: types.Storage) -> Type[PersistentStorage]: - _storage_implementations = {types.Storage.MINIO: minio.Minio} - impl = _storage_implementations.get(storage_type) - assert impl - return impl + """ + Get the class for a given self-hosted persistent storage type. + + :param storage_type: A `sebs.types.Storage` enum member. + :return: The class of the storage implementation (e.g., Minio). + :raises AssertionError: If the storage type is unknown. + """ + # Maps storage type enum to its implementation class + _storage_map = {types.Storage.MINIO: minio.Minio} + impl_class = _storage_map.get(storage_type) + assert impl_class is not None, f"Unknown self-hosted storage type: {storage_type}" + return impl_class @staticmethod def get_nosql_implementation(storage_type: types.NoSQLStorage) -> Type[NoSQLStorage]: - _storage_implementations = {types.NoSQLStorage.SCYLLADB: scylladb.ScyllaDB} - impl = _storage_implementations.get(storage_type) - assert impl - return impl + """ + Get the class for a given self-hosted NoSQL storage type. + + :param storage_type: A `sebs.types.NoSQLStorage` enum member. + :return: The class of the NoSQL storage implementation (e.g., ScyllaDB). + :raises AssertionError: If the NoSQL storage type is unknown. + """ + _nosql_map = {types.NoSQLStorage.SCYLLADB: scylladb.ScyllaDB} + impl_class = _nosql_map.get(storage_type) + assert impl_class is not None, f"Unknown self-hosted NoSQL storage type: {storage_type}" + return impl_class @staticmethod - def get_storage_config_implementation(storage_type: types.Storage): - _storage_implementations = {types.Storage.MINIO: config.MinioConfig} - impl = _storage_implementations.get(storage_type) - assert impl - return impl + def get_storage_config_implementation(storage_type: types.Storage) -> Type[config.PersistentStorageConfig]: + """ + Get the configuration class for a given self-hosted persistent storage type. + + :param storage_type: A `sebs.types.Storage` enum member. + :return: The configuration class (e.g., MinioConfig). + :raises AssertionError: If the storage type is unknown. + """ + _storage_config_map = {types.Storage.MINIO: config.MinioConfig} + impl_class = _storage_config_map.get(storage_type) + assert impl_class is not None, f"Unknown self-hosted storage config type: {storage_type}" + return impl_class @staticmethod - def get_nosql_config_implementation(storage_type: types.NoSQLStorage): - _storage_implementations = {types.NoSQLStorage.SCYLLADB: config.ScyllaDBConfig} - impl = _storage_implementations.get(storage_type) - assert impl - return impl + def get_nosql_config_implementation(storage_type: types.NoSQLStorage) -> Type[config.NoSQLStorageConfig]: + """ + Get the configuration class for a given self-hosted NoSQL storage type. + + :param storage_type: A `sebs.types.NoSQLStorage` enum member. + :return: The configuration class (e.g., ScyllaDBConfig). + :raises AssertionError: If the NoSQL storage type is unknown. + """ + _nosql_config_map = {types.NoSQLStorage.SCYLLADB: config.ScyllaDBConfig} + impl_class = _nosql_config_map.get(storage_type) + assert impl_class is not None, f"Unknown self-hosted NoSQL storage config type: {storage_type}" + return impl_class def shutdown(self): + """ + Shut down the SeBS client, primarily by shutting down the cache client + which handles saving any updated configurations. + """ self.cache_client.shutdown() def __enter__(self): + """Enter the runtime context related to this object (for `with` statement).""" return self - def __exit__(self): + def __exit__(self, exc_type, exc_val, exc_tb): + """ + Exit the runtime context, ensuring shutdown is called. + + :param exc_type: Exception type if an exception occurred in the `with` block. + :param exc_val: Exception value. + :param exc_tb: Traceback object. + """ self.shutdown() diff --git a/sebs/statistics.py b/sebs/statistics.py index 8d00b8552..16d00d2ab 100644 --- a/sebs/statistics.py +++ b/sebs/statistics.py @@ -6,31 +6,121 @@ import scipy.stats as st BasicStats = namedtuple("BasicStats", "mean median std cv") +"""A named tuple to store basic statistics: mean, median, standard deviation (std), and coefficient of variation (cv).""" def basic_stats(times: List[float]) -> BasicStats: - mean = np.mean(times) - median = np.median(times) - std = np.std(times) - cv = std / mean * 100 - return BasicStats(mean, median, std, cv) + """ + Calculate basic statistics for a list of time measurements. + + Computes mean, median, standard deviation, and coefficient of variation. + + :param times: A list of floating-point time measurements. + :return: A BasicStats named tuple containing (mean, median, std, cv). + Returns NaNs for std and cv if mean is zero or times list is empty/has one element. + """ + if not times: + return BasicStats(np.nan, np.nan, np.nan, np.nan) + + mean_val = np.mean(times) + median_val = np.median(times) + std_val = np.std(times) + + if mean_val == 0: + cv_val = np.nan + else: + cv_val = (std_val / mean_val) * 100 + + return BasicStats(mean_val, median_val, std_val, cv_val) def ci_tstudents(alpha: float, times: List[float]) -> Tuple[float, float]: - mean = np.mean(times) - return st.t.interval(alpha, len(times) - 1, loc=mean, scale=st.sem(times)) + """ + Calculate the confidence interval using Student's t-distribution. + + Assumes the data is approximately normally distributed. + + :param alpha: The confidence level (e.g., 0.95 for 95% CI). + :param times: A list of floating-point time measurements. + :return: A tuple (lower_bound, upper_bound) of the confidence interval. + Returns (nan, nan) if the list has fewer than 2 samples. + """ + if len(times) < 2: + return (np.nan, np.nan) + mean_val = np.mean(times) + return st.t.interval(alpha, len(times) - 1, loc=mean_val, scale=st.sem(times)) def ci_le_boudec(alpha: float, times: List[float]) -> Tuple[float, float]: + """ + Calculate a non-parametric confidence interval based on Le Boudec's method. + + This method uses order statistics and is suitable for distributions that may + not be normal. It requires a sufficient number of samples (related to z_value calculation). + Reference: "Performance Evaluation of Computer and Communication Systems" by Le Boudec. + + :param alpha: The confidence level (e.g., 0.95 for 95% CI). + :param times: A list of floating-point time measurements. + :return: A tuple (lower_bound, upper_bound) of the confidence interval. + Returns (nan, nan) if the number of samples is too small for the calculation. + :raises AssertionError: If alpha is not one of the supported values (0.95, 0.99). + """ + if not times: + return (np.nan, np.nan) + sorted_times = sorted(times) n = len(times) - # z(alfa/2) - z_value = {0.95: 1.96, 0.99: 2.576}.get(alpha) - assert z_value + # z(alpha/2) - critical value from standard normal distribution + # For a two-sided interval with confidence `alpha`, we need z_{1 - (1-alpha)/2} = z_{(1+alpha)/2} + # However, the formula used by Le Boudec for indices is n/2 +- z * sqrt(n)/2 + # The z_value here corresponds to z_{1 - (1-alpha)/2} + z_critical_value = {0.95: 1.96, 0.99: 2.576}.get(alpha) + assert z_critical_value is not None, f"Unsupported alpha value: {alpha}. Supported values are 0.95, 0.99." + + # Calculate ranks for lower and upper bounds of the CI for the median + # (as per Le Boudec's method for quantiles, here applied to median implicitly) + # Note: The formula in the original code seems to be for median CI. + # low_pos = floor( (n - z * sqrt(n)) / 2 ) + # high_pos = ceil( 1 + (n + z * sqrt(n)) / 2 ) + # These indices are 0-based for the sorted list. + + sqrt_n = math.sqrt(n) + if sqrt_n == 0: # Avoid division by zero if n=0, though caught by earlier check + return (np.nan, np.nan) + + val_for_pos = z_critical_value * sqrt_n / 2.0 + + # Ensure low_pos and high_pos are within valid array bounds [0, n-1] + # The formula can result in indices outside this range if n is too small. + low_idx = math.floor(n / 2.0 - val_for_pos) + high_idx = math.ceil(n / 2.0 + val_for_pos) # The original had 1 + n/2 + val_for_pos, usually it's n/2 + z*sqrt(n)/2 for upper rank. + # Let's stick to a common interpretation of order statistic CIs. + # The +1 in original might be for 1-based indexing conversion or specific formula variant. + # For 0-based index, high_idx should be n - 1 - low_idx for symmetric CI around median. + # Let's use a simpler, more standard approach for quantile CIs if that was the intent, + # or stick to the provided formula if it's a specific known method. + # Re-evaluating the original formula: + # low_pos_orig = math.floor((n - z_critical_value * math.sqrt(n)) / 2) + # high_pos_orig = math.ceil(1 + (n + z_critical_value * math.sqrt(n)) / 2) + # These indices are 0-based. high_pos_orig includes an extra +1. + # Let's assume the formula is as intended. + # Need to ensure low_pos >=0 and high_pos < n + + low_pos_calculated = math.floor((n - z_critical_value * sqrt_n) / 2) + # The `1 +` in high_pos seems to make it 1-based then implicitly 0-based by list access. + # Or it's part of a specific formula variant. + # If it's rank k, then index is k-1. + # Let's ensure indices are valid. + high_pos_calculated = math.ceil(1 + (n + z_critical_value * sqrt_n) / 2) + + # Clamp indices to valid range [0, n-1] + final_low_idx = max(0, low_pos_calculated) + final_high_idx = min(n - 1, high_pos_calculated -1) # -1 if high_pos_calculated was 1-based rank - low_pos = math.floor((n - z_value * math.sqrt(n)) / 2) - high_pos = math.ceil(1 + (n + z_value * math.sqrt(n)) / 2) + if final_low_idx > final_high_idx or final_high_idx >= n or final_low_idx < 0: # Check validity + # This happens if n is too small for the given alpha + return (np.nan, np.nan) - return (sorted_times[low_pos], sorted_times[high_pos]) + return (sorted_times[final_low_idx], sorted_times[final_high_idx]) diff --git a/sebs/storage/config.py b/sebs/storage/config.py index cd47df391..5fab97152 100644 --- a/sebs/storage/config.py +++ b/sebs/storage/config.py @@ -9,51 +9,115 @@ @dataclass class PersistentStorageConfig(ABC): + """ + Abstract base class for persistent storage configurations. + + Defines the interface for serializing the configuration and providing + environment variables necessary for functions to access the storage. + """ @abstractmethod def serialize(self) -> dict: + """ + Serialize the storage configuration to a dictionary. + + :return: A dictionary representation of the configuration. + """ pass @abstractmethod def envs(self) -> dict: + """ + Return a dictionary of environment variables required by functions + to connect to and use this persistent storage. + + :return: Dictionary of environment variables. + """ pass @dataclass class MinioConfig(PersistentStorageConfig): + """ + Configuration for a self-hosted Minio S3-compatible object storage. + + Attributes: + address: Network address of the Minio server. + mapped_port: Host port mapped to the Minio container's port. + access_key: Access key for Minio. + secret_key: Secret key for Minio. + instance_id: Docker container ID of the running Minio instance. + output_buckets: List of output bucket names. + input_buckets: List of input bucket names. + version: Version of the Minio Docker image. + data_volume: Name of the Docker volume used for Minio data persistence. + type: Identifier for this storage type, defaults to "minio". + """ address: str = "" mapped_port: int = -1 access_key: str = "" secret_key: str = "" instance_id: str = "" output_buckets: List[str] = field(default_factory=list) - input_buckets: List[str] = field(default_factory=lambda: []) + input_buckets: List[str] = field(default_factory=lambda: []) # Ensure default_factory is callable version: str = "" data_volume: str = "" - type: str = "minio" + type: str = "minio" # Type identifier for deserialization or type checking def update_cache(self, path: List[str], cache: Cache): + """ + Update the SeBS cache with the Minio configuration details. - for key in MinioConfig.__dataclass_fields__.keys(): - if key == "resources": + Iterates over dataclass fields and updates them in the cache under the given path. + + :param path: List of keys defining the path in the cache structure. + :param cache: The Cache client instance. + """ + for key_name in self.__dataclass_fields__.keys(): + # Avoid trying to cache complex objects or fields not meant for direct caching + if key_name == "resources": # Example of a field to skip if it existed continue - cache.update_config(val=getattr(self, key), keys=[*path, key]) + cache.update_config(val=getattr(self, key_name), keys=[*path, key_name]) + # If self.resources (from a potential parent or mixed-in class) needed caching: # self.resources.update_cache(cache) @staticmethod def deserialize(data: dict) -> "MinioConfig": - keys = list(MinioConfig.__dataclass_fields__.keys()) - data = {k: v for k, v in data.items() if k in keys} - - cfg = MinioConfig(**data) - - return cfg + """ + Deserialize a MinioConfig object from a dictionary. + + Filters the input dictionary to include only known fields of MinioConfig. + + :param data: Dictionary containing MinioConfig data. + :return: A MinioConfig instance. + """ + known_keys = list(MinioConfig.__dataclass_fields__.keys()) + filtered_data = {k: v for k, v in data.items() if k in known_keys} + # Ensure list fields are correctly initialized if missing in filtered_data + if 'output_buckets' not in filtered_data: + filtered_data['output_buckets'] = [] + if 'input_buckets' not in filtered_data: + filtered_data['input_buckets'] = [] + return MinioConfig(**filtered_data) def serialize(self) -> dict: - return self.__dict__ + """ + Serialize the MinioConfig to a dictionary. + + :return: A dictionary representation of the MinioConfig. + """ + # Using self.__dict__ directly for dataclasses is generally fine, + # but ensure all fields are serializable (e.g., no complex objects + # that aren't handled by the JSON serializer later). + return self.__dict__.copy() # Return a copy def envs(self) -> dict: + """ + Return environment variables for functions to connect to this Minio instance. + + :return: Dictionary of Minio-related environment variables. + """ return { - "MINIO_ADDRESS": self.address, + "MINIO_ADDRESS": f"{self.address}:{self.mapped_port}", # Include port in address "MINIO_ACCESS_KEY": self.access_key, "MINIO_SECRET_KEY": self.secret_key, } @@ -61,38 +125,81 @@ def envs(self) -> dict: @dataclass class NoSQLStorageConfig(ABC): + """ + Abstract base class for NoSQL storage configurations. + + Defines the interface for serializing the configuration. + """ @abstractmethod def serialize(self) -> dict: + """ + Serialize the NoSQL storage configuration to a dictionary. + + :return: A dictionary representation of the configuration. + """ pass @dataclass class ScyllaDBConfig(NoSQLStorageConfig): + """ + Configuration for a self-hosted ScyllaDB NoSQL database. + + Attributes: + address: Network address of the ScyllaDB server. + mapped_port: Host port mapped to the ScyllaDB container's CQL port. + alternator_port: Host port mapped to ScyllaDB's Alternator (DynamoDB compatible) port. + access_key: Access key (typically "None" for ScyllaDB unless auth is configured). + secret_key: Secret key (typically "None" for ScyllaDB). + instance_id: Docker container ID of the running ScyllaDB instance. + region: Region (typically "None" for self-hosted ScyllaDB). + cpus: Number of CPUs allocated to the ScyllaDB container. + memory: Memory allocated to the ScyllaDB container (in MB or similar unit). + version: Version of the ScyllaDB Docker image. + data_volume: Name of the Docker volume used for ScyllaDB data persistence. + """ address: str = "" mapped_port: int = -1 - alternator_port: int = 8000 + alternator_port: int = 8000 # Default ScyllaDB Alternator port in container access_key: str = "None" secret_key: str = "None" instance_id: str = "" - region: str = "None" + region: str = "None" # ScyllaDB is self-hosted, region might not be applicable like in cloud cpus: int = -1 - memory: int = -1 + memory: int = -1 # e.g. in MB version: str = "" data_volume: str = "" def update_cache(self, path: List[str], cache: Cache): + """ + Update the SeBS cache with the ScyllaDB configuration details. + + Iterates over dataclass fields and updates them in the cache under the given path. - for key in ScyllaDBConfig.__dataclass_fields__.keys(): - cache.update_config(val=getattr(self, key), keys=[*path, key]) + :param path: List of keys defining the path in the cache structure. + :param cache: The Cache client instance. + """ + for key_name in self.__dataclass_fields__.keys(): + cache.update_config(val=getattr(self, key_name), keys=[*path, key_name]) @staticmethod def deserialize(data: dict) -> "ScyllaDBConfig": - keys = list(ScyllaDBConfig.__dataclass_fields__.keys()) - data = {k: v for k, v in data.items() if k in keys} + """ + Deserialize a ScyllaDBConfig object from a dictionary. - cfg = ScyllaDBConfig(**data) + Filters the input dictionary to include only known fields of ScyllaDBConfig. - return cfg + :param data: Dictionary containing ScyllaDBConfig data. + :return: A ScyllaDBConfig instance. + """ + known_keys = list(ScyllaDBConfig.__dataclass_fields__.keys()) + filtered_data = {k: v for k, v in data.items() if k in known_keys} + return ScyllaDBConfig(**filtered_data) def serialize(self) -> dict: - return self.__dict__ + """ + Serialize the ScyllaDBConfig to a dictionary. + + :return: A dictionary representation of the ScyllaDBConfig. + """ + return self.__dict__.copy() # Return a copy diff --git a/sebs/storage/minio.py b/sebs/storage/minio.py index bb9112a22..47bfd34ac 100644 --- a/sebs/storage/minio.py +++ b/sebs/storage/minio.py @@ -17,42 +17,62 @@ class Minio(PersistentStorage): + """ + Persistent storage implementation using a self-hosted Minio S3-compatible server + running in a Docker container. + """ @staticmethod def typename() -> str: + """Return the type name of this storage implementation.""" return f"{Minio.deployment_name()}.Minio" @staticmethod def deployment_name() -> str: + """Return the deployment name, which is 'minio' for this storage type.""" return "minio" - # the location does not matter + # Minio is S3-compatible; a default region is often needed for S3 SDKs. MINIO_REGION = "us-east-1" def __init__( self, docker_client: docker.client, cache_client: Cache, - resources: Resources, + resources: Resources, # Should be SelfHostedResources or similar if it holds MinioConfig replace_existing: bool, ): + """ + Initialize the Minio storage client. + + :param docker_client: Docker client instance for managing the Minio container. + :param cache_client: Cache client instance. + :param resources: Cloud/system resources configuration object. + :param replace_existing: Flag to control overwriting existing files. + """ super().__init__(self.MINIO_REGION, cache_client, resources, replace_existing) self._docker_client = docker_client - self._storage_container: Optional[docker.container] = None - self._cfg = MinioConfig() + self._storage_container: Optional[docker.models.containers.Container] = None # Type hint for container + self._cfg = MinioConfig() # Default config, can be updated via property @property def config(self) -> MinioConfig: + """The Minio specific configuration for this storage instance.""" return self._cfg @config.setter def config(self, config: MinioConfig): + """Set the Minio specific configuration.""" self._cfg = config @staticmethod - def _define_http_client(): + def _define_http_client(): # No type hint for urllib3.PoolManager as it's an import """ - Minio does not allow another way of configuring timeout for connection. - The rest of configuration is copied from source code of Minio. + Define a custom urllib3 HTTP client with specific timeout settings for Minio. + + This is used because the default Minio client might not offer sufficient + timeout configuration directly. The settings are based on Minio's own client source. + + :return: A configured urllib3.PoolManager instance. """ import urllib3 from datetime import timedelta @@ -68,9 +88,20 @@ def _define_http_client(): ) def start(self): + """ + Start the Minio Docker container. + + Configures a data volume for persistence, sets access/secret keys, + and maps the container's port 9000 to a host port specified in `self._cfg.mapped_port`. + The container ID and connection details are stored in `self._cfg`. - if self._cfg.data_volume == "": - minio_volume = os.path.join(project_absolute_path(), "minio-volume") + :raises RuntimeError: If starting the Minio container fails. + """ + if not self._cfg.data_volume: # Check if data_volume is empty or None + minio_data_path = os.path.join(project_absolute_path(), "minio-volume") + else: + minio_data_path = self._cfg.data_volume + minio_data_path = os.path.abspath(minio_data_path) else: minio_volume = self._cfg.data_volume minio_volume = os.path.abspath(minio_volume) @@ -115,178 +146,425 @@ def start(self): raise RuntimeError("Starting Minio storage unsuccesful") def configure_connection(self): - # who knows why? otherwise attributes are not loaded - if self._cfg.address == "": + """ + Configure the connection details (address) for the Minio client. + + If the address is not already set in the config, it determines the + Minio server address based on the Docker container's network settings. + On Linux, it uses the container's bridge IP. On other systems (like + Docker Desktop for Mac/Windows), it uses localhost with the mapped port. + Initializes `self.connection` with a Minio client instance. + :raises RuntimeError: If the Minio container is not running or IP address cannot be detected. + """ + if not self._cfg.address: # Check if address is empty or None if self._storage_container is None: raise RuntimeError( - "Minio container is not available! Make sure that you deployed " - "the Minio storage and provided configuration!" - ) - - self._storage_container.reload() - - # Check if the system is Linux and that it's not WSL - if is_linux(): - networks = self._storage_container.attrs["NetworkSettings"]["Networks"] - self._cfg.address = "{IPAddress}:{Port}".format( - IPAddress=networks["bridge"]["IPAddress"], Port=9000 + "Minio container is not available! Ensure Minio is started and configured." ) - else: - # System is either WSL, Windows, or Mac + self._storage_container.reload() # Refresh container attributes + + if is_linux(): # Native Linux Docker + networks = self._storage_container.attrs.get("NetworkSettings", {}).get("Networks", {}) + bridge_network = networks.get("bridge", {}) + ip_address = bridge_network.get("IPAddress") + if not ip_address: + # Fallback for some Docker versions or network modes if IPAddress is empty + ip_address = bridge_network.get("Gateway") + if not ip_address: + self.logging.error( + "Could not determine Minio container IP address from bridge network. Attributes: %s", + json.dumps(self._storage_container.attrs, indent=2) + ) + raise RuntimeError(f"Failed to detect IP address for Minio container {self._storage_container.id}") + self._cfg.address = f"{ip_address}:9000" # Minio internal port is 9000 + else: # Docker Desktop (Windows, macOS), WSL + # Mapped port from self._cfg should be used with localhost + if self._cfg.mapped_port == -1: + raise RuntimeError("Minio host port not mapped or invalid for non-Linux Docker.") self._cfg.address = f"localhost:{self._cfg.mapped_port}" - - if not self._cfg.address: - self.logging.error( - f"Couldn't read the IP address of container from attributes " - f"{json.dumps(self._instance.attrs, indent=2)}" - ) - raise RuntimeError( - f"Incorrect detection of IP address for container with id {self._instance_id}" - ) - self.logging.info("Starting minio instance at {}".format(self._cfg.address)) + + self.logging.info(f"Minio instance configured at {self._cfg.address}") + self.connection = self.get_connection() def stop(self): + """ + Stop the Minio Docker container if it's running. + """ if self._storage_container is not None: - self.logging.info(f"Stopping minio container at {self._cfg.address}.") - self._storage_container.stop() - self.logging.info(f"Stopped minio container at {self._cfg.address}.") + try: + self.logging.info(f"Stopping Minio container {self._storage_container.id} at {self._cfg.address}.") + self._storage_container.stop() + self.logging.info(f"Stopped Minio container {self._storage_container.id}.") + except docker.errors.NotFound: + self.logging.warning(f"Minio container {self._storage_container.id} already removed or not found.") + except docker.errors.APIError as e: + self.logging.error(f"Error stopping Minio container {self._storage_container.id}: {e}") else: - self.logging.error("Stopping minio was not succesful, storage container not known!") + self.logging.warning("Attempted to stop Minio, but storage container instance is not known.") - def get_connection(self): + def get_connection(self) -> minio.Minio: + """ + Create and return a Minio client connection instance. + + Uses connection details from `self._cfg` (address, access_key, secret_key). + Configures a custom HTTP client with timeouts. + + :return: A `minio.Minio` client instance. + """ return minio.Minio( self._cfg.address, - access_key=self._cfg.access_key, + access_key=self._cfg.access_key, # Should be self._cfg.access_key secret_key=self._cfg.secret_key, secure=False, http_client=Minio._define_http_client(), ) - def _create_bucket(self, name: str, buckets: List[str] = [], randomize_name: bool = False): - for bucket_name in buckets: - if name in bucket_name: + def _create_bucket( + self, name: str, buckets: List[str] = [], randomize_name: bool = False + ) -> str: + """ + Create a Minio bucket. + + Checks if a bucket with a similar name prefix already exists in the `buckets` list. + If `randomize_name` is True, appends a random string to the bucket name. + Minio bucket names have a limit (often related to DNS compatibility, though Minio + itself might be more flexible locally than S3). The original code mentioned a + 16-character limit for the random part, which implies overall length constraints. + + :param name: Desired base name for the bucket. + :param buckets: List of existing bucket names to check against (prefix match). + :param randomize_name: If True, append a random string to the bucket name. + :return: Name of the created or existing bucket. + :raises minio.error.S3Error: If bucket creation fails for other S3-compatible reasons. + """ + # Check if a bucket with `name` as a prefix already exists + for existing_bucket_name in buckets: + if existing_bucket_name.startswith(name): self.logging.info( - "Bucket {} for {} already exists, skipping.".format(bucket_name, name) + f"Bucket {existing_bucket_name} (similar to {name}) already exists, skipping." ) - return bucket_name - # minio has limit of bucket name to 16 characters + return existing_bucket_name + + bucket_to_create = name if randomize_name: - bucket_name = "{}-{}".format(name, str(uuid.uuid4())[0:16]) - else: - bucket_name = name + # Minio bucket names are flexible but often adhere to S3/DNS for broader compatibility. + # Using hyphen as separator and keeping it relatively short. + random_suffix = str(uuid.uuid4())[0:8] # Shorter random part than original + bucket_to_create = f"{name}-{random_suffix}" + + # Ensure name is valid for Minio (e.g. length, characters) + # Minio itself is quite flexible, but S3 compatibility is often desired. + # For simplicity, not adding complex validation here beyond what Minio client enforces. + try: - self.connection.make_bucket(bucket_name, location=self.MINIO_REGION) - self.logging.info("Created bucket {}".format(bucket_name)) - return bucket_name - except ( - minio.error.BucketAlreadyOwnedByYou, - minio.error.BucketAlreadyExists, - minio.error.ResponseError, - ) as err: - self.logging.error("Bucket creation failed!") - # rethrow - raise err - - def uploader_func(self, path_idx, file, filepath): + if not self.connection.bucket_exists(bucket_to_create): + self.connection.make_bucket(bucket_to_create, location=self.MINIO_REGION) + self.logging.info(f"Created Minio bucket {bucket_to_create}") + else: + self.logging.info(f"Minio bucket {bucket_to_create} already exists.") + return bucket_to_create + except (minio.error.S3Error) as err: # Catching general S3Error + self.logging.error(f"Minio bucket creation/check for {bucket_to_create} failed: {err}") + raise # Re-throw the Minio/S3 error + + def uploader_func(self, path_idx: int, file_key: str, local_filepath: str): + """ + Upload a file to a Minio input bucket, used as a callback for multiprocessing. + + Constructs the object key using input prefixes. Skips upload if using cached + buckets and not replacing existing files (though current check is basic). + + :param path_idx: Index of the input prefix from `self.input_prefixes`. + :param file_key: Object key (filename) to use within the bucket, relative to prefix. + :param local_filepath: Local path to the file to upload. + :raises minio.error.S3Error: If upload fails. + """ + # Note: Original did not check self.replace_existing or existing files here. + # Adding a basic check, but proper cache handling would be more complex. + if self.cached and not self.replace_existing: + # A more robust check would be to list objects and see if this one exists. + # For simplicity, this uploader assumes it should upload if called, + # unless a more sophisticated check is added. + self.logging.info(f"Skipping upload of {local_filepath} due to cache and no-replace policy (basic check).") + return + + full_object_key = os.path.join(self.input_prefixes[path_idx], file_key) + target_bucket_name = self.get_bucket(Resources.StorageBucketType.BENCHMARKS) try: - key = os.path.join(self.input_prefixes[path_idx], file) - bucket_name = self.get_bucket(Resources.StorageBucketType.BENCHMARKS) - self.connection.fput_object(bucket_name, key, filepath) - except minio.error.ResponseError as err: - self.logging.error("Upload failed!") - raise (err) + self.logging.info(f"Uploading {local_filepath} to Minio bucket {target_bucket_name} as {full_object_key}") + self.connection.fput_object(target_bucket_name, full_object_key, local_filepath) + except minio.error.S3Error as err: + self.logging.error(f"Minio upload of {local_filepath} failed: {err}") + raise # Re-throw def clean(self): - for bucket in self.output_buckets: - objects = self.connection.list_objects_v2(bucket) - objects = [obj.object_name for obj in objects] - for err in self.connection.remove_objects(bucket, objects): - self.logging.error("Deletion Error: {}".format(err)) - - def download_results(self, result_dir): - result_dir = os.path.join(result_dir, "storage_output") - for bucket in self.output_buckets: - objects = self.connection.list_objects_v2(bucket) - objects = [obj.object_name for obj in objects] - for obj in objects: - self.connection.fget_object(bucket, obj, os.path.join(result_dir, obj)) - - def clean_bucket(self, bucket: str): - delete_object_list = map( - lambda x: minio.DeleteObject(x.object_name), - self.connection.list_objects(bucket_name=bucket), - ) - errors = self.connection.remove_objects(bucket, delete_object_list) - for error in errors: - self.logging.error(f"Error when deleting object from bucket {bucket}: {error}!") + """ + Clean all output buckets associated with this Minio instance. + + Iterates through `self.output_prefixes` (which are path prefixes, not bucket names) + and attempts to delete objects matching these prefixes from the EXPERIMENTS bucket. + Note: This logic might need refinement if `output_prefixes` are not direct paths + or if multiple output buckets are used per benchmark. + """ + # Output prefixes are paths within the EXPERIMENTS bucket. + experiments_bucket = self.get_bucket(Resources.StorageBucketType.EXPERIMENTS) + if experiments_bucket: + for prefix in self.output_prefixes: + self.logging.info(f"Cleaning objects with prefix '{prefix}' from Minio bucket {experiments_bucket}") + try: + objects_to_delete = self.connection.list_objects(experiments_bucket, prefix=prefix, recursive=True) + # minio.delete_objects needs a list of DeleteObject instances or just names + delete_obj_list = [minio.deleteobjects.DeleteObject(obj.object_name) for obj in objects_to_delete] + if delete_obj_list: + errors = self.connection.remove_objects(experiments_bucket, delete_obj_list) + for error in errors: + self.logging.error(f"Error deleting object from {experiments_bucket}: {error}") + else: + self.logging.info(f"No objects found with prefix '{prefix}' in {experiments_bucket} to clean.") + except minio.error.S3Error as e: + self.logging.error(f"Error listing/cleaning objects in {experiments_bucket} with prefix {prefix}: {e}") + else: + self.logging.warning("No EXPERIMENTS bucket found to clean.") + + + def download_results(self, result_dir_base: str): # Renamed arg for clarity + """ + Download all objects from configured output prefixes to a local directory. + + Each output prefix (path in the EXPERIMENTS bucket) will correspond to a + subdirectory within `result_dir_base/storage_output/`. + + :param result_dir_base: The base local directory to download results into. + A 'storage_output' subdirectory will be created here. + """ + # Output prefixes are paths within the EXPERIMENTS bucket + experiments_bucket = self.get_bucket(Resources.StorageBucketType.EXPERIMENTS) + if not experiments_bucket: + self.logging.warning("No EXPERIMENTS bucket found to download results from.") + return + + storage_output_dir = os.path.join(result_dir_base, "storage_output") + os.makedirs(storage_output_dir, exist_ok=True) + + for prefix in self.output_prefixes: + self.logging.info(f"Downloading objects with prefix '{prefix}' from Minio bucket {experiments_bucket}") + try: + objects = self.connection.list_objects(experiments_bucket, prefix=prefix, recursive=True) + for obj in objects: + # Create local path that mirrors the object's path relative to the prefix + relative_path = os.path.relpath(obj.object_name, prefix) + local_file_path = os.path.join(storage_output_dir, prefix, relative_path) + os.makedirs(os.path.dirname(local_file_path), exist_ok=True) + self.logging.debug(f"Downloading {obj.object_name} to {local_file_path}") + self.connection.fget_object(experiments_bucket, obj.object_name, local_file_path) + except minio.error.S3Error as e: + self.logging.error(f"Error downloading results from {experiments_bucket} with prefix {prefix}: {e}") + + + def clean_bucket(self, bucket_name: str): + """ + Delete all objects within a specified Minio bucket. + + :param bucket_name: Name of the Minio bucket to clean. + """ + try: + self.logging.info(f"Cleaning Minio bucket {bucket_name}") + delete_object_list = [ + minio.deleteobjects.DeleteObject(obj.object_name) + for obj in self.connection.list_objects(bucket_name, recursive=True) + ] + if delete_object_list: + errors = self.connection.remove_objects(bucket_name, delete_object_list) + for error in errors: + self.logging.error(f"Error deleting object from Minio bucket {bucket_name}: {error}") + else: + self.logging.info(f"Minio bucket {bucket_name} is already empty or has no objects to clean.") + except minio.error.S3Error as e: + self.logging.error(f"Error cleaning Minio bucket {bucket_name}: {e}") + + + def remove_bucket(self, bucket_name: str): # Renamed arg for consistency + """ + Delete a Minio bucket. The bucket must typically be empty. + + :param bucket_name: Name of the Minio bucket to delete. + """ + try: + self.logging.info(f"Removing Minio bucket {bucket_name}") + self.connection.remove_bucket(bucket_name) # Minio remove_bucket expects bucket_name kwarg + self.logging.info(f"Minio bucket {bucket_name} removed.") + except minio.error.S3Error as e: + self.logging.error(f"Error removing Minio bucket {bucket_name}: {e}") - def remove_bucket(self, bucket: str): - self.connection.remove_bucket(Bucket=bucket) def correct_name(self, name: str) -> str: + """ + Return the corrected bucket name (Minio is generally flexible, but S3 + compatibility rules might be desired). Currently returns name as is. + + :param name: Original bucket name. + :return: Corrected bucket name. + """ + # Minio bucket names are quite flexible. If strict S3 compatibility is needed, + # more rules (lowercase, no underscores, 3-63 chars, etc.) would apply. + # For local Minio, this is often not strictly enforced. return name - def download(self, bucket_name: str, key: str, filepath: str): - raise NotImplementedError() + def download(self, bucket_name: str, key: str, filepath: str): # Mark as -> None as per parent + """ + Download an object from Minio. (Implementation provided by parent, this is override) + + :param bucket_name: Name of the bucket. + :param key: Object key. + :param filepath: Local path to save the file. + :raises NotImplementedError: If not overridden by a concrete implementation (but it is). + """ + # This method overrides the abstract one from PersistentStorage. + # The actual implementation for fget_object is needed here if parent is truly abstract. + # However, the original code structure seems to imply this *is* the implementation. + # For clarity, if this is the direct Minio implementation: + self.logging.info(f"Downloading {key} from Minio bucket {bucket_name} to {filepath}") + try: + self.connection.fget_object(bucket_name, key, filepath) + except minio.error.S3Error as e: + self.logging.error(f"Failed to download {key} from {bucket_name}: {e}") + raise # Re-throw to indicate failure + def exists_bucket(self, bucket_name: str) -> bool: + """ + Check if a Minio bucket exists. + + :param bucket_name: Name of the bucket. + :return: True if the bucket exists, False otherwise. + """ return self.connection.bucket_exists(bucket_name) def list_bucket(self, bucket_name: str, prefix: str = "") -> List[str]: + """ + List objects in a Minio bucket, optionally filtered by prefix. + + :param bucket_name: Name of the bucket. + :param prefix: Optional prefix to filter objects. + :return: List of object names. + :raises RuntimeError: If the bucket does not exist. + """ try: - objects_list = self.connection.list_objects(bucket_name) - return [obj.object_name for obj in objects_list if prefix in obj.object_name] - except minio.error.NoSuchBucket: - raise RuntimeError(f"Attempting to access a non-existing bucket {bucket_name}!") + # list_objects is recursive by default if prefix is used effectively. + # To match S3-like behavior of non-recursive listing unless specified, + # one might need to adjust or check Minio client specifics. + # Assuming list_objects with a prefix gives objects *under* that prefix. + objects_iterator = self.connection.list_objects(bucket_name, prefix=prefix, recursive=True) + return [obj.object_name for obj in objects_iterator] + except minio.error.S3Error as e: # Catching S3Error, which includes NoSuchBucket + if "NoSuchBucket" in str(e): # More specific check if needed, though S3Error often suffices + raise RuntimeError(f"Attempting to list a non-existing Minio bucket: {bucket_name}") from e + self.logging.error(f"Error listing Minio bucket {bucket_name}: {e}") + raise # Re-throw other S3 errors + + def list_buckets(self, bucket_name_filter: Optional[str] = None) -> List[str]: # Renamed arg + """ + List all Minio buckets, or filter by a partial name. - def list_buckets(self, bucket_name: Optional[str] = None) -> List[str]: + :param bucket_name_filter: Optional string to filter bucket names (contains match). + :return: List of bucket names. + """ buckets = self.connection.list_buckets() - if bucket_name is not None: - return [bucket.name for bucket in buckets if bucket_name in bucket.name] + if bucket_name_filter is not None: + return [bucket.name for bucket in buckets if bucket_name_filter in bucket.name] else: return [bucket.name for bucket in buckets] - def upload(self, bucket_name: str, filepath: str, key: str): - raise NotImplementedError() + def upload(self, bucket_name: str, filepath: str, key: str): # Mark as -> None as per parent + """ + Upload a file to Minio. (Implementation provided by parent, this is override) - def serialize(self) -> dict: - return self._cfg.serialize() + :param bucket_name: Name of the bucket. + :param filepath: Local path of the file to upload. + :param key: Object key for storage. + :raises NotImplementedError: If not overridden (but it is). + """ + # This method overrides the abstract one from PersistentStorage. + self.logging.info(f"Uploading {filepath} to Minio bucket {bucket_name} as {key}") + try: + self.connection.fput_object(bucket_name, key, filepath) + except minio.error.S3Error as e: + self.logging.error(f"Failed to upload {filepath} to {bucket_name} as {key}: {e}") + raise - """ - This implementation supports overriding this class. - The main Minio class is used to start/stop deployments. - When overriding the implementation in Local/OpenWhisk/..., - we call the _deserialize and provide an alternative implementation. - """ + def serialize(self) -> dict: + """ + Serialize the Minio storage configuration. + + :return: Dictionary representation of the MinioConfig. + """ + return self._cfg.serialize() - T = TypeVar("T", bound="Minio") + T = TypeVar("T", bound="Minio") # For type hinting the return of _deserialize @staticmethod def _deserialize( cached_config: MinioConfig, cache_client: Cache, - resources: Resources, - obj_type: Type[T], + resources: Resources, # Should be SelfHostedResources or similar + obj_type: Type[T], # The concrete class type (Minio or subclass) ) -> T: + """ + Internal helper to deserialize a Minio (or subclass) instance. + + Restores configuration and re-attaches to an existing Docker container if specified. + This method supports creating instances of Minio or its subclasses, which is + useful if Local/OpenWhisk storage types inherit from Minio but have their own class. + + :param cached_config: The MinioConfig object from cache/config. + :param cache_client: Cache client instance. + :param resources: The Resources object (expected to be SelfHostedResources compatible). + :param obj_type: The actual class type to instantiate (Minio or a subclass). + :return: An instance of `obj_type`. + :raises RuntimeError: If a cached Docker container ID is provided but the container is not found. + """ docker_client = docker.from_env() - obj = obj_type(docker_client, cache_client, resources, False) - obj._cfg = cached_config - if cached_config.instance_id: - instance_id = cached_config.instance_id + # Create instance of the correct type (Minio or a subclass like LocalMinioStorage) + obj = obj_type(docker_client, cache_client, resources, False) # False for replace_existing typically + obj._cfg = cached_config # Apply the full MinioConfig + + if cached_config.instance_id: # If a container ID was cached try: - obj._storage_container = docker_client.containers.get(instance_id) + obj._storage_container = docker_client.containers.get(cached_config.instance_id) + obj.logging.info(f"Re-attached to existing Minio container {cached_config.instance_id}") except docker.errors.NotFound: - raise RuntimeError(f"Storage container {instance_id} does not exist!") + obj.logging.error(f"Cached Minio container {cached_config.instance_id} not found!") + # Decide on behavior: raise error, or try to start a new one? + # Current SeBS logic might expect this to fail if container is gone. + raise RuntimeError(f"Minio storage container {cached_config.instance_id} does not exist!") + except docker.errors.APIError as e: + obj.logging.error(f"API error attaching to Minio container {cached_config.instance_id}: {e}") + raise else: - obj._storage_container = None - obj._input_prefixes = copy.copy(cached_config.input_buckets) - obj._output_prefixes = copy.copy(cached_config.output_buckets) - obj.configure_connection() + obj._storage_container = None # No cached container ID + + # Restore prefixes from config, as they are part of MinioConfig now + obj._input_prefixes = copy.copy(cached_config.input_buckets) # Assuming input_buckets are prefixes + obj._output_prefixes = copy.copy(cached_config.output_buckets) # Assuming output_buckets are prefixes + + if obj._storage_container or obj._cfg.address : # If we have a container or a pre-configured address + obj.configure_connection() # Setup Minio client connection + return obj @staticmethod def deserialize(cached_config: MinioConfig, cache_client: Cache, res: Resources) -> "Minio": + """ + Deserialize a Minio instance from a MinioConfig object. + + This is the primary public deserialization method for Minio. + + :param cached_config: The MinioConfig object (e.g., from a top-level config's resources). + :param cache_client: Cache client instance. + :param res: The Resources object. + :return: A Minio instance. + """ return Minio._deserialize(cached_config, cache_client, res, Minio) diff --git a/sebs/storage/resources.py b/sebs/storage/resources.py index a85e725e1..5f1fd4dba 100644 --- a/sebs/storage/resources.py +++ b/sebs/storage/resources.py @@ -19,178 +19,259 @@ class SelfHostedResources(Resources): + """ + Manages resources for self-hosted FaaS deployments, such as local Minio + for object storage and ScyllaDB for NoSQL storage. + """ def __init__( self, name: str, storage_cfg: Optional[PersistentStorageConfig] = None, nosql_storage_cfg: Optional[NoSQLStorageConfig] = None, ): + """ + Initialize SelfHostedResources. + + :param name: Name of the self-hosted platform (e.g., "local", "openwhisk"). + :param storage_cfg: Optional configuration for persistent object storage (e.g., MinioConfig). + :param nosql_storage_cfg: Optional configuration for NoSQL storage (e.g., ScyllaDBConfig). + """ super().__init__(name=name) - self._object_storage = storage_cfg - self._nosql_storage = nosql_storage_cfg + self._object_storage: Optional[PersistentStorageConfig] = storage_cfg + self._nosql_storage: Optional[NoSQLStorageConfig] = nosql_storage_cfg @property def storage_config(self) -> Optional[PersistentStorageConfig]: + """Configuration for the self-hosted object storage (e.g., Minio).""" return self._object_storage @property def nosql_storage_config(self) -> Optional[NoSQLStorageConfig]: + """Configuration for the self-hosted NoSQL storage (e.g., ScyllaDB).""" return self._nosql_storage def serialize(self) -> dict: + """ + Serialize the self-hosted resource configurations to a dictionary. + + Includes configurations for object storage and NoSQL storage if they are set. + + :return: Dictionary representation of the self-hosted resources. + """ out: dict = {} + # Call super().serialize() to include base Resource fields like buckets, resource_id + out = {**super().serialize(), **out} + if self._object_storage is not None: - out = {**out, "storage": self._object_storage.serialize()} + out["object_storage"] = self._object_storage.serialize() # Changed key from "storage" if self._nosql_storage is not None: - out = {**out, "nosql": self._nosql_storage.serialize()} + out["nosql_storage"] = self._nosql_storage.serialize() # Changed key from "nosql" return out def update_cache(self, cache: Cache): + """ + Update the SeBS cache with the configurations of self-hosted resources. + + Saves object storage (Minio) and NoSQL storage (ScyllaDB) configurations. + + :param cache: The Cache client instance. + """ super().update_cache(cache) - if self._object_storage is not None: - cast(MinioConfig, self._object_storage).update_cache( - [self._name, "resources", "storage"], cache + if self._object_storage is not None and isinstance(self._object_storage, MinioConfig): + self._object_storage.update_cache( # MinioConfig has its own update_cache + [self._name, "resources", "object_storage", "minio"], cache # More specific path ) - if self._nosql_storage is not None: - cast(ScyllaDBConfig, self._nosql_storage).update_cache( - [self._name, "resources", "nosql"], cache + if self._nosql_storage is not None and isinstance(self._nosql_storage, ScyllaDBConfig): + self._nosql_storage.update_cache( # ScyllaDBConfig has its own update_cache + [self._name, "resources", "nosql_storage", "scylladb"], cache # More specific path ) - def _deserialize_storage( - self, config: dict, cached_config: Optional[dict], storage_type: str - ) -> Tuple[str, dict]: - storage_impl = "" - storage_config = {} - - # Check for new config - if "storage" in config and storage_type in config["storage"]: - storage_impl = config["storage"][storage_type]["type"] - storage_config = config["storage"][storage_type][storage_impl] - self.logging.info( - "Using user-provided configuration of storage " - f"type: {storage_type} for {self._name} containers." - ) + def _deserialize_storage_config( # Renamed for clarity, takes specific storage_type_key + self, + user_config: dict, # User-provided config for the entire 'resources' section + cached_resource_config: Optional[dict], # Cached 'resources' section + storage_type_key: str, # "object_storage" or "nosql_storage" + default_type_map: Dict[str, Type[PersistentStorageConfig] | Type[NoSQLStorageConfig]] + ) -> Optional[PersistentStorageConfig | NoSQLStorageConfig]: + """ + Helper to deserialize a specific type of storage configuration (object or NoSQL). - # Load cached values - elif ( - cached_config is not None - and "resources" in cached_config - and "storage" in cached_config["resources"] - and "object" in cached_config["resources"]["storage"] - ): - storage_impl = cached_config["storage"]["object"]["type"] - storage_config = cached_config["storage"]["object"][storage_impl] - self.logging.info( - f"Using cached configuration of storage type: " - f"{storage_type} for {self._name} container." - ) + It checks user config first, then cached config. - return storage_impl, storage_config + :param user_config: The 'resources' part of the user-provided configuration. + :param cached_resource_config: The 'resources' part of the cached configuration. + :param storage_type_key: The key for this storage type (e.g., "object_storage"). + :param default_type_map: Maps type strings (e.g., "minio") to config classes. + :return: Deserialized storage configuration object or None. + """ + storage_details_user = user_config.get(storage_type_key) + storage_details_cached = (cached_resource_config or {}).get(storage_type_key) - @staticmethod - def _deserialize(ret: "SelfHostedResources", config: dict, cached_config: dict): - obj_storage_impl, obj_storage_cfg = ret._deserialize_storage( - config, cached_config, "object" - ) + final_storage_details = None + source_msg = "" + + if storage_details_user: + final_storage_details = storage_details_user + source_msg = "user-provided" + elif storage_details_cached: + final_storage_details = storage_details_cached + source_msg = "cached" - if obj_storage_impl == "minio": - ret._object_storage = MinioConfig.deserialize(obj_storage_cfg) - ret.logging.info("Deserializing access data to Minio storage") - elif obj_storage_impl != "": - ret.logging.warning(f"Unknown object storage type: {obj_storage_impl}") + if final_storage_details: + storage_impl_type_str = final_storage_details.get("type") + if storage_impl_type_str and storage_impl_type_str in default_type_map: + config_class = default_type_map[storage_impl_type_str] + # Pass the actual config dict for that type (e.g., content of "minio" key) + # The structure is assumed: storage_type_key: {"type": "minio", "minio": {...actual_config...}} + # Or, if simpler: storage_type_key: {"type": "minio", ...actual_config_kv_pairs...} + # Assuming the latter simpler structure for now based on MinioConfig.deserialize + self.logging.info( + f"Using {source_msg} configuration for {storage_type_key} (type: {storage_impl_type_str})." + ) + return config_class.deserialize(final_storage_details) + elif storage_impl_type_str: + self.logging.warning(f"Unknown {storage_type_key} type: {storage_impl_type_str}") + else: + self.logging.info(f"No 'type' specified for {storage_type_key} in {source_msg} config.") else: - ret.logging.info("No object storage available") + self.logging.info(f"No {storage_type_key} configuration provided or found in cache.") + return None - nosql_storage_impl, nosql_storage_cfg = ret._deserialize_storage( - config, cached_config, "nosql" - ) - if nosql_storage_impl == "scylladb": - ret._nosql_storage = ScyllaDBConfig.deserialize(nosql_storage_cfg) - ret.logging.info("Deserializing access data to ScylladB NoSQL storage") - elif nosql_storage_impl != "": - ret.logging.warning(f"Unknown NoSQL storage type: {nosql_storage_impl}") - else: - ret.logging.info("No NoSQL storage available") + @staticmethod + def _deserialize( + ret: "SelfHostedResources", + user_resources_config: dict, # The 'resources' part of user config + cached_resources_config: Optional[dict] # The 'resources' part of cached config + ): + """ + Deserialize self-hosted storage configurations (object and NoSQL). + + Populates `_object_storage` and `_nosql_storage` attributes of the `ret` instance. + + :param ret: The SelfHostedResources instance to populate. + :param user_resources_config: The 'resources' section from user-provided configuration. + :param cached_resources_config: The 'resources' section from cached configuration, if any. + """ + # Define type maps for deserialization + object_storage_type_map = {"minio": MinioConfig} + nosql_storage_type_map = {"scylladb": ScyllaDBConfig} + + ret._object_storage = ret._deserialize_storage_config( + user_resources_config, cached_resources_config, "object_storage", object_storage_type_map + ) + ret._nosql_storage = ret._deserialize_storage_config( + user_resources_config, cached_resources_config, "nosql_storage", nosql_storage_type_map + ) class SelfHostedSystemResources(SystemResources): + """ + Manages system resources for self-hosted FaaS deployments. + + This class provides access to self-hosted persistent storage (Minio) and + NoSQL storage (ScyllaDB) based on the provided configuration. + """ def __init__( self, - name: str, - config: Config, + name: str, # Name of the self-hosted platform, e.g., "local", "openwhisk" + config: Config, # The top-level platform Config (e.g., LocalConfig, OpenWhiskConfig) cache_client: Cache, docker_client: docker.client, logger_handlers: LoggingHandlers, ): - super().__init__(config, cache_client, docker_client) + """ + Initialize SelfHostedSystemResources. - self._name = name + :param name: Name of the self-hosted platform. + :param config: The top-level configuration for the platform. + :param cache_client: Cache client instance. + :param docker_client: Docker client instance. + :param logger_handlers: Logging handlers. + """ + super().__init__(config, cache_client, docker_client) + self._name = name # Platform name, e.g. "local" or "openwhisk" self._logging_handlers = logger_handlers self._storage: Optional[PersistentStorage] = None self._nosql_storage: Optional[NoSQLStorage] = None - """ - Create wrapper object for minio storage and fill buckets. - Starts minio as a Docker instance, using always fresh buckets. + def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStorage: + """ + Get or initialize the self-hosted persistent storage client (Minio). - :param benchmark: - :param buckets: number of input and output buckets - :param replace_existing: not used. - :return: Azure storage instance - """ + If the client hasn't been initialized, it deserializes the MinioConfig + from the system configuration and creates a Minio client instance. - def get_storage(self, replace_existing: Optional[bool] = None) -> PersistentStorage: + :param replace_existing: If True, replace existing files in input buckets. + Defaults to False if None. + :return: Minio persistent storage client. + :raises RuntimeError: If Minio configuration is missing or invalid. + """ if self._storage is None: - storage_config = cast(SelfHostedResources, self._config.resources).storage_config + # self._config.resources should be SelfHostedResources instance + sh_resources = cast(SelfHostedResources, self._config.resources) + storage_config = sh_resources.storage_config if storage_config is None: self.logging.error( f"The {self._name} deployment is missing the " - "configuration of pre-allocated storage!" + "configuration of self-hosted object storage (e.g., Minio)!" ) - raise RuntimeError(f"Cannot run {self._name} deployment without any object storage") + raise RuntimeError(f"Cannot run {self._name} deployment without object storage config.") if isinstance(storage_config, MinioConfig): + # Minio.deserialize expects the MinioConfig itself, cache, and parent Resources self._storage = Minio.deserialize( storage_config, self._cache_client, - self._config.resources, + self._config.resources, # Pass the parent Resources object ) self._storage.logging_handlers = self._logging_handlers + if replace_existing is not None: # Apply replace_existing if provided now + self._storage.replace_existing = replace_existing else: self.logging.error( f"The {self._name} deployment does not support " f"the object storage config type: {type(storage_config)}!" ) - raise RuntimeError("Cannot work with the provided object storage!") + raise RuntimeError("Cannot work with the provided object storage config type!") - elif replace_existing is not None: + elif replace_existing is not None: # If storage already exists, just update replace_existing self._storage.replace_existing = replace_existing return self._storage def get_nosql_storage(self) -> NoSQLStorage: + """ + Get or initialize the self-hosted NoSQL storage client (ScyllaDB). + + If the client hasn't been initialized, it deserializes the ScyllaDBConfig + from the system configuration and creates a ScyllaDB client instance. + + :return: ScyllaDB NoSQL storage client. + :raises RuntimeError: If ScyllaDB configuration is missing or invalid. + """ if self._nosql_storage is None: - storage_config = cast(SelfHostedResources, self._config.resources).nosql_storage_config - if storage_config is None: + sh_resources = cast(SelfHostedResources, self._config.resources) + nosql_config = sh_resources.nosql_storage_config + if nosql_config is None: self.logging.error( f"The {self._name} deployment is missing the configuration " - "of pre-allocated NoSQL storage!" + "of self-hosted NoSQL storage (e.g., ScyllaDB)!" ) - raise RuntimeError("Cannot allocate NoSQL storage!") + raise RuntimeError(f"Cannot run {self._name} deployment without NoSQL storage config.") - if isinstance(storage_config, ScyllaDBConfig): + if isinstance(nosql_config, ScyllaDBConfig): + # ScyllaDB.deserialize expects ScyllaDBConfig, cache, and parent Resources self._nosql_storage = ScyllaDB.deserialize( - storage_config, self._cache_client, self._config.resources + nosql_config, self._cache_client, self._config.resources ) self._nosql_storage.logging_handlers = self._logging_handlers else: self.logging.error( f"The {self._name} deployment does not support " - f"the NoSQL storage config type: {type(storage_config)}!" + f"the NoSQL storage config type: {type(nosql_config)}!" ) - raise RuntimeError("Cannot work with the provided NoSQL storage!") - + raise RuntimeError("Cannot work with the provided NoSQL storage config type!") return self._nosql_storage diff --git a/sebs/storage/scylladb.py b/sebs/storage/scylladb.py index aae97815d..4152cb256 100644 --- a/sebs/storage/scylladb.py +++ b/sebs/storage/scylladb.py @@ -18,19 +18,27 @@ class ScyllaDB(NoSQLStorage): + """ + NoSQL storage implementation using a self-hosted ScyllaDB server + running in a Docker container. ScyllaDB is accessed via its DynamoDB-compatible + API (Alternator). + """ @staticmethod def typename() -> str: + """Return the type name of this storage implementation.""" return f"{ScyllaDB.deployment_name()}.ScyllaDB" @staticmethod def deployment_name() -> str: + """Return the deployment name, which is 'scylladb' for this storage type.""" return "scylladb" @property def config(self) -> ScyllaDBConfig: + """The ScyllaDB specific configuration for this storage instance.""" return self._cfg - # the location does not matter + # ScyllaDB is self-hosted, so a fixed region is used, though not strictly applicable. SCYLLADB_REGION = "None" def __init__( @@ -38,31 +46,55 @@ def __init__( docker_client: docker.client, cache_client: Cache, config: ScyllaDBConfig, - resources: Optional[Resources] = None, + resources: Optional[Resources] = None, # Should be SelfHostedResources for consistency ): - - super().__init__(self.SCYLLADB_REGION, cache_client, resources) # type: ignore + """ + Initialize the ScyllaDB storage client. + + Sets up connection parameters and initializes a boto3 DynamoDB client + configured to connect to ScyllaDB's Alternator endpoint if an address is provided. + + :param docker_client: Docker client instance for managing the ScyllaDB container. + :param cache_client: Cache client instance. + :param config: ScyllaDBConfig object with connection and deployment details. + :param resources: Cloud/system resources configuration object. + """ + super().__init__(self.SCYLLADB_REGION, cache_client, resources) # type: ignore self._docker_client = docker_client - self._storage_container: Optional[docker.container] = None + self._storage_container: Optional[docker.models.containers.Container] = None # Type hint self._cfg = config - # Map benchmark -> orig_name -> table_name + # Map benchmark -> original_table_name -> actual_table_name (used by parent, might be direct here) self._tables: Dict[str, Dict[str, str]] = defaultdict(dict) - self._serializer = TypeSerializer() + self._serializer = TypeSerializer() # For DynamoDB data types - if config.address != "": - self.client = boto3.client( + # Initialize boto3 client if address is already known (e.g. from cache) + if config.address: # Check if address is not empty + self.client = boto3.client( # This is a DynamoDB client "dynamodb", - region_name="None", - aws_access_key_id="None", - aws_secret_access_key="None", - endpoint_url=f"http://{config.address}", + region_name=self.SCYLLADB_REGION, # ScyllaDB region is 'None' + aws_access_key_id=config.access_key, # Should be config.access_key + aws_secret_access_key=config.secret_key, # Should be config.secret_key + endpoint_url=f"http://{config.address}:{config.alternator_port}", # Use alternator_port ) + else: + self.client = None # Will be initialized after container start and IP detection def start(self): + """ + Start the ScyllaDB Docker container. + + Configures a data volume, CPU/memory resources, and ScyllaDB arguments + (including enabling Alternator). Waits for the node to become operational. + The container ID and connection details are stored in `self._cfg`. - if self._cfg.data_volume == "": - scylladb_volume = os.path.join(project_absolute_path(), "scylladb-volume") + :raises RuntimeError: If starting the ScyllaDB container fails or node doesn't boot. + """ + if not self._cfg.data_volume: # Check if data_volume is empty or None + scylla_data_path = os.path.join(project_absolute_path(), "scylladb-volume") + else: + scylla_data_path = self._cfg.data_volume + scylla_data_path = os.path.abspath(scylla_data_path) else: scylladb_volume = self._cfg.data_volume scylladb_volume = os.path.abspath(scylladb_volume) @@ -127,192 +159,335 @@ def start(self): self.logging.error("Starting ScyllaDB storage failed! Unknown error: {}".format(e)) raise RuntimeError("Starting ScyllaDB storage unsuccesful") - # FIXME: refactor this - duplicated code from minio def configure_connection(self): - # who knows why? otherwise attributes are not loaded - if self._cfg.address == "": - + """ + Configure the connection details (address) for the ScyllaDB client (Alternator endpoint). + + If the address is not already set in the config, it determines the + ScyllaDB server address based on the Docker container's network settings. + On Linux, it uses the container's bridge IP and the Alternator port. + On other systems, it uses localhost with the host-mapped Alternator port. + Initializes `self.client` with a boto3 DynamoDB client instance. + + :raises RuntimeError: If the ScyllaDB container is not running or IP address cannot be detected. + """ + # The original comment "who knows why? otherwise attributes are not loaded" + # likely refers to needing to call reload() or access attrs to populate them. + if not self._cfg.address: # Check if address is empty or None if self._storage_container is None: raise RuntimeError( - "ScyllaDB container is not available! Make sure that you deployed " - "the ScyllaDB storage and provided configuration!" - ) - - self._storage_container.reload() - - # Check if the system is Linux and that it's not WSL - if platform.system() == "Linux" and "microsoft" not in platform.release().lower(): - networks = self._storage_container.attrs["NetworkSettings"]["Networks"] - self._cfg.address = "{IPAddress}:{Port}".format( - IPAddress=networks["bridge"]["IPAddress"], Port=self._cfg.alternator_port + "ScyllaDB container is not available! Ensure ScyllaDB is started and configured." ) - else: - # System is either WSL, Windows, or Mac + self._storage_container.reload() # Refresh container attributes + + if platform.system() == "Linux" and "microsoft" not in platform.release().lower(): # Native Linux + networks = self._storage_container.attrs.get("NetworkSettings", {}).get("Networks", {}) + bridge_network = networks.get("bridge", {}) + ip_address = bridge_network.get("IPAddress") + if not ip_address: # Fallback for some Docker versions or network modes + ip_address = bridge_network.get("Gateway") + if not ip_address: + self.logging.error( + "Could not determine ScyllaDB container IP address from bridge network. Attributes: %s", + json.dumps(self._storage_container.attrs, indent=2) + ) + raise RuntimeError(f"Failed to detect IP address for ScyllaDB container {self._storage_container.id}") + # Use the internal alternator port for connection from host if bridge IP is used + self._cfg.address = f"{ip_address}:{self._cfg.alternator_port}" + else: # Docker Desktop (Windows, macOS), WSL + # Use the host-mapped port for Alternator + if self._cfg.mapped_port == -1: # mapped_port should be for Alternator here + raise RuntimeError("ScyllaDB Alternator host port not mapped or invalid for non-Linux Docker.") self._cfg.address = f"localhost:{self._cfg.mapped_port}" + + self.logging.info(f"ScyllaDB (Alternator) instance configured at http://{self._cfg.address}") + + # Initialize boto3 client to interact with ScyllaDB's DynamoDB API (Alternator) + # This should be done after the address (and potentially port for Alternator) is known. + self.client = boto3.client( + "dynamodb", + region_name=self.SCYLLADB_REGION, + aws_access_key_id=self._cfg.access_key, # Use configured keys + aws_secret_access_key=self._cfg.secret_key, + endpoint_url=f"http://{self._cfg.address}", # Full endpoint URL + ) - if not self._cfg.address: - self.logging.error( - f"Couldn't read the IP address of container from attributes " - f"{json.dumps(self._instance.attrs, indent=2)}" - ) - raise RuntimeError( - f"Incorrect detection of IP address for container with id {self._instance_id}" - ) - self.logging.info("Starting ScyllaDB instance at {}".format(self._cfg.address)) def stop(self): + """ + Stop the ScyllaDB Docker container if it's running. + """ if self._storage_container is not None: - self.logging.info(f"Stopping ScyllaDB container at {self._cfg.address}.") - self._storage_container.stop() - self.logging.info(f"Stopped ScyllaDB container at {self._cfg.address}.") + try: + self.logging.info(f"Stopping ScyllaDB container {self._storage_container.id} at {self._cfg.address}.") + self._storage_container.stop() + self.logging.info(f"Stopped ScyllaDB container {self._storage_container.id}.") + except docker.errors.NotFound: + self.logging.warning(f"ScyllaDB container {self._storage_container.id} already removed or not found.") + except docker.errors.APIError as e: + self.logging.error(f"Error stopping ScyllaDB container {self._storage_container.id}: {e}") else: - self.logging.error("Stopping ScyllaDB was not succesful, storage container not known!") + self.logging.warning("Attempted to stop ScyllaDB, but storage container instance is not known.") def envs(self) -> dict: - return {"NOSQL_STORAGE_TYPE": "scylladb", "NOSQL_STORAGE_ENDPOINT": self._cfg.address} + """ + Return environment variables for functions to connect to this ScyllaDB instance + via its Alternator (DynamoDB compatible) endpoint. + + :return: Dictionary of ScyllaDB Alternator related environment variables. + """ + # Ensure address includes the Alternator port if not already part of it + # self.configure_connection() might be needed if address wasn't set before envs() is called + if not self._cfg.address: + self.configure_connection() # Ensure address is resolved + + return { + "NOSQL_STORAGE_TYPE": "scylladb", # Or "dynamodb" if functions use generic DynamoDB SDK + "NOSQL_STORAGE_ENDPOINT": f"http://{self._cfg.address}", # Full endpoint URL + # ScyllaDB's Alternator might not require AWS_ACCESS_KEY_ID/SECRET if auth is off + # If needed, they would be self._cfg.access_key and self._cfg.secret_key + "AWS_ACCESS_KEY_ID": self._cfg.access_key, + "AWS_SECRET_ACCESS_KEY": self._cfg.secret_key, + "AWS_DEFAULT_REGION": self.SCYLLADB_REGION, # Needs a region for AWS SDK + } def serialize(self) -> Tuple[StorageType, dict]: - return StorageType.SCYLLADB, self._cfg.serialize() + """ + Serialize the ScyllaDB storage configuration. - """ - This implementation supports overriding this class. - The main ScyllaDB class is used to start/stop deployments. - - When overriding the implementation in Local/OpenWhisk/..., - we call the _deserialize and provide an alternative implementation. - """ + :return: Tuple containing the storage type (StorageType.SCYLLADB) and + the serialized ScyllaDBConfig dictionary. + """ + return StorageType.SCYLLADB, self._cfg.serialize() - T = TypeVar("T", bound="ScyllaDB") + T = TypeVar("T", bound="ScyllaDB") # For type hinting the return of _deserialize @staticmethod def _deserialize( - cached_config: ScyllaDBConfig, cache_client: Cache, resources: Resources, obj_type: Type[T] + cached_config: ScyllaDBConfig, + cache_client: Cache, + resources: Resources, # Should be SelfHostedResources + obj_type: Type[T], # The concrete class type (ScyllaDB or subclass) ) -> T: + """ + Internal helper to deserialize a ScyllaDB (or subclass) instance. + + Restores configuration and re-attaches to an existing Docker container if specified. + + :param cached_config: The ScyllaDBConfig object from cache/config. + :param cache_client: Cache client instance. + :param resources: The Resources object. + :param obj_type: The actual class type to instantiate. + :return: An instance of `obj_type`. + :raises RuntimeError: If a cached Docker container ID is provided but the container is not found. + """ docker_client = docker.from_env() + # Create instance of the correct type, passing all necessary args obj = obj_type(docker_client, cache_client, cached_config, resources) + # obj._cfg is already set by __init__ - if cached_config.instance_id: - instance_id = cached_config.instance_id + if cached_config.instance_id: # If a container ID was cached try: - obj._storage_container = docker_client.containers.get(instance_id) + obj._storage_container = docker_client.containers.get(cached_config.instance_id) + obj.logging.info(f"Re-attached to existing ScyllaDB container {cached_config.instance_id}") except docker.errors.NotFound: - raise RuntimeError(f"Storage container {instance_id} does not exist!") + obj.logging.error(f"Cached ScyllaDB container {cached_config.instance_id} not found!") + raise RuntimeError(f"ScyllaDB storage container {cached_config.instance_id} does not exist!") + except docker.errors.APIError as e: + obj.logging.error(f"API error attaching to ScyllaDB container {cached_config.instance_id}: {e}") + raise else: - obj._storage_container = None + obj._storage_container = None # No cached container ID + + # Configure connection if address is known (either from config or after container reload) + if obj._cfg.address or obj._storage_container: + obj.configure_connection() + return obj @staticmethod def deserialize( cached_config: ScyllaDBConfig, cache_client: Cache, resources: Resources ) -> "ScyllaDB": + """ + Deserialize a ScyllaDB instance from a ScyllaDBConfig object. + + :param cached_config: The ScyllaDBConfig object. + :param cache_client: Cache client instance. + :param resources: The Resources object. + :return: A ScyllaDB instance. + """ return ScyllaDB._deserialize(cached_config, cache_client, resources, ScyllaDB) def retrieve_cache(self, benchmark: str) -> bool: + """ + Retrieve ScyllaDB table configurations for a benchmark from the cache. + Populates internal table mappings if cached data is found. + + :param benchmark: The name of the benchmark. + :return: True if cached data was successfully retrieved, False otherwise. + """ if benchmark in self._tables: return True - - cached_storage = self.cache_client.get_nosql_config(self.deployment_name(), benchmark) - if cached_storage is not None: - self._tables[benchmark] = cached_storage["tables"] + cached_storage_info = self.cache_client.get_nosql_config(self.deployment_name(), benchmark) + if cached_storage_info and "tables" in cached_storage_info: + self._tables[benchmark] = cached_storage_info["tables"] return True - return False def update_cache(self, benchmark: str): + """ + Update the cache with the current ScyllaDB table configurations for a benchmark. + :param benchmark: The name of the benchmark. + """ self._cache_client.update_nosql( self.deployment_name(), benchmark, - { - "tables": self._tables[benchmark], - }, + {"tables": self._tables[benchmark]}, # Store the table mappings ) def get_tables(self, benchmark: str) -> Dict[str, str]: - return self._tables[benchmark] - - def _get_table_name(self, benchmark: str, table: str) -> Optional[str]: + """ + Get the mapping of benchmark-defined table names to actual ScyllaDB/DynamoDB table names. - if benchmark not in self._tables: - return None + :param benchmark: Name of the benchmark. + :return: Dictionary mapping logical table names to actual table names. + """ + return self._tables[benchmark] - if table not in self._tables[benchmark]: - return None + def _get_table_name(self, benchmark: str, table_alias: str) -> Optional[str]: # Renamed arg + """ + Get the actual cloud provider table name for a given benchmark and logical table alias. - return self._tables[benchmark][table] + :param benchmark: The name of the benchmark. + :param table_alias: The logical name of the table within the benchmark. + :return: The actual table name, or None if not found. + """ + benchmark_tables = self._tables.get(benchmark) + if benchmark_tables: + return benchmark_tables.get(table_alias) + return None def write_to_table( self, benchmark: str, - table: str, + table_alias: str, # Renamed from table to table_alias data: dict, - primary_key: Tuple[str, str], - secondary_key: Optional[Tuple[str, str]] = None, + primary_key: Tuple[str, str], # (key_name, key_value) + secondary_key: Optional[Tuple[str, str]] = None, # (key_name, key_value) ): + """ + Write an item to the specified ScyllaDB/DynamoDB table. + + Data is serialized using DynamoDB TypeSerializer. + + :param benchmark: The name of the benchmark. + :param table_alias: The logical name of the table. + :param data: The data to write (as a dictionary). + :param primary_key: Tuple (key_name, key_value) for the primary/partition key. + :param secondary_key: Optional tuple for the secondary/sort key. + """ + actual_table_name = self._get_table_name(benchmark, table_alias) + assert actual_table_name is not None, f"Table alias {table_alias} not found for benchmark {benchmark}" + + item_to_put = data.copy() # Avoid modifying original data dict + # Add primary and secondary keys to the item itself + item_to_put[primary_key[0]] = primary_key[1] + if secondary_key: + item_to_put[secondary_key[0]] = secondary_key[1] + + serialized_item = {k: self._serializer.serialize(v) for k, v in item_to_put.items()} + if not self.client: + self.configure_connection() # Ensure client is initialized + self.client.put_item(TableName=actual_table_name, Item=serialized_item) # type: ignore - table_name = self._get_table_name(benchmark, table) - assert table_name is not None - - for key in (primary_key, secondary_key): - if key is not None: - data[key[0]] = key[1] - - serialized_data = {k: self._serializer.serialize(v) for k, v in data.items()} - self.client.put_item(TableName=table_name, Item=serialized_data) - - """ - AWS: create a DynamoDB Table - - In contrast to the hierarchy of database objects in Azure (account -> database -> container) - and GCP (database per benchmark), we need to create unique table names here. - """ def create_table( - self, benchmark: str, name: str, primary_key: str, secondary_key: Optional[str] = None + self, benchmark: str, name_alias: str, primary_key_name: str, secondary_key_name: Optional[str] = None ) -> str: - - table_name = f"sebs-benchmarks-{self._cloud_resources.resources_id}-{benchmark}-{name}" + """ + Create a ScyllaDB/DynamoDB table for a benchmark. + + Table names are constructed using a standard SeBS pattern: + `sebs-benchmarks-{resource_id}-{benchmark-name}-{table_alias}`. + Uses PAY_PER_REQUEST billing mode (DynamoDB specific, ScyllaDB ignores). + + :param benchmark: Name of the benchmark. + :param name_alias: Logical name for the table within the benchmark. + :param primary_key_name: Name of the primary/partition key attribute. + :param secondary_key_name: Optional name of the secondary/sort key attribute. + :return: Actual name of the created or existing table. + :raises RuntimeError: If table creation fails for an unknown reason. + """ + # Construct the full table name + actual_table_name = f"sebs-benchmarks-{self._cloud_resources.resources_id}-{benchmark}-{name_alias}" try: - - definitions = [{"AttributeName": primary_key, "AttributeType": "S"}] - key_schema = [{"AttributeName": primary_key, "KeyType": "HASH"}] - - if secondary_key is not None: - definitions.append({"AttributeName": secondary_key, "AttributeType": "S"}) - key_schema.append({"AttributeName": secondary_key, "KeyType": "RANGE"}) - - ret = self.client.create_table( - TableName=table_name, - BillingMode="PAY_PER_REQUEST", - AttributeDefinitions=definitions, # type: ignore - KeySchema=key_schema, # type: ignore + attribute_definitions = [{"AttributeName": primary_key_name, "AttributeType": "S"}] # Assume string keys + key_schema = [{"AttributeName": primary_key_name, "KeyType": "HASH"}] # HASH for partition key + + if secondary_key_name: + attribute_definitions.append({"AttributeName": secondary_key_name, "AttributeType": "S"}) + key_schema.append({"AttributeName": secondary_key_name, "KeyType": "RANGE"}) # RANGE for sort key + + if not self.client: + self.configure_connection() # Ensure client is initialized + + response = self.client.create_table( # type: ignore + TableName=actual_table_name, + AttributeDefinitions=attribute_definitions, + KeySchema=key_schema, + BillingMode="PAY_PER_REQUEST" # For DynamoDB compatibility, ScyllaDB ignores ) + + # Wait for table to become active (mainly for DynamoDB, ScyllaDB might be faster) + # ScyllaDB might not have CREATING status or waiter, handle this gracefully. + if response.get("TableDescription", {}).get("TableStatus") == "CREATING": + self.logging.info(f"Waiting for creation of table {actual_table_name}") + try: + waiter = self.client.get_waiter("table_exists") # type: ignore + waiter.wait(TableName=actual_table_name) + except Exception as e: # Broad exception if waiter not supported or fails + self.logging.warning(f"Waiter for table {actual_table_name} failed or not supported (ScyllaDB?): {e}. Assuming table will be available.") + time.sleep(5) # Generic wait for ScyllaDB + + self.logging.info(f"Created/Verified table {actual_table_name} for benchmark {benchmark}") + self._tables[benchmark][name_alias] = actual_table_name + return actual_table_name + + except self.client.exceptions.ResourceInUseException: # type: ignore + # Table already exists + self.logging.info( + f"Using existing table {actual_table_name} for benchmark {benchmark}, alias {name_alias}" + ) + self._tables[benchmark][name_alias] = actual_table_name + return actual_table_name + except Exception as e: # Catch other potential errors + self.logging.error(f"Creating table {actual_table_name} failed: {e}") + raise RuntimeError(f"Creating table failed: {e}") - if ret["TableDescription"]["TableStatus"] == "CREATING": - self.logging.info(f"Waiting for creation of DynamoDB table {name}") - waiter = self.client.get_waiter("table_exists") - waiter.wait(TableName=name) - - self.logging.info(f"Created DynamoDB table {name} for benchmark {benchmark}") - self._tables[benchmark][name] = table_name - return ret["TableDescription"]["TableName"] + def clear_table(self, name: str) -> str: + """ + Clear all items from a ScyllaDB/DynamoDB table. - except self.client.exceptions.ResourceInUseException as e: + Note: This method is not implemented. Efficiently clearing a table + often involves deleting and recreating it, or scanning and batch deleting items. - if "already exists" in e.response["Error"]["Message"]: - self.logging.info( - f"Using existing DynamoDB table {table_name} for benchmark {benchmark}" - ) - self._tables[benchmark][name] = table_name - return name + :param name: Actual name of the table in the cloud/DB. + :raises NotImplementedError: This method is not yet implemented. + """ + raise NotImplementedError("Clearing a ScyllaDB/DynamoDB table is not implemented yet.") - raise RuntimeError(f"Creating DynamoDB failed, unknown reason! Error: {e}") + def remove_table(self, name: str) -> str: + """ + Remove/delete a ScyllaDB/DynamoDB table completely. - def clear_table(self, name: str) -> str: - raise NotImplementedError() + Note: This method is not implemented. - def remove_table(self, name: str) -> str: - raise NotImplementedError() + :param name: Actual name of the table in the cloud/DB. + :raises NotImplementedError: This method is not yet implemented. + """ + raise NotImplementedError("Removing a ScyllaDB/DynamoDB table is not implemented yet.") diff --git a/sebs/types.py b/sebs/types.py index b87516fba..d437b53c0 100644 --- a/sebs/types.py +++ b/sebs/types.py @@ -2,27 +2,40 @@ class BenchmarkModule(str, Enum): - STORAGE = "storage" - NOSQL = "nosql" + """ + Enumeration of SeBS modules that benchmarks can utilize. + These modules typically provide access to specific types of cloud resources. + """ + STORAGE = "storage" #: Represents object storage services (e.g., S3, Minio). + NOSQL = "nosql" #: Represents NoSQL database services (e.g., DynamoDB, ScyllaDB). class Platforms(str, Enum): - AWS = "aws" - AZURE = "azure" - GCP = "gcp" - LOCAL = "local" - OPENWHISK = "openwhisk" + """ + Enumeration of supported FaaS platforms in SeBS. + """ + AWS = "aws" #: Amazon Web Services + AZURE = "azure" #: Microsoft Azure + GCP = "gcp" #: Google Cloud Platform + LOCAL = "local" #: Local Docker-based deployment for testing. + OPENWHISK = "openwhisk" #: Apache OpenWhisk class Storage(str, Enum): - AWS_S3 = "aws-s3" - AZURE_BLOB_STORAGE = "azure-blob-storage" - GCP_STORAGE = "google-cloud-storage" - MINIO = "minio" + """ + Enumeration of specific persistent object storage service types supported by SeBS. + """ + AWS_S3 = "aws-s3" #: AWS Simple Storage Service (S3). + AZURE_BLOB_STORAGE = "azure-blob-storage" #: Azure Blob Storage. + GCP_STORAGE = "google-cloud-storage" #: Google Cloud Storage. + MINIO = "minio" #: Self-hosted Minio S3-compatible storage. class NoSQLStorage(str, Enum): - AWS_DYNAMODB = "aws-dynamodb" - AZURE_COSMOSDB = "azure-cosmosdb" - GCP_DATASTORE = "google-cloud-datastore" - SCYLLADB = "scylladb" + """ + Enumeration of specific NoSQL database service types supported by SeBS. + """ + AWS_DYNAMODB = "aws-dynamodb" #: AWS DynamoDB. + AZURE_COSMOSDB = "azure-cosmosdb" #: Azure Cosmos DB. + GCP_DATASTORE = "google-cloud-datastore" #: Google Cloud Datastore (Firestore in Datastore mode). + SCYLLADB = "scylladb" #: Self-hosted ScyllaDB (DynamoDB compatible via Alternator). diff --git a/sebs/utils.py b/sebs/utils.py index e7ab43f63..372efdff6 100644 --- a/sebs/utils.py +++ b/sebs/utils.py @@ -11,190 +11,298 @@ from typing import List, Optional PROJECT_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir) +"""Absolute path to the SeBS project root directory.""" DOCKER_DIR = os.path.join(PROJECT_DIR, "dockerfiles") -PACK_CODE_APP = "pack_code_{}.sh" +"""Absolute path to the directory containing Dockerfiles.""" +PACK_CODE_APP = "pack_code_{}.sh" # Seems unused, consider removal or add docstring if used. +"""Template string for packaging script names, potentially unused.""" -def project_absolute_path(*paths: str): +def project_absolute_path(*paths: str) -> str: + """ + Construct an absolute path relative to the SeBS project root directory. + + :param paths: Variable number of path components to join with the project root. + :return: Absolute path string. + """ return os.path.join(PROJECT_DIR, *paths) class JSONSerializer(json.JSONEncoder): - def default(self, o): + """ + Custom JSON encoder for SeBS objects. + + Handles objects that have a `serialize()` method, dictionaries, + and attempts to convert other objects using `vars()` or `str()`. + """ + def default(self, o: Any) -> Any: + """ + Override the default JSON encoding behavior. + + :param o: The object to encode. + :return: A serializable representation of the object. + """ if hasattr(o, "serialize"): return o.serialize() - elif isinstance(o, dict): - return str(o) + # elif isinstance(o, dict): # This condition is problematic, as dicts are usually handled by default. + # return str(o) # Converting dict to str is generally not desired for JSON. + # # If the intent was to handle specific non-serializable dicts, it needs refinement. + # # For now, commenting out as it might break standard dict serialization. else: try: - return vars(o) + return vars(o) # For simple objects, their __dict__ might be serializable except TypeError: - return str(o) + return str(o) # Fallback to string representation + +def serialize(obj: Any) -> str: + """ + Serialize an object to a JSON string using the custom `JSONSerializer`. -def serialize(obj) -> str: + If the object has a `serialize()` method, it's called first. + + :param obj: The object to serialize. + :return: A JSON string representation of the object, pretty-printed with indent 2. + """ if hasattr(obj, "serialize"): + # Assumes obj.serialize() returns a JSON-serializable dictionary or list return json.dumps(obj.serialize(), sort_keys=True, indent=2) else: return json.dumps(obj, cls=JSONSerializer, sort_keys=True, indent=2) -# Executing with shell provides options such as wildcard expansion -def execute(cmd, shell=False, cwd=None): +def execute(cmd: str, shell: bool = False, cwd: Optional[str] = None) -> str: + """ + Execute a shell command. + + Can run the command directly or through the system's shell. + Captures stdout and stderr, raising a RuntimeError if the command fails. + + :param cmd: The command string to execute. + :param shell: If True, execute the command through the shell (allows shell features + like wildcards, but can be a security risk if `cmd` is from untrusted input). + Defaults to False. + :param cwd: Optional current working directory for the command execution. + :return: The decoded stdout of the executed command. + :raises RuntimeError: If the command returns a non-zero exit code. + """ if not shell: - cmd = cmd.split() - ret = subprocess.run( - cmd, shell=shell, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + command_list = cmd.split() + else: + command_list = cmd # If shell=True, cmd is passed as a string + + process_result = subprocess.run( + command_list, shell=shell, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT ) - if ret.returncode: + if process_result.returncode != 0: raise RuntimeError( - "Running {} failed!\n Output: {}".format(cmd, ret.stdout.decode("utf-8")) + f"Running command '{cmd}' failed with exit code {process_result.returncode}!\n" + f"Output: {process_result.stdout.decode('utf-8', errors='replace')}" ) - return ret.stdout.decode("utf-8") + return process_result.stdout.decode("utf-8", errors='replace') + +def update_nested_dict(cfg: dict, keys: List[str], value: Optional[Any]): # Value type changed to Any + """ + Update a value in a nested dictionary at a path specified by `keys`. -def update_nested_dict(cfg: dict, keys: List[str], value: Optional[str]): + If `value` is None, the key at the end of the path is not set or modified. + Parent dictionaries in the path are created if they don't exist. + + :param cfg: The dictionary to update. + :param keys: A list of strings representing the path to the value. + :param value: The new value to set. If None, no update is performed for the final key. + """ if value is not None: - # make sure parent keys exist - for key in keys[:-1]: - cfg = cfg.setdefault(key, {}) - cfg[keys[-1]] = value + current_level = cfg + for key_part in keys[:-1]: # Iterate through keys to navigate/create path + current_level = current_level.setdefault(key_part, {}) + current_level[keys[-1]] = value + + +def append_nested_dict(cfg: dict, keys: List[str], value_dict: Optional[dict]): # Renamed value to value_dict + """ + Append/merge a dictionary `value_dict` into a nested dictionary `cfg` at `keys`. + + If `value_dict` is provided, its key-value pairs are merged into the dictionary + found at the specified path. Existing keys at the target location will be + overwritten by values from `value_dict`. + + :param cfg: The dictionary to update. + :param keys: A list of strings representing the path to the target dictionary. + :param value_dict: The dictionary whose items will be merged into the target. + If None, no update is performed. + """ + if value_dict: + current_level = cfg + for key_part in keys[:-1]: + current_level = current_level.setdefault(key_part, {}) + # Ensure the target key exists and is a dictionary before merging + target_dict = current_level.setdefault(keys[-1], {}) + if isinstance(target_dict, dict): + target_dict.update(value_dict) + else: + # Handle case where target is not a dict (e.g., overwrite or log error) + current_level[keys[-1]] = value_dict # Overwrites if not a dict -def append_nested_dict(cfg: dict, keys: List[str], value: Optional[dict]): - if value: - # make sure parent keys exist - for key in keys[:-1]: - cfg = cfg.setdefault(key, {}) - cfg[keys[-1]] = {**cfg[keys[-1]], **value} +def find(name: str, search_path: str) -> Optional[str]: # Renamed path to search_path + """ + Find a directory by name within a given search path. + Performs a recursive walk starting from `search_path`. -def find(name, path): - for root, dirs, files in os.walk(path): + :param name: The name of the directory to find. + :param search_path: The root path to start the search from. + :return: The absolute path to the found directory, or None if not found. + """ + for root, dirs, _ in os.walk(search_path): # files variable is not used if name in dirs: return os.path.join(root, name) return None -def create_output(directory, preserve_dir, verbose): - output_dir = os.path.abspath(directory) - if os.path.exists(output_dir) and not preserve_dir: - shutil.rmtree(output_dir) - if not os.path.exists(output_dir): - os.makedirs(output_dir, exist_ok=True) +def create_output(directory: str, preserve_dir: bool, verbose: bool) -> str: # verbose seems unused here + """ + Create or clean an output directory and configure logging. + + If `preserve_dir` is False and the directory exists, it's removed first. + The directory is then created if it doesn't exist. + Calls `configure_logging()` (which currently mutes library loggers). + + :param directory: Path to the output directory. + :param preserve_dir: If True, do not remove the directory if it exists. + :param verbose: Verbosity flag (passed to logging config, though current configure_logging is simple). + :return: The absolute path to the created/ensured output directory. + """ + abs_output_dir = os.path.abspath(directory) + if os.path.exists(abs_output_dir) and not preserve_dir: + logging.info(f"Removing existing output directory: {abs_output_dir}") + shutil.rmtree(abs_output_dir) + if not os.path.exists(abs_output_dir): + os.makedirs(abs_output_dir, exist_ok=True) + + # configure_logging is currently very simple. If it were to use `verbose` or `output_dir`, + # those would be passed here. configure_logging() - return output_dir + return abs_output_dir def configure_logging(): - - # disable information from libraries logging to decrease output noise - loggers = ["urrlib3", "docker", "botocore"] - for name in logging.root.manager.loggerDict: - for logger in loggers: - if name.startswith(logger): + """ + Configure global logging settings for SeBS. + + Currently, this function disables verbose logging from common libraries + (urllib3, docker, botocore) by setting their log levels to ERROR, + to reduce output noise during SeBS execution. + The commented-out section shows a more elaborate potential logging setup. + """ + # Disable verbose logging from common libraries + noisy_loggers = ["urllib3", "docker", "botocore"] + for logger_name_prefix in noisy_loggers: + # Iterate over all existing loggers to find matches by prefix + for name in list(logging.root.manager.loggerDict): # Iterate over a copy of keys + if name.startswith(logger_name_prefix): logging.getLogger(name).setLevel(logging.ERROR) -# def configure_logging(verbose: bool = False, output_dir: Optional[str] = None): -# logging_format = "%(asctime)s,%(msecs)d %(levelname)s %(name)s: %(message)s" -# logging_date_format = "%H:%M:%S" -# -# # default file log -# options = { -# "format": logging_format, -# "datefmt": logging_date_format, -# "level": logging.DEBUG if verbose else logging.INFO, -# } -# if output_dir: -# options = { -# **options, -# "filename": os.path.join(output_dir, "out.log"), -# "filemode": "w", -# } -# logging.basicConfig(**options) -# # Add stdout output -# if output_dir: -# stdout = logging.StreamHandler(sys.stdout) -# formatter = logging.Formatter(logging_format, logging_date_format) -# stdout.setFormatter(formatter) -# stdout.setLevel(logging.DEBUG if verbose else logging.INFO) -# logging.getLogger().addHandler(stdout) -# # disable information from libraries logging to decrease output noise -# for name in logging.root.manager.loggerDict: -# if ( -# name.startswith("urllib3") -# or name.startswith("docker") -# or name.startswith("botocore") -# ): -# logging.getLogger(name).setLevel(logging.ERROR) - - -""" - Locate directory corresponding to a benchmark in benchmarks - or benchmarks-data directory. - - :param benchmark: Benchmark name. - :param path: Path for lookup, relative to repository. - :return: relative path to directory corresponding to benchmark -""" - - -def find_benchmark(benchmark: str, path: str): - benchmarks_dir = os.path.join(PROJECT_DIR, path) - benchmark_path = find(benchmark, benchmarks_dir) - return benchmark_path +def find_benchmark(benchmark_name: str, benchmarks_root_path: str) -> Optional[str]: + """ + Locate a benchmark's directory within a given root path. + + Searches for a directory named `benchmark_name` under `benchmarks_root_path`. + + :param benchmark_name: Name of the benchmark directory to find. + :param benchmarks_root_path: The root path for benchmark lookup (e.g., "benchmarks" or "benchmarks-data"), + relative to the SeBS project directory. + :return: Absolute path to the benchmark directory if found, otherwise None. + """ + # Construct absolute path for searching + search_dir = project_absolute_path(benchmarks_root_path) + return find(benchmark_name, search_dir) # Use the general `find` utility def global_logging(): + """ + Set up basic global logging configuration for the SeBS application. + + Configures a default format, date format, and sets the logging level to INFO. + This is typically called once at the start of the application. + """ logging_format = "%(asctime)s,%(msecs)d %(levelname)s %(name)s: %(message)s" logging_date_format = "%H:%M:%S" logging.basicConfig(format=logging_format, datefmt=logging_date_format, level=logging.INFO) class ColoredWrapper: - SUCCESS = "\033[92m" - STATUS = "\033[94m" - WARNING = "\033[93m" - ERROR = "\033[91m" - BOLD = "\033[1m" - END = "\033[0m" - - def __init__(self, prefix, logger, verbose=True, propagte=False): + """ + A wrapper around a standard Python logger to provide colored console output using Click. + + Allows logging messages with different colors based on severity (DEBUG, INFO, + WARNING, ERROR, CRITICAL). Can also propagate messages to the underlying logger. + """ + SUCCESS = "\033[92m" #: Green color for success messages. + STATUS = "\033[94m" #: Blue color for status/debug messages. + WARNING = "\033[93m" #: Yellow color for warning messages. + ERROR = "\033[91m" #: Red color for error/critical messages. + BOLD = "\033[1m" #: Bold text. + END = "\033[0m" #: Reset text formatting. + + def __init__(self, prefix: str, logger: logging.Logger, verbose: bool = True, propagate: bool = False): # Renamed propagte + """ + Initialize the ColoredWrapper. + + :param prefix: A prefix string to prepend to log messages (e.g., class name). + :param logger: The underlying `logging.Logger` instance. + :param verbose: If True, DEBUG messages are printed to console. Defaults to True. + :param propagate: If True, messages are also passed to the underlying logger's handlers. + Defaults to False. + """ self.verbose = verbose - self.propagte = propagte + self.propagate = propagate # Renamed from propagte self.prefix = prefix self._logging = logger - def debug(self, message): + def debug(self, message: str): + """Log a DEBUG message. Printed to console if verbose is True.""" if self.verbose: self._print(message, ColoredWrapper.STATUS) - if self.propagte: - self._logging.debug(message) + if self.propagate: + self._logging.debug(message) - def info(self, message): + def info(self, message: str): + """Log an INFO message.""" self._print(message, ColoredWrapper.SUCCESS) - if self.propagte: + if self.propagate: self._logging.info(message) - def warning(self, message): + def warning(self, message: str): + """Log a WARNING message.""" self._print(message, ColoredWrapper.WARNING) - if self.propagte: + if self.propagate: self._logging.warning(message) - def error(self, message): + def error(self, message: str): + """Log an ERROR message.""" self._print(message, ColoredWrapper.ERROR) - if self.propagte: + if self.propagate: self._logging.error(message) - def critical(self, message): - self._print(message, ColoredWrapper.ERROR) - if self.propagte: + def critical(self, message: str): + """Log a CRITICAL message.""" + self._print(message, ColoredWrapper.ERROR) # Uses ERROR color for critical + if self.propagate: self._logging.critical(message) - def _print(self, message, color): - timestamp = datetime.datetime.now().strftime("%H:%M:%S.%f") + def _print(self, message: str, color: str): + """ + Internal method to print a colored and formatted message to the console using Click. + + :param message: The message string. + :param color: The ANSI color code string. + """ + timestamp = datetime.datetime.now().strftime("%H:%M:%S.%f")[:-3] # Milliseconds click.echo( f"{color}{ColoredWrapper.BOLD}[{timestamp}]{ColoredWrapper.END} " f"{ColoredWrapper.BOLD}{self.prefix}{ColoredWrapper.END} {message}" @@ -202,78 +310,155 @@ def _print(self, message, color): class LoggingHandlers: + """ + Manages logging handlers, specifically a file handler if a filename is provided. + + Attributes: + verbosity: Boolean indicating if verbose logging is enabled for console. + handler: Optional `logging.FileHandler` instance if file logging is active. + """ def __init__(self, verbose: bool = False, filename: Optional[str] = None): + """ + Initialize LoggingHandlers. + + Sets up a file handler if `filename` is provided. + + :param verbose: If True, sets DEBUG level for file log, otherwise INFO. + Also stored for use by `ColoredWrapper`. + :param filename: Optional path to a log file. If provided, a file handler + is created and configured. + """ logging_format = "%(asctime)s,%(msecs)d %(levelname)s %(name)s: %(message)s" logging_date_format = "%H:%M:%S" formatter = logging.Formatter(logging_format, logging_date_format) self.handler: Optional[logging.FileHandler] = None - # Remember verbosity for colored wrapper - self.verbosity = verbose + self.verbosity = verbose # Store verbosity for ColoredWrapper - # Add file output if needed if filename: - file_out = logging.FileHandler(filename=filename, mode="w") - file_out.setFormatter(formatter) - file_out.setLevel(logging.DEBUG if verbose else logging.INFO) - self.handler = file_out + try: + file_out_handler = logging.FileHandler(filename=filename, mode="w") + file_out_handler.setFormatter(formatter) + file_out_handler.setLevel(logging.DEBUG if verbose else logging.INFO) + self.handler = file_out_handler + except Exception as e: + # Fallback to console if file handler fails, but log the error + print(f"Error setting up file logger for {filename}: {e}. Logging to console only for this handler context.") + self.handler = None class LoggingBase: + """ + Base class providing standardized logging capabilities for SeBS components. + + Initializes a logger with a unique name (class name + UUID4 prefix) and + a `ColoredWrapper` for console output. Allows attaching `LoggingHandlers` + to enable file logging. + """ def __init__(self): - uuid_name = str(uuid.uuid4())[0:4] - if hasattr(self, "typename"): - self.log_name = f"{self.typename()}-{uuid_name}" - else: - self.log_name = f"{self.__class__.__name__}-{uuid_name}" + """ + Initialize LoggingBase. Sets up a logger and a default ColoredWrapper. + `logging_handlers` should be set after initialization to enable file logging. + """ + uuid_prefix = str(uuid.uuid4())[0:4] # Renamed from uuid_name + class_name = getattr(self, "typename", lambda: self.__class__.__name__)() + self.log_name = f"{class_name}-{uuid_prefix}" self._logging = logging.getLogger(self.log_name) - self._logging.setLevel(logging.INFO) + # Default level, can be overridden by handlers + self._logging.setLevel(logging.DEBUG) # Set logger to DEBUG, handlers control output level + + # Default wrapper, might be updated when handlers are set self.wrapper = ColoredWrapper(self.log_name, self._logging) + self._logging_handlers: Optional[LoggingHandlers] = None + @property def logging(self) -> ColoredWrapper: - # This would always print log with color. And only if - # filename in LoggingHandlers is set, it would log to file. + """ + Access the `ColoredWrapper` instance for colored console logging. + The wrapper's verbosity and propagation depend on the configured `logging_handlers`. + """ return self.wrapper @property - def logging_handlers(self) -> LoggingHandlers: + def logging_handlers(self) -> Optional[LoggingHandlers]: # Can be None if not set + """The `LoggingHandlers` instance associated with this logger.""" return self._logging_handlers @logging_handlers.setter - def logging_handlers(self, handlers: LoggingHandlers): + def logging_handlers(self, handlers: Optional[LoggingHandlers]): # Allow setting to None + """ + Set the `LoggingHandlers` for this logger. + + This configures the underlying logger to use the file handler from `handlers` + (if any) and updates the `ColoredWrapper` verbosity and propagation settings. + + :param handlers: The LoggingHandlers instance, or None to clear handlers. + """ + # Remove old handler if it exists and is different or new one is None + if self._logging_handlers and self._logging_handlers.handler: + if not handlers or self._logging_handlers.handler != handlers.handler: + self._logging.removeHandler(self._logging_handlers.handler) + self._logging_handlers = handlers + + if handlers: + self.wrapper = ColoredWrapper( + self.log_name, + self._logging, + verbose=handlers.verbosity, + propagate=handlers.handler is not None, # Propagate if file handler exists + ) + if handlers.handler: + self._logging.addHandler(handlers.handler) + self._logging.propagate = False # Avoid duplicate messages from root logger if file handler is specific + else: + # If no file handler, let messages propagate to root/console handlers if any configured there. + # However, ColoredWrapper handles console output directly via click.echo. + # So, for console, propagation might not be desired if root also has console. + # Defaulting to False to rely on ColoredWrapper for console. + self._logging.propagate = False + else: + # Reset to a default wrapper if handlers are removed + self.wrapper = ColoredWrapper(self.log_name, self._logging) + self._logging.propagate = True # Allow propagation if no specific handlers - self._logging.propagate = False - self.wrapper = ColoredWrapper( - self.log_name, - self._logging, - verbose=handlers.verbosity, - propagte=handlers.handler is not None, - ) - if self._logging_handlers.handler is not None: - self._logging.addHandler(self._logging_handlers.handler) +def has_platform(name: str) -> bool: + """ + Check if a specific FaaS platform is enabled via environment variables. + Looks for an environment variable `SEBS_WITH_{NAME_UPPERCASE}` and checks + if its value is "true" (case-insensitive). -def has_platform(name: str) -> bool: + :param name: The short name of the platform (e.g., "aws", "azure"). + :return: True if the platform is enabled, False otherwise. + """ return os.environ.get(f"SEBS_WITH_{name.upper()}", "False").lower() == "true" -# Check if the system is Linux and that it's not WSL def is_linux() -> bool: + """ + Check if the current operating system is Linux and not Windows Subsystem for Linux (WSL). + + :return: True if native Linux, False otherwise (e.g., Windows, macOS, WSL). + """ return platform.system() == "Linux" and "microsoft" not in platform.release().lower() def catch_interrupt(): - + """ + Set up a signal handler to catch KeyboardInterrupt (Ctrl+C) and print a stack trace + before exiting. Useful for debugging hangs or long operations. + """ import signal import sys import traceback - def handler(x, y): - traceback.print_stack() - sys.exit(signal.SIGINT) + def custom_interrupt_handler(signum, frame): # Corrected signature + print("\nKeyboardInterrupt caught!") + traceback.print_stack(frame) + sys.exit(1) # Exit with a non-zero code - signal.signal(signal.SIGINT, handler) + signal.signal(signal.SIGINT, custom_interrupt_handler)