diff --git a/components/clp-package-utils/clp_package_utils/controller.py b/components/clp-package-utils/clp_package_utils/controller.py index 740767b26f..e47839ab76 100644 --- a/components/clp-package-utils/clp_package_utils/controller.py +++ b/components/clp-package-utils/clp_package_utils/controller.py @@ -8,6 +8,7 @@ import subprocess import uuid from abc import ABC, abstractmethod +from enum import auto from typing import Any, Optional from clp_py_utils.clp_config import ( @@ -17,7 +18,6 @@ COMPRESSION_SCHEDULER_COMPONENT_NAME, COMPRESSION_WORKER_COMPONENT_NAME, DB_COMPONENT_NAME, - DeploymentType, GARBAGE_COLLECTOR_COMPONENT_NAME, MCP_SERVER_COMPONENT_NAME, QUERY_JOBS_TABLE_NAME, @@ -37,11 +37,13 @@ get_datasets_table_name, get_files_table_name, ) +from strenum import KebabCaseStrEnum from clp_package_utils.general import ( check_docker_dependencies, CONTAINER_CLP_HOME, DockerComposeProjectNotRunningError, + DockerComposeProjectAlreadyRunningError, DockerDependencyError, dump_shared_container_config, generate_docker_compose_container_config, @@ -74,6 +76,16 @@ def __ior__(self, other: "EnvVarsDict") -> "EnvVarsDict": return self +class DeploymentTarget(KebabCaseStrEnum): + ALL = auto() + CONTROLLER = auto() + COMPRESSION_WORKER = auto() + QUERY_WORKER = auto() + REDUCER = auto() + WEBUI = auto() + MCP = auto() + + class BaseController(ABC): """ Base controller for orchestrating CLP components. Derived classes should implement any @@ -638,8 +650,74 @@ class DockerComposeController(BaseController): Controller for orchestrating CLP components using Docker Compose. """ - def __init__(self, clp_config: CLPConfig, instance_id: str) -> None: + def __init__( + self, + clp_config: CLPConfig, + instance_id: str, + target: DeploymentTarget, + num_workers: Optional[int] + ) -> None: self._project_name = f"clp-package-{instance_id}" + + self._target = target + self._is_existing_project_allowed = target != DeploymentTarget.ALL + self._num_workers = num_workers or self._get_num_workers() + + self._is_mcp_enabled = clp_config.mcp_server is not None + self._is_legacy_search_enabled = clp_config.package.query_engine != QueryEngine.PRESTO + + if target in ( + DeploymentTarget.QUERY_WORKER, + DeploymentTarget.REDUCER, + ) and not self._is_legacy_search_enabled: + raise ValueError( + "Legacy search components (query-worker/reducer) cannot be launched when the " + "query engine is set to Presto." + ) + if target == DeploymentTarget.MCP and not self._is_mcp_enabled: + raise ValueError( + "The MCP server is not configured in the CLP package configuration, so it cannot " + "be launched." + ) + + # Controllers + self._launch_database = target in (DeploymentTarget.ALL, DeploymentTarget.CONTROLLER) + self._launch_redis = target in (DeploymentTarget.ALL, DeploymentTarget.CONTROLLER) + self._launch_queue = target in (DeploymentTarget.ALL, DeploymentTarget.CONTROLLER) + self._launch_results_cache = target in (DeploymentTarget.ALL, DeploymentTarget.CONTROLLER) + self._launch_compression_scheduler = target in (DeploymentTarget.ALL, DeploymentTarget.CONTROLLER) + self._launch_query_scheduler = target in (DeploymentTarget.ALL, DeploymentTarget.CONTROLLER) + self._launch_garbage_collector = target in (DeploymentTarget.ALL, + DeploymentTarget.CONTROLLER) and is_retention_period_configured(clp_config) + + # Workers + self._launch_compression_worker = target in (DeploymentTarget.ALL, DeploymentTarget.COMPRESSION_WORKER) + self._launch_query_worker = target in (DeploymentTarget.ALL, DeploymentTarget.QUERY_WORKER) + self._launch_reducer = target in (DeploymentTarget.ALL, DeploymentTarget.REDUCER) + + # Clients + self._launch_webui = target in (DeploymentTarget.ALL, DeploymentTarget.WEBUI) + self._launch_mcp = (target in (DeploymentTarget.ALL,DeploymentTarget.MCP) and self._is_mcp_enabled) + + self._compose_profile: Optional[str] = None + self._launch_only_service: Optional[str] = None + if target == DeploymentTarget.CONTROLLER: + self._compose_profile = "controller" + elif target == DeploymentTarget.WEBUI: + self._launch_only_service = "webui" + elif target == DeploymentTarget.MCP: + self._launch_only_service = "mcp-server" + elif target == DeploymentTarget.COMPRESSION_WORKER: + self._launch_only_service = "compression-worker" + elif target == DeploymentTarget.QUERY_WORKER: + self._launch_only_service = "query-worker" + elif target == DeploymentTarget.REDUCER: + self._launch_only_service = "reducer" + + self._compose_file_override = ( + "docker-compose.base.yaml" if target == DeploymentTarget.CONTROLLER else None + ) + super().__init__(clp_config) def start(self) -> None: @@ -649,20 +727,32 @@ def start(self) -> None: :raise: Propagates `check_docker_dependencies`'s exceptions. :raise: Propagates `subprocess.run`'s exceptions. """ - check_docker_dependencies( - should_compose_project_be_running=False, project_name=self._project_name - ) + try: + check_docker_dependencies( + should_compose_project_be_running=False, project_name=self._project_name + ) + except DockerComposeProjectAlreadyRunningError: + if self._is_existing_project_allowed: + logger.info( + "Docker Compose project '%s' is already running. Adding requested services.", + self._project_name, + ) + else: + raise self._set_up_env() - deployment_type = self._clp_config.get_deployment_type() - logger.info(f"Starting CLP using Docker Compose ({deployment_type} deployment)...") - + logger.info( + "Starting CLP using Docker Compose (target=%s)...", + self._target, + ) cmd = ["docker", "compose", "--project-name", self._project_name] - if deployment_type == DeploymentType.BASE: - cmd += ["--file", "docker-compose.base.yaml"] - if self._clp_config.mcp_server is not None: - cmd += ["--profile", "mcp"] + if self._compose_file_override is not None: + cmd += ["--file", self._compose_file_override] + if self._compose_profile is not None: + cmd += ["--profile", self._compose_profile] cmd += ["up", "--detach", "--wait"] + if self._launch_only_service is not None: + cmd += ["--no-deps", self._launch_only_service] subprocess.run( cmd, cwd=self._clp_home, @@ -713,7 +803,6 @@ def _get_num_workers() -> int: def _set_up_env(self) -> None: # Generate container-specific config. container_clp_config = generate_docker_compose_container_config(self._clp_config) - num_workers = self._get_num_workers() dump_shared_container_config(container_clp_config, self._clp_config) env_vars = EnvVarsDict() @@ -782,12 +871,16 @@ def _set_up_env(self) -> None: env_vars |= self._set_up_env_for_results_cache() env_vars |= self._set_up_env_for_compression_scheduler() env_vars |= self._set_up_env_for_query_scheduler() - env_vars |= self._set_up_env_for_compression_worker(num_workers) - env_vars |= self._set_up_env_for_query_worker(num_workers) - env_vars |= self._set_up_env_for_reducer(num_workers) + env_vars |= self._set_up_env_for_garbage_collector() + env_vars |= self._set_up_env_for_compression_worker(self._num_workers) + env_vars |= self._set_up_env_for_query_worker(self._num_workers) + env_vars |= self._set_up_env_for_reducer(self._num_workers) env_vars |= self._set_up_env_for_webui(container_clp_config) env_vars |= self._set_up_env_for_mcp_server() - env_vars |= self._set_up_env_for_garbage_collector() + + if not self._launch_garbage_collector: + env_vars["CLP_GARBAGE_COLLECTOR_ENABLED"] = "0" + env_vars["CLP_LEGACY_SEARCH_ENABLED"] = "1" if self._is_legacy_search_enabled else "0" # Write the environment variables to the `.env` file. with open(f"{self._clp_home}/.env", "w") as env_file: diff --git a/components/clp-package-utils/clp_package_utils/scripts/start_clp.py b/components/clp-package-utils/clp_package_utils/scripts/start_clp.py index 89ddf2f56b..1661240211 100755 --- a/components/clp-package-utils/clp_package_utils/scripts/start_clp.py +++ b/components/clp-package-utils/clp_package_utils/scripts/start_clp.py @@ -5,7 +5,11 @@ from clp_py_utils.clp_config import CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH -from clp_package_utils.controller import DockerComposeController, get_or_create_instance_id +from clp_package_utils.controller import ( + DeploymentTarget, + DockerComposeController, + get_or_create_instance_id, +) from clp_package_utils.general import ( get_clp_home, load_config_file, @@ -38,12 +42,28 @@ def main(argv): help="Enable debug logging.", ) - parsed_args = args_parser.parse_args(argv[1:]) + subparsers = args_parser.add_subparsers(dest="target", help="Deployment target to start.") + for target in DeploymentTarget: + sub = subparsers.add_parser(target.value) + + if target in { + DeploymentTarget.COMPRESSION_WORKER, + DeploymentTarget.QUERY_WORKER, + DeploymentTarget.REDUCER, + }: + sub.add_argument( + "--num-workers", + type=int, + help="Set worker concurrency for this target.", + ) + parsed_args = args_parser.parse_args(argv[1:]) if parsed_args.verbose: logger.setLevel(logging.DEBUG) else: logger.setLevel(logging.INFO) + target = parsed_args.target or DeploymentTarget.ALL + num_workers = getattr(parsed_args, "num_workers", None) try: # Validate and load config file. @@ -78,7 +98,12 @@ def main(argv): try: instance_id = get_or_create_instance_id(clp_config) - controller = DockerComposeController(clp_config, instance_id) + controller = DockerComposeController( + clp_config, + instance_id, + target, + num_workers, + ) controller.start() except Exception as ex: if type(ex) == ValueError: diff --git a/components/clp-py-utils/clp_py_utils/clp_config.py b/components/clp-py-utils/clp_py_utils/clp_config.py index 8c07cc98a1..bfa58705c0 100644 --- a/components/clp-py-utils/clp_py_utils/clp_config.py +++ b/components/clp-py-utils/clp_py_utils/clp_config.py @@ -86,11 +86,6 @@ ZstdCompressionLevel = Annotated[int, Field(ge=1, le=19)] -class DeploymentType(KebabCaseStrEnum): - BASE = auto() - FULL = auto() - - class StorageEngine(KebabCaseStrEnum): CLP = auto() CLP_S = auto() @@ -767,12 +762,6 @@ def load_container_image_ref(self): def get_shared_config_file_path(self) -> pathlib.Path: return self.logs_directory / CLP_SHARED_CONFIG_FILENAME - def get_deployment_type(self) -> DeploymentType: - if QueryEngine.PRESTO == self.package.query_engine: - return DeploymentType.BASE - else: - return DeploymentType.FULL - def dump_to_primitive_dict(self): custom_serialized_fields = { "database", diff --git a/docs/src/dev-docs/design-deployment-orchestration.md b/docs/src/dev-docs/design-deployment-orchestration.md index 039bb5e5ea..32c63f9299 100755 --- a/docs/src/dev-docs/design-deployment-orchestration.md +++ b/docs/src/dev-docs/design-deployment-orchestration.md @@ -171,8 +171,8 @@ This section explains how we use Docker Compose to orchestrate the CLP package a the following subsections: * [Setting up the Docker Compose project's environment](#setting-up-the-environment) -* [Starting and stoping the Docker Compose project](#starting-and-stopping-the-project) -* [Deployment types](#deployment-types) +* [Starting and stopping the Docker Compose project](#starting-and-stopping-the-project) +* [Targets and profiles](#targets-and-profiles) * [Implementation details](#implementation-details) * [Troubleshooting](#troubleshooting) @@ -199,19 +199,41 @@ as environment variables or command line arguments, as necessary. ### Starting and stopping the project -To start and stop the project, `DockerComposeController` simply invokes `docker compose up` or +To start and stop the project, `DockerComposeController` invokes `docker compose up` or `docker compose down` as appropriate. However, to allow multiple CLP packages to be run on the same host, we explicitly specify a project name for the project, where the name is based on the package's instance ID. -### Deployment Types - -CLP supports two deployment types determined by the `package.query_engine` configuration setting. - -1. **BASE**: For deployments using [Presto][presto-integration] as the query engine. This deployment - only uses `docker-compose.base.yaml`. -2. **FULL**: For deployments using one of CLP's native query engines. This uses both - `docker-compose.base.yaml` and `docker-compose.yaml`. +### Targets and profiles + +`start-clp.py` exposes a `--target` flag that determines which services are launched on the current +host. If no target is specified, the controller starts the full stack (`docker compose up` using +`docker-compose.yaml`) and, when MCP is configured, enables the `mcp` profile so the MCP server is +included. + +For targeted launches, the controller switches to `docker-compose.base.yaml`, activates the relevant +profile, and, for single-service targets, specifies the service name with `--no-deps`. This allows +operators to run CLP across multiple hosts without bringing down existing containers: + +| Target | Profile(s) | Services started | +|----------------------|------------|----------------------------------------------------| +| `controller` | `controller` | Core databases, schedulers, garbage collector | +| `ui` | `ui` | `webui` (no dependencies) | +| `mcp` | `mcp` | `mcp-server` (no dependencies) | +| `compression-worker` | `worker` | `compression-worker` (no dependencies) | +| `query-worker` | `worker` | `query-worker` (no dependencies) | +| `reducer` | `worker` | `reducer` (no dependencies) | + +`--num-workers` can be supplied with the worker and reducer targets to override the default process +count inside those containers. The CLI can be invoked repeatedly to scale the deployment: each call +adds the requested services to the existing Compose project without interrupting running containers. +`stop-clp.py` continues to tear down the entire project regardless of the target that was used to +start individual services. + +When the package's query engine is set to [Presto][presto-integration], legacy search components are +disabled automatically via the `CLP_ENABLE_LEGACY_SEARCH` environment variable. This prevents the +query scheduler, query workers, and reducer from starting unless they are explicitly requested when +legacy search is enabled. ### Implementation details diff --git a/docs/src/user-docs/guides-multi-node.md b/docs/src/user-docs/guides-multi-node.md index 697fbf396e..0b2af015be 100644 --- a/docs/src/user-docs/guides-multi-node.md +++ b/docs/src/user-docs/guides-multi-node.md @@ -1,10 +1,72 @@ # Multi-node deployment -A multi-node deployment allows you to run CLP across a distributed set of hosts. +A multi-node deployment allows you to run CLP across a distributed set of hosts. The packaged +`start-clp.sh` wrapper (which calls `start_clp.py`) now exposes a `--target` flag so each host can +launch only the services it should run, while sharing the same Compose project. -:::{warning} -CLP now uses [Docker Compose][docker-compose] for orchestration and support for multi-node -deployments has been removed temporarily. -::: +## Roles and targets -[docker-compose]: https://docs.docker.com/compose/ +| Target | Responsibilities | Notes | +|----------------------|------------------|-------| +| `controller` | Databases, schedulers, garbage collector | Run this first so dependencies are available. | +| `ui` | Web UI | Starts only the `webui` service with no local dependencies. | +| `compression-worker` | Compression workers | Accepts `--num-workers` to override process count. | +| `query-worker` | Query workers | Available only when CLP's legacy query engine is enabled. | +| `reducer` | Reducer processes | Available only when CLP's legacy query engine is enabled. | +| `mcp` | MCP server | Optional; requires an `mcp_server` block in `clp-config.yml`. | + +When no target is specified, `start-clp.sh` launches the full stack on a single machine (the +previous behaviour). The controller inspects `package.query_engine` and sets +`CLP_ENABLE_LEGACY_SEARCH=0` automatically when Presto is selected, preventing the query scheduler, +query workers, and reducer from starting on any host. + +## Preparation + +1. Install the CLP package on each host that will participate in the deployment. +2. Copy the same `clp-config.yml` (and any credentials files) to every host. Ensure network + addresses in the config are reachable from all nodes. +3. If you plan to store data or logs on shared storage, mount those paths before launching CLP. +4. Confirm Docker Engine and the Compose plugin meet the versions listed in the installation guide. + +## Launch procedure + +1. On the controller host, run: + + ```bash + ./sbin/start-clp.sh --target controller + ``` + + This creates the Compose project, primes configuration, and launches the core services. + +2. On additional hosts, start the desired roles. Examples: + + ```bash + # Web UI host + ./sbin/start-clp.sh --target ui + + # Dedicated compression workers with four processes + ./sbin/start-clp.sh --target compression-worker --num-workers 4 + + # Legacy search workers (only when CLP query engine is in use) + ./sbin/start-clp.sh --target query-worker --num-workers 8 + ./sbin/start-clp.sh --target reducer --num-workers 2 + + # MCP server host + ./sbin/start-clp.sh --target mcp + ``` + + Each invocation adds services to the existing Compose project without interrupting containers + launched on other nodes. + +## Stopping the deployment + +Run `./sbin/stop-clp.sh` from any host that has the package installed. It shuts down the entire +Compose project regardless of which targets were used to start individual services. + +## Troubleshooting tips + +* Use `docker compose --project-name clp-package-$(cat var/log/instance-id) ps` to verify which + services are up. +* If a worker fails to connect to core services, confirm that the controller host's addresses in + `clp-config.yml` are reachable from the worker host, and rerun `start-clp.sh` for that worker after + updating the configuration. diff --git a/tools/deployment/package/docker-compose.base.yaml b/tools/deployment/package/docker-compose.base.yaml index ac2b422159..cdc5e0f1cc 100644 --- a/tools/deployment/package/docker-compose.base.yaml +++ b/tools/deployment/package/docker-compose.base.yaml @@ -1,5 +1,7 @@ name: "clp-package-base" +include: ["docker-compose.volumes.yaml"] + # Common service defaults. x-service-defaults: &service_defaults image: "${CLP_PACKAGE_CONTAINER_IMAGE_REF:-clp-package}" @@ -40,17 +42,10 @@ x-volume-definitions: source: "${CLP_LOGS_DIR_HOST:-./var/log}" target: "/var/log" -volumes: - # Dummy volume to use when a bind mount is not desired. - empty: - driver_opts: - device: "tmpfs" - type: "tmpfs" - size: 0 - services: database: <<: *service_defaults + profiles: ["controller"] image: "${CLP_DB_CONTAINER_IMAGE_REF:-mariadb:10-jammy}" hostname: "database" user: "${CLP_THIRD_PARTY_SERVICE_UID_GID:-1000:1000}" @@ -87,6 +82,7 @@ services: db-table-creator: <<: *service_defaults + profiles: ["controller"] hostname: "db_table_creator" environment: CLP_DB_PASS: "${CLP_DB_PASS:?Please set a value.}" @@ -107,6 +103,7 @@ services: queue: <<: *service_defaults + profiles: ["controller"] image: "rabbitmq:3.9.8" hostname: "queue" user: "${CLP_THIRD_PARTY_SERVICE_UID_GID:-1000:1000}" @@ -131,6 +128,7 @@ services: redis: <<: *service_defaults + profiles: ["controller"] image: "redis:7.2.4" hostname: "redis" user: "${CLP_THIRD_PARTY_SERVICE_UID_GID:-1000:1000}" @@ -167,6 +165,7 @@ services: results-cache: <<: *service_defaults + profiles: ["controller"] image: "mongo:7.0.1" hostname: "results_cache" user: "${CLP_THIRD_PARTY_SERVICE_UID_GID:-1000:1000}" @@ -198,6 +197,7 @@ services: results-cache-indices-creator: <<: *service_defaults + profiles: ["controller"] hostname: "results_cache_indices_creator" environment: PYTHONPATH: "/opt/clp/lib/python3/site-packages" @@ -214,6 +214,7 @@ services: compression-scheduler: <<: *service_defaults + profiles: ["controller"] hostname: "compression_scheduler" stop_grace_period: "300s" environment: @@ -245,8 +246,51 @@ services: "--config", "/etc/clp-config.yml" ] + query-scheduler: + <<: *service_defaults + profiles: ["controller"] + hostname: "query_scheduler" + stop_grace_period: "10s" + deploy: + replicas: "${CLP_LEGACY_SEARCH_ENABLED:-1}" + environment: + BROKER_URL: "amqp://${CLP_QUEUE_USER:?Please set a value.}\ + :${CLP_QUEUE_PASS:?Please set a value.}@queue:5672" + CLP_DB_PASS: "${CLP_DB_PASS:?Please set a value.}" + CLP_DB_USER: "${CLP_DB_USER:?Please set a value.}" + CLP_LOGGING_LEVEL: "${CLP_QUERY_SCHEDULER_LOGGING_LEVEL:-INFO}" + CLP_LOGS_DIR: "/var/log/query_scheduler" + PYTHONPATH: "/opt/clp/lib/python3/site-packages" + RESULT_BACKEND: "redis://default:${CLP_REDIS_PASS:?Please set a value.}@redis:6379\ + /${CLP_REDIS_BACKEND_DB_QUERY:-0}" + volumes: + - *volume_clp_config_readonly + - *volume_clp_logs + depends_on: + db-table-creator: + condition: "service_completed_successfully" + queue: + condition: "service_healthy" + redis: + condition: "service_healthy" + command: [ + "python3", + "-u", + "-m", "job_orchestration.scheduler.query.query_scheduler", + "--config", "/etc/clp-config.yml" + ] + healthcheck: + <<: *healthcheck_defaults + test: [ + "CMD", + "bash", + "-c", + "< /dev/tcp/query_scheduler/7000" + ] + compression-worker: <<: *service_defaults + profiles: ["worker"] hostname: "compression_worker" environment: BROKER_URL: "amqp://${CLP_QUEUE_USER:?Please set a value.}\ @@ -282,8 +326,74 @@ services: "-n", "compression-worker" ] + query-worker: + <<: *service_defaults + profiles: ["worker"] + hostname: "query_worker" + deploy: + replicas: "${CLP_LEGACY_SEARCH_ENABLED:-1}" + environment: + BROKER_URL: "amqp://${CLP_QUEUE_USER:?Please set a value.}\ + :${CLP_QUEUE_PASS:?Please set a value.}@queue:5672" + CLP_CONFIG_PATH: "/etc/clp-config.yml" + CLP_HOME: "/opt/clp" + CLP_LOGGING_LEVEL: "${CLP_QUERY_WORKER_LOGGING_LEVEL:-INFO}" + CLP_LOGS_DIR: "/var/log/query_worker" + CLP_WORKER_LOG_PATH: "/var/log/query_worker/worker.log" + PYTHONPATH: "/opt/clp/lib/python3/site-packages" + RESULT_BACKEND: "redis://default:${CLP_REDIS_PASS:?Please set a value.}@redis:6379\ + /${CLP_REDIS_BACKEND_DB_QUERY:-0}" + volumes: + - *volume_clp_config_readonly + - *volume_clp_logs + - "${CLP_ARCHIVE_OUTPUT_DIR_HOST:-empty}:/var/data/archives" + - "${CLP_AWS_CONFIG_DIR_HOST:-empty}:/.aws:ro" + - "${CLP_STAGED_STREAM_OUTPUT_DIR_HOST:-empty}:/var/data/staged-streams" + - "${CLP_STREAM_OUTPUT_DIR_HOST:-empty}:/var/data/streams" + command: [ + "python3", + "-u", + "/opt/clp/lib/python3/site-packages/bin/celery", + "-A", "job_orchestration.executor.query", + "worker", + "--concurrency", "${CLP_QUERY_WORKER_CONCURRENCY:-1}", + "--loglevel", "WARNING", + "-f", "/var/log/query_worker/worker.log", + "-Q", "query", + "-n", "query-worker" + ] + + reducer: + <<: *service_defaults + profiles: ["worker"] + hostname: "reducer" + stop_grace_period: "10s" + deploy: + replicas: "${CLP_LEGACY_SEARCH_ENABLED:-1}" + environment: + CLP_HOME: "/opt/clp" + CLP_LOGGING_LEVEL: "${CLP_REDUCER_LOGGING_LEVEL:-INFO}" + CLP_LOGS_DIR: "/var/log/reducer" + PYTHONPATH: "/opt/clp/lib/python3/site-packages" + volumes: + - *volume_clp_config_readonly + - *volume_clp_logs + depends_on: + query-scheduler: + condition: "service_healthy" + results-cache-indices-creator: + condition: "service_completed_successfully" + command: [ + "python3", "-u", + "-m", "job_orchestration.reducer.reducer", + "--config", "/etc/clp-config.yml", + "--concurrency", "${CLP_REDUCER_CONCURRENCY:-1}", + "--upsert-interval", "${CLP_REDUCER_UPSERT_INTERVAL:-100}" + ] + webui: <<: *service_defaults + profiles: ["ui"] hostname: "webui" environment: AWS_ACCESS_KEY_ID: "${CLP_STREAM_OUTPUT_AWS_ACCESS_KEY_ID:-}" @@ -328,8 +438,47 @@ services: "< /dev/tcp/webui/4000" ] + mcp-server: + <<: *service_defaults + hostname: "mcp_server" + profiles: ["mcp"] + environment: + CLP_LOGGING_LEVEL: "${CLP_MCP_LOGGING_LEVEL:-INFO}" + CLP_LOGS_DIR: "/var/log/mcp_server" + CLP_DB_USER: "${CLP_DB_USER}" + CLP_DB_PASS: "${CLP_DB_PASS}" + PYTHONPATH: "/opt/clp/lib/python3/site-packages" + ports: + - host_ip: "${CLP_MCP_HOST:-127.0.0.1}" + published: "${CLP_MCP_PORT:-8000}" + target: 8000 + volumes: + - *volume_clp_config_readonly + - *volume_clp_logs + depends_on: + db-table-creator: + condition: "service_completed_successfully" + results-cache-indices-creator: + condition: "service_completed_successfully" + command: [ + "python3", "-u", + "-m", "clp_mcp_server.clp_mcp_server", + "--host", "mcp_server", + "--port", "8000", + "--config-path", "/etc/clp-config.yml", + ] + healthcheck: + <<: *healthcheck_defaults + test: [ + "CMD", + "curl", + "-f", + "http://mcp_server:8000/health" + ] + garbage-collector: <<: *service_defaults + profiles: ["controller"] hostname: "garbage_collector" stop_grace_period: "10s" deploy: diff --git a/tools/deployment/package/docker-compose.volumes.yaml b/tools/deployment/package/docker-compose.volumes.yaml new file mode 100644 index 0000000000..57350e4876 --- /dev/null +++ b/tools/deployment/package/docker-compose.volumes.yaml @@ -0,0 +1,7 @@ +volumes: + # Dummy volume to use when a bind mount is not desired. + empty: + driver_opts: + device: "tmpfs" + type: "tmpfs" + size: 0 diff --git a/tools/deployment/package/docker-compose.yaml b/tools/deployment/package/docker-compose.yaml index e05c49792e..416cf71a6e 100644 --- a/tools/deployment/package/docker-compose.yaml +++ b/tools/deployment/package/docker-compose.yaml @@ -1,165 +1,88 @@ name: "clp-package" -include: ["docker-compose.base.yaml"] - -# The `x-*` definitions below are duplicated from docker-compose.base.yaml. Refer to that file for -# documentation. -x-service-defaults: &service_defaults - image: "${CLP_PACKAGE_CONTAINER_IMAGE_REF:-clp-package}" - logging: - driver: "local" - stop_grace_period: "60s" - user: "${CLP_FIRST_PARTY_SERVICE_UID_GID:-1000:1000}" -x-healthcheck-defaults: &healthcheck_defaults - interval: "30s" - retries: 3 - start_interval: "2s" - start_period: "60s" - timeout: "2s" -x-volume-definitions: - clp-config-readonly: &volume_clp_config_readonly - type: "bind" - source: "${CLP_LOGS_DIR_HOST:-./var/log}/.clp-config.yml" - target: "/etc/clp-config.yml" - read_only: true - clp-logs: &volume_clp_logs - type: "bind" - source: "${CLP_LOGS_DIR_HOST:-./var/log}" - target: "/var/log" +include: ["docker-compose.volumes.yaml"] services: + database: + profiles: !reset [] + extends: + service: "database" + file: "docker-compose.base.yaml" + + db-table-creator: + profiles: !reset [] + extends: + service: "db-table-creator" + file: "docker-compose.base.yaml" + + queue: + profiles: !reset [] + extends: + service: "queue" + file: "docker-compose.base.yaml" + + redis: + profiles: !reset [] + extends: + service: "redis" + file: "docker-compose.base.yaml" + + results-cache: + profiles: !reset [] + extends: + service: "results-cache" + file: "docker-compose.base.yaml" + + results-cache-indices-creator: + profiles: !reset [] + extends: + service: "results-cache-indices-creator" + file: "docker-compose.base.yaml" + + compression-scheduler: + profiles: !reset [] + extends: + service: "compression-scheduler" + file: "docker-compose.base.yaml" + + compression-worker: + profiles: !reset [] + extends: + service: "compression-worker" + file: "docker-compose.base.yaml" + query-scheduler: - <<: *service_defaults - hostname: "query_scheduler" - stop_grace_period: "10s" - environment: - BROKER_URL: "amqp://${CLP_QUEUE_USER:?Please set a value.}\ - :${CLP_QUEUE_PASS:?Please set a value.}@queue:5672" - CLP_DB_PASS: "${CLP_DB_PASS:?Please set a value.}" - CLP_DB_USER: "${CLP_DB_USER:?Please set a value.}" - CLP_LOGGING_LEVEL: "${CLP_QUERY_SCHEDULER_LOGGING_LEVEL:-INFO}" - CLP_LOGS_DIR: "/var/log/query_scheduler" - PYTHONPATH: "/opt/clp/lib/python3/site-packages" - RESULT_BACKEND: "redis://default:${CLP_REDIS_PASS:?Please set a value.}@redis:6379\ - /${CLP_REDIS_BACKEND_DB_QUERY:-0}" - volumes: - - *volume_clp_config_readonly - - *volume_clp_logs - depends_on: - db-table-creator: - condition: "service_completed_successfully" - queue: - condition: "service_healthy" - redis: - condition: "service_healthy" - command: [ - "python3", - "-u", - "-m", "job_orchestration.scheduler.query.query_scheduler", - "--config", "/etc/clp-config.yml" - ] - healthcheck: - <<: *healthcheck_defaults - test: [ - "CMD", - "bash", - "-c", - "< /dev/tcp/query_scheduler/7000" - ] + profiles: !reset [] + extends: + service: "query-scheduler" + file: "docker-compose.base.yaml" query-worker: - <<: *service_defaults - hostname: "query_worker" - environment: - BROKER_URL: "amqp://${CLP_QUEUE_USER:?Please set a value.}\ - :${CLP_QUEUE_PASS:?Please set a value.}@queue:5672" - CLP_CONFIG_PATH: "/etc/clp-config.yml" - CLP_HOME: "/opt/clp" - CLP_LOGGING_LEVEL: "${CLP_QUERY_WORKER_LOGGING_LEVEL:-INFO}" - CLP_LOGS_DIR: "/var/log/query_worker" - CLP_WORKER_LOG_PATH: "/var/log/query_worker/worker.log" - PYTHONPATH: "/opt/clp/lib/python3/site-packages" - RESULT_BACKEND: "redis://default:${CLP_REDIS_PASS:?Please set a value.}@redis:6379\ - /${CLP_REDIS_BACKEND_DB_QUERY:-0}" - volumes: - - *volume_clp_config_readonly - - *volume_clp_logs - - "${CLP_ARCHIVE_OUTPUT_DIR_HOST:-empty}:/var/data/archives" - - "${CLP_AWS_CONFIG_DIR_HOST:-empty}:/.aws:ro" - - "${CLP_STAGED_STREAM_OUTPUT_DIR_HOST:-empty}:/var/data/staged-streams" - - "${CLP_STREAM_OUTPUT_DIR_HOST:-empty}:/var/data/streams" - command: [ - "python3", - "-u", - "/opt/clp/lib/python3/site-packages/bin/celery", - "-A", "job_orchestration.executor.query", - "worker", - "--concurrency", "${CLP_QUERY_WORKER_CONCURRENCY:-1}", - "--loglevel", "WARNING", - "-f", "/var/log/query_worker/worker.log", - "-Q", "query", - "-n", "query-worker" - ] + profiles: !reset [] + extends: + service: "query-worker" + file: "docker-compose.base.yaml" reducer: - <<: *service_defaults - hostname: "reducer" - stop_grace_period: "10s" - environment: - CLP_HOME: "/opt/clp" - CLP_LOGGING_LEVEL: "${CLP_REDUCER_LOGGING_LEVEL:-INFO}" - CLP_LOGS_DIR: "/var/log/reducer" - PYTHONPATH: "/opt/clp/lib/python3/site-packages" - volumes: - - *volume_clp_config_readonly - - *volume_clp_logs - depends_on: - query-scheduler: - condition: "service_healthy" - results-cache-indices-creator: - condition: "service_completed_successfully" - command: [ - "python3", "-u", - "-m", "job_orchestration.reducer.reducer", - "--config", "/etc/clp-config.yml", - "--concurrency", "${CLP_REDUCER_CONCURRENCY:-1}", - "--upsert-interval", "${CLP_REDUCER_UPSERT_INTERVAL:-100}" - ] + profiles: !reset [] + extends: + service: "reducer" + file: "docker-compose.base.yaml" + + webui: + profiles: !reset [] + extends: + service: "webui" + file: "docker-compose.base.yaml" mcp-server: - <<: *service_defaults - hostname: "mcp_server" - profiles: ["mcp"] - environment: - CLP_LOGGING_LEVEL: "${CLP_MCP_LOGGING_LEVEL:-INFO}" - CLP_LOGS_DIR: "/var/log/mcp_server" - CLP_DB_USER: "${CLP_DB_USER}" - CLP_DB_PASS: "${CLP_DB_PASS}" - PYTHONPATH: "/opt/clp/lib/python3/site-packages" - ports: - - host_ip: "${CLP_MCP_HOST:-127.0.0.1}" - published: "${CLP_MCP_PORT:-8000}" - target: 8000 - volumes: - - *volume_clp_config_readonly - - *volume_clp_logs - depends_on: - db-table-creator: - condition: "service_completed_successfully" - results-cache-indices-creator: - condition: "service_completed_successfully" - command: [ - "python3", "-u", - "-m", "clp_mcp_server.clp_mcp_server", - "--host", "mcp_server", - "--port", "8000", - "--config-path", "/etc/clp-config.yml", - ] - healthcheck: - <<: *healthcheck_defaults - test: [ - "CMD", - "curl", - "-f", - "http://mcp_server:8000/health" - ] + # Inherit the `profiles` defined in `docker-compose.base.yaml`. + extends: + service: "mcp-server" + file: "docker-compose.base.yaml" + + garbage-collector: + profiles: !reset [] + extends: + service: "garbage-collector" + file: "docker-compose.base.yaml"