diff --git a/Cargo.lock b/Cargo.lock index 971fb16337..65e6b011be 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -122,6 +122,7 @@ dependencies = [ "mongodb", "num_enum", "rmp-serde", + "secrecy", "serde", "serde_json", "sqlx", diff --git a/components/api-server/Cargo.toml b/components/api-server/Cargo.toml index a8017f77c2..dc57d8bccd 100644 --- a/components/api-server/Cargo.toml +++ b/components/api-server/Cargo.toml @@ -20,6 +20,7 @@ futures = "0.3.31" mongodb = "3.3.0" num_enum = "0.7.5" rmp-serde = "1.3.0" +secrecy = "0.10.3" serde = { version = "1.0.228", features = ["derive"] } serde_json = "1.0.145" sqlx = { version = "0.8.6", features = ["runtime-tokio", "mysql"] } @@ -27,4 +28,4 @@ thiserror = "2.0.17" tokio = { version = "1.48.0", features = ["full"] } tracing = "0.1.41" tracing-appender = "0.2.3" -tracing-subscriber = { version = "0.3.20", features = ["json", "env-filter"] } +tracing-subscriber = { version = "0.3.20", features = ["json", "env-filter", "fmt", "std"] } diff --git a/components/api-server/src/bin/api_server.rs b/components/api-server/src/bin/api_server.rs index fafd76ea89..e2adf4412d 100644 --- a/components/api-server/src/bin/api_server.rs +++ b/components/api-server/src/bin/api_server.rs @@ -16,44 +16,59 @@ use clp_rust_utils::{clp_config::package, serde::yaml}; use futures::{Stream, StreamExt}; use thiserror::Error; use tracing_appender::rolling::{RollingFileAppender, Rotation}; -use tracing_subscriber::{self}; +use tracing_subscriber::{self, fmt::writer::Tee}; #[derive(Parser)] #[command(version, about = "API Server for CLP.")] -struct Args {} +struct Args { + #[arg(long)] + config: String, + + #[arg(long)] + host: Option, + + #[arg(long)] + port: Option, +} #[tokio::main] async fn main() -> anyhow::Result<()> { - let _ = Args::parse(); - let home = std::env::var("CLP_HOME").context("Expect `CLP_HOME` env variable")?; - let home = std::path::Path::new(&home); + let args = Args::parse(); - let config_path = home.join(package::DEFAULT_CONFIG_FILE_PATH); - let config: package::config::Config = yaml::from_path(&config_path).context(format!( + let config_path = std::path::Path::new(args.config.as_str()); + let config: package::config::Config = yaml::from_path(config_path).context(format!( "Config file {} does not exist", config_path.display() ))?; - let file_appender = RollingFileAppender::new( - Rotation::HOURLY, - home.join(&config.logs_directory).join("api_server"), - "api_server.log", - ); + let logs_directory = + std::env::var("CLP_LOGS_DIR").context("Expect `CLP_LOGS_DIR` environment variable.")?; + let logs_directory = std::path::Path::new(logs_directory.as_str()); + let file_appender = + RollingFileAppender::new(Rotation::HOURLY, logs_directory, "api_server.log"); let (non_blocking_writer, _guard) = tracing_appender::non_blocking(file_appender); tracing_subscriber::fmt() .with_env_filter(tracing_subscriber::EnvFilter::from_default_env()) .with_ansi(false) - .with_writer(non_blocking_writer) + .with_writer(Tee::new(std::io::stdout, non_blocking_writer)) .init(); - let credentials_path = home.join(package::DEFAULT_CREDENTIALS_FILE_PATH); - let credentials: package::credentials::Credentials = yaml::from_path(&credentials_path) - .context(format!( - "Credentials file {} does not exist", - credentials_path.display() - ))?; - - let addr = format!("{}:{}", &config.api_server.host, &config.api_server.port); + let credentials = package::credentials::Credentials { + database: package::credentials::Database { + password: secrecy::SecretString::new( + std::env::var("CLP_DB_PASS") + .context("Expect `CLP_DB_PASS` env variable")? + .into_boxed_str(), + ), + user: std::env::var("CLP_DB_USER").context("Expect `CLP_DB_USER` env variable")?, + }, + }; + + let addr = format!( + "{}:{}", + args.host.unwrap_or_else(|| config.api_server.host.clone()), + args.port.unwrap_or(config.api_server.port) + ); let listener = tokio::net::TcpListener::bind(&addr) .await .context(format!("Cannot listen to {addr}"))?; @@ -72,9 +87,15 @@ async fn main() -> anyhow::Result<()> { tracing::info!("Server started at {addr}"); axum::serve(listener, app) .with_graceful_shutdown(async { - tokio::signal::ctrl_c() - .await - .expect("failed to listen for event"); + let mut sigterm = + tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate()) + .expect("failed to listen for SIGTERM"); + tokio::select! { + _ = sigterm.recv() => { + } + _ = tokio::signal::ctrl_c()=> { + } + } }) .await?; Ok(()) diff --git a/components/clp-package-utils/clp_package_utils/controller.py b/components/clp-package-utils/clp_package_utils/controller.py index 86b243a411..52b3865431 100644 --- a/components/clp-package-utils/clp_package_utils/controller.py +++ b/components/clp-package-utils/clp_package_utils/controller.py @@ -11,6 +11,7 @@ from typing import Any, Optional from clp_py_utils.clp_config import ( + API_SERVER_COMPONENT_NAME, AwsAuthType, CLPConfig, COMPRESSION_JOBS_TABLE_NAME, @@ -619,6 +620,30 @@ def _set_up_env_for_garbage_collector(self) -> EnvVarsDict: return env_vars + def _set_up_env_for_api_server(self) -> EnvVarsDict: + """ + Sets up environment variables and directories for the API server component. + + :return: Dictionary of environment variables necessary to launch the component. + """ + component_name = API_SERVER_COMPONENT_NAME + + logger.info(f"Setting up environment for {component_name}...") + + logs_dir = self._clp_config.logs_directory / component_name + resolved_logs_dir = resolve_host_path_in_container(logs_dir) + resolved_logs_dir.mkdir(parents=True, exist_ok=True) + + env_vars = EnvVarsDict() + + # Connection config + env_vars |= { + "CLP_API_SERVER_HOST": _get_ip_from_hostname(self._clp_config.api_server.host), + "CLP_API_SERVER_PORT": str(self._clp_config.api_server.port), + } + + return env_vars + def _read_and_update_settings_json( self, settings_file_path: pathlib.Path, updates: dict[str, Any] ) -> dict[str, Any]: @@ -747,6 +772,7 @@ def set_up_env(self) -> None: env_vars |= self._set_up_env_for_webui(container_clp_config) env_vars |= self._set_up_env_for_mcp_server() env_vars |= self._set_up_env_for_garbage_collector() + env_vars |= self._set_up_env_for_api_server() # Write the environment variables to the `.env` file. with open(f"{self._clp_home}/.env", "w") as env_file: diff --git a/components/clp-py-utils/clp_py_utils/clp_config.py b/components/clp-py-utils/clp_py_utils/clp_config.py index cc383a7f2f..aca0882fe9 100644 --- a/components/clp-py-utils/clp_py_utils/clp_config.py +++ b/components/clp-py-utils/clp_py_utils/clp_config.py @@ -39,6 +39,7 @@ WEBUI_COMPONENT_NAME = "webui" MCP_SERVER_COMPONENT_NAME = "mcp_server" GARBAGE_COLLECTOR_COMPONENT_NAME = "garbage_collector" +API_SERVER_COMPONENT_NAME = "api_server" # Action names ARCHIVE_MANAGER_ACTION_NAME = "archive_manager" @@ -592,6 +593,19 @@ class GarbageCollector(BaseModel): sweep_interval: SweepInterval = SweepInterval() +class QueryJobPollingConfig(BaseModel): + initial_backoff_ms: int = Field(default=100, alias="initial_backoff") + + max_backoff_ms: int = Field(default=5000, alias="max_backoff") + + +class ApiServer(BaseModel): + host: str = "localhost" + port: int = 3001 + query_job_polling: QueryJobPollingConfig = QueryJobPollingConfig() + default_max_num_query_results: int = 1000 + + class Presto(BaseModel): DEFAULT_PORT: ClassVar[int] = 8080 @@ -627,6 +641,7 @@ class CLPConfig(BaseModel): query_worker: QueryWorker = QueryWorker() webui: WebUi = WebUi() garbage_collector: GarbageCollector = GarbageCollector() + api_server: ApiServer = ApiServer() credentials_file_path: SerializablePath = CLP_DEFAULT_CREDENTIALS_FILE_PATH mcp_server: Optional[McpServer] = None diff --git a/components/clp-rust-utils/src/clp_config/package/config.rs b/components/clp-rust-utils/src/clp_config/package/config.rs index b5876918f3..6355a23903 100644 --- a/components/clp-rust-utils/src/clp_config/package/config.rs +++ b/components/clp-rust-utils/src/clp_config/package/config.rs @@ -75,6 +75,7 @@ impl Default for ApiServer { } #[derive(Clone, Debug, Deserialize, Eq, PartialEq)] +#[serde(default)] pub struct QueryJobPollingConfig { #[serde(rename = "initial_backoff")] pub initial_backoff_ms: u64, diff --git a/components/package-template/src/etc/clp-config.yml b/components/package-template/src/etc/clp-config.yml index 91322a86c6..23b3472363 100644 --- a/components/package-template/src/etc/clp-config.yml +++ b/components/package-template/src/etc/clp-config.yml @@ -118,6 +118,15 @@ # archive: 60 # search_result: 30 # +## API server config +#api_server: +# host: "localhost" +# port: 3001 +# default_max_num_query_results: 1000 +# query_job_polling: +# initial_backoff_ms: 100 +# max_backoff_ms: 5000 +# ## Presto client config #presto: null # diff --git a/taskfile.yaml b/taskfile.yaml index 293a6b1298..2b68163ff9 100644 --- a/taskfile.yaml +++ b/taskfile.yaml @@ -30,6 +30,7 @@ vars: G_PYTHON_LIBS_DIR: "{{.G_BUILD_DIR}}/python-libs" G_WEBUI_BUILD_DIR: "{{.G_BUILD_DIR}}/webui" G_SPIDER_BUILD_DIR: "{{.G_BUILD_DIR}}/spider" + G_RUST_BUILD_DIR: "{{.G_BUILD_DIR}}/rust" # Taskfile paths G_UTILS_TASKFILE: "{{.ROOT_DIR}}/tools/yscope-dev-utils/exports/taskfiles/utils/utils.yaml" @@ -73,6 +74,7 @@ tasks: vars: COMPONENT: "job-orchestration" - task: "clean-webui" + - task: "clean-rust" - task: "tests:integration:cache-clear" clean-core: @@ -103,6 +105,10 @@ tasks: - "rm -rf '{{.G_WEBUI_SRC_DIR}}/server/node_modules'" - "rm -rf '{{.G_WEBUI_SRC_DIR}}/yscope-log-viewer/node_modules'" + clean-rust: + cmds: + - "rm -rf '{{.G_RUST_BUILD_DIR}}'" + package: vars: CHECKSUM_FILE: "{{.G_PACKAGE_CHECKSUM_FILE}}" @@ -191,6 +197,17 @@ tasks: JOBS: "{{.G_CPP_MAX_PARALLELISM_PER_BUILD_TASK}}" TARGETS: ["clg", "clo", "clp", "clp-s", "indexer", "log-converter", "reducer-server"] + rust: + deps: ["toolchains:rust"] + dir: "{{.ROOT_DIR}}" + cmd: |- + echo $HOME + echo $HOME/.cargo + type cargo + type rustup + ls $HOME + cargo build --release --bins --target-dir {{.G_RUST_BUILD_DIR}} + clp-mcp-server: - task: "uv-component" vars: @@ -218,6 +235,7 @@ tasks: - "init" - "python-libs" - "webui" + - "rust" webui: env: diff --git a/tools/deployment/package/docker-compose-all.yaml b/tools/deployment/package/docker-compose-all.yaml index f037911192..b5dd4512bd 100644 --- a/tools/deployment/package/docker-compose-all.yaml +++ b/tools/deployment/package/docker-compose-all.yaml @@ -486,3 +486,30 @@ services: "-f", "http://mcp_server:8000/health" ] + + api-server: + <<: *service_defaults + hostname: "api_server" + environment: + CLP_LOGS_DIR: "/var/log/api_server" + CLP_DB_PASS: "${CLP_DB_PASS:?Please set a value.}" + CLP_DB_USER: "${CLP_DB_USER:?Please set a value.}" + RUST_LOG: "TRACE" + ports: + - host_ip: "${CLP_API_SERVER_HOST:-127.0.0.1}" + published: "${CLP_API_SERVER_PORT:-3001}" + target: 3001 + volumes: + - *volume_clp_config_readonly + - *volume_clp_logs + depends_on: + db-table-creator: + condition: "service_completed_successfully" + results-cache-indices-creator: + condition: "service_completed_successfully" + command: [ + "/opt/clp/bin/api_server", + "--host", "0.0.0.0", + "--port", "3001", + "--config", "/etc/clp-config.yml" + ] diff --git a/tools/deployment/package/docker-compose-base.yaml b/tools/deployment/package/docker-compose-base.yaml index fba182a839..7f0ee5b788 100644 --- a/tools/deployment/package/docker-compose-base.yaml +++ b/tools/deployment/package/docker-compose-base.yaml @@ -53,3 +53,8 @@ services: extends: file: "docker-compose-all.yaml" service: "garbage-collector" + + api-server: + extends: + file: "docker-compose-all.yaml" + service: "api-server" diff --git a/tools/deployment/package/docker-compose.yaml b/tools/deployment/package/docker-compose.yaml index 9ec4ea439f..a23c4a6a8a 100644 --- a/tools/deployment/package/docker-compose.yaml +++ b/tools/deployment/package/docker-compose.yaml @@ -73,3 +73,8 @@ services: extends: file: "docker-compose-all.yaml" service: "mcp-server" + + api-server: + extends: + file: "docker-compose-all.yaml" + service: "api-server" diff --git a/tools/docker-images/clp-package/Dockerfile b/tools/docker-images/clp-package/Dockerfile index 6dc00016d1..16deff4939 100644 --- a/tools/docker-images/clp-package/Dockerfile +++ b/tools/docker-images/clp-package/Dockerfile @@ -42,3 +42,4 @@ COPY --link --chown=${UID} ./build/spider/spider-build/src/spider/spider_worker COPY --link --chown=${UID} ./build/nodejs-22/bin/node bin/node-22 COPY --link --chown=${UID} ./build/python-libs/ lib/python3/site-packages/ COPY --link --chown=${UID} ./build/webui/ var/www/webui/ +COPY --link --chown=${UID} ./build/rust/release/api_server bin/