Skip to content

Commit 53df39a

Browse files
authored
Merge branch 'y-scope:main' into 2024-anonymous-usage-telemetry-framework-for-clp
2 parents de34b9d + dbc1799 commit 53df39a

File tree

260 files changed

+7888
-3535
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

260 files changed

+7888
-3535
lines changed

Cargo.lock

Lines changed: 23 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

components/api-server/Cargo.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,5 @@ thiserror = "2.0.18"
3838
tokio = { version = "1.49.0", features = ["full"] }
3939
tower-http = { version = "0.6.8", features = ["cors"] }
4040
tracing = "0.1.44"
41-
tracing-appender = "0.2.4"
42-
tracing-subscriber = { version = "0.3.22", features = ["json", "env-filter", "fmt", "std"] }
4341
utoipa = { version = "5.4.0", features = ["axum_extras"] }
4442
utoipa-axum = "0.2.0"

components/api-server/src/bin/api_server.rs

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11
use anyhow::Context;
22
use clap::Parser;
33
use clp_rust_utils::{clp_config::package, serde::yaml};
4-
use tracing_appender::{
5-
non_blocking::WorkerGuard,
6-
rolling::{RollingFileAppender, Rotation},
7-
};
8-
use tracing_subscriber::{self, fmt::writer::MakeWriterExt};
94

105
#[derive(Parser)]
116
#[command(version, about = "API Server for CLP.")]
@@ -40,29 +35,6 @@ fn read_config_and_credentials(
4035
Ok((config, credentials))
4136
}
4237

43-
fn set_up_logging() -> anyhow::Result<WorkerGuard> {
44-
let logs_directory =
45-
std::env::var("CLP_LOGS_DIR").context("Expect `CLP_LOGS_DIR` environment variable.")?;
46-
let logs_directory = std::path::Path::new(logs_directory.as_str());
47-
let file_appender =
48-
RollingFileAppender::new(Rotation::HOURLY, logs_directory, "api_server.log");
49-
let (non_blocking_writer, guard) = tracing_appender::non_blocking(file_appender);
50-
tracing_subscriber::fmt()
51-
.event_format(
52-
tracing_subscriber::fmt::format()
53-
.with_level(true)
54-
.with_target(false)
55-
.with_file(true)
56-
.with_line_number(true)
57-
.json(),
58-
)
59-
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
60-
.with_ansi(false)
61-
.with_writer(std::io::stdout.and(non_blocking_writer))
62-
.init();
63-
Ok(guard)
64-
}
65-
6638
async fn shutdown_signal() {
6739
let mut sigterm = tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
6840
.expect("failed to listen for SIGTERM");
@@ -79,7 +51,7 @@ async fn main() -> anyhow::Result<()> {
7951
let args = Args::parse();
8052

8153
let (config, credentials) = read_config_and_credentials(&args)?;
82-
let _guard = set_up_logging()?;
54+
let _guard = clp_rust_utils::logging::set_up_logging("api_server.log");
8355

8456
let api_server_config = config
8557
.api_server

components/api-server/src/client.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ pub struct QueryConfig {
2828
/// The search query as a KQL string.
2929
pub query_string: String,
3030

31-
/// The dataset to search within. If not provided, only `default` dataset will be searched.
31+
/// The datasets to search within. If not provided, only `default` dataset will be searched.
3232
#[serde(default)]
33-
pub dataset: Option<String>,
33+
pub datasets: Option<Vec<String>>,
3434

3535
/// The maximum number of results to return. Set to `0` for no limit.
3636
#[serde(default)]
@@ -58,7 +58,7 @@ pub struct QueryConfig {
5858
impl From<QueryConfig> for SearchJobConfig {
5959
fn from(value: QueryConfig) -> Self {
6060
Self {
61-
dataset: value.dataset,
61+
datasets: value.datasets,
6262
query_string: value.query_string,
6363
max_num_results: value.max_num_results,
6464
begin_timestamp: value.time_range_begin_millisecs,
@@ -128,10 +128,10 @@ impl Client {
128128
/// * Forwards [`sqlx::query::Query::execute`]'s return values on failure.
129129
pub async fn submit_query(&self, query_config: QueryConfig) -> Result<u64, ClientError> {
130130
let mut search_job_config: SearchJobConfig = query_config.into();
131-
if search_job_config.dataset.is_none() {
132-
search_job_config.dataset = match self.config.package.storage_engine {
131+
if search_job_config.datasets.is_none() {
132+
search_job_config.datasets = match self.config.package.storage_engine {
133133
StorageEngine::Clp => None,
134-
StorageEngine::ClpS => Some("default".to_owned()),
134+
StorageEngine::ClpS => Some(vec!["default".to_owned()]),
135135
}
136136
}
137137
if search_job_config.max_num_results == 0 {

components/api-server/src/routes.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ async fn health() -> String {
8585
content= QueryConfig,
8686
example = json!({
8787
"query_string": "*",
88-
"dataset": "default",
88+
"datasets": ["default"],
8989
"time_range_begin_millisecs": 0,
9090
"time_range_end_millisecs": 17_356_896,
9191
"ignore_case": true,

components/clp-mcp-server/clp_mcp_server/clp_connector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ async def submit_query(
6060
job_config = msgpack.packb(
6161
{
6262
"begin_timestamp": begin_ts,
63-
"dataset": CLP_DEFAULT_DATASET_NAME,
63+
"datasets": [CLP_DEFAULT_DATASET_NAME],
6464
"end_timestamp": end_ts,
6565
"ignore_case": True,
6666
"max_num_results": SEARCH_MAX_NUM_RESULTS,

components/clp-mcp-server/clp_mcp_server/clp_mcp_server.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
"""CLP MCP Server entry point."""
22

33
import ipaddress
4-
import logging
5-
import os
64
import socket
75
import sys
86
from pathlib import Path
97

108
import click
119
from clp_py_utils.clp_config import ClpConfig, MCP_SERVER_COMPONENT_NAME
12-
from clp_py_utils.clp_logging import get_logger, get_logging_formatter, set_logging_level
10+
from clp_py_utils.clp_logging import configure_logging, get_logger
1311
from clp_py_utils.core import read_yaml_config_file
1412
from pydantic import ValidationError
1513

@@ -38,12 +36,8 @@ def main(host: str, port: int, config_path: Path) -> int:
3836
:param config_path: The path to server's configuration file.
3937
:return: Exit code (0 for success, non-zero for failure).
4038
"""
41-
# Setup logging to file
42-
log_file_path = Path(os.getenv("CLP_LOGS_DIR")) / "mcp_server.log"
43-
logging_file_handler = logging.FileHandler(filename=log_file_path, encoding="utf-8")
44-
logging_file_handler.setFormatter(get_logging_formatter())
45-
logger.addHandler(logging_file_handler)
46-
set_logging_level(logger, os.getenv("CLP_LOGGING_LEVEL"))
39+
# Setup optional file logging and logging level.
40+
configure_logging(logger, "mcp_server")
4741

4842
exit_code = 0
4943

components/clp-package-utils/clp_package_utils/controller.py

Lines changed: 70 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import ipaddress
12
import json
23
import logging
34
import multiprocessing
@@ -188,13 +189,25 @@ def _set_up_env_for_database(self) -> EnvVarsDict:
188189

189190
# Connection config
190191
env_vars |= {
191-
"CLP_DB_HOST": _get_ip_from_hostname(self._clp_config.database.host),
192192
"CLP_DB_NAME": self._clp_config.database.names[ClpDbNameType.CLP],
193-
"CLP_DB_PORT": str(self._clp_config.database.port),
194193
}
195194
if self._clp_config.compression_scheduler.type == OrchestrationType.SPIDER:
196195
env_vars["SPIDER_DB_NAME"] = self._clp_config.database.names[ClpDbNameType.SPIDER]
197196

197+
if BundledService.DATABASE not in self._clp_config.bundled:
198+
env_vars |= {
199+
"CLP_DB_CONNECT_PORT": str(self._clp_config.database.port),
200+
"CLP_EXTRA_HOST_DATABASE_NAME": DB_COMPONENT_NAME,
201+
"CLP_EXTRA_HOST_DATABASE_ADDR": _resolve_external_host(
202+
self._clp_config.database.host
203+
),
204+
}
205+
else:
206+
env_vars |= {
207+
"CLP_DB_HOST": _get_ip_from_hostname(self._clp_config.database.host),
208+
"CLP_DB_PORT": str(self._clp_config.database.port),
209+
}
210+
198211
# Credentials
199212
credentials = self._clp_config.database.credentials
200213
env_vars |= {
@@ -261,10 +274,17 @@ def _set_up_env_for_queue(self) -> EnvVarsDict:
261274
env_vars = EnvVarsDict()
262275

263276
# Connection config
264-
env_vars |= {
265-
"CLP_QUEUE_HOST": _get_ip_from_hostname(self._clp_config.queue.host),
266-
"CLP_QUEUE_PORT": str(self._clp_config.queue.port),
267-
}
277+
if BundledService.QUEUE not in self._clp_config.bundled:
278+
env_vars |= {
279+
"CLP_QUEUE_CONNECT_PORT": str(self._clp_config.queue.port),
280+
"CLP_EXTRA_HOST_QUEUE_NAME": QUEUE_COMPONENT_NAME,
281+
"CLP_EXTRA_HOST_QUEUE_ADDR": _resolve_external_host(self._clp_config.queue.host),
282+
}
283+
else:
284+
env_vars |= {
285+
"CLP_QUEUE_HOST": _get_ip_from_hostname(self._clp_config.queue.host),
286+
"CLP_QUEUE_PORT": str(self._clp_config.queue.port),
287+
}
268288

269289
# Credentials
270290
env_vars |= {
@@ -343,10 +363,17 @@ def _set_up_env_for_redis(self) -> EnvVarsDict:
343363
env_vars = EnvVarsDict()
344364

345365
# Connection config
346-
env_vars |= {
347-
"CLP_REDIS_HOST": _get_ip_from_hostname(self._clp_config.redis.host),
348-
"CLP_REDIS_PORT": str(self._clp_config.redis.port),
349-
}
366+
if BundledService.REDIS not in self._clp_config.bundled:
367+
env_vars |= {
368+
"CLP_REDIS_CONNECT_PORT": str(self._clp_config.redis.port),
369+
"CLP_EXTRA_HOST_REDIS_NAME": REDIS_COMPONENT_NAME,
370+
"CLP_EXTRA_HOST_REDIS_ADDR": _resolve_external_host(self._clp_config.redis.host),
371+
}
372+
else:
373+
env_vars |= {
374+
"CLP_REDIS_HOST": _get_ip_from_hostname(self._clp_config.redis.host),
375+
"CLP_REDIS_PORT": str(self._clp_config.redis.port),
376+
}
350377

351378
# Credentials
352379
env_vars |= {
@@ -442,9 +469,22 @@ def _set_up_env_for_results_cache(self) -> EnvVarsDict:
442469
# Connection config
443470
env_vars |= {
444471
"CLP_RESULTS_CACHE_DB_NAME": self._clp_config.results_cache.db_name,
445-
"CLP_RESULTS_CACHE_HOST": _get_ip_from_hostname(self._clp_config.results_cache.host),
446-
"CLP_RESULTS_CACHE_PORT": str(self._clp_config.results_cache.port),
447472
}
473+
if BundledService.RESULTS_CACHE not in self._clp_config.bundled:
474+
env_vars |= {
475+
"CLP_RESULTS_CACHE_CONNECT_PORT": str(self._clp_config.results_cache.port),
476+
"CLP_EXTRA_HOST_RESULTS_CACHE_NAME": RESULTS_CACHE_COMPONENT_NAME,
477+
"CLP_EXTRA_HOST_RESULTS_CACHE_ADDR": _resolve_external_host(
478+
self._clp_config.results_cache.host
479+
),
480+
}
481+
else:
482+
env_vars |= {
483+
"CLP_RESULTS_CACHE_HOST": _get_ip_from_hostname(
484+
self._clp_config.results_cache.host
485+
),
486+
"CLP_RESULTS_CACHE_PORT": str(self._clp_config.results_cache.port),
487+
}
448488

449489
return env_vars
450490

@@ -683,6 +723,7 @@ def _set_up_env_for_webui(self, container_clp_config: ClpConfig) -> EnvVarsDict:
683723
"ClpStorageEngine": self._clp_config.package.storage_engine,
684724
"ClpQueryEngine": self._clp_config.package.query_engine,
685725
"LogsInputType": self._clp_config.logs_input.type,
726+
"MaxDatasetsPerQuery": self._clp_config.query_scheduler.max_datasets_per_query,
686727
"MongoDbSearchResultsMetadataCollectionName": (
687728
self._clp_config.webui.results_metadata_collection_name
688729
),
@@ -1160,3 +1201,20 @@ def _get_ip_from_hostname(hostname: str) -> str:
11601201
:return: The resolved IP address.
11611202
"""
11621203
return socket.gethostbyname(hostname)
1204+
1205+
1206+
def _resolve_external_host(hostname: str) -> str:
1207+
"""
1208+
Resolves a hostname to an address suitable for Docker's ``extra_hosts``.
1209+
1210+
When the hostname resolves to a loopback address, returns Docker's ``host-gateway`` token so
1211+
that containers can reach services running on the Docker host. For any other hostname, falls
1212+
back to standard DNS resolution.
1213+
1214+
:param hostname:
1215+
:return: The resolved address.
1216+
"""
1217+
resolved_ip = _get_ip_from_hostname(hostname)
1218+
if ipaddress.ip_address(resolved_ip).is_loopback:
1219+
return "host-gateway"
1220+
return resolved_ip

components/clp-package-utils/clp_package_utils/scripts/native/archive_manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
END_TS_ARG,
2929
FIND_COMMAND,
3030
)
31-
from clp_package_utils.scripts.native.utils import validate_dataset_exists
31+
from clp_package_utils.scripts.native.utils import validate_datasets_exist
3232

3333
logger: logging.Logger = logging.getLogger(__file__)
3434

@@ -200,7 +200,7 @@ def main(argv: list[str]) -> int:
200200
dataset = parsed_args.dataset
201201
if dataset is not None:
202202
try:
203-
validate_dataset_exists(database_config, dataset)
203+
validate_datasets_exist(database_config, [dataset])
204204
except Exception as e:
205205
logger.error(e)
206206
return -1

components/clp-package-utils/clp_package_utils/scripts/native/decompress.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
CLP_DB_PASS_ENV_VAR_NAME,
1313
CLP_DB_USER_ENV_VAR_NAME,
1414
CLP_DEFAULT_CONFIG_FILE_RELATIVE_PATH,
15+
CLP_DEFAULT_DATASET_NAME,
1516
ClpConfig,
1617
ClpDbNameType,
1718
ClpDbUserType,
@@ -36,7 +37,7 @@
3637
from clp_package_utils.scripts.native.utils import (
3738
run_function_in_process,
3839
submit_query_job,
39-
validate_dataset_exists,
40+
validate_datasets_exist,
4041
wait_for_query_job,
4142
)
4243

@@ -144,11 +145,9 @@ def handle_extract_stream_cmd(
144145
)
145146
elif EXTRACT_JSON_CMD == command:
146147
dataset = parsed_args.dataset
147-
if dataset is None:
148-
logger.error(f"Dataset unspecified, but must be specified for command `{command}'.")
149-
return -1
148+
dataset = CLP_DEFAULT_DATASET_NAME if dataset is None else dataset
150149
try:
151-
validate_dataset_exists(clp_config.database, dataset)
150+
validate_datasets_exist(clp_config.database, [dataset])
152151
except Exception as e:
153152
logger.error(e)
154153
return -1

0 commit comments

Comments
 (0)