Skip to content

Commit 7c028f1

Browse files
committed
Merge branch 'main' into bundle-mariadb
# Conflicts: # tools/docker-images/clp-package/Dockerfile
2 parents b345b02 + 2ac456e commit 7c028f1

File tree

91 files changed

+4401
-2407
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

91 files changed

+4401
-2407
lines changed

.github/workflows/clp-rust-checks.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,17 @@ name: "clp-rust-checks"
22

33
on:
44
pull_request:
5+
paths: &monitored_paths
6+
- ".cargo/**"
7+
- "Cargo.lock"
8+
- "Cargo.toml"
9+
- ".github/workflows/clp-rust-checks.yaml"
10+
- "components/clp-rust-utils/**"
11+
- "components/log-ingestor/**"
12+
- "taskfile.yaml"
13+
- "taskfiles/**"
514
push:
15+
paths: *monitored_paths
616
schedule:
717
# Run daily at 00:15 UTC (the 15 is to avoid periods of high load)
818
- cron: "15 0 * * *"
@@ -31,6 +41,10 @@ jobs:
3141
shell: "bash"
3242
run: "npm install -g @go-task/[email protected]"
3343

44+
- name: "Validate lock files"
45+
shell: "bash"
46+
run: "task deps:lock:check-rust"
47+
3448
- name: "Lint"
3549
shell: "bash"
3650
run: "task lint:check-rust"

Cargo.lock

Lines changed: 7 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

components/clp-mcp-server/clp_mcp_server/clp_connector.py

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import aiomysql
77
import msgpack
8+
from clp_py_utils.clp_config import CLP_DEFAULT_DATASET_NAME
89
from pymongo import AsyncMongoClient
910

1011
from .constants import (
@@ -37,27 +38,32 @@ def __init__(self, clp_config: Any) -> None:
3738
"db": clp_config.database.name,
3839
}
3940

40-
async def submit_query(self, query: str, begin_ts: int, end_ts: int) -> str:
41+
self._webui_addr = f"http://{clp_config.webui.host}:{clp_config.webui.port}"
42+
43+
async def submit_query(
44+
self, query: str, begin_ts: int | None = None, end_ts: int | None = None
45+
) -> str:
4146
"""
4247
Submits a query to the CLP database and returns the ID of the query.
4348
4449
:param query: The query string.
4550
:param begin_ts: The beginning timestamp of the query range.
4651
:param end_ts: The end timestamp of the query range.
4752
:return: The ID assigned to the query.
48-
:raise ValueError: If ``end_ts`` is smaller than ``begin_ts``.
49-
:raise aiomysql.Error: If there is an error connecting to or querying MariaDB.
50-
:raise pymongo.errors.PyMongoError: If there is an error interacting with MongoDB.
51-
:raise Exception: For any other unexpected errors.
53+
:raise: ValueError if `end_ts` is smaller than `begin_ts`.
54+
:raise: RuntimeError if it fails to retrieve the ID of the submitted query.
55+
:raise: aiomysql.Error if there is an error connecting to or querying MariaDB.
56+
:raise: pymongo.errors.PyMongoError if there is an error interacting with MongoDB.
57+
:raise: Exception for any other unexpected errors.
5258
"""
53-
if end_ts < begin_ts:
59+
if begin_ts is not None and end_ts is not None and end_ts < begin_ts:
5460
err_msg = f"end_ts {end_ts} is smaller than begin_ts {begin_ts}."
5561
raise ValueError(err_msg)
5662

5763
job_config = msgpack.packb(
5864
{
5965
"begin_timestamp": begin_ts,
60-
"dataset": None,
66+
"dataset": CLP_DEFAULT_DATASET_NAME,
6167
"end_timestamp": end_ts,
6268
"ignore_case": True,
6369
"max_num_results": SEARCH_MAX_NUM_RESULTS,
@@ -118,9 +124,10 @@ async def wait_query_completion(self, query_id: str, timeout: float | None = Non
118124
119125
:param query_id: The ID of the query.
120126
:param timeout: Maximum time to wait in seconds, or None for no timeout.
121-
:raise aiomysql.Error: If there is an error connecting to or querying MariaDB.
122-
:raise ValueError: When the query is not found.
123-
:raise RuntimeError: When the query fails or is cancelled.
127+
:raise: aiomysql.Error if there is an error connecting to or querying MariaDB.
128+
:raise: ValueError if the query is not found.
129+
:raise: RuntimeError if the query fails or is cancelled.
130+
:raise: TimeoutError if the timeout is reached before the query completes.
124131
"""
125132
waiting_states = {QueryJobStatus.PENDING, QueryJobStatus.RUNNING, QueryJobStatus.CANCELLING}
126133
error_states = {QueryJobStatus.FAILED, QueryJobStatus.CANCELLED, QueryJobStatus.KILLED}
@@ -132,7 +139,10 @@ async def wait_query_completion(self, query_id: str, timeout: float | None = Non
132139
if status == QueryJobStatus.SUCCEEDED:
133140
break
134141
if status in error_states:
135-
err_msg = f"Query job with ID {query_id} ended in status {status.name}."
142+
err_msg = (
143+
f"Query job with ID {query_id} ended in "
144+
f"status {QueryJobStatus(status).name}."
145+
)
136146
raise RuntimeError(err_msg)
137147
if status not in waiting_states:
138148
err_msg = f"Query job with ID {query_id} has unknown status {status}."
@@ -155,6 +165,13 @@ async def read_results(self, query_id: str) -> list[dict]:
155165
results = []
156166

157167
async for doc in collection.find({}, limit=SEARCH_MAX_NUM_RESULTS):
168+
doc["link"] = (
169+
f"{self._webui_addr}/streamFile?type=json"
170+
f'&streamId={doc["archive_id"]}'
171+
f"&dataset={CLP_DEFAULT_DATASET_NAME}"
172+
f'&logEventIdx={doc["log_event_ix"]}'
173+
)
174+
doc["_id"] = None
158175
results.append(doc)
159176

160177
return results

components/clp-mcp-server/clp_mcp_server/clp_mcp_server.py

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,35 +2,55 @@
22

33
import ipaddress
44
import logging
5+
import os
56
import socket
67
import sys
8+
from pathlib import Path
79

810
import click
11+
from clp_py_utils.clp_config import CLPConfig, MCP_SERVER_COMPONENT_NAME
12+
from clp_py_utils.clp_logging import get_logger, get_logging_formatter, set_logging_level
13+
from clp_py_utils.core import read_yaml_config_file
14+
from pydantic import ValidationError
915

1016
from .server import create_mcp_server
1117

18+
logger = get_logger(MCP_SERVER_COMPONENT_NAME)
19+
1220

1321
@click.command()
1422
@click.option(
1523
"--host", type=str, default="127.0.0.1", help="The server's host address (default: 127.0.0.1)."
1624
)
1725
@click.option("--port", type=int, default=8000, help="The server's port number (default: 8000).")
18-
def main(host: str, port: int) -> None:
26+
@click.option(
27+
"--config-path",
28+
type=click.Path(exists=True),
29+
default="/etc/clp-config.yml",
30+
help="The path to server's configuration file (default: /etc/clp-config.yml).",
31+
)
32+
def main(host: str, port: int, config_path: Path) -> int:
1933
"""
2034
Runs the CLP MCP server with HTTP transport.
2135
2236
:param host: The server's host address (IP address or hostname).
2337
:param port: The server's port number (1-65535).
38+
:param config_path: The path to server's configuration file.
39+
:return: Exit code (0 for success, non-zero for failure).
2440
"""
25-
logging.basicConfig(
26-
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
27-
)
28-
logger = logging.getLogger(__name__)
41+
# Setup logging to file
42+
log_file_path = Path(os.getenv("CLP_LOGS_DIR")) / "mcp_server.log"
43+
logging_file_handler = logging.FileHandler(filename=log_file_path, encoding="utf-8")
44+
logging_file_handler.setFormatter(get_logging_formatter())
45+
logger.addHandler(logging_file_handler)
46+
set_logging_level(logger, os.getenv("CLP_LOGGING_LEVEL"))
47+
48+
exit_code = 0
2949

3050
# Validate host and port
3151
if len(host.strip()) == 0:
3252
logger.error("Host cannot be empty.")
33-
sys.exit(1)
53+
exit_code = 1
3454

3555
# Validate host format (IP address or resolvable hostname)
3656
try:
@@ -44,21 +64,32 @@ def main(host: str, port: int) -> None:
4464
"Host validation failed: '%s' is not a valid IP address and DNS resolution failed.",
4565
host,
4666
)
47-
sys.exit(1)
67+
exit_code = 1
4868

4969
max_port = 65535
5070
if port <= 0 or port > max_port:
5171
logger.error("Port must be between 1 and %d, got: %d.", max_port, port)
52-
sys.exit(1)
72+
exit_code = 1
5373

5474
try:
55-
mcp = create_mcp_server()
75+
clp_config = CLPConfig.model_validate(read_yaml_config_file(config_path))
76+
except ValidationError:
77+
logger.exception("Configuration validation failed.")
78+
exit_code = 1
79+
except Exception:
80+
logger.exception("Failed to load configuration.")
81+
exit_code = 1
82+
83+
try:
84+
mcp = create_mcp_server(clp_config)
5685
logger.info("Starting CLP MCP Server on %s:%d.", host, port)
5786
mcp.run(transport="streamable-http", host=host, port=port)
5887
except Exception:
5988
logger.exception("Failed to start MCP server.")
60-
sys.exit(1)
89+
exit_code = 1
90+
91+
return exit_code
6192

6293

6394
if __name__ == "__main__":
64-
main()
95+
sys.exit(main())

components/clp-mcp-server/clp_mcp_server/server/constants.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,25 @@
66
# 10 minutes
77
SESSION_TTL_SECONDS = 600
88

9+
TIMESTAMP_NOT_AVAILABLE = "N/A"
10+
911
SERVER_NAME = "clp-mcp-server"
1012

1113
# fmt: off
1214
SYSTEM_PROMPT = (
1315
"You are an AI assistant that helps users query a log database using KQL (Kibana Query Language)."
1416
" You should generate a KQL query that accurately expresses the user's intent. The generated KQL"
15-
" query should be as specific as possible to minimize the number of log messages returned.\n\n"
17+
" query should be as specific as possible to minimize the number of log messages returned. When "
18+
"displaying log messages, wrap them in hyperlinks with the `link` field from the search result.\n\n"
1619
"You should consider the following guidelines to generate KQL queries efficiently:\n"
1720
"- Use specific field names and values to narrow down the search.\n"
1821
"- Avoid using wildcards (`*`) unless absolutely necessary, as they can lead to large result"
1922
" sets.\n"
2023
"- Use logical operators (`AND`, `OR`, `NOT`) to combine one or more key-value searches.\n"
21-
"- Consider the time range of the logs you are searching. If the user specifies a time range,"
22-
" include it in the KQL query.\n"
24+
"- Consider specifying a time range to narrow down the search. Use"
25+
" `search_by_kql_with_timestamp_range` with your KQL query and explicit start and end timestamps."
26+
" Timestamps must follow the ISO 8601 UTC format (`YYYY-MM-DDTHH:mm:ss.fffZ`), where the trailing"
27+
" `Z` indicates UTC.\n"
2328
"- If the user query is ambiguous or lacks detail, ask clarifying questions to better understand"
2429
" their intent before generating the KQL query.\n"
2530
"- Always ensure that the generated KQL query is syntactically correct and can be executed without"

0 commit comments

Comments
 (0)