diff --git a/CHANGES.md b/CHANGES.md index 1343bea6d..f2e289b93 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -13,6 +13,11 @@ `GroupTitle` and `Tags` . This feature has been added in order to support grouping and filtering of datasets in UIs, see https://github.com/xcube-dev/xcube-viewer/issues/385. + +* xcube server can now be configured to produce structured data logs. + If `data_logging` is `true` server requests are logged in form of + JSON data records, that can be machine-detected and -parsed by lines + starting with a `{` character. (#1060) * Added server endpoint `GET /statistics/{varName}` with query parameters `lon`, `lat`, `time` which is used to extract single point data. diff --git a/test/server/test_server.py b/test/server/test_server.py index 6ad5f6786..d2af695ea 100644 --- a/test/server/test_server.py +++ b/test/server/test_server.py @@ -201,6 +201,12 @@ def test_config_schema_effectively_merged(self): "additionalProperties": False, }, }, + "data_logging": { + "type": "boolean", + "title": ( + "Log JSON data records instead of human-readable message logs." + ), + }, "trace_perf": { "type": "boolean", "title": "Output performance measures", diff --git a/test/server/webservers/test_tornado.py b/test/server/webservers/test_tornado.py index 6a6cc000a..867afeaee 100644 --- a/test/server/webservers/test_tornado.py +++ b/test/server/webservers/test_tornado.py @@ -8,9 +8,6 @@ Callable, Any, Union, - Tuple, - Type, - Dict, ) from collections.abc import Sequence, Awaitable @@ -84,6 +81,38 @@ def test_start_and_update_and_stop(self): self.assertEqual(1, self.io_loop.start_count) self.assertEqual(1, self.io_loop.stop_count) + def test_data_logging_disabled_by_default(self): + server = mock_server(framework=self.framework, config={}) + self.framework.start(server.ctx) + self.assertNotIn("log_function", self.application.settings) + self.framework.stop(server.ctx) + + def test_enable_data_logging(self): + server = mock_server(framework=self.framework, config={"data_logging": True}) + self.framework.start(server.ctx) + self.assertIn("log_function", self.application.settings) + log_function = self.application.settings["log_function"] + self.assertTrue(callable(log_function)) + + class DatasetsHandler(tornado.web.RequestHandler): + def get(self): + return {} + + # noinspection PyTypeChecker + handler = DatasetsHandler( + self.application, + tornado.httputil.HTTPServerRequest( + method="GET", uri="/datasets", connection=MockConnection() + ), + ) + + # For time being, smoke test only, logging is side effect. + # Check if we can return data record from log_function(), + # and verify fields are as expected. + log_function(handler) + + self.framework.stop(server.ctx) + def test_async_exec(self): def my_func(a, b): return a + b @@ -158,13 +187,15 @@ def test_path_to_pattern_fails(self): with self.assertRaises(ValueError) as cm: p2p("/datasets/{dataset_id") self.assertEqual( - 'missing closing "}" in "/datasets/{dataset_id"', f"{cm.exception}", + 'missing closing "}" in "/datasets/{dataset_id"', + f"{cm.exception}", ) with self.assertRaises(ValueError) as cm: p2p("/datasets/dataset_id}/bbox") self.assertEqual( - 'missing opening "{" in "/datasets/dataset_id}/bbox"', f"{cm.exception}", + 'missing opening "{" in "/datasets/dataset_id}/bbox"', + f"{cm.exception}", ) with self.assertRaises(ValueError) as cm: @@ -421,6 +452,9 @@ def run_in_executor(self, *args, **kwargs): class MockApplication: def __init__(self): self.handlers = [] + self.settings = {} + self.ui_modules = {} + self.ui_methods = {} self.listen_count = 0 # noinspection PyUnusedLocal diff --git a/xcube/server/config.py b/xcube/server/config.py index 937231936..9981a7f86 100644 --- a/xcube/server/config.py +++ b/xcube/server/config.py @@ -34,6 +34,9 @@ " returned by server responses." " Can be an absolute URL or a relative URL path.", ), + data_logging=JsonBooleanSchema( + title="Log JSON data records instead of human-readable message logs." + ), trace_perf=JsonBooleanSchema( title="Output performance measures", ), diff --git a/xcube/server/webservers/tornado.py b/xcube/server/webservers/tornado.py index 9f143e9e0..a38c70ac7 100644 --- a/xcube/server/webservers/tornado.py +++ b/xcube/server/webservers/tornado.py @@ -4,11 +4,13 @@ import asyncio import concurrent.futures +import datetime import functools +import json import logging import traceback import urllib.parse -from typing import Any, Optional, Union, Callable, Type +from typing import Any, Optional, Union, Callable from collections.abc import Sequence, Awaitable, Mapping import tornado.escape @@ -124,16 +126,18 @@ def add_routes(self, api_routes: Sequence[ApiRoute], url_prefix: str): handlers = [] for api_route in api_routes: - handlers.append(( - url_prefix + self.path_to_pattern(api_route.path) - + ("/?" if api_route.slash else ""), - TornadoRequestHandler, - {"api_route": api_route}, - )) + handlers.append( + ( + url_prefix + + self.path_to_pattern(api_route.path) + + ("/?" if api_route.slash else ""), + TornadoRequestHandler, + {"api_route": api_route}, + ) + ) LOG.log( LOG_LEVEL_DETAIL, - f"Added route {api_route.path!r}" - f" from API {api_route.api_name!r}", + f"Added route {api_route.path!r}" f" from API {api_route.api_name!r}", ) if handlers: @@ -150,6 +154,14 @@ def start(self, ctx: Context): url_prefix = get_url_prefix(config) tornado_settings = config.get("tornado", {}) + if config.get("data_logging"): + # Reset formatters to just output the message (= JSON data record) + for h in logging.getLogger().handlers: + if isinstance(h, logging.StreamHandler): + h.setFormatter(logging.Formatter("%(message)s")) + + self.application.settings["log_function"] = _get_data_log_function(ctx) + self.application.listen(port, address=address, **tornado_settings) address_ = "127.0.0.1" if address == "0.0.0.0" else address @@ -474,3 +486,63 @@ def finish( ): self.write(data, content_type) return self._handler.finish() + + +# noinspection PyUnusedLocal +def _get_data_log_function(ctx: Context): + """Get a logging function that outputs JSON records. + + Args: + ctx: Application context. + + Returns: + A Tornado log function. + """ + + # Closure here, because we may want to include + # auth info from context later + def log_function(handler: tornado.web.RequestHandler): + request = handler.request + status = handler.get_status() + + if status < 400: + level = "INFO" + elif status < 500: + level = "WARNING" + else: + level = "ERROR" + + duration = round(1000.0 * request.request_time()) + + # will not be set yet + user_id = ( + f"{handler.current_user}" if handler.current_user is not None else None + ) + + data = { + "timestamp": str(datetime.datetime.utcnow()), + "level": level, + "status": status, + "user_id": user_id, + "duration": duration, # milliseconds + "request": { + "method": request.method, + "uri": request.uri, + "host": request.host, + "protocol": request.protocol, + "remote_ip": request.remote_ip, + "user_agent": request.headers.get("User-Agent"), + }, + } + + LOG.log(getattr(logging, level), DataLogMessage(data)) + + return log_function + + +class DataLogMessage: + def __init__(self, data: dict[str, Any]): + self.data = data + + def __str__(self): + return json.dumps(self.data)