diff --git a/python/pyproject.toml b/python/pyproject.toml index e2c7afc..9c7f59c 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -29,6 +29,7 @@ dependencies = [ "opentelemetry-exporter-otlp~=1.21", "opentelemetry-instrumentation-fastapi~=0.46b0", "opentelemetry-sdk~=1.21", + "prometheus-client~=0.24", "jsontas>=1.4.1,<2.0.0", "packageurl-python~=0.11", "cryptography>=42.0.4,<43.0.0", diff --git a/python/src/etos_api/library/metrics.py b/python/src/etos_api/library/metrics.py new file mode 100644 index 0000000..1792986 --- /dev/null +++ b/python/src/etos_api/library/metrics.py @@ -0,0 +1,68 @@ +# Copyright Axis Communications AB. +# +# For a full list of individual contributors, please see the commit history. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""ETOS API metrics.""" + +from enum import Enum +from functools import wraps +from logging import Logger +from typing import Callable + +from fastapi import HTTPException +from prometheus_client import Counter, Histogram + +OPERATIONS = Enum( + "OPERATIONS", + [ + "start_testrun", + "get_subsuite", + "stop_testrun", + ], +) + +REQUEST_TIME = Histogram( + "http_request_duration_seconds", + "Time spent processing request", + ["endpoint", "operation"], +) +REQUESTS_TOTAL = Counter( + "http_requests_total", + "Total number of requests", + ["endpoint", "operation", "status"], +) + + +# I like the idea of all operations in this file is upper-case. +def COUNT_REQUESTS(labels: dict, logger: Logger): # pylint:disable=invalid-name + """Count number of requests to server using the REQUESTS_TOTAL counter.""" + + def decorator(func: Callable): + @wraps(func) + async def wrapper(*args, **kwargs): + try: + response = await func(*args, **kwargs) + REQUESTS_TOTAL.labels(**labels, status=200).inc() + return response + except HTTPException as http_exception: + REQUESTS_TOTAL.labels(**labels, status=http_exception.status_code).inc() + raise + except Exception: # pylint:disable=bare-except + logger.exception("Unhandled exception occurred, setting status to 500") + REQUESTS_TOTAL.labels(**labels, status=500).inc() + raise + + return wrapper + + return decorator diff --git a/python/src/etos_api/main.py b/python/src/etos_api/main.py index a54c364..209226f 100644 --- a/python/src/etos_api/main.py +++ b/python/src/etos_api/main.py @@ -16,6 +16,7 @@ """ETOS API.""" from fastapi import FastAPI +from prometheus_client import make_asgi_app from etos_api.routers.v0 import ETOSV0 from etos_api.routers.v1alpha import ETOSV1ALPHA @@ -26,3 +27,4 @@ APP.mount("/api/v1alpha", ETOSV1ALPHA, "ETOS V1 Alpha") APP.mount("/api/v0", ETOSV0, "ETOS V0") APP.mount("/api", DEFAULT_VERSION, "ETOS V0") +APP.mount("/metrics", make_asgi_app(), "Metrics") diff --git a/python/src/etos_api/routers/v0/router.py b/python/src/etos_api/routers/v0/router.py index d86707a..41b0225 100644 --- a/python/src/etos_api/routers/v0/router.py +++ b/python/src/etos_api/routers/v0/router.py @@ -32,6 +32,7 @@ from starlette.responses import RedirectResponse, Response from etos_api.library.environment import Configuration, configure_testrun +from etos_api.library.metrics import COUNT_REQUESTS, OPERATIONS, REQUEST_TIME from etos_api.library.opentelemetry import context from etos_api.library.utilities import sync_to_async @@ -45,12 +46,22 @@ root_path_in_servers=False, dependencies=[Depends(context)], ) + +API = f"/api/{ETOSV0.version}/etos" +START_LABELS = {"endpoint": API, "operation": OPERATIONS.start_testrun.name} +# The key {suite_id} is supposed to indicate that this is a path parameter, but +# we don't want to set the actual value in the metrics label since that would create +# a high cardinality metric. Therefore we use the literal string "{suite_id}". +STOP_LABELS = {"endpoint": f"{API}/{{suite_id}}", "operation": OPERATIONS.stop_testrun.name} + TRACER = trace.get_tracer("etos_api.routers.etos.router") LOGGER = logging.getLogger(__name__) logging.getLogger("pika").setLevel(logging.WARNING) # pylint:disable=too-many-locals,too-many-statements +@REQUEST_TIME.labels(**START_LABELS).time() +@COUNT_REQUESTS(START_LABELS, LOGGER) @ETOSV0.post("/etos", tags=["etos"], response_model=StartEtosResponse) async def start_etos( etos: StartEtosRequest, @@ -69,6 +80,8 @@ async def start_etos( return await _start(etos, span, otel_context.get_current()) +@REQUEST_TIME.labels(**STOP_LABELS).time() +@COUNT_REQUESTS(STOP_LABELS, LOGGER) @ETOSV0.delete("/etos/{suite_id}", tags=["etos"], response_model=AbortEtosResponse) async def abort_etos(suite_id: str, ctx: Annotated[otel_context.Context, Depends(context)]) -> dict: """Abort ETOS execution on delete. diff --git a/python/src/etos_api/routers/v1alpha/router.py b/python/src/etos_api/routers/v1alpha/router.py index cda55ce..c913515 100644 --- a/python/src/etos_api/routers/v1alpha/router.py +++ b/python/src/etos_api/routers/v1alpha/router.py @@ -33,6 +33,7 @@ from opentelemetry.trace import Span from starlette.responses import Response +from etos_api.library.metrics import COUNT_REQUESTS, OPERATIONS, REQUEST_TIME from etos_api.library.opentelemetry import context from .schemas import AbortTestrunResponse, StartTestrunRequest, StartTestrunResponse @@ -51,12 +52,26 @@ root_path_in_servers=False, dependencies=[Depends(context)], ) + +API = f"/api/{ETOSV1ALPHA.version}/testrun" +START_LABELS = {"endpoint": API, "operation": OPERATIONS.start_testrun.name} +# The key {suite_id} is supposed to indicate that this is a path parameter, but +# we don't want to set the actual value in the metrics label since that would create +# a high cardinality metric. Therefore we use the literal string "{suite_id}". +STOP_LABELS = {"endpoint": f"{API}/{{suite_id}}", "operation": OPERATIONS.stop_testrun.name} +SUBSUITE_LABELS = { + "endpoint": f"{API}/{{suite_id}}", + "operation": OPERATIONS.get_subsuite.name, +} + TRACER = trace.get_tracer("etos_api.routers.testrun.router") LOGGER = logging.getLogger(__name__) logging.getLogger("pika").setLevel(logging.WARNING) # pylint:disable=too-many-locals,too-many-statements +@REQUEST_TIME.labels(**START_LABELS).time() +@COUNT_REQUESTS(START_LABELS, LOGGER) @ETOSV1ALPHA.post("/testrun", tags=["etos"], response_model=StartTestrunResponse) async def start_testrun( etos: StartTestrunRequest, ctx: Annotated[otel_context.Context, Depends(context)] @@ -74,6 +89,8 @@ async def start_testrun( return await _create_testrun(etos, span, otel_context.get_current()) +@REQUEST_TIME.labels(**STOP_LABELS).time() +@COUNT_REQUESTS(STOP_LABELS, LOGGER) @ETOSV1ALPHA.delete("/testrun/{suite_id}", tags=["etos"], response_model=AbortTestrunResponse) async def abort_testrun( suite_id: str, ctx: Annotated[otel_context.Context, Depends(context)] @@ -91,6 +108,11 @@ async def abort_testrun( return await _abort(suite_id) +# The key {suite_id} is supposed to indicate that this is a path parameter, but +# we don't want to set the actual value in the metrics label since that would create +# a high cardinality metric. Therefore we use the literal string "{suite_id}". +@REQUEST_TIME.labels(**SUBSUITE_LABELS).time() +@COUNT_REQUESTS(SUBSUITE_LABELS, LOGGER) @ETOSV1ALPHA.get("/testrun/{sub_suite_id}", tags=["etos"]) async def get_subsuite(sub_suite_id: str) -> dict: """Get sub suite returns the sub suite definition for the ETOS test runner.