Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ dependencies = [
"opentelemetry-exporter-otlp~=1.21",
"opentelemetry-instrumentation-fastapi~=0.46b0",
"opentelemetry-sdk~=1.21",
"prometheus-client~=0.24",
"jsontas>=1.4.1,<2.0.0",
"packageurl-python~=0.11",
"cryptography>=42.0.4,<43.0.0",
Expand Down
68 changes: 68 additions & 0 deletions python/src/etos_api/library/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Copyright Axis Communications AB.
#
# For a full list of individual contributors, please see the commit history.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""ETOS API metrics."""

from enum import Enum
from functools import wraps
from logging import Logger
from typing import Callable

from fastapi import HTTPException
from prometheus_client import Counter, Histogram

OPERATIONS = Enum(
"OPERATIONS",
[
"start_testrun",
"get_subsuite",
"stop_testrun",
],
)

REQUEST_TIME = Histogram(
"http_request_duration_seconds",
"Time spent processing request",
["endpoint", "operation"],
)
REQUESTS_TOTAL = Counter(
"http_requests_total",
"Total number of requests",
["endpoint", "operation", "status"],
)


# I like the idea of all operations in this file is upper-case.
def COUNT_REQUESTS(labels: dict, logger: Logger): # pylint:disable=invalid-name
"""Count number of requests to server using the REQUESTS_TOTAL counter."""

def decorator(func: Callable):
@wraps(func)
async def wrapper(*args, **kwargs):
try:
response = await func(*args, **kwargs)
REQUESTS_TOTAL.labels(**labels, status=200).inc()
return response
except HTTPException as http_exception:
REQUESTS_TOTAL.labels(**labels, status=http_exception.status_code).inc()
raise
except Exception: # pylint:disable=bare-except
logger.exception("Unhandled exception occurred, setting status to 500")
REQUESTS_TOTAL.labels(**labels, status=500).inc()
raise

return wrapper

return decorator
2 changes: 2 additions & 0 deletions python/src/etos_api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"""ETOS API."""

from fastapi import FastAPI
from prometheus_client import make_asgi_app

from etos_api.routers.v0 import ETOSV0
from etos_api.routers.v1alpha import ETOSV1ALPHA
Expand All @@ -26,3 +27,4 @@
APP.mount("/api/v1alpha", ETOSV1ALPHA, "ETOS V1 Alpha")
APP.mount("/api/v0", ETOSV0, "ETOS V0")
APP.mount("/api", DEFAULT_VERSION, "ETOS V0")
APP.mount("/metrics", make_asgi_app(), "Metrics")
13 changes: 13 additions & 0 deletions python/src/etos_api/routers/v0/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from starlette.responses import RedirectResponse, Response

from etos_api.library.environment import Configuration, configure_testrun
from etos_api.library.metrics import COUNT_REQUESTS, OPERATIONS, REQUEST_TIME
from etos_api.library.opentelemetry import context
from etos_api.library.utilities import sync_to_async

Expand All @@ -45,12 +46,22 @@
root_path_in_servers=False,
dependencies=[Depends(context)],
)

API = f"/api/{ETOSV0.version}/etos"
START_LABELS = {"endpoint": API, "operation": OPERATIONS.start_testrun.name}
# The key {suite_id} is supposed to indicate that this is a path parameter, but
# we don't want to set the actual value in the metrics label since that would create
# a high cardinality metric. Therefore we use the literal string "{suite_id}".
STOP_LABELS = {"endpoint": f"{API}/{{suite_id}}", "operation": OPERATIONS.stop_testrun.name}

TRACER = trace.get_tracer("etos_api.routers.etos.router")
LOGGER = logging.getLogger(__name__)
logging.getLogger("pika").setLevel(logging.WARNING)
# pylint:disable=too-many-locals,too-many-statements


@REQUEST_TIME.labels(**START_LABELS).time()
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will likely include decorator overhead too? Intended?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since it is supposed to count request time it should be included. Not that the time we spend in decorators is enough to even notice.

@COUNT_REQUESTS(START_LABELS, LOGGER)
@ETOSV0.post("/etos", tags=["etos"], response_model=StartEtosResponse)
async def start_etos(
etos: StartEtosRequest,
Expand All @@ -69,6 +80,8 @@ async def start_etos(
return await _start(etos, span, otel_context.get_current())


@REQUEST_TIME.labels(**STOP_LABELS).time()
@COUNT_REQUESTS(STOP_LABELS, LOGGER)
@ETOSV0.delete("/etos/{suite_id}", tags=["etos"], response_model=AbortEtosResponse)
async def abort_etos(suite_id: str, ctx: Annotated[otel_context.Context, Depends(context)]) -> dict:
"""Abort ETOS execution on delete.
Expand Down
22 changes: 22 additions & 0 deletions python/src/etos_api/routers/v1alpha/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from opentelemetry.trace import Span
from starlette.responses import Response

from etos_api.library.metrics import COUNT_REQUESTS, OPERATIONS, REQUEST_TIME
from etos_api.library.opentelemetry import context

from .schemas import AbortTestrunResponse, StartTestrunRequest, StartTestrunResponse
Expand All @@ -51,12 +52,26 @@
root_path_in_servers=False,
dependencies=[Depends(context)],
)

API = f"/api/{ETOSV1ALPHA.version}/testrun"
START_LABELS = {"endpoint": API, "operation": OPERATIONS.start_testrun.name}
# The key {suite_id} is supposed to indicate that this is a path parameter, but
# we don't want to set the actual value in the metrics label since that would create
# a high cardinality metric. Therefore we use the literal string "{suite_id}".
STOP_LABELS = {"endpoint": f"{API}/{{suite_id}}", "operation": OPERATIONS.stop_testrun.name}
SUBSUITE_LABELS = {
"endpoint": f"{API}/{{suite_id}}",
"operation": OPERATIONS.get_subsuite.name,
}

TRACER = trace.get_tracer("etos_api.routers.testrun.router")
LOGGER = logging.getLogger(__name__)
logging.getLogger("pika").setLevel(logging.WARNING)
# pylint:disable=too-many-locals,too-many-statements


@REQUEST_TIME.labels(**START_LABELS).time()
@COUNT_REQUESTS(START_LABELS, LOGGER)
@ETOSV1ALPHA.post("/testrun", tags=["etos"], response_model=StartTestrunResponse)
async def start_testrun(
etos: StartTestrunRequest, ctx: Annotated[otel_context.Context, Depends(context)]
Expand All @@ -74,6 +89,8 @@ async def start_testrun(
return await _create_testrun(etos, span, otel_context.get_current())


@REQUEST_TIME.labels(**STOP_LABELS).time()
@COUNT_REQUESTS(STOP_LABELS, LOGGER)
@ETOSV1ALPHA.delete("/testrun/{suite_id}", tags=["etos"], response_model=AbortTestrunResponse)
async def abort_testrun(
suite_id: str, ctx: Annotated[otel_context.Context, Depends(context)]
Expand All @@ -91,6 +108,11 @@ async def abort_testrun(
return await _abort(suite_id)


# The key {suite_id} is supposed to indicate that this is a path parameter, but
# we don't want to set the actual value in the metrics label since that would create
# a high cardinality metric. Therefore we use the literal string "{suite_id}".
@REQUEST_TIME.labels(**SUBSUITE_LABELS).time()
@COUNT_REQUESTS(SUBSUITE_LABELS, LOGGER)
@ETOSV1ALPHA.get("/testrun/{sub_suite_id}", tags=["etos"])
async def get_subsuite(sub_suite_id: str) -> dict:
"""Get sub suite returns the sub suite definition for the ETOS test runner.
Expand Down
Loading