Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ dependencies = [
"opentelemetry-exporter-otlp~=1.21",
"opentelemetry-instrumentation-fastapi~=0.46b0",
"opentelemetry-sdk~=1.21",
"prometheus-client~=0.24",
"jsontas>=1.4.1,<2.0.0",
"packageurl-python~=0.11",
"cryptography>=42.0.4,<43.0.0",
Expand Down
40 changes: 40 additions & 0 deletions python/src/etos_api/library/metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Copyright Axis Communications AB.
#
# For a full list of individual contributors, please see the commit history.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""ETOS API metrics."""

from enum import Enum

from prometheus_client import Counter, Histogram

OPERATIONS = Enum(
"OPERATIONS",
[
"start_testrun",
"get_subsuite",
"stop_testrun",
],
)

REQUEST_TIME = Histogram(
"http_request_duration_seconds",
"Time spent processing request",
["endpoint", "operation"],
)
REQUESTS_TOTAL = Counter(
"http_requests_total",
"Total number of requests",
["endpoint", "operation", "status"],
)
3 changes: 3 additions & 0 deletions python/src/etos_api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
"""ETOS API."""

from fastapi import FastAPI
from prometheus_client import make_asgi_app

from etos_api.routers.v0 import ETOSv0
from etos_api.routers.v1alpha import ETOSv1Alpha

Expand All @@ -25,3 +27,4 @@
APP.mount("/api/v1alpha", ETOSv1Alpha, "ETOS V1 Alpha")
APP.mount("/api/v0", ETOSv0, "ETOS V0")
APP.mount("/api", DEFAULT_VERSION, "ETOS V0")
APP.mount("/metrics", make_asgi_app(), "Metrics")
43 changes: 37 additions & 6 deletions python/src/etos_api/routers/v0/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,28 +23,38 @@
from etos_lib import ETOS
from etos_lib.kubernetes import Kubernetes
from fastapi import FastAPI, HTTPException
from starlette.responses import RedirectResponse, Response
from kubernetes import client
from opentelemetry import trace
from opentelemetry.trace import Span
from starlette.responses import RedirectResponse, Response

from etos_api.library.environment import Configuration, configure_testrun
from etos_api.library.metrics import OPERATIONS, REQUEST_TIME, REQUESTS_TOTAL
from etos_api.library.utilities import sync_to_async

from .schemas import AbortEtosResponse, StartEtosRequest, StartEtosResponse
from .utilities import wait_for_artifact_created, validate_suite
from .utilities import validate_suite, wait_for_artifact_created

ETOSv0 = FastAPI(
title="ETOS",
version="v0",
summary="API endpoints for ETOS v0 - I.e. the version before versions",
root_path_in_servers=False,
)

API = f"/api/{ETOSv0.version}/etos"
START_LABELS = {"endpoint": API, "operation": OPERATIONS.start_testrun.name}
# The key {suite_id} is supposed to indicate that this is a path parameter, but
# we don't want to set the actual value in the metrics label since that would create
# a high cardinality metric. Therefore we use the literal string "{suite_id}".
STOP_LABELS = {"endpoint": f"{API}/{{suite_id}}", "operation": OPERATIONS.stop_testrun.name}

TRACER = trace.get_tracer("etos_api.routers.etos.router")
LOGGER = logging.getLogger(__name__)
logging.getLogger("pika").setLevel(logging.WARNING)


@REQUEST_TIME.labels(**START_LABELS).time()
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will likely include decorator overhead too? Intended?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since it is supposed to count request time it should be included. Not that the time we spend in decorators is enough to even notice.

@ETOSv0.post("/etos", tags=["etos"], response_model=StartEtosResponse)
async def start_etos(etos: StartEtosRequest):
"""Start ETOS execution on post.
Expand All @@ -54,10 +64,21 @@ async def start_etos(etos: StartEtosRequest):
:return: JSON dictionary with response.
:rtype: dict
"""
with TRACER.start_as_current_span("start-etos") as span:
return await _start(etos, span)
try:
with TRACER.start_as_current_span("start-etos") as span:
response = await _start(etos, span)
REQUESTS_TOTAL.labels(**START_LABELS, status=200).inc()
return response
except HTTPException as http_exception:
REQUESTS_TOTAL.labels(**START_LABELS, status=http_exception.status_code).inc()
raise
except Exception: # pylint:disable=bare-except
LOGGER.exception("Unhandled exception occurred")
REQUESTS_TOTAL.labels(**START_LABELS, status=500).inc()
raise


@REQUEST_TIME.labels(**STOP_LABELS).time()
@ETOSv0.delete("/etos/{suite_id}", tags=["etos"], response_model=AbortEtosResponse)
async def abort_etos(suite_id: str):
"""Abort ETOS execution on delete.
Expand All @@ -67,8 +88,18 @@ async def abort_etos(suite_id: str):
:return: JSON dictionary with response.
:rtype: dict
"""
with TRACER.start_as_current_span("abort-etos"):
return await _abort(suite_id)
try:
with TRACER.start_as_current_span("abort-etos"):
response = await _abort(suite_id)
REQUESTS_TOTAL.labels(**STOP_LABELS, status=200).inc()
return response
except HTTPException as http_exception:
REQUESTS_TOTAL.labels(**STOP_LABELS, status=http_exception.status_code).inc()
raise
except Exception: # pylint:disable=bare-except
LOGGER.exception("Unhandled exception occurred")
REQUESTS_TOTAL.labels(**STOP_LABELS, status=500).inc()
raise


@ETOSv0.get("/ping", tags=["etos"], status_code=204)
Expand Down
104 changes: 73 additions & 31 deletions python/src/etos_api/routers/v1alpha/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,30 +20,25 @@
from uuid import uuid4

from etos_lib import ETOS
from etos_lib.kubernetes.schemas.testrun import (
TestRun as TestRunSchema,
TestRunSpec,
Providers,
Image,
Metadata,
Retention,
TestRunner,
)
from etos_lib.kubernetes import TestRun, Environment, Kubernetes
from etos_lib.kubernetes import Environment, Kubernetes, TestRun
from etos_lib.kubernetes.schemas.testrun import Image, Metadata, Providers, Retention
from etos_lib.kubernetes.schemas.testrun import TestRun as TestRunSchema
from etos_lib.kubernetes.schemas.testrun import TestRunner, TestRunSpec
from fastapi import FastAPI, HTTPException
from starlette.responses import Response
from opentelemetry import trace, context
from opentelemetry import context, trace
from opentelemetry.propagate import inject
from opentelemetry.trace import Span
from starlette.responses import Response

from etos_api.library.metrics import OPERATIONS, REQUEST_TIME, REQUESTS_TOTAL

from .schemas import AbortTestrunResponse, StartTestrunRequest, StartTestrunResponse
from .utilities import (
wait_for_artifact_created,
download_suite,
validate_suite,
convert_to_rfc1123,
download_suite,
recipes_from_tests,
validate_suite,
wait_for_artifact_created,
)

ETOSv1Alpha = FastAPI(
Expand All @@ -52,11 +47,24 @@
summary="API endpoints for ETOS v1 Alpha",
root_path_in_servers=False,
)

API = f"/api/{ETOSv1Alpha.version}/testrun"
START_LABELS = {"endpoint": API, "operation": OPERATIONS.start_testrun.name}
# The key {suite_id} is supposed to indicate that this is a path parameter, but
# we don't want to set the actual value in the metrics label since that would create
# a high cardinality metric. Therefore we use the literal string "{suite_id}".
STOP_LABELS = {"endpoint": f"{API}/{{suite_id}}", "operation": OPERATIONS.stop_testrun.name}
SUBSUITE_LABELS = {
"endpoint": f"{API}/{{suite_id}}",
"operation": OPERATIONS.get_subsuite.name,
}

TRACER = trace.get_tracer("etos_api.routers.testrun.router")
LOGGER = logging.getLogger(__name__)
logging.getLogger("pika").setLevel(logging.WARNING)


@REQUEST_TIME.labels(**START_LABELS).time()
@ETOSv1Alpha.post("/testrun", tags=["etos"], response_model=StartTestrunResponse)
async def start_testrun(etos: StartTestrunRequest):
"""Start ETOS testrun on post.
Expand All @@ -66,10 +74,21 @@ async def start_testrun(etos: StartTestrunRequest):
:return: JSON dictionary with response.
:rtype: dict
"""
with TRACER.start_as_current_span("start-etos") as span:
return await _create_testrun(etos, span)


try:
with TRACER.start_as_current_span("start-etos") as span:
response = await _create_testrun(etos, span)
REQUESTS_TOTAL.labels(**START_LABELS, status=200).inc()
return response
except HTTPException as http_exception:
REQUESTS_TOTAL.labels(**START_LABELS, status=http_exception.status_code).inc()
raise
except Exception: # pylint:disable=bare-except
LOGGER.exception("Unhandled exception occurred")
REQUESTS_TOTAL.labels(**START_LABELS, status=500).inc()
raise


@REQUEST_TIME.labels(**STOP_LABELS).time()
@ETOSv1Alpha.delete("/testrun/{suite_id}", tags=["etos"], response_model=AbortTestrunResponse)
async def abort_testrun(suite_id: str):
"""Abort ETOS testrun on delete.
Expand All @@ -79,25 +98,48 @@ async def abort_testrun(suite_id: str):
:return: JSON dictionary with response.
:rtype: dict
"""
with TRACER.start_as_current_span("abort-etos"):
return await _abort(suite_id)


try:
with TRACER.start_as_current_span("abort-etos"):
response = await _abort(suite_id)
REQUESTS_TOTAL.labels(**STOP_LABELS, status=200).inc()
return response
except HTTPException as http_exception:
REQUESTS_TOTAL.labels(**STOP_LABELS, status=http_exception.status_code).inc()
raise
except Exception: # pylint:disable=bare-except
LOGGER.exception("Unhandled exception occurred")
REQUESTS_TOTAL.labels(**STOP_LABELS, status=500).inc()
raise


# The key {suite_id} is supposed to indicate that this is a path parameter, but
# we don't want to set the actual value in the metrics label since that would create
# a high cardinality metric. Therefore we use the literal string "{suite_id}".
@REQUEST_TIME.labels(**SUBSUITE_LABELS).time()
@ETOSv1Alpha.get("/testrun/{sub_suite_id}", tags=["etos"])
async def get_subsuite(sub_suite_id: str) -> dict:
"""Get sub suite returns the sub suite definition for the ETOS test runner.

:param sub_suite_id: The name of the Environment kubernetes resource.
:return: JSON dictionary with the Environment spec. Formatted to TERCC format.
"""
environment_client = Environment(Kubernetes())
environment_resource = environment_client.get(sub_suite_id)
if not environment_resource:
raise HTTPException(404, "Failed to get environment")
environment_spec = environment_resource.to_dict().get("spec", {})
recipes = await recipes_from_tests(environment_spec["recipes"])
environment_spec["recipes"] = recipes
return environment_spec
try:
environment_client = Environment(Kubernetes())
environment_resource = environment_client.get(sub_suite_id)
if not environment_resource:
raise HTTPException(404, "Failed to get environment")
environment_spec = environment_resource.to_dict().get("spec", {})
recipes = await recipes_from_tests(environment_spec["recipes"])
environment_spec["recipes"] = recipes
REQUESTS_TOTAL.labels(**SUBSUITE_LABELS, status=200).inc()
return environment_spec
except HTTPException as http_exception:
REQUESTS_TOTAL.labels(**SUBSUITE_LABELS, status=http_exception.status_code).inc()
raise
except Exception: # pylint:disable=bare-except
LOGGER.exception("Unhandled exception occurred")
REQUESTS_TOTAL.labels(**SUBSUITE_LABELS, status=500).inc()
raise


@ETOSv1Alpha.get("/ping", tags=["etos"], status_code=204)
Expand Down
Loading