diff --git a/main.py b/main.py index 6e2a97c..6987302 100644 --- a/main.py +++ b/main.py @@ -11,6 +11,7 @@ if telemetry.ENABLE_TELEMETRY: print("WARNING: Running telemetry.", flush=True) + telemetry.setting_app_name(app_name) telemetry.setting_otlp(app, app_name=app_name, endpoint=OTLP_GRPC_ENDPOINT) app.add_middleware(telemetry.PrometheusMiddleware, app_name=app_name) app.add_route("/metrics", telemetry.metrics) diff --git a/start b/start index 062d9ce..34d94ef 100755 --- a/start +++ b/start @@ -33,8 +33,6 @@ docker run -d --rm \ -p $DEBUG_PORT:$DEBUG_PORT \ -e ENABLE_TELEMETRY=$ENABLE_TELEMETRY \ --network dev-setup_default \ ---log-driver=loki \ ---log-opt loki-url="http://$HOST_IP:3100/loki/api/v1/push" \ refinery-authorizer-dev $CMD > /dev/null 2>&1 echo -ne '\t\t\t [done]\n' diff --git a/telemetry.py b/telemetry.py index 4164283..d8ac2b9 100644 --- a/telemetry.py +++ b/telemetry.py @@ -22,6 +22,8 @@ from starlette.status import HTTP_500_INTERNAL_SERVER_ERROR from starlette.types import ASGIApp + +APP_NAME = os.getenv("APP_NAME") ENABLE_TELEMETRY = os.getenv("ENABLE_TELEMETRY", "false") == "true" INFO = Gauge("fastapi_app_info", "FastAPI application information.", ["app_name"]) @@ -50,6 +52,41 @@ "Gauge of requests by method and path currently being processed", ["method", "path", "app_name"], ) +TASKS_IN_PROGRESS = Gauge( + "cognition_tasks_in_progress", + "Indicates if the task master thread is running (1) or not (0)", + ["task_name", "app_name"], +) +TASKS_PROCESSED = Counter( + "cognition_task_processed_total", + "Total items processed by the task", + ["task_name", "app_name"], +) +TASKS_ERRORS = Counter( + "cognition_task_errors_total", + "Total errors encountered by the task", + ["task_name", "app_name"], +) +WEBSOCKET_EXTERNAL_SUCCESS = Counter( + "cognition_websocket_external_success_total", + "Total successful external websocket connections", + ["app_name", "org_id", "project_id"], +) +WEBSOCKET_EXTERNAL_FAILURE = Counter( + "cognition_websocket_external_failure_total", + "Total failed external websocket connections", + ["app_name", "org_id", "project_id"], +) +WEBSOCKET_INTERNAL_SUCCESS = Counter( + "cognition_websocket_internal_success_total", + "Total successful internal websocket connections", + ["app_name", "org_id", "project_id"], +) +WEBSOCKET_INTERNAL_FAILURE = Counter( + "cognition_websocket_internal_failure_total", + "Total failed internal websocket connections", + ["app_name", "org_id", "project_id"], +) class PrometheusMiddleware(BaseHTTPMiddleware): @@ -122,12 +159,20 @@ def metrics(request: Request) -> Response: ) +def setting_app_name(app_name: str) -> None: + global APP_NAME + if APP_NAME is None: + APP_NAME = app_name + + def setting_otlp( app: ASGIApp, app_name: str, endpoint: str, log_correlation: bool = True ) -> None: # Setting OpenTelemetry # set the service name to show in traces - resource = Resource.create(attributes={"service.name": app_name}) + resource = Resource.create( + attributes={"service.name": app_name, "compose_service": app_name} + ) # set the tracer provider tracer = TracerProvider(resource=resource)