Skip to content

Commit d2e6e28

Browse files
authored
Add prometheus metrics (#75)
* Add setup + dependencies and barebones metrics.py * Update config-default.yaml * Add api count metrics, add `get_user_id` method * Add run.sh and update _common_setup setup_prometheus shell scripts * Add metrics unit test * update `pyproject.toml` and poetry lock * Seperate setting up database to be able to use it only for tests
1 parent 8de8a62 commit d2e6e28

File tree

17 files changed

+862
-549
lines changed

17 files changed

+862
-549
lines changed

Dockerfile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,6 @@ USER gen3
3636

3737
WORKDIR /${appname}
3838

39-
CMD ["poetry", "run", "gunicorn", "gen3workflow.app:app", "-k", "uvicorn.workers.UvicornWorker", "-c", "gunicorn.conf.py"]
39+
RUN chmod 755 bin/run.sh
40+
41+
CMD ["bash", "bin/run.sh"]

bin/_common_setup.sh

Lines changed: 8 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -6,45 +6,16 @@ set -e
66

77
CURRENT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
88

9+
if [ -f "/src/gen3-workflow-config.yaml" ]; then
10+
# For multi-worker Gunicorn setups; requires PROMETHEUS_MULTIPROC_DIR to be set before startup,
11+
# here we assume the config file is mounted at /src via cloud-automation.
12+
PROMETHEUS_MULTIPROC_DIR=$(grep 'PROMETHEUS_MULTIPROC_DIR:' /src/gen3-workflow-config.yaml | awk -F': ' '{print $2}' | tr -d '"')
13+
else
14+
PROMETHEUS_MULTIPROC_DIR=""
15+
fi
916
# Source the environment variables from the metrics setup script
10-
# source "${CURRENT_DIR}/setup_prometheus"
17+
source "${CURRENT_DIR}/setup_prometheus.sh" $PROMETHEUS_MULTIPROC_DIR
1118

1219
echo "installing dependencies with 'poetry install -vv'..."
1320
poetry install -vv
1421
poetry env info
15-
echo "ensuring db exists"
16-
17-
# Get the username, password, host, port, and database name
18-
db_settings=$(poetry run python $CURRENT_DIR/../gen3workflow/config.py | tail -1)
19-
if [ -z "${db_settings}" ]; then
20-
echo "'gen3workflow/config.py' did not return DB settings"
21-
exit 1
22-
fi
23-
db_settings_array=($db_settings)
24-
HOST=${db_settings_array[0]}
25-
PORT=${db_settings_array[1]}
26-
USER=${db_settings_array[2]}
27-
PASSWORD=${db_settings_array[3]}
28-
DB_NAME=${db_settings_array[4]}
29-
30-
if [ -z "${HOST}" ] || [ -z "${PORT}" ] || [ -z "${USER}" ] || [ -z "${PASSWORD}" ] || [ -z "${DB_NAME}" ]; then
31-
echo "Failed to extract one or more components from DB settings"
32-
exit 1
33-
fi
34-
35-
echo "Extracted database name: ${DB_NAME}"
36-
echo "Extracted username: ${USER}"
37-
38-
# Check if the database exists
39-
# Use the full connection string to connect directly
40-
if [ "$( PGPASSWORD="${PASSWORD}" psql -h "${HOST}" -p "${PORT}" -U "${USER}" -d postgres -XtAc "SELECT 1 FROM pg_database WHERE datname='${DB_NAME}'" )" = '1' ]
41-
then
42-
echo "Database ${DB_NAME} already exists."
43-
else
44-
echo "Database ${DB_NAME} does not exist. Creating it..."
45-
# Connect to the default postgres database to create the new database
46-
PGPASSWORD="${PASSWORD}" psql -h "${HOST}" -p "${PORT}" -U "${USER}" -d postgres -c "CREATE DATABASE \"${DB_NAME}\";"
47-
fi
48-
49-
echo "running db migration with 'poetry run alembic upgrade head'..."
50-
poetry run alembic upgrade head

bin/_setup_db.sh

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
echo "ensuring db exists"
2+
3+
# Get the username, password, host, port, and database name
4+
db_settings=$(poetry run python $CURRENT_DIR/../gen3workflow/config.py | tail -1)
5+
if [ -z "${db_settings}" ]; then
6+
echo "'gen3workflow/config.py' did not return DB settings"
7+
exit 1
8+
fi
9+
db_settings_array=($db_settings)
10+
HOST=${db_settings_array[0]}
11+
PORT=${db_settings_array[1]}
12+
USER=${db_settings_array[2]}
13+
PASSWORD=${db_settings_array[3]}
14+
DB_NAME=${db_settings_array[4]}
15+
16+
if [ -z "${HOST}" ] || [ -z "${PORT}" ] || [ -z "${USER}" ] || [ -z "${PASSWORD}" ] || [ -z "${DB_NAME}" ]; then
17+
echo "Failed to extract one or more components from DB settings"
18+
exit 1
19+
fi
20+
21+
echo "Extracted database name: ${DB_NAME}"
22+
echo "Extracted username: ${USER}"
23+
24+
# Check if the database exists
25+
# Use the full connection string to connect directly
26+
if [ "$( PGPASSWORD="${PASSWORD}" psql -h "${HOST}" -p "${PORT}" -U "${USER}" -d postgres -XtAc "SELECT 1 FROM pg_database WHERE datname='${DB_NAME}'" )" = '1' ]
27+
then
28+
echo "Database ${DB_NAME} already exists."
29+
else
30+
echo "Database ${DB_NAME} does not exist. Creating it..."
31+
# Connect to the default postgres database to create the new database
32+
PGPASSWORD="${PASSWORD}" psql -h "${HOST}" -p "${PORT}" -U "${USER}" -d postgres -c "CREATE DATABASE \"${DB_NAME}\";"
33+
fi
34+
35+
echo "running db migration with 'poetry run alembic upgrade head'..."
36+
poetry run alembic upgrade head

bin/run.sh

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/usr/bin/env bash
2+
set -e
3+
4+
# Mostly simulates the production run of the app as described in the Dockerfile.
5+
# Uses Gunicorn, multiple Uvicorn workers
6+
# Small config overrides for local dev, like hot reload when the code is modified and logs to stdout
7+
8+
CURRENT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
9+
echo $CURRENT_DIR
10+
export ENV="production"
11+
12+
source "${CURRENT_DIR}/bin/_common_setup.sh"
13+
14+
#TODO: if we need a DB later, run `source "${CURRENT_DIR}/_setup_db.sh`
15+
#source "${CURRENT_DIR}/_setup_db.sh
16+
17+
18+
poetry run gunicorn \
19+
gen3workflow.app:app \
20+
-k uvicorn.workers.UvicornWorker \
21+
-c gunicorn.conf.py \
22+
--reload \
23+
--access-logfile - \
24+
--error-logfile -

bin/setup_prometheus.sh

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#!/usr/bin/env bash
2+
# Prepares the prometheus_multiproc_dir folder to store the metrics from separate workers (per PID)
3+
#
4+
# This script is called by:
5+
# Dockerfile & run.py
6+
# - So local runs setup necessary environment vars and folders for prometheus metrics
7+
# Test framework in conftest
8+
# - So test runs setup necessary environment vars and folders for prometheus metrics
9+
10+
# Usage:
11+
# ./setup_prometheus [DIR] [true]
12+
13+
# Default directory if no argument is provided
14+
DIR=${1:-/var/tmp/prometheus_metrics}
15+
16+
# Determine whether to wipe the directory (default is to wipe)
17+
SETUP_DIR=${2:-true}
18+
19+
set -ex
20+
21+
if [[ "$SETUP_DIR" == "true" ]]; then
22+
echo "setting up $PROMETHEUS_MULTIPROC_DIR. clearing existing files, ensuring it exists, chmod 755"
23+
rm -Rf "$DIR"
24+
mkdir -p "$DIR"
25+
chmod 755 "$DIR"
26+
fi
27+
28+
if id -u gen3 &>/dev/null; then
29+
chown "$(id -u gen3)":"$(id -g gen3)" "$DIR"
30+
fi
31+
32+
export PROMETHEUS_MULTIPROC_DIR="$DIR"
33+
echo "PROMETHEUS_MULTIPROC_DIR is $PROMETHEUS_MULTIPROC_DIR"

bin/test.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ set -e
44
CURRENT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
55

66
source "${CURRENT_DIR}/_common_setup.sh"
7+
source "${CURRENT_DIR}/_setup_db.sh"
78

89
echo "running tests with 'pytest'..."
910
poetry run pytest -vv --cov=gen3workflow --cov=migrations --cov-report term-missing:skip-covered --cov-report xml

docs/metrics.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
## Metrics
2+
3+
Metrics can be exposed at a `/metrics` endpoint compatible with Prometheus scraping and visualized in Prometheus or
4+
Graphana, etc.
5+
6+
The metrics are defined in `gen3workflow/metrics.py` as follows:
7+
8+
* **gen3_workflow_api_requests_total**: API requests for made to Gen3-Workflow service.
9+
* ** **More metrics yet to be decided** **
10+
11+
You can [run Prometheus locally](https://github.com/prometheus/prometheus) if you want to test or visualize these.

docs/openapi.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ paths:
4747
content:
4848
application/json:
4949
schema:
50+
additionalProperties: true
5051
title: Response Get Status Get
5152
type: object
5253
description: Successful Response
@@ -61,6 +62,7 @@ paths:
6162
content:
6263
application/json:
6364
schema:
65+
additionalProperties: true
6466
title: Response Get Status Status Get
6567
type: object
6668
description: Successful Response
@@ -75,6 +77,7 @@ paths:
7577
content:
7678
application/json:
7779
schema:
80+
additionalProperties: true
7881
title: Response Get Version Version Get
7982
type: object
8083
description: Successful Response
@@ -89,6 +92,7 @@ paths:
8992
content:
9093
application/json:
9194
schema:
95+
additionalProperties: true
9296
title: Response Service Info Ga4Gh Tes V1 Service Info Get
9397
type: object
9498
description: Successful Response
@@ -105,6 +109,7 @@ paths:
105109
content:
106110
application/json:
107111
schema:
112+
additionalProperties: true
108113
title: Response List Tasks Ga4Gh Tes V1 Tasks Get
109114
type: object
110115
description: Successful Response
@@ -120,6 +125,7 @@ paths:
120125
content:
121126
application/json:
122127
schema:
128+
additionalProperties: true
123129
title: Response Create Task Ga4Gh Tes V1 Tasks Post
124130
type: object
125131
description: Successful Response
@@ -143,6 +149,7 @@ paths:
143149
content:
144150
application/json:
145151
schema:
152+
additionalProperties: true
146153
title: Response Get Task Ga4Gh Tes V1 Tasks Task Id Get
147154
type: object
148155
description: Successful Response
@@ -172,6 +179,7 @@ paths:
172179
content:
173180
application/json:
174181
schema:
182+
additionalProperties: true
175183
title: Response Cancel Task Ga4Gh Tes V1 Tasks Task Id Cancel Post
176184
type: object
177185
description: Successful Response
@@ -451,6 +459,7 @@ paths:
451459
content:
452460
application/json:
453461
schema:
462+
additionalProperties: true
454463
title: Response Get Storage Info Storage Info Get
455464
type: object
456465
description: Successful Response

gen3workflow/app.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
11
from fastapi import FastAPI
2+
from fastapi.security import HTTPAuthorizationCredentials
23
import httpx
34
from importlib.metadata import version
45
import os
6+
import time
57

68
from cdislogging import get_logger
79
from gen3authz.client.arborist.async_client import ArboristClient
8-
10+
from fastapi import Request, HTTPException
911
from gen3workflow import logger
1012
from gen3workflow.config import config
13+
from gen3workflow.metrics import Metrics
1114
from gen3workflow.routes.ga4gh_tes import router as ga4gh_tes_router
1215
from gen3workflow.routes.s3 import router as s3_router
1316
from gen3workflow.routes.storage import router as storage_router
1417
from gen3workflow.routes.system import router as system_router
18+
from gen3workflow.auth import Auth
1519

1620

1721
def get_app(httpx_client=None) -> FastAPI:
@@ -54,6 +58,49 @@ def get_app(httpx_client=None) -> FastAPI:
5458
logger=get_logger("gen3workflow.gen3authz", log_level=log_level),
5559
)
5660

61+
app.metrics = Metrics(
62+
enabled=config["ENABLE_PROMETHEUS_METRICS"],
63+
prometheus_dir=config["PROMETHEUS_MULTIPROC_DIR"],
64+
)
65+
66+
if app.metrics.enabled:
67+
app.mount("/metrics", app.metrics.get_asgi_app())
68+
69+
@app.middleware("http")
70+
async def middleware_log_response_and_api_metric(
71+
request: Request, call_next
72+
) -> None:
73+
"""
74+
This FastAPI middleware effectively allows pre and post logic to a request.
75+
76+
We are using this to log the response consistently across defined endpoints (including execution time).
77+
78+
Args:
79+
request (Request): the incoming HTTP request
80+
call_next (Callable): function to call (this is handled by FastAPI's middleware support)
81+
"""
82+
start_time = time.perf_counter()
83+
response = await call_next(request)
84+
response_time_seconds = time.perf_counter() - start_time
85+
86+
path = request.url.path
87+
method = request.method
88+
89+
# NOTE: If adding more endpoints to metrics, try making it configurable using a list of paths and methods in config.
90+
# For now, we are only interested in the "/ga4gh/tes/v1/tasks" endpoint for metrics.
91+
if method != "POST" or path != "/ga4gh/tes/v1/tasks":
92+
return response
93+
94+
metrics = app.metrics
95+
metrics.add_create_task_api_interaction(
96+
method=method,
97+
path=path,
98+
response_time_seconds=response_time_seconds,
99+
status_code=response.status_code,
100+
)
101+
102+
return response
103+
57104
return app
58105

59106

gen3workflow/auth.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from typing import Union
12
from authutils.token.fastapi import access_token
23
from fastapi import HTTPException, Security
34
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
@@ -65,6 +66,13 @@ async def get_token_claims(self) -> dict:
6566

6667
return token_claims
6768

69+
async def get_user_id(self) -> Union[str, None]:
70+
try:
71+
token_claims = await self.get_token_claims()
72+
except Exception:
73+
return None
74+
return token_claims.get("sub")
75+
6876
async def authorize(
6977
self,
7078
method: str,

0 commit comments

Comments
 (0)