Skip to content

Commit 89fef0d

Browse files
authored
✨Adding tracing in fastapi-based services (⚠️ devops) (ITISFoundation#2558)
1 parent a50b617 commit 89fef0d

File tree

41 files changed

+401
-302
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+401
-302
lines changed

.env-devel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ STORAGE_ENDPOINT=storage:8080
6767

6868
TRACING_ENABLED=1
6969
TRACING_ZIPKIN_ENDPOINT=http://jaeger:9411
70+
TRACING_THRIFT_COMPACT_ENDPOINT=http://jaeger:5775
7071

7172
TRAEFIK_SIMCORE_ZONE=internal_simcore_stack
7273

.github/workflows/ci-testing-deploy.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -354,9 +354,9 @@ jobs:
354354
restore-keys: |
355355
${{ runner.os }}-pip-
356356
- name: install
357-
run: ./ci/github/unit-testing/director_v2.bash install
357+
run: ./ci/github/unit-testing/director-v2.bash install
358358
- name: test
359-
run: ./ci/github/unit-testing/director_v2.bash test
359+
run: ./ci/github/unit-testing/director-v2.bash test
360360
- name: upload failed tests logs
361361
if: failure()
362362
uses: actions/upload-artifact@v2
@@ -747,7 +747,6 @@ jobs:
747747
name: codeclimate-${{ github.job }}-coverage
748748
path: codeclimate.${{ github.job }}_coverage.json
749749

750-
751750
unit-test-service-library:
752751
timeout-minutes: 14 # if this timeout gets too small, then split the tests
753752
name: "[unit] service-library"

ci/github/unit-testing/catalog.bash

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,26 @@
11
#!/bin/bash
22
# http://redsymbol.net/articles/unofficial-bash-strict-mode/
3-
set -o errexit # abort on nonzero exitstatus
4-
set -o nounset # abort on unbound variable
5-
set -o pipefail # don't hide errors within pipes
3+
set -o errexit # abort on nonzero exitstatus
4+
set -o nounset # abort on unbound variable
5+
set -o pipefail # don't hide errors within pipes
66
IFS=$'\n\t'
77

88
install() {
9-
bash ci/helpers/ensure_python_pip.bash
10-
pushd services/catalog; pip3 install -r requirements/ci.txt; popd
11-
pip list --verbose
9+
bash ci/helpers/ensure_python_pip.bash
10+
pushd services/catalog
11+
pip3 install -r requirements/ci.txt
12+
popd
13+
pip list --verbose
1214
}
1315

1416
test() {
15-
pytest --cov=simcore_service_catalog --durations=10 --cov-append \
16-
--color=yes --cov-report=term-missing --cov-report=xml --cov-config=.coveragerc \
17-
-v -m "not travis" services/catalog/tests/unit
17+
pytest --cov=simcore_service_catalog --durations=10 --cov-append \
18+
--color=yes --cov-report=term-missing --cov-report=xml --cov-config=.coveragerc \
19+
-v -m "not travis" services/catalog/tests/unit
1820
}
1921

2022
# Check if the function exists (bash specific)
21-
if declare -f "$1" > /dev/null
22-
then
23+
if declare -f "$1" >/dev/null; then
2324
# call arguments verbatim
2425
"$@"
2526
else

ci/github/unit-testing/director_v2.bash renamed to ci/github/unit-testing/director-v2.bash

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ test() {
1919
# these tests cannot be run in parallel
2020
pytest --cov=simcore_service_director_v2 --durations=10 --cov-append \
2121
--color=yes --cov-report=term-missing --cov-report=xml --cov-config=.coveragerc \
22-
-v -m "not travis" services/director-v2/tests/unit/with_dbs services/director-v2/tests/unit/with_swarm;
22+
-v -m "not travis" services/director-v2/tests/unit/with_swarm services/director-v2/tests/unit/with_dbs;
2323
}
2424

2525
# Check if the function exists (bash specific)

packages/models-library/src/models_library/settings/http_clients.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44

55

66
class ClientRequestSettings(BaseSettings):
7-
# NOTE: when updating the defaults please make sure to search for the env vars
8-
# in all the project, they also need to be updated inside the service-library
7+
# NOTE: These entries are used in some old services as well. These need to be updated if these
8+
# variable names or defaults are changed.
99
total_timeout: Optional[int] = Field(
1010
default=20,
11-
description="timeout used for outgoing http requests",
11+
description="timeout in seconds used for outgoing http requests",
1212
env="HTTP_CLIENT_REQUEST_TOTAL_TIMEOUT",
1313
)
1414

packages/pytest-simcore/src/pytest_simcore/docker_compose.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import pytest
1919
import yaml
20+
from _pytest.config import ExitCode
2021
from dotenv import dotenv_values
2122

2223
from .helpers import (
@@ -234,6 +235,15 @@ def pytest_exception_interact(node, call, report):
234235
save_docker_infos(failed_test_directory)
235236

236237

238+
@pytest.hookimpl()
239+
def pytest_sessionfinish(session: pytest.Session, exitstatus: ExitCode) -> None:
240+
if exitstatus == ExitCode.TESTS_FAILED:
241+
# get the node root dir (guaranteed to exist)
242+
root_directory: Path = Path(session.fspath)
243+
failed_test_directory = root_directory / "test_failures" / session.name
244+
save_docker_infos(failed_test_directory)
245+
246+
237247
# HELPERS ---------------------------------------------
238248
def _minio_fix(service_environs: Dict) -> Dict:
239249
"""this hack ensures that S3 is accessed from the host at all time, thus pre-signed links work."""

packages/pytest-simcore/src/pytest_simcore/docker_swarm.py

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
import pytest
1616
import tenacity
1717
import yaml
18+
from docker.errors import APIError
19+
from tenacity.before_sleep import before_sleep_log
20+
from tenacity.stop import stop_after_attempt, stop_after_delay
21+
from tenacity.wait import wait_exponential, wait_fixed
1822

1923
from .helpers.utils_docker import get_ip
2024

@@ -36,7 +40,7 @@ def _in_docker_swarm(
3640
docker_client.swarm.reload()
3741
inspect_result = docker_client.swarm.attrs
3842
assert type(inspect_result) == dict
39-
except docker.errors.APIError as error:
43+
except APIError as error:
4044
if raise_error:
4145
raise _NotInSwarmException() from error
4246
return False
@@ -45,8 +49,8 @@ def _in_docker_swarm(
4549

4650
def _attempt_for(retry_error_cls: Type[Exception]) -> tenacity.Retrying:
4751
return tenacity.Retrying(
48-
wait=tenacity.wait_exponential(),
49-
stop=tenacity.stop_after_delay(15),
52+
wait=wait_exponential(),
53+
stop=stop_after_delay(15),
5054
retry_error_cls=retry_error_cls,
5155
)
5256

@@ -77,17 +81,8 @@ def docker_swarm(
7781

7882
yield
7983

80-
for attempt in _attempt_for(retry_error_cls=_StillInSwarmException):
81-
with attempt:
82-
if _in_docker_swarm(docker_client):
83-
if not keep_docker_up:
84-
assert docker_client.swarm.leave(force=True)
85-
86-
if _in_docker_swarm(docker_client) and not keep_docker_up:
87-
# if still in swarm, raise an error to try and leave again
88-
raise _StillInSwarmException()
89-
if keep_docker_up:
90-
assert _in_docker_swarm(docker_client) is True
84+
if not keep_docker_up:
85+
assert docker_client.swarm.leave(force=True)
9186

9287
assert _in_docker_swarm(docker_client) is keep_docker_up
9388

@@ -109,9 +104,9 @@ def by_task_update(task: Dict) -> datetime:
109104

110105

111106
@tenacity.retry(
112-
wait=tenacity.wait_fixed(5),
113-
stop=tenacity.stop_after_attempt(20),
114-
before_sleep=tenacity.before_sleep_log(log, logging.INFO),
107+
wait=wait_fixed(5),
108+
stop=stop_after_attempt(20),
109+
before_sleep=before_sleep_log(log, logging.INFO),
115110
reraise=True,
116111
)
117112
def _wait_for_services(docker_client: docker.client.DockerClient) -> None:
@@ -237,4 +232,17 @@ def docker_stack(
237232
):
238233
time.sleep(WAIT_BEFORE_RETRY_SECS)
239234

235+
while docker_client.containers.list(
236+
filters={"label": f"com.docker.stack.namespace={stack}"}
237+
):
238+
time.sleep(WAIT_BEFORE_RETRY_SECS)
239+
240+
for attempt in _attempt_for(retry_error_cls=APIError):
241+
with attempt:
242+
list_of_volumes = docker_client.volumes.list(
243+
filters={"label": f"com.docker.stack.namespace={stack}"}
244+
)
245+
for volume in list_of_volumes:
246+
volume.remove(force=True)
247+
240248
_print_services(docker_client, "[AFTER REMOVED]")

packages/pytest-simcore/src/pytest_simcore/postgres_service.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
import tenacity
1313
from simcore_postgres_database.models.base import metadata
1414
from sqlalchemy.orm import sessionmaker
15+
from tenacity.before_sleep import before_sleep_log
16+
from tenacity.stop import stop_after_attempt
17+
from tenacity.wait import wait_fixed
1518

1619
from .helpers.utils_docker import get_service_published_port
1720

@@ -168,7 +171,7 @@ def postgres_db(
168171
postgres_dsn: Dict[str, str],
169172
postgres_engine: sa.engine.Engine,
170173
) -> Iterator[sa.engine.Engine]:
171-
""" An postgres database init with empty tables and an sqlalchemy engine connected to it """
174+
"""An postgres database init with empty tables and an sqlalchemy engine connected to it"""
172175

173176
# upgrades database from zero
174177
kwargs = postgres_dsn.copy()
@@ -196,7 +199,7 @@ def postgres_db(
196199
async def aiopg_engine(
197200
postgres_db: sa.engine.Engine, loop
198201
) -> Iterator[aiopg.sa.engine.Engine]:
199-
""" An aiopg engine connected to an initialized database """
202+
"""An aiopg engine connected to an initialized database"""
200203
from aiopg.sa import create_engine
201204

202205
engine = await create_engine(str(postgres_db.url))
@@ -234,13 +237,13 @@ def postgres_session(postgres_db: sa.engine.Engine) -> sa.orm.session.Session:
234237

235238

236239
@tenacity.retry(
237-
wait=tenacity.wait_fixed(5),
238-
stop=tenacity.stop_after_attempt(60),
239-
before_sleep=tenacity.before_sleep_log(log, logging.INFO),
240+
wait=wait_fixed(5),
241+
stop=stop_after_attempt(60),
242+
before_sleep=before_sleep_log(log, logging.WARNING),
240243
reraise=True,
241244
)
242245
def wait_till_postgres_is_responsive(url: str) -> None:
243-
print("Trying", url, "...")
244246
engine = sa.create_engine(url, isolation_level="AUTOCOMMIT")
245247
conn = engine.connect()
246248
conn.close()
249+
log.info("Connected with %s", url)
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from fastapi import FastAPI
2+
from fastapi_contrib.conf import settings
3+
from fastapi_contrib.tracing.middlewares import OpentracingMiddleware
4+
from fastapi_contrib.tracing.utils import setup_opentracing
5+
from settings_library.tracing import TracingSettings
6+
7+
8+
def setup_tracing(app: FastAPI, tracing_settings: TracingSettings):
9+
async def start_app() -> None:
10+
settings.service_name = tracing_settings.TRACING_CLIENT_NAME
11+
settings.jaeger_host = tracing_settings.TRACING_THRIFT_COMPACT_ENDPOINT.host
12+
settings.jaeger_port = tracing_settings.TRACING_THRIFT_COMPACT_ENDPOINT.port
13+
setup_opentracing(app)
14+
app.add_middleware(OpentracingMiddleware)
15+
16+
app.add_event_handler("startup", start_app)
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from typing import Optional
2+
3+
from pydantic import Field
4+
5+
from .base import BaseCustomSettings
6+
7+
8+
class ClientRequestSettings(BaseCustomSettings):
9+
# NOTE: These entries are used in some old services as well. These need to be updated if these
10+
# variable names or defaults are changed.
11+
HTTP_CLIENT_REQUEST_TOTAL_TIMEOUT: Optional[int] = Field(
12+
default=20,
13+
description="timeout in seconds used for outgoing http requests",
14+
)
15+
16+
HTTP_CLIENT_REQUEST_AIOHTTP_CONNECT_TIMEOUT: Optional[int] = Field(
17+
default=None,
18+
description=(
19+
"Maximal number of seconds for acquiring a connection"
20+
" from pool. The time consists connection establishment"
21+
" for a new connection or waiting for a free connection"
22+
" from a pool if pool connection limits are exceeded. "
23+
"For pure socket connection establishment time use sock_connect."
24+
),
25+
)
26+
27+
HTTP_CLIENT_REQUEST_AIOHTTP_SOCK_CONNECT_TIMEOUT: Optional[int] = Field(
28+
default=5,
29+
description=(
30+
"aiohttp specific field used in ClientTimeout, timeout for connecting to a "
31+
"peer for a new connection not given a pool"
32+
),
33+
)

0 commit comments

Comments
 (0)