Skip to content

Commit cd09172

Browse files
committed
Merge branch 'feature/event-driven-patches' of github.com:odeimaiz/osparc-simcore into feature/event-driven-patches
2 parents d060ac5 + a003be2 commit cd09172

File tree

35 files changed

+1223
-495
lines changed

35 files changed

+1223
-495
lines changed

.github/workflows/ci-testing-deploy.yml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1903,20 +1903,27 @@ jobs:
19031903
run: ./ci/github/system-testing/e2e.bash install
19041904
- name: test
19051905
run: ./ci/github/system-testing/e2e.bash test
1906+
- name: wait before dumping logs
1907+
if: ${{ failure() }}
1908+
run: sleep 5
19061909
- name: dump docker logs
1910+
if: ${{ failure() }}
19071911
id: docker_logs_dump
19081912
run: ./ci/github/system-testing/e2e.bash dump_docker_logs
19091913
- name: upload docker logs
1914+
if: ${{ failure() }}
19101915
uses: actions/upload-artifact@v4
19111916
with:
19121917
name: ${{ github.job }}_docker_logs
19131918
path: ./tests/e2e/test_failures
19141919
- name: upload screenshots
1920+
if: ${{ failure() }}
19151921
uses: actions/upload-artifact@v4
19161922
with:
19171923
name: ${{ github.job }}_screenshots
19181924
path: tests/e2e/screenshots
19191925
- name: upload e2e logs
1926+
if: ${{ failure() }}
19201927
uses: actions/upload-artifact@v4
19211928
with:
19221929
name: ${{ github.job }}_logs
@@ -1955,15 +1962,21 @@ jobs:
19551962
- name: test
19561963
run: |
19571964
./ci/github/system-testing/e2e-playwright.bash test
1965+
- name: wait before dumping logs
1966+
if: ${{ failure() }}
1967+
run: sleep 5
19581968
- name: dump docker logs
1969+
if: ${{ failure() }}
19591970
id: docker_logs_dump
19601971
run: ./ci/github/system-testing/e2e-playwright.bash dump_docker_logs
19611972
- name: upload docker logs
1973+
if: ${{ failure() }}
19621974
uses: actions/upload-artifact@v4
19631975
with:
19641976
name: ${{ github.job }}_docker_logs
19651977
path: ./tests/e2e-playwright/test_failures
19661978
- name: upload tracing if failed
1979+
if: ${{ failure() }}
19671980
uses: actions/upload-artifact@v4
19681981
with:
19691982
name: ${{ github.job }}_tracing

Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -378,8 +378,8 @@ up-devel-frontend: .stack-simcore-development-frontend.yml .init-swarm ## Every
378378
@$(MAKE_C) services/dask-sidecar certificates
379379
# Deploy stack $(SWARM_STACK_NAME) [back-end]
380380
@docker stack deploy --detach=true --with-registry-auth -c $< $(SWARM_STACK_NAME)
381-
@$(MAKE) .deploy-vendors
382381
@$(MAKE) .deploy-ops
382+
@$(MAKE) .deploy-vendors
383383
@$(_show_endpoints)
384384
@$(MAKE_C) services/static-webserver/client follow-dev-logs
385385

@@ -389,8 +389,8 @@ ifeq ($(target),)
389389
@$(MAKE_C) services/dask-sidecar certificates
390390
# Deploy stack $(SWARM_STACK_NAME)
391391
@docker stack deploy --detach=true --with-registry-auth -c $< $(SWARM_STACK_NAME)
392-
@$(MAKE) .deploy-vendors
393392
@$(MAKE) .deploy-ops
393+
@$(MAKE) .deploy-vendors
394394
else
395395
# deploys ONLY $(target) service
396396
@docker compose --file $< up --detach $(target)
@@ -671,6 +671,7 @@ local-registry: .env ## creates a local docker registry and configure simcore to
671671
sudo mv /tmp/daemon.json /etc/docker/daemon.json &&\
672672
echo restarting engine... &&\
673673
sudo service docker restart &&\
674+
sleep 5 &&\
674675
echo done)
675676

676677
@$(if $(shell docker ps --format="{{.Names}}" | grep registry),,\

ci/github/system-testing/e2e-playwright.bash

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ dump_docker_logs() {
3737
for service_id in $(docker service ls -q); do
3838
service_name=$(docker service inspect "$service_id" --format="{{.Spec.Name}}")
3939
echo "Dumping logs for $service_name"
40-
(timeout 30 docker service logs --timestamps --tail=400 --details "$service_id" >"$out_dir/$service_name.log" 2>&1) || true
40+
(timeout 30 docker service logs --timestamps --tail=500 --details "$service_id" >"$out_dir/$service_name.log" 2>&1) || true
4141
done
4242
}
4343

packages/pytest-simcore/src/pytest_simcore/helpers/logging_tools.py

Lines changed: 42 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
import datetime
22
import logging
3-
from collections.abc import Iterator
3+
import warnings
4+
from collections.abc import Callable, Iterator
45
from contextlib import contextmanager
56
from dataclasses import dataclass, field
67
from types import SimpleNamespace
7-
from typing import TypeAlias
8+
from typing import Final, TypeAlias
89

910

1011
def _timedelta_as_minute_second_ms(delta: datetime.timedelta) -> str:
@@ -29,6 +30,19 @@ def _timedelta_as_minute_second_ms(delta: datetime.timedelta) -> str:
2930
return f"{sign}{result.strip()}"
3031

3132

33+
def _resolve(val: str | Callable[[], str], prefix: str, suffix: str) -> str:
34+
try:
35+
return f"{prefix}{val if isinstance(val, str) else val()}{suffix}"
36+
except Exception as exc: # pylint: disable=broad-exception-caught
37+
warnings.warn(
38+
f"Failed to generate {val} message: {exc!r}. "
39+
f"Fix the callable to return a string without raising exceptions.",
40+
UserWarning,
41+
stacklevel=3,
42+
)
43+
return f"❌❌❌ [{val} message generation failed TIP: Check how the {val} message is generated!] ❌❌❌"
44+
45+
3246
class DynamicIndentFormatter(logging.Formatter):
3347
indent_char: str = " "
3448
_cls_indent_level: int = 0
@@ -74,15 +88,26 @@ def setup(cls, logger: logging.Logger) -> None:
7488
DynamicIndentFormatter.setup(test_logger)
7589

7690

91+
# Message formatting constants
92+
_STARTING_PREFIX: Final[str] = "--> "
93+
_STARTING_SUFFIX: Final[str] = " ⏳"
94+
_DONE_PREFIX: Final[str] = "<-- "
95+
_DONE_SUFFIX: Final[str] = " ✅"
96+
_RAISED_PREFIX: Final[str] = "❌❌❌ Error: "
97+
_RAISED_SUFFIX: Final[str] = " ❌❌❌"
98+
99+
77100
@dataclass
78101
class ContextMessages:
79-
starting: str
80-
done: str
81-
raised: str = field(default="")
102+
starting: str | Callable[[], str]
103+
done: str | Callable[[], str]
104+
raised: str | Callable[[], str] = field(default="")
82105

83106
def __post_init__(self):
84107
if not self.raised:
85-
self.raised = f"{self.done} [with error]"
108+
self.raised = (
109+
lambda: f"{self.done if isinstance(self.done, str) else self.done()} [with raised error]"
110+
)
86111

87112

88113
LogLevelInt: TypeAlias = int
@@ -127,9 +152,9 @@ def log_context(
127152

128153
if isinstance(msg, str):
129154
ctx_msg = ContextMessages(
130-
starting=f"-> {msg} starting ...",
131-
done=f"<- {msg} done",
132-
raised=f"! {msg} raised",
155+
starting=f"{msg}",
156+
done=f"{msg}",
157+
raised=f"{msg}",
133158
)
134159
elif isinstance(msg, tuple):
135160
ctx_msg = ContextMessages(*msg)
@@ -140,13 +165,16 @@ def log_context(
140165
try:
141166
DynamicIndentFormatter.cls_increase_indent()
142167

143-
logger.log(level, ctx_msg.starting, *args, **kwargs)
168+
logger.log(
169+
level,
170+
_resolve(ctx_msg.starting, _STARTING_PREFIX, _STARTING_SUFFIX),
171+
*args,
172+
**kwargs,
173+
)
144174
with _increased_logger_indent(logger):
145175
yield SimpleNamespace(logger=logger, messages=ctx_msg)
146176
elapsed_time = datetime.datetime.now(tz=datetime.UTC) - started_time
147-
done_message = (
148-
f"{ctx_msg.done} ({_timedelta_as_minute_second_ms(elapsed_time)})"
149-
)
177+
done_message = f"{_resolve(ctx_msg.done, _DONE_PREFIX, _DONE_SUFFIX)} ({_timedelta_as_minute_second_ms(elapsed_time)})"
150178
logger.log(
151179
level,
152180
done_message,
@@ -156,9 +184,7 @@ def log_context(
156184

157185
except:
158186
elapsed_time = datetime.datetime.now(tz=datetime.UTC) - started_time
159-
error_message = (
160-
f"{ctx_msg.raised} ({_timedelta_as_minute_second_ms(elapsed_time)})"
161-
)
187+
error_message = f"{_resolve(ctx_msg.raised, _RAISED_PREFIX, _RAISED_SUFFIX)} ({_timedelta_as_minute_second_ms(elapsed_time)})"
162188
logger.exception(
163189
error_message,
164190
*args,

packages/pytest-simcore/src/pytest_simcore/helpers/playwright.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
wait_fixed,
4444
)
4545

46-
from .logging_tools import log_context
46+
from .logging_tools import ContextMessages, log_context
4747

4848
_logger = logging.getLogger(__name__)
4949

@@ -532,9 +532,10 @@ def wait_for_pipeline_state(
532532
if current_state in if_in_states:
533533
with log_context(
534534
logging.INFO,
535-
msg=(
536-
f"pipeline is in {current_state=}, waiting for one of {expected_states=}",
537-
f"pipeline is now in {current_state=}",
535+
msg=ContextMessages(
536+
starting=f"wait for one of {expected_states=}",
537+
done=lambda: f"wait for one of {expected_states=}, pipeline reached {current_state=}",
538+
raised=lambda: f"pipeline failed or timed out with {current_state}. Expected one of {expected_states=}",
538539
),
539540
):
540541
waiter = SocketIOProjectStateUpdatedWaiter(
@@ -551,7 +552,7 @@ def wait_for_pipeline_state(
551552
and current_state not in expected_states
552553
):
553554
pytest.fail(
554-
f"❌ Pipeline failed with state {current_state}. Expected one of {expected_states} ❌"
555+
f"❌ Pipeline failed fast with state {current_state}. Expected one of {expected_states} ❌"
555556
)
556557
return current_state
557558

packages/pytest-simcore/src/pytest_simcore/socketio.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ async def web_server(
7070

7171
@pytest.fixture
7272
async def server_url(web_server: URL) -> str:
73-
return f'{web_server.with_path("/")}'
73+
return f"{web_server.with_path('/')}"
7474

7575

7676
@pytest.fixture

packages/service-library/src/servicelib/rabbitmq/rpc_interfaces/webserver/functions/functions_rpc_interface.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
RegisteredFunctionJobCollection,
1818
)
1919
from models_library.functions import (
20+
FunctionJobStatus,
21+
FunctionOutputs,
2022
FunctionUserAccessRights,
2123
FunctionUserApiAccessRights,
2224
)
@@ -300,6 +302,82 @@ async def get_function_job(
300302
return TypeAdapter(RegisteredFunctionJob).validate_python(result)
301303

302304

305+
@log_decorator(_logger, level=logging.DEBUG)
306+
async def get_function_job_status(
307+
rabbitmq_rpc_client: RabbitMQRPCClient,
308+
*,
309+
user_id: UserID,
310+
function_job_id: FunctionJobID,
311+
product_name: ProductName,
312+
) -> FunctionJobStatus:
313+
result = await rabbitmq_rpc_client.request(
314+
WEBSERVER_RPC_NAMESPACE,
315+
TypeAdapter(RPCMethodName).validate_python("get_function_job_status"),
316+
function_job_id=function_job_id,
317+
user_id=user_id,
318+
product_name=product_name,
319+
)
320+
return TypeAdapter(FunctionJobStatus).validate_python(result)
321+
322+
323+
@log_decorator(_logger, level=logging.DEBUG)
324+
async def get_function_job_outputs(
325+
rabbitmq_rpc_client: RabbitMQRPCClient,
326+
*,
327+
user_id: UserID,
328+
function_job_id: FunctionJobID,
329+
product_name: ProductName,
330+
) -> FunctionOutputs:
331+
result = await rabbitmq_rpc_client.request(
332+
WEBSERVER_RPC_NAMESPACE,
333+
TypeAdapter(RPCMethodName).validate_python("get_function_job_outputs"),
334+
function_job_id=function_job_id,
335+
user_id=user_id,
336+
product_name=product_name,
337+
)
338+
return TypeAdapter(FunctionOutputs).validate_python(result)
339+
340+
341+
@log_decorator(_logger, level=logging.DEBUG)
342+
async def update_function_job_status(
343+
rabbitmq_rpc_client: RabbitMQRPCClient,
344+
*,
345+
user_id: UserID,
346+
product_name: ProductName,
347+
function_job_id: FunctionJobID,
348+
job_status: FunctionJobStatus,
349+
) -> FunctionJobStatus:
350+
result = await rabbitmq_rpc_client.request(
351+
WEBSERVER_RPC_NAMESPACE,
352+
TypeAdapter(RPCMethodName).validate_python("update_function_job_status"),
353+
function_job_id=function_job_id,
354+
job_status=job_status,
355+
user_id=user_id,
356+
product_name=product_name,
357+
)
358+
return TypeAdapter(FunctionJobStatus).validate_python(result)
359+
360+
361+
@log_decorator(_logger, level=logging.DEBUG)
362+
async def update_function_job_outputs(
363+
rabbitmq_rpc_client: RabbitMQRPCClient,
364+
*,
365+
user_id: UserID,
366+
product_name: ProductName,
367+
function_job_id: FunctionJobID,
368+
outputs: FunctionOutputs,
369+
) -> FunctionOutputs:
370+
result = await rabbitmq_rpc_client.request(
371+
WEBSERVER_RPC_NAMESPACE,
372+
TypeAdapter(RPCMethodName).validate_python("update_function_job_outputs"),
373+
function_job_id=function_job_id,
374+
outputs=outputs,
375+
user_id=user_id,
376+
product_name=product_name,
377+
)
378+
return TypeAdapter(FunctionOutputs).validate_python(result)
379+
380+
303381
@log_decorator(_logger, level=logging.DEBUG)
304382
async def delete_function_job(
305383
rabbitmq_rpc_client: RabbitMQRPCClient,

packages/service-library/src/servicelib/socketio_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
""" Common utilities for python-socketio library
1+
"""Common utilities for python-socketio library
22
33
44
NOTE: we intentionally avoided importing socketio here to avoid adding an extra dependency at
@@ -9,7 +9,6 @@
99

1010

1111
async def cleanup_socketio_async_pubsub_manager(server_manager):
12-
1312
# NOTE: this is ugly. It seems though that python-socketio does not
1413
# cleanup its background tasks properly.
1514
# https://github.com/miguelgrinberg/python-socketio/discussions/1092
@@ -35,6 +34,7 @@ async def cleanup_socketio_async_pubsub_manager(server_manager):
3534
for coro_name in [
3635
"AsyncServer._service_task",
3736
"AsyncSocket.schedule_ping",
37+
"AsyncSocket._send_ping",
3838
"AsyncPubSubManager._thread",
3939
]
4040
):

scripts/maintenance/computational-clusters/autoscaled_monitor/core.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -615,7 +615,11 @@ async def _cancel_all_jobs(
615615
the_cluster,
616616
dask_task.job_id,
617617
)
618-
if comp_task is not None and abort_in_db:
618+
if (
619+
comp_task is not None
620+
and comp_task.state not in ["FAILED", "SUCCESS", "ABORTED"]
621+
and abort_in_db
622+
):
619623
await db.abort_job_in_db(state, comp_task.project_id, comp_task.node_id)
620624

621625
rich.print("cancelled all tasks")

0 commit comments

Comments
 (0)