Skip to content

Commit 59d913e

Browse files
GitHKAndrei Neagu
andauthored
🐛 Dynamic services log issues with missing resources when scheduled (ITISFoundation#2566)
* legacy service fails if resources are missing * refactor and fixed issue with missing resources * added test for failing to get scheduled * test is now returning the correct value * reverting changes & fixing test * if a pending message is present it will be displyed in the logs * fix styling * removed unrequired limits * reverting * updated message diplayed to user * fix camelcase * fixing linter in CI * trying to revert * fixes issue with python linting * revert & commit * reverting change Co-authored-by: Andrei Neagu <[email protected]>
1 parent 01a377a commit 59d913e

File tree

3 files changed

+47
-3
lines changed

3 files changed

+47
-3
lines changed

services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_api.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -239,16 +239,15 @@ def _make_pending() -> Tuple[ServiceState, str]:
239239
# it is looking for a service or something with no error message
240240
return _make_pending()
241241

242-
task_status = last_task["Status"]
243-
service_state, message = extract_task_state(task_status=task_status)
242+
service_state, message = extract_task_state(task_status=last_task["Status"])
244243

245244
# to avoid creating confusion for the user, always return the status
246245
# as pending while the dynamic-sidecar is starting, with
247246
# FAILED and COMPLETED and RUNNING being the only exceptions
248247
if service_state not in NO_PENDING_OVERWRITE:
249248
return ServiceState.PENDING, message
250249

251-
return extract_task_state(task_status=task_status)
250+
return service_state, message
252251

253252

254253
async def are_services_missing(

services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_docker_api.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# pylint: disable=redefined-outer-name
22
# pylint: disable=unused-argument
33

4+
import asyncio
45
from asyncio import BaseEventLoop
56
from typing import Any, AsyncIterator, Dict, List
67
from uuid import UUID, uuid4
@@ -29,6 +30,8 @@
2930

3031
pytestmark = pytest.mark.asyncio
3132

33+
MAX_INT64 = 9223372036854775807
34+
3235
# FIXTURES
3336

3437

@@ -262,6 +265,12 @@ async def _count_services_in_stack(
262265
return len(services)
263266

264267

268+
def _inject_impossible_resources(dynamic_sidecar_service_spec: Dict[str, Any]) -> None:
269+
dynamic_sidecar_service_spec["task_template"]["Resources"] = {
270+
"Reservations": {"NanoCPUs": MAX_INT64, "MemoryBytes": MAX_INT64}
271+
}
272+
273+
265274
# TESTS
266275

267276

@@ -392,6 +401,28 @@ async def test_dynamic_sidecar_in_running_state_and_node_id_is_recovered(
392401
assert dynamic_sidecar_state == (ServiceState.RUNNING, "")
393402

394403

404+
async def test_dynamic_sidecar_get_dynamic_sidecar_sate_fail_to_schedule(
405+
dynamic_sidecar_service_spec: Dict[str, Any],
406+
dynamic_sidecar_settings: DynamicSidecarSettings,
407+
cleanup_test_dynamic_sidecar_service: None,
408+
docker_swarm: None,
409+
) -> None:
410+
_inject_impossible_resources(dynamic_sidecar_service_spec)
411+
service_id = await docker_api.create_service_and_get_id(
412+
dynamic_sidecar_service_spec
413+
)
414+
assert service_id
415+
416+
# wait for the service to get scheduled
417+
await asyncio.sleep(0.2)
418+
419+
dynamic_sidecar_state = await docker_api.get_dynamic_sidecar_state(service_id)
420+
assert dynamic_sidecar_state == (
421+
ServiceState.PENDING,
422+
"no suitable node (insufficient resources on 1 node)",
423+
)
424+
425+
395426
async def test_are_services_missing(
396427
node_uuid: UUID,
397428
dynamic_sidecar_settings: DynamicSidecarSettings,

services/web/client/source/class/osparc/data/model/Node.js

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,6 +1019,20 @@ qx.Class.define("osparc.data.model.Node", {
10191019
break;
10201020
}
10211021
case "pending": {
1022+
if (data["service_message"]) {
1023+
const serviceId = data["service_uuid"];
1024+
const serviceName = this.getLabel();
1025+
const serviceMessage = data["service_message"];
1026+
const msg = `The service "${serviceName}" is waiting for available ` +
1027+
`resources. Please inform support and provide the following message ` +
1028+
`in case this does not resolve in a few minutes: "${serviceId}" ` +
1029+
`reported "${serviceMessage}"`;
1030+
const msgData = {
1031+
nodeId: this.getNodeId(),
1032+
msg: msg
1033+
};
1034+
this.fireDataEvent("showInLogger", msgData);
1035+
}
10221036
status.setInteractive("pending");
10231037
const interval = 10000;
10241038
qx.event.Timer.once(() => this.__nodeState(), this, interval);

0 commit comments

Comments
 (0)