Skip to content

Commit 7fd0848

Browse files
committed
added more descriptive debug message to pydra submitter
1 parent 76c933e commit 7fd0848

File tree

2 files changed

+38
-2
lines changed

2 files changed

+38
-2
lines changed

pydra/engine/helpers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def load_result(checksum, cache_locations):
112112
if not cache_locations:
113113
return None
114114
# TODO: if there are issues with loading, we might need to
115-
# TODO: sleep and repeat loads (after checkin that there are no lock files!)
115+
# TODO: sleep and repeat loads (after checking that there are no lock files!)
116116
for location in cache_locations:
117117
if (location / checksum).exists():
118118
result_file = location / checksum / "_result.pklz"

pydra/engine/submitter.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Handle execution backends."""
22
import asyncio
3+
import pickle
34
from uuid import uuid4
45
from .workers import WORKERS
56
from .core import is_workflow
@@ -167,9 +168,14 @@ async def expand_workflow(self, wf, rerun=False):
167168
# don't block the event loop!
168169
await asyncio.sleep(1)
169170
if ii > 60:
171+
blocked = _list_blocked_tasks(graph_copy)
172+
get_runnable_tasks(graph_copy)
170173
raise Exception(
171174
"graph is not empty, but not able to get more tasks "
172-
"- something is wrong (e.g. with the filesystem)"
175+
"- something may have gone wrong when retrieving the results "
176+
"of predecessor tasks caused by a file-system error or a bug "
177+
"in the internal workflow logic.\n\nBlocked tasks\n-------------\n"
178+
+ "\n".join(blocked)
173179
)
174180
for task in tasks:
175181
# grab inputs if needed
@@ -281,3 +287,33 @@ async def prepare_runnable_with_state(runnable):
281287
runnable.state.prepare_inputs()
282288
logger.debug(f"Expanding {runnable} into {len(runnable.state.states_val)} states")
283289
return runnable.pickle_task()
290+
291+
292+
def _list_blocked_tasks(graph):
293+
"""Generates a list of tasks that can't be run and predecessors that are blocking
294+
them to help debugging of broken workflows"""
295+
blocked = []
296+
for tsk in graph.sorted_nodes:
297+
blocking = []
298+
for pred in graph.predecessors[tsk.name]:
299+
if not pred.done:
300+
matching_name = []
301+
for cache_loc in tsk.cache_locations:
302+
for tsk_work_dir in cache_loc.iterdir():
303+
if (tsk_work_dir / "_task.pklz").exists():
304+
with open(tsk_work_dir / "_task.pklz", "rb") as f:
305+
saved_tsk = pickle.load(f)
306+
if saved_tsk.name == pred.name:
307+
matching_name.append(
308+
f"{saved_tsk.name} ({tsk_work_dir.name})"
309+
)
310+
blocking.append(pred, ", ".join(matching_name))
311+
if blocking:
312+
blocked.append(
313+
f"\n{tsk.name} ({tsk.checksum}) is blocked by "
314+
+ "; ".join(
315+
f"{pred.name} ({pred.checksum}), which matches names of [{matching}]"
316+
for pred, matching in blocking
317+
)
318+
)
319+
return blocked

0 commit comments

Comments
 (0)