Skip to content

Commit 0063027

Browse files
authored
revert: multiprocess kind processing (#746)
While this worked very well on Linux, it causes issues anywhere we can't `fork` to get a new process (most notably on Windows). The problem lies in the fact that in these cases, we spawn an entire new process, which re-imports taskgraph from scratch. This is fine in some cases, but in any case where global state has been modified in an earlier part of `TaskGraphGenerator._run`, we lose whatever side effects happened there, and end up failing in some way. Concretely: in gecko we add a bunch of `payload_builders` as part of registering the graph config. This code doesn't re-run in the spawned processes, so the payload builders don't exist there. There are workarounds for this: for example, redoing all the earlier work of `_run` in each subprocess, or perhaps finding some way to ensure all the needed state is passed explicitly. There's no quick and easy way to make this work though, and some thought should be given to the tradeoffs of doing it (vs. doing nothing, or spending the effort on a different way to parallelize) before proceeding.
1 parent 1fdbb6c commit 0063027

File tree

3 files changed

+37
-102
lines changed

3 files changed

+37
-102
lines changed

packages/pytest-taskgraph/src/pytest_taskgraph/fixtures/gen.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,8 @@ def _load_kinds(self, graph_config, target_kind=None):
6262
yield FakeKind(kind_name, "/fake", config, graph_config)
6363

6464

65-
class FakeGraphConfig(GraphConfig):
66-
def register(self):
67-
pass
68-
69-
7065
def fake_load_graph_config(root_dir):
71-
graph_config = FakeGraphConfig(
66+
graph_config = GraphConfig(
7267
{
7368
"trust-domain": "test-domain",
7469
"taskgraph": {
@@ -108,6 +103,7 @@ def fake_load_graph_config(root_dir):
108103
},
109104
root_dir,
110105
)
106+
graph_config.__dict__["register"] = lambda: None
111107
return graph_config
112108

113109

src/taskgraph/generator.py

Lines changed: 31 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,6 @@
55
import copy
66
import logging
77
import os
8-
from concurrent.futures import (
9-
FIRST_COMPLETED,
10-
ProcessPoolExecutor,
11-
wait,
12-
)
138
from dataclasses import dataclass
149
from typing import Callable, Dict, Optional, Union
1510

@@ -49,20 +44,16 @@ def _get_loader(self):
4944
loader = "taskgraph.loader.default:loader"
5045
return find_object(loader)
5146

52-
def load_tasks(self, parameters, kind_dependencies_tasks, write_artifacts):
53-
logger.debug(f"Loading tasks for kind {self.name}")
54-
55-
parameters = Parameters(**parameters)
47+
def load_tasks(self, parameters, loaded_tasks, write_artifacts):
5648
loader = self._get_loader()
5749
config = copy.deepcopy(self.config)
5850

59-
inputs = loader(
60-
self.name,
61-
self.path,
62-
config,
63-
parameters,
64-
list(kind_dependencies_tasks.values()),
65-
)
51+
kind_dependencies = config.get("kind-dependencies", [])
52+
kind_dependencies_tasks = {
53+
task.label: task for task in loaded_tasks if task.kind in kind_dependencies
54+
}
55+
56+
inputs = loader(self.name, self.path, config, parameters, loaded_tasks)
6657

6758
transforms = TransformSequence()
6859
for xform_path in config["transforms"]:
@@ -96,7 +87,6 @@ def load_tasks(self, parameters, kind_dependencies_tasks, write_artifacts):
9687
)
9788
for task_dict in transforms(trans_config, inputs)
9889
]
99-
logger.info(f"Generated {len(tasks)} tasks for kind {self.name}")
10090
return tasks
10191

10292
@classmethod
@@ -261,69 +251,6 @@ def _load_kinds(self, graph_config, target_kinds=None):
261251
except KindNotFound:
262252
continue
263253

264-
def _load_tasks(self, kinds, kind_graph, parameters):
265-
all_tasks = {}
266-
futures_to_kind = {}
267-
futures = set()
268-
edges = set(kind_graph.edges)
269-
270-
with ProcessPoolExecutor() as executor:
271-
272-
def submit_ready_kinds():
273-
"""Create the next batch of tasks for kinds without dependencies."""
274-
nonlocal kinds, edges, futures
275-
loaded_tasks = all_tasks.copy()
276-
kinds_with_deps = {edge[0] for edge in edges}
277-
ready_kinds = (
278-
set(kinds) - kinds_with_deps - set(futures_to_kind.values())
279-
)
280-
for name in ready_kinds:
281-
kind = kinds.get(name)
282-
if not kind:
283-
message = (
284-
f'Could not find the kind "{name}"\nAvailable kinds:\n'
285-
)
286-
for k in sorted(kinds):
287-
message += f' - "{k}"\n'
288-
raise Exception(message)
289-
290-
future = executor.submit(
291-
kind.load_tasks,
292-
dict(parameters),
293-
{
294-
k: t
295-
for k, t in loaded_tasks.items()
296-
if t.kind in kind.config.get("kind-dependencies", [])
297-
},
298-
self._write_artifacts,
299-
)
300-
futures.add(future)
301-
futures_to_kind[future] = name
302-
303-
submit_ready_kinds()
304-
while futures:
305-
done, _ = wait(futures, return_when=FIRST_COMPLETED)
306-
for future in done:
307-
if exc := future.exception():
308-
executor.shutdown(wait=False, cancel_futures=True)
309-
raise exc
310-
kind = futures_to_kind.pop(future)
311-
futures.remove(future)
312-
313-
for task in future.result():
314-
if task.label in all_tasks:
315-
raise Exception("duplicate tasks with label " + task.label)
316-
all_tasks[task.label] = task
317-
318-
# Update state for next batch of futures.
319-
del kinds[kind]
320-
edges = {e for e in edges if e[1] != kind}
321-
322-
# Submit any newly unblocked kinds
323-
submit_ready_kinds()
324-
325-
return all_tasks
326-
327254
def _run(self):
328255
logger.info("Loading graph configuration.")
329256
graph_config = load_graph_config(self.root_dir)
@@ -378,8 +305,31 @@ def _run(self):
378305
)
379306

380307
logger.info("Generating full task set")
381-
all_tasks = self._load_tasks(kinds, kind_graph, parameters)
308+
all_tasks = {}
309+
for kind_name in kind_graph.visit_postorder():
310+
logger.debug(f"Loading tasks for kind {kind_name}")
311+
312+
kind = kinds.get(kind_name)
313+
if not kind:
314+
message = f'Could not find the kind "{kind_name}"\nAvailable kinds:\n'
315+
for k in sorted(kinds):
316+
message += f' - "{k}"\n'
317+
raise Exception(message)
382318

319+
try:
320+
new_tasks = kind.load_tasks(
321+
parameters,
322+
list(all_tasks.values()),
323+
self._write_artifacts,
324+
)
325+
except Exception:
326+
logger.exception(f"Error loading tasks for kind {kind_name}:")
327+
raise
328+
for task in new_tasks:
329+
if task.label in all_tasks:
330+
raise Exception("duplicate tasks with label " + task.label)
331+
all_tasks[task.label] = task
332+
logger.info(f"Generated {len(new_tasks)} tasks for kind {kind_name}")
383333
full_task_set = TaskGraph(all_tasks, Graph(frozenset(all_tasks), frozenset()))
384334
yield self.verify("full_task_set", full_task_set, graph_config, parameters)
385335

test/test_generator.py

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,16 @@
33
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
44

55

6-
from concurrent.futures import ProcessPoolExecutor
7-
86
import pytest
9-
from pytest_taskgraph import WithFakeKind, fake_load_graph_config
7+
from pytest_taskgraph import FakeKind, WithFakeKind, fake_load_graph_config
108

119
from taskgraph import generator, graph
1210
from taskgraph.generator import Kind, load_tasks_for_kind
1311
from taskgraph.loader.default import loader as default_loader
1412

1513

16-
class FakePPE(ProcessPoolExecutor):
17-
loaded_kinds = []
18-
19-
def submit(self, kind_load_tasks, *args):
20-
self.loaded_kinds.append(kind_load_tasks.__self__.name)
21-
return super().submit(kind_load_tasks, *args)
22-
23-
24-
def test_kind_ordering(mocker, maketgg):
14+
def test_kind_ordering(maketgg):
2515
"When task kinds depend on each other, they are loaded in postorder"
26-
mocked_ppe = mocker.patch.object(generator, "ProcessPoolExecutor", new=FakePPE)
2716
tgg = maketgg(
2817
kinds=[
2918
("_fake3", {"kind-dependencies": ["_fake2", "_fake1"]}),
@@ -32,7 +21,7 @@ def test_kind_ordering(mocker, maketgg):
3221
]
3322
)
3423
tgg._run_until("full_task_set")
35-
assert mocked_ppe.loaded_kinds == ["_fake1", "_fake2", "_fake3"]
24+
assert FakeKind.loaded_kinds == ["_fake1", "_fake2", "_fake3"]
3625

3726

3827
def test_full_task_set(maketgg):
@@ -286,5 +275,5 @@ def test_kind_load_tasks(monkeypatch, graph_config, parameters, datadir, kind_co
286275
kind = Kind(
287276
name="fake", path="foo/bar", config=kind_config, graph_config=graph_config
288277
)
289-
tasks = kind.load_tasks(parameters, {}, False)
278+
tasks = kind.load_tasks(parameters, [], False)
290279
assert tasks

0 commit comments

Comments
 (0)