|
10 | 10 | from concurrent.futures import ( |
11 | 11 | FIRST_COMPLETED, |
12 | 12 | ProcessPoolExecutor, |
| 13 | + ThreadPoolExecutor, |
13 | 14 | wait, |
14 | 15 | ) |
15 | 16 | from dataclasses import dataclass |
@@ -317,9 +318,22 @@ def _load_tasks_parallel(self, kinds, kind_graph, parameters): |
317 | 318 | futures = set() |
318 | 319 | edges = set(kind_graph.edges) |
319 | 320 |
|
320 | | - with ProcessPoolExecutor( |
321 | | - mp_context=multiprocessing.get_context("fork") |
322 | | - ) as executor: |
| 321 | + # use processes if available; this allows us to use multiple CPU cores |
| 322 | + # we should revisit this default when free-threaded python is more |
| 323 | + # stable and performant. in the meantime, allowing the usage of threads |
| 324 | + # can still be helpful when `fork` multiprocessing is not available |
| 325 | + # (like windows and mac), and gives users the option to try using |
| 326 | + # free threaded python to speed things up |
| 327 | + if "fork" in multiprocessing.get_all_start_methods() and not os.environ.get( |
| 328 | + "TASKGRAPH_USE_THREADS" |
| 329 | + ): |
| 330 | + executor = ProcessPoolExecutor( |
| 331 | + mp_context=multiprocessing.get_context("fork") |
| 332 | + ) |
| 333 | + else: |
| 334 | + executor = ThreadPoolExecutor(max_workers=os.process_cpu_count()) |
| 335 | + |
| 336 | + with executor: |
323 | 337 |
|
324 | 338 | def submit_ready_kinds(): |
325 | 339 | """Create the next batch of tasks for kinds without dependencies.""" |
@@ -433,13 +447,6 @@ def _run(self): |
433 | 447 | yield "kind_graph", kind_graph |
434 | 448 |
|
435 | 449 | logger.info("Generating full task set") |
436 | | - # Current parallel generation relies on multiprocessing, and forking. |
437 | | - # This causes problems on Windows and macOS due to how new processes |
438 | | - # are created there, and how doing so reinitializes global variables |
439 | | - # that are modified earlier in graph generation, that doesn't get |
440 | | - # redone in the new processes. Ideally this would be fixed, or we |
441 | | - # would take another approach to parallel kind generation. In the |
442 | | - # meantime, it's not supported outside of Linux. |
443 | 450 | if "fork" not in multiprocessing.get_all_start_methods() or os.environ.get( |
444 | 451 | "TASKGRAPH_SERIAL" |
445 | 452 | ): |
|
0 commit comments