feat: add support for parallel kind processing with threads

bhearsum · bhearsum · commit 1f3a474fb500 · 2025-10-24T15:08:30.000-04:00
Even with 3.14 free-threaded python, this is still a bit slower than multiprocessing on Linux, but it will allow us to start experimenting with it more, and may allow users on macOS and Windows to immediately see a speed-up.
diff --git a/src/taskgraph/generator.py b/src/taskgraph/generator.py
@@ -10,6 +10,7 @@
 from concurrent.futures import (
     FIRST_COMPLETED,
     ProcessPoolExecutor,
+    ThreadPoolExecutor,
     wait,
 )
 from dataclasses import dataclass
@@ -317,10 +318,18 @@ def _load_tasks_parallel(self, kinds, kind_graph, parameters):
         futures = set()
         edges = set(kind_graph.edges)
 
-        with ProcessPoolExecutor(
-            mp_context=multiprocessing.get_context("fork")
-        ) as executor:
+        # use processes if available; this allows us to use multiple CPU cores
+        # we should revisit this default when free-threaded python is more
+        # stable and performant. in the meantime, allowing the usage of threads
+        # can still be helpful when `fork` multiprocessing is not available
+        # (like windows and mac), and gives users the option to try using
+        # free threaded python to speed things up
+        if "fork" in multiprocessing.get_all_start_methods() and not os.environ.get("TASKGRAPH_USE_THREADS"):
+            factory = lambda: ProcessPoolExecutor(mp_context=multiprocessing.get_context("fork"))
+        else:
+            factory = lambda: ThreadPoolExecutor(max_workers=os.process_cpu_count())
 
+        with factory() as executor:
             def submit_ready_kinds():
                 """Create the next batch of tasks for kinds without dependencies."""
                 nonlocal kinds, edges, futures
@@ -433,13 +442,6 @@ def _run(self):
         yield "kind_graph", kind_graph
 
         logger.info("Generating full task set")
-        # Current parallel generation relies on multiprocessing, and forking.
-        # This causes problems on Windows and macOS due to how new processes
-        # are created there, and how doing so reinitializes global variables
-        # that are modified earlier in graph generation, that doesn't get
-        # redone in the new processes. Ideally this would be fixed, or we
-        # would take another approach to parallel kind generation. In the
-        # meantime, it's not supported outside of Linux.
         if "fork" not in multiprocessing.get_all_start_methods() or os.environ.get(
             "TASKGRAPH_SERIAL"
         ):