diff --git a/nemo_curator/stages/file_partitioning.py b/nemo_curator/stages/file_partitioning.py index 418699e5d0..a5c1a5c526 100644 --- a/nemo_curator/stages/file_partitioning.py +++ b/nemo_curator/stages/file_partitioning.py @@ -85,6 +85,11 @@ def ray_stage_spec(self) -> dict[str, Any]: RayStageSpecKeys.IS_FANOUT_STAGE: True, } + def xenna_stage_spec(self) -> dict[str, Any]: + return { + "num_workers_per_node": 1, + } + def process(self, _: _EmptyTask) -> list[FileGroupTask]: """Process the initial task to create file group tasks. diff --git a/nemo_curator/stages/text/download/base/url_generation.py b/nemo_curator/stages/text/download/base/url_generation.py index eb4e419cac..278bf88ad0 100644 --- a/nemo_curator/stages/text/download/base/url_generation.py +++ b/nemo_curator/stages/text/download/base/url_generation.py @@ -81,3 +81,8 @@ def ray_stage_spec(self) -> dict[str, Any]: return { "is_fanout_stage": True, } + + def xenna_stage_spec(self) -> dict[str, Any]: + return { + "num_workers_per_node": 1, + }