diff --git a/3rdparty/Megatron-Bridge b/3rdparty/Megatron-Bridge index f370f03..953aabf 160000 --- a/3rdparty/Megatron-Bridge +++ b/3rdparty/Megatron-Bridge @@ -1 +1 @@ -Subproject commit f370f03e4039f85c0f1657f21cda47e00ef853db +Subproject commit 953aabf75c0500180dc14a6a76cf9e7e7c4baec7 diff --git a/dfm/src/megatron/data/common/base_energon_datamodule.py b/dfm/src/megatron/data/common/base_energon_datamodule.py deleted file mode 100644 index 0bf711a..0000000 --- a/dfm/src/megatron/data/common/base_energon_datamodule.py +++ /dev/null @@ -1,362 +0,0 @@ -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -from typing import Any, Dict, Literal, Optional - -from megatron.core import parallel_state -from megatron.energon import WorkerConfig, get_savable_loader, get_train_dataset - - -logger = logging.getLogger(__name__) - - -class EnergonMultiModalDataModule: - """ - A DataModule for handling multimodal datasets with images and text. - - This data module is designed to work with multimodal datasets that involve both images and text. - It provides a seamless interface to load training and validation data, manage batching, and handle - the state of the data pipeline across training epochs. The module integrates with the Megatron-Energon - framework for efficient data handling in large-scale distributed training. - - Attributes: - path (str): Path to the energon dataset. - tokenizer (Tokenizer): The tokenizer used for processing text. - image_processor (ImageProcessor): The image processor used for preprocessing images. - seq_length (int): The maximum sequence length for tokenized text. - micro_batch_size (int): The batch size for training and validation. - num_workers (int): Number of workers for data loading. - pin_memory (bool): Whether to pin memory in the DataLoader. - multimodal_sample_config (MultiModalSampleConfig): Configuration object for multimodal samples. - task_encoder (MultiModalTaskEncoder): Encoder responsible for encoding and batching samples. - init_global_step (int): The initial global step for the trainer, used for resuming training. - data_sampler (SequentialMegatronSampler): Sampler responsible for generating sequential samples. - train_dataloader_object (Optional): The DataLoader object for training data. - val_dataloader_object (Optional): The DataLoader object for validation data. - """ - - def __init__( - self, - path: str, - tokenizer, - image_processor, - seq_length: int = 2048, - micro_batch_size: int = 1, - global_batch_size: int = 1, - num_workers: int = 1, - num_val_workers: int | None = None, - pin_memory: bool = True, - shuffle_buffer_size: int = 100, - max_samples_per_sequence: int | None = None, - multimodal_sample_config: Optional[Any] = None, - task_encoder: Optional[Any] = None, - decoder_seq_length: Optional[int] = None, - packing_buffer_size: Optional[int] = None, - validation_task_encoder: Optional[Any] = None, - **kwargs, - ) -> None: - """ - Initialize the EnergonMultiModalDataModule. - - Parameters: - path (str): Path to the dataset. - tokenizer (Tokenizer): The tokenizer used for processing text. - image_processor (ImageProcessor): The image processor used for preprocessing images. - seq_length (int, optional): The maximum sequence length for tokenized text. Defaults to 2048. - micro_batch_size (int, optional): The batch size for training and validation. Defaults to 1. - num_workers (int, optional): Number of workers for data loading. Defaults to 1. - num_val_workers (int, optional): Number of workers for validation data loading. Defaults to num_workers. - pin_memory (bool, optional): Whether to pin memory in the DataLoader. Defaults to True. - multimodal_sample_config (MultiModalSampleConfig, optional): Configuration object for multimodal samples. - Defaults to MultiModalSampleConfig(). - shuffle_buffer_size (int, optional): Size of the shuffle buffer. Defaults to 100. - max_samples_per_sequence (int, optional): Maximum number of samples per sequence to load from memory. - Defaults to None (loads the whole tar file at once). - task_encoder (MultiModalTaskEncoder, optional): Encoder responsible for encoding and batching samples. - If not provided, a default (MultimodalTaskEncoder) encoder will be created. Defaults to None. - decoder_seq_length (int, optional): The max sequence length for the decoder. Used in encoder-decoder models - packing_buffer_size (int, optional): Size of the packing buffer for batched samples. Defaults to None. - validation_task_encoder (MultiModalTaskEncoder, optional): Encoder responsible for encoding - and batching samples for validation. Defaults to None and will be the same as task_encoder. - **kwargs: Additional keyword arguments. Will be passed to get_train_dataset() of Energon - """ - - super().__init__() - self.path = path - self.tokenizer = tokenizer - self.image_processor = image_processor - self.seq_length = seq_length - self.decoder_seq_length = decoder_seq_length - self.micro_batch_size = micro_batch_size - self.global_batch_size = global_batch_size - self.num_workers = num_workers - self.pin_memory = pin_memory - self.multimodal_sample_config = multimodal_sample_config - self.shuffle_buffer_size = shuffle_buffer_size - self.max_samples_per_sequence = max_samples_per_sequence - self.task_encoder = task_encoder - self.init_global_step = 0 - self.train_dataloader_object = None - self.val_dataloader_object = None - self.packing_buffer_size = packing_buffer_size - self.validation_task_encoder = validation_task_encoder or self.task_encoder - self.num_val_workers = num_val_workers or self.num_workers - self.kwargs = kwargs - - def datasets_provider(self, worker_config, split: Literal["train", "val"] = "val"): - """ - Provide the dataset for training or validation. - - This method retrieves the dataset for the specified split (either 'train' or 'val') and configures - it according to the worker configuration. - - Parameters: - worker_config: Configuration for the data loader workers. - split (Literal['train', 'val'], optional): The data split to retrieve ('train' or 'val'). Defaults to 'val'. - - Returns: - Dataset: The dataset configured for the specified split. - """ - - if split not in {"train", "val"}: - raise ValueError("Invalid value for split. Allowed values are 'train' or 'val'.") - - if split == "train": - task_encoder = self.task_encoder - else: - task_encoder = self.validation_task_encoder - - _dataset = get_train_dataset( - self.path, - batch_size=self.micro_batch_size, - task_encoder=task_encoder, - worker_config=worker_config, - packing_buffer_size=self.packing_buffer_size, - split_part=split, - shuffle_buffer_size=self.shuffle_buffer_size, - max_samples_per_sequence=self.max_samples_per_sequence, - **self.kwargs, - ) - - return _dataset - - def build(self): - return self.train_dataloader(), self.val_dataloader() - - def train_dataloader(self) -> Any: - """ - Initialize and return the training DataLoader. - - This method initializes the DataLoader for the training dataset. It uses the global step - from the trainer to configure the data sampler and ensures that the parallel state is initialized - correctly for distributed training. - - Returns: - TRAIN_DATALOADERS: The DataLoader for the training dataset. - """ - - logger.info(f"Multimodal train dataloader initializing with init_global_step {self.init_global_step}") - if self.train_dataloader_object: - return self.train_dataloader_object - if not parallel_state.is_initialized(): - logger.info( - f"Muiltimodal data loader parallel state is not initialized," - f"using default worker config with no_workers {self.num_workers}" - ) - worker_config = WorkerConfig.default_worker_config(self.num_workers) - else: - rank = parallel_state.get_data_parallel_rank() - world_size = parallel_state.get_data_parallel_world_size() - data_parallel_group = parallel_state.get_data_parallel_group() - logger.info( - f" Multimodal train dataloader initializing with" - f"rank {rank} world_size {world_size} data_parallel_group {data_parallel_group} ****** " - ) - worker_config = WorkerConfig( - rank=rank, - world_size=world_size, - num_workers=self.num_workers, - data_parallel_group=data_parallel_group, - worker_debug_path=None, - worker_log_level=0, - ) - train_dataset = self.datasets_provider(worker_config, split="train") - energon_dataloader = get_savable_loader(train_dataset, worker_config=worker_config) - self.train_dataloader_object = energon_dataloader - return EnergonDataloader(self.train_dataloader_object) - - def val_dataloader(self): - """ - Initialize and return the validation DataLoader. - - This method initializes the DataLoader for the validation dataset. It ensures that the parallel state - is initialized correctly for distributed training and returns a configured DataLoader object. - - Returns: - EVAL_DATALOADERS: The DataLoader for the validation dataset. - """ - if self.val_dataloader_object: - return self.val_dataloader_object - - if not parallel_state.is_initialized(): - logger.info( - f"Muiltimodal val data loader parallel state is not initialized," - f"using default worker config with no_workers {self.num_workers}" - ) - worker_config = WorkerConfig.default_worker_config(self.num_val_workers) - else: - rank = parallel_state.get_data_parallel_rank() - world_size = parallel_state.get_data_parallel_world_size() - data_parallel_group = parallel_state.get_data_parallel_group() - - logger.info(f"rank {rank} world_size {world_size} data_parallel_group {data_parallel_group}") - worker_config = WorkerConfig( - rank=rank, - world_size=world_size, - num_workers=self.num_workers, - data_parallel_group=data_parallel_group, - worker_debug_path=None, - worker_log_level=0, - ) - val_dataset = self.datasets_provider(worker_config, split="val") - energon_loader = get_savable_loader(val_dataset, worker_config=worker_config) - self.val_dataloader_object = energon_loader - return EnergonDataloader(self.val_dataloader_object) - - def test_dataloader(self) -> None: - """ - Return None as test dataset split does not exist. - - This method overrides the test_dataloader method and returns None since the test dataset split - is not defined or used in this module. - - Returns: - None - """ - logger.warning("Multimodal dataloader test dataset split does not exist") - return None - - def state_dict(self) -> Dict[str, Any]: - """ - Save the state of the data module. - - This method is called when saving a checkpoint. It generates and saves the state of the data module, - including the state of the dataloader and the number of consumed samples. - - Returns: - Dict[str, Any]: A dictionary containing the state of the data module. - """ - - if self.trainer: - dataloader_obj = self.trainer.train_dataloader - - state = [] - # All ranks should be zero except the dp rank. - if ( - parallel_state.get_context_parallel_rank() - or parallel_state.get_pipeline_model_parallel_rank() - or parallel_state.get_tensor_model_parallel_rank() - or parallel_state.get_expert_model_parallel_rank() - ) == 0: - # Save_state_global in energon assumes that we call it for only the first rank within each group that - # shares the same dataloader state. By making sure that current rank is the first rank in a model - # parallel group, we ensure this. - state = dataloader_obj.save_state_global(global_dst_rank=0) - - consumed_samples = self.data_sampler.compute_consumed_samples( - self.trainer.global_step - self.init_global_step - ) - - if state is None: - state = [] # Megatron core requires all the states on all the ranks to have same python - # type. Energon sends the state as a list - logger.info(f"Multimodal data loader saving dataloader state dict consumed samples {consumed_samples}") - return {"dataloader_state": state, "consumed_samples": consumed_samples} - - logger.warning("trainer object not connected to data module object returning empty state") - return {} - - def load_state_dict(self, state_dict: Dict[str, Any]) -> None: - """ - Load the state of the data module from a checkpoint. - - This method is called when loading a checkpoint. It restores the state of the data module, - including the state of the dataloader and the number of consumed samples. - - Parameters: - state_dict (Dict[str, Any]): The state dictionary containing the saved state of the data module. - """ - if not "dataloader_state" in state_dict: - logger.warning( - f"Data loader state cannot be resumed from state_dict, " - f"it does not have the required key dataloader_state. It has {state_dict.keys()}" - ) - return - - state = state_dict["dataloader_state"] - try: - if self.trainer: - self.trainer.datamodule.train_dataloader().restore_state_global(state) - logger.info("Multimodal dataloader state restored") - else: - logger.error(f"Cannot restore state from state_dict {state_dict}") - raise ValueError( - "Cannot restore state from state_dict: " - "Is the trainer object is initialized and attached to datamodule???" - ) - except Exception as e: - logger.warning( - f"Failed to dataloader restore state due to [Please ensure you are using same version " - f"of energon while saving and loading, Continuing without restoring data loader] : {e}" - ) - - try: - from megatron.core.num_microbatches_calculator import update_num_microbatches - - except (ImportError, ModuleNotFoundError): - logger.warning("Megatron num_microbatches_calculator not found, using Apex version.") - from apex.transformer.pipeline_parallel.utils import update_num_microbatches - - consumed_samples = state_dict["consumed_samples"] - self.data_sampler.init_consumed_samples = consumed_samples - self.data_sampler.prev_consumed_samples = consumed_samples - logger.info(f"Multimodal dataloader load state dict with consumed_samples {consumed_samples}") - update_num_microbatches( - consumed_samples=consumed_samples, - consistency_check=False, - ) - - -class EnergonDataloader: - """A wrapper to use Megatron Energon dataloader with the Megatron-LM training loop.""" - - def __init__(self, dataloader): - self._dataloader = dataloader - self._iter = iter(cyclic_iter(dataloader)) - - def __next__(self): - return self._iter.__next__() - - def __iter__(self): - return self._iter.__iter__() - - def save_state(self): - return self._dataloader.save_state_rank() - - -def cyclic_iter(iter): - while True: - for x in iter: - yield x diff --git a/dfm/src/megatron/data/common/diffusion_energon_datamodule.py b/dfm/src/megatron/data/common/diffusion_energon_datamodule.py index f59a55f..83d83e8 100644 --- a/dfm/src/megatron/data/common/diffusion_energon_datamodule.py +++ b/dfm/src/megatron/data/common/diffusion_energon_datamodule.py @@ -19,11 +19,11 @@ from dataclasses import dataclass from typing import Any, Dict, Literal +from megatron.bridge.data.energon.base_energon_datamodule import EnergonMultiModalDataModule from megatron.bridge.data.utils import DatasetBuildContext, DatasetProvider from megatron.energon import DefaultTaskEncoder, get_train_dataset from torch import int_repr -from dfm.src.megatron.data.common.base_energon_datamodule import EnergonMultiModalDataModule from dfm.src.megatron.data.dit.dit_taskencoder import DiTTaskEncoder diff --git a/uv.lock b/uv.lock index 84987ec..3534f6b 100644 --- a/uv.lock +++ b/uv.lock @@ -1460,7 +1460,7 @@ wheels = [ [[package]] name = "emerging-optimizers" version = "0.1.0" -source = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=cf9909b777ffac18e05b67a6708282cadc000942#cf9909b777ffac18e05b67a6708282cadc000942" } +source = { git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0#d5363b4a418128cd8111983b191c4b8869a9766b" } dependencies = [ { name = "absl-py" }, { name = "torch", marker = "sys_platform == 'never'" }, @@ -1488,48 +1488,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/ea/53f2148663b321f21b5a606bd5f191517cf40b7072c0497d3c92c4a13b1e/executing-2.2.1-py2.py3-none-any.whl", hash = "sha256:760643d3452b4d777d295bb167ccc74c64a81df23fb5e08eff250c425a4b2017", size = 28317, upload-time = "2025-09-01T09:48:08.5Z" }, ] -[[package]] -name = "fastapi" -version = "0.1.17" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.13' and sys_platform == 'darwin'", - "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'", - "python_full_version == '3.12.*' and platform_machine != 'aarch64' and sys_platform == 'linux'", - "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'", - "python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'linux'", - "python_full_version == '3.11.*' and sys_platform == 'darwin'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux'", - "python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux'", - "python_full_version < '3.11' and sys_platform == 'darwin'", - "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux'", - "python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux'", -] -dependencies = [ - { name = "pydantic", marker = "python_full_version != '3.12.*' or sys_platform != 'darwin'" }, - { name = "starlette", version = "0.50.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version != '3.12.*' or sys_platform != 'darwin'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/fb/ca/e336cae3818f843392ce9e1e6f8cfc4b8dbdc1c2fcf81b34f7e6255ea05f/fastapi-0.1.17.tar.gz", hash = "sha256:a9a9b6cc32c38bab27a6549b94c44a30c70b485bc789d03de3aa8725f3394be5", size = 2826896, upload-time = "2019-01-05T13:39:54.842Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ce/04/3639503a8a1bac37e747597e687997c58b302be92bc633049376f59e7dd7/fastapi-0.1.17-py3-none-any.whl", hash = "sha256:a6aaad2f60684477480ac9d7a1c95e67f4696a722f184db467494bfdd5b8f29d", size = 105578, upload-time = "2019-01-05T13:39:45.597Z" }, -] - [[package]] name = "fastapi" version = "0.121.1" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version == '3.12.*' and sys_platform == 'darwin'", -] dependencies = [ - { name = "annotated-doc", marker = "python_full_version == '3.12.*' and sys_platform == 'darwin'" }, - { name = "pydantic", marker = "python_full_version == '3.12.*' and sys_platform == 'darwin'" }, - { name = "starlette", version = "0.49.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.12.*' and sys_platform == 'darwin'" }, - { name = "typing-extensions", marker = "python_full_version == '3.12.*' and sys_platform == 'darwin'" }, + { name = "annotated-doc" }, + { name = "pydantic" }, + { name = "starlette" }, + { name = "typing-extensions" }, ] sdist = { url = "https://files.pythonhosted.org/packages/6b/a4/29e1b861fc9017488ed02ff1052feffa40940cb355ed632a8845df84ce84/fastapi-0.121.1.tar.gz", hash = "sha256:b6dba0538fd15dab6fe4d3e5493c3957d8a9e1e9257f56446b5859af66f32441", size = 342523, upload-time = "2025-11-08T21:48:14.068Z" } wheels = [ @@ -2568,7 +2535,7 @@ wheels = [ [[package]] name = "mamba-ssm" version = "2.2.6.post3" -source = { registry = "https://pypi.org/simple" } +source = { git = "https://github.com/yfw/mamba?branch=general_stride_fix#62bc4455e3e4b2cc6c920b47edc888a9785c9ce3" } dependencies = [ { name = "einops" }, { name = "ninja" }, @@ -2578,7 +2545,6 @@ dependencies = [ { name = "transformers" }, { name = "triton", marker = "sys_platform == 'never'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b6/0c/9373a469ff7a33bdd0644e55fa45165ba3900274dcf7fe9f10ccc232aef9/mamba_ssm-2.2.6.post3.tar.gz", hash = "sha256:826a3cdb651959f191dac64502f8a29627d9116fe6bb7c57e4f562da1aea7bf3", size = 113913, upload-time = "2025-10-10T06:00:44.939Z" } [[package]] name = "markdown" @@ -2853,7 +2819,7 @@ requires-dist = [ { name = "datasets" }, { name = "flash-linear-attention" }, { name = "hydra-core", specifier = ">1.3,<=1.3.2" }, - { name = "mamba-ssm" }, + { name = "mamba-ssm", git = "https://github.com/yfw/mamba?branch=general_stride_fix" }, { name = "megatron-core", extras = ["dev", "mlm"], directory = "3rdparty/Megatron-Bridge/3rdparty/Megatron-LM" }, { name = "nemo-run", marker = "extra == 'recipes'", specifier = ">=0.5.0a0,<0.6.0" }, { name = "nvdlfw-inspect", marker = "extra == 'tensor-inspect'", specifier = "==0.2.1" }, @@ -2869,7 +2835,7 @@ requires-dist = [ { name = "timm" }, { name = "tqdm", specifier = ">=4.67.1" }, { name = "transformer-engine", extras = ["pytorch"], git = "https://github.com/NVIDIA/TransformerEngine.git?rev=release_v2.9" }, - { name = "transformers", specifier = ">=4.57.1" }, + { name = "transformers", specifier = ">=4.57.1,<5.0.0" }, { name = "typing-extensions" }, { name = "wandb", specifier = ">=0.19.10" }, ] @@ -2926,6 +2892,7 @@ dev = [ { name = "causal-conv1d" }, { name = "einops" }, { name = "emerging-optimizers" }, + { name = "fastapi" }, { name = "flash-linear-attention" }, { name = "flashinfer-python" }, { name = "mamba-ssm" }, @@ -2938,7 +2905,6 @@ dev = [ { name = "onnxscript", version = "0.5.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" }, { name = "onnxscript", version = "0.5.7.dev20251112", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" }, { name = "opentelemetry-api" }, - { name = "setuptools" }, { name = "tensorstore", version = "0.1.74", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or python_full_version >= '3.13'" }, { name = "tensorstore", version = "0.1.79", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and python_full_version < '3.13'" }, { name = "tqdm" }, @@ -2955,42 +2921,49 @@ mlm = [ [package.metadata] requires-dist = [ - { name = "av", marker = "extra == 'dev'", specifier = "<16.0.0" }, + { name = "av", marker = "extra == 'dev'" }, + { name = "av", marker = "extra == 'lts'" }, { name = "causal-conv1d", marker = "extra == 'dev'", specifier = "~=1.5" }, + { name = "causal-conv1d", marker = "extra == 'lts'", specifier = "~=1.5" }, { name = "einops", marker = "extra == 'dev'", specifier = "~=0.8" }, - { name = "einops", marker = "extra == 'lts'" }, - { name = "emerging-optimizers", marker = "extra == 'dev'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=cf9909b777ffac18e05b67a6708282cadc000942" }, + { name = "einops", marker = "extra == 'lts'", specifier = "~=0.8" }, + { name = "emerging-optimizers", marker = "extra == 'dev'", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0" }, + { name = "fastapi", marker = "extra == 'dev'", specifier = "~=0.50" }, + { name = "fastapi", marker = "extra == 'lts'", specifier = "~=0.50" }, { name = "flash-linear-attention", marker = "extra == 'dev'", specifier = "~=0.3.2" }, { name = "flashinfer-python", marker = "extra == 'dev'" }, + { name = "flashinfer-python", marker = "extra == 'lts'" }, { name = "flask-restful", marker = "extra == 'mlm'" }, { name = "mamba-ssm", marker = "extra == 'dev'", specifier = "~=2.2" }, + { name = "mamba-ssm", marker = "extra == 'lts'", specifier = "~=2.2" }, { name = "megatron-energon", extras = ["av-decode"], marker = "extra == 'dev'", specifier = "~=6.0" }, + { name = "megatron-energon", extras = ["av-decode"], marker = "extra == 'lts'", specifier = "~=6.0" }, { name = "multi-storage-client", marker = "extra == 'dev'", specifier = "~=0.27" }, - { name = "numpy", specifier = "<2.0.0" }, + { name = "multi-storage-client", marker = "extra == 'lts'", specifier = "~=0.27" }, + { name = "numpy" }, { name = "nv-grouped-gemm", marker = "extra == 'dev'", specifier = "~=1.1" }, - { name = "nvidia-modelopt", extras = ["torch"], marker = "sys_platform != 'darwin' and extra == 'dev'", specifier = ">=0.33.0a0,<0.34.0" }, - { name = "nvidia-resiliency-ext", marker = "extra == 'dev'", specifier = ">=0.4.0a0,<0.5.0" }, + { name = "nv-grouped-gemm", marker = "extra == 'lts'", specifier = "~=1.1" }, + { name = "nvidia-modelopt", extras = ["torch"], marker = "sys_platform != 'darwin' and extra == 'dev'" }, + { name = "nvidia-resiliency-ext", marker = "extra == 'dev'" }, { name = "nvtx", marker = "extra == 'dev'", specifier = "~=0.2" }, - { name = "nvtx", marker = "extra == 'lts'" }, + { name = "nvtx", marker = "extra == 'lts'", specifier = "~=0.2" }, { name = "onnxscript", marker = "extra == 'dev'" }, + { name = "onnxscript", marker = "extra == 'lts'" }, { name = "opentelemetry-api", marker = "extra == 'dev'", specifier = "~=1.33.1" }, + { name = "opentelemetry-api", marker = "extra == 'lts'", specifier = "~=1.33.1" }, { name = "packaging", specifier = ">=24.2" }, { name = "sentencepiece", marker = "extra == 'mlm'" }, - { name = "setuptools", marker = "extra == 'dev'", specifier = "<80.0.0" }, - { name = "setuptools", marker = "extra == 'lts'", specifier = "<80.0.0" }, { name = "tensorstore", marker = "extra == 'dev'", specifier = "~=0.1,!=0.1.46,!=0.1.72" }, - { name = "tensorstore", marker = "extra == 'lts'", specifier = "!=0.1.46,!=0.1.72" }, + { name = "tensorstore", marker = "extra == 'lts'", specifier = "~=0.1,!=0.1.46,!=0.1.72" }, { name = "tiktoken", marker = "extra == 'mlm'" }, { name = "torch" }, { name = "tqdm", marker = "extra == 'dev'" }, { name = "tqdm", marker = "extra == 'lts'" }, - { name = "transformer-engine", extras = ["pytorch"], marker = "extra == 'dev'", git = "https://github.com/NVIDIA/TransformerEngine.git?rev=release_v2.9" }, - { name = "transformers", marker = "extra == 'lts'" }, + { name = "transformer-engine", extras = ["core-cu13", "pytorch"], marker = "extra == 'dev'", specifier = ">=2.9.0a0,<2.10.0" }, { name = "transformers", marker = "extra == 'mlm'" }, { name = "wandb", marker = "extra == 'mlm'" }, { name = "wget", marker = "extra == 'dev'" }, { name = "wget", marker = "extra == 'lts'" }, - { name = "zarr", marker = "extra == 'lts'" }, ] provides-extras = ["mlm", "dev", "lts"] @@ -3017,7 +2990,6 @@ docs = [ { name = "sphinx-autodoc2" }, { name = "sphinx-copybutton" }, ] -flash-mla = [{ name = "flash-mla", git = "https://github.com/deepseek-ai/FlashMLA?rev=9edee0c022cd0938148a18e334203b0aab43aa19" }] linting = [ { name = "black", specifier = "==24.4.2" }, { name = "flake8", specifier = "==7.1.0" }, @@ -3025,6 +2997,10 @@ linting = [ { name = "pylint", specifier = "==3.2.6" }, { name = "ruff", specifier = "~=0.9.0" }, ] +no-pypi-wheels = [ + { name = "emerging-optimizers", git = "https://github.com/NVIDIA-NeMo/Emerging-Optimizers.git?rev=v0.1.0" }, + { name = "flash-mla", git = "https://github.com/deepseek-ai/FlashMLA?rev=9edee0c022cd0938148a18e334203b0aab43aa19" }, +] test = [ { name = "coverage" }, { name = "nemo-run", git = "https://github.com/NVIDIA-NeMo/Run.git?rev=01a9a8ba360f7b2908728ad0516e0ad9d936966d" }, @@ -3209,8 +3185,7 @@ dependencies = [ { name = "click" }, { name = "cloudpickle" }, { name = "databricks-sdk" }, - { name = "fastapi", version = "0.1.17", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version != '3.12.*' or sys_platform != 'darwin'" }, - { name = "fastapi", version = "0.121.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.12.*' and sys_platform == 'darwin'" }, + { name = "fastapi" }, { name = "gitpython" }, { name = "importlib-metadata" }, { name = "opentelemetry-api" }, @@ -6075,7 +6050,7 @@ resolution-markers = [ dependencies = [ { name = "colorama", marker = "python_full_version < '3.11'" }, { name = "sphinx", version = "8.1.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "starlette", version = "0.50.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "starlette", marker = "python_full_version < '3.11'" }, { name = "uvicorn", marker = "python_full_version < '3.11'" }, { name = "watchfiles", marker = "python_full_version < '3.11'" }, { name = "websockets", marker = "python_full_version < '3.11'" }, @@ -6106,8 +6081,7 @@ resolution-markers = [ dependencies = [ { name = "colorama", marker = "python_full_version >= '3.11'" }, { name = "sphinx", version = "8.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "starlette", version = "0.49.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.12.*' and sys_platform == 'darwin'" }, - { name = "starlette", version = "0.50.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform != 'darwin') or (python_full_version == '3.11.*' and sys_platform == 'darwin') or (python_full_version >= '3.13' and sys_platform == 'darwin')" }, + { name = "starlette", marker = "python_full_version >= '3.11'" }, { name = "uvicorn", marker = "python_full_version >= '3.11'" }, { name = "watchfiles", marker = "python_full_version >= '3.11'" }, { name = "websockets", marker = "python_full_version >= '3.11'" }, @@ -6270,48 +6244,15 @@ wheels = [ name = "starlette" version = "0.49.3" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version == '3.12.*' and sys_platform == 'darwin'", -] dependencies = [ - { name = "anyio", marker = "python_full_version == '3.12.*' and sys_platform == 'darwin'" }, - { name = "typing-extensions", marker = "python_full_version == '3.12.*' and sys_platform == 'darwin'" }, + { name = "anyio" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/de/1a/608df0b10b53b0beb96a37854ee05864d182ddd4b1156a22f1ad3860425a/starlette-0.49.3.tar.gz", hash = "sha256:1c14546f299b5901a1ea0e34410575bc33bbd741377a10484a54445588d00284", size = 2655031, upload-time = "2025-11-01T15:12:26.13Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/a3/e0/021c772d6a662f43b63044ab481dc6ac7592447605b5b35a957785363122/starlette-0.49.3-py3-none-any.whl", hash = "sha256:b579b99715fdc2980cf88c8ec96d3bf1ce16f5a8051a7c2b84ef9b1cdecaea2f", size = 74340, upload-time = "2025-11-01T15:12:24.387Z" }, ] -[[package]] -name = "starlette" -version = "0.50.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.13' and sys_platform == 'darwin'", - "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'", - "python_full_version == '3.12.*' and platform_machine != 'aarch64' and sys_platform == 'linux'", - "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'", - "python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'linux'", - "python_full_version == '3.11.*' and sys_platform == 'darwin'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux'", - "python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux'", - "python_full_version < '3.11' and sys_platform == 'darwin'", - "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux'", - "python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux'", -] -dependencies = [ - { name = "anyio", marker = "python_full_version != '3.12.*' or sys_platform != 'darwin'" }, - { name = "typing-extensions", marker = "(python_full_version < '3.13' and sys_platform != 'darwin') or (python_full_version < '3.12' and sys_platform == 'darwin')" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/ba/b8/73a0e6a6e079a9d9cfa64113d771e421640b6f679a52eeb9b32f72d871a1/starlette-0.50.0.tar.gz", hash = "sha256:a2a17b22203254bcbc2e1f926d2d55f3f9497f769416b3190768befe598fa3ca", size = 2646985, upload-time = "2025-11-01T15:25:27.516Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d9/52/1064f510b141bd54025f9b55105e26d1fa970b9be67ad766380a3c9b74b0/starlette-0.50.0-py3-none-any.whl", hash = "sha256:9e5391843ec9b6e472eed1365a78c8098cfceb7a74bfd4d6b1c0c0095efb3bca", size = 74033, upload-time = "2025-11-01T15:25:25.461Z" }, -] - [[package]] name = "sympy" version = "1.14.0"