[Refactor] LLM data structures

Vincent Moens · Vincent Moens · commit 73c7b0ac4d37 · 2025-03-05T17:09:27.000-08:00
ghstack-source-id: 8483fe0 Pull Request resolved: #2834
diff --git a/docs/source/reference/data.rst b/docs/source/reference/data.rst
@@ -1133,6 +1133,9 @@ efficient sampling.
     get_dataloader
     ConstantKLController
     AdaptiveKLController
+    LLMData
+    LLMInput
+    LLMOutput
 
 
 Utils
diff --git a/torchrl/data/__init__.py b/torchrl/data/__init__.py
@@ -8,6 +8,9 @@
     ConstantKLController,
     create_infinite_iterator,
     get_dataloader,
+    LLMData,
+    LLMInput,
+    LLMOutput,
     PairwiseDataset,
     PromptData,
     PromptTensorDictTokenizer,
@@ -103,96 +106,99 @@
 from .utils import check_no_exclusive_keys, consolidate_spec, contains_lazy_spec
 
 __all__ = [
+    "AdaptiveKLController",
+    "Binary",
+    "BinaryDiscreteTensorSpec",
     "BinaryToDecimal",
-    "HashToInt",
-    "MCTSForest",
-    "QueryModule",
-    "RandomProjectionHash",
-    "SipHash",
-    "TensorDictMap",
-    "TensorMap",
-    "Tree",
-    "MultiStep",
+    "Bounded",
+    "BoundedContinuous",
+    "BoundedTensorSpec",
+    "Categorical",
+    "Choice",
+    "Composite",
+    "CompositeSpec",
+    "ConstantKLController",
+    "DEVICE_TYPING",
+    "DiscreteTensorSpec",
     "Flat2TED",
     "FlatStorageCheckpointer",
     "H5Combine",
     "H5Split",
     "H5StorageCheckpointer",
+    "HashToInt",
     "ImmutableDatasetWriter",
+    "LLMData",
+    "LLMInput",
+    "LLMOutput",
     "LazyMemmapStorage",
     "LazyStackStorage",
+    "LazyStackedCompositeSpec",
+    "LazyStackedTensorSpec",
     "LazyTensorStorage",
     "ListStorage",
     "ListStorageCheckpointer",
+    "MCTSForest",
+    "MultiCategorical",
+    "MultiDiscreteTensorSpec",
+    "MultiOneHot",
+    "MultiOneHotDiscreteTensorSpec",
+    "MultiStep",
     "Nested2TED",
     "NestedStorageCheckpointer",
+    "NonTensor",
+    "NonTensorSpec",
+    "OneHot",
+    "OneHotDiscreteTensorSpec",
+    "PairwiseDataset",
     "PrioritizedReplayBuffer",
     "PrioritizedSampler",
     "PrioritizedSliceSampler",
+    "PromptData",
+    "PromptTensorDictTokenizer",
+    "QueryModule",
+    "RandomProjectionHash",
     "RandomSampler",
     "RemoteTensorDictReplayBuffer",
     "ReplayBuffer",
     "ReplayBufferEnsemble",
+    "RewardData",
+    "RolloutFromModel",
     "RoundRobinWriter",
     "SamplerEnsemble",
     "SamplerWithoutReplacement",
+    "SipHash",
     "SliceSampler",
     "SliceSamplerWithoutReplacement",
+    "Stacked",
+    "StackedComposite",
     "Storage",
     "StorageCheckpointerBase",
     "StorageEnsemble",
     "StorageEnsembleCheckpointer",
     "TED2Flat",
     "TED2Nested",
+    "TensorDictMap",
     "TensorDictMaxValueWriter",
     "TensorDictPrioritizedReplayBuffer",
     "TensorDictReplayBuffer",
     "TensorDictRoundRobinWriter",
+    "TensorDictTokenizer",
+    "TensorMap",
+    "TensorSpec",
     "TensorStorage",
     "TensorStorageCheckpointer",
-    "Writer",
-    "WriterEnsemble",
-    "AdaptiveKLController",
-    "ConstantKLController",
-    "create_infinite_iterator",
-    "get_dataloader",
-    "PairwiseDataset",
-    "PromptData",
-    "PromptTensorDictTokenizer",
-    "RewardData",
-    "RolloutFromModel",
-    "TensorDictTokenizer",
     "TokenizedDatasetLoader",
-    "Binary",
-    "BinaryDiscreteTensorSpec",
-    "Bounded",
-    "BoundedContinuous",
-    "BoundedTensorSpec",
-    "Categorical",
-    "Choice",
-    "Composite",
-    "CompositeSpec",
-    "DEVICE_TYPING",
-    "DiscreteTensorSpec",
-    "LazyStackedCompositeSpec",
-    "LazyStackedTensorSpec",
-    "MultiCategorical",
-    "MultiDiscreteTensorSpec",
-    "MultiOneHot",
-    "MultiOneHotDiscreteTensorSpec",
-    "NonTensor",
-    "NonTensorSpec",
-    "OneHot",
-    "OneHotDiscreteTensorSpec",
-    "Stacked",
-    "StackedComposite",
-    "TensorSpec",
+    "Tree",
     "Unbounded",
     "UnboundedContinuous",
     "UnboundedContinuousTensorSpec",
     "UnboundedDiscrete",
     "UnboundedDiscreteTensorSpec",
+    "Writer",
+    "WriterEnsemble",
     "check_no_exclusive_keys",
     "consolidate_spec",
     "contains_lazy_spec",
+    "create_infinite_iterator",
+    "get_dataloader",
 ]
diff --git a/torchrl/data/llm/__init__.py b/torchrl/data/llm/__init__.py
@@ -11,18 +11,21 @@
 )
 from .prompt import PromptData, PromptTensorDictTokenizer
 from .reward import PairwiseDataset, RewardData
-from .utils import AdaptiveKLController, ConstantKLController, RolloutFromModel
+from .utils import AdaptiveKLController, ConstantKLController, RolloutFromModel, LLMData, LLMOutput, LLMInput
 
 __all__ = [
-    "create_infinite_iterator",
-    "get_dataloader",
-    "TensorDictTokenizer",
-    "TokenizedDatasetLoader",
+    "AdaptiveKLController",
+    "ConstantKLController",
+    "LLMData",
+    "LLMInput",
+    "LLMOutput",
+    "PairwiseDataset",
     "PromptData",
     "PromptTensorDictTokenizer",
-    "PairwiseDataset",
     "RewardData",
-    "AdaptiveKLController",
-    "ConstantKLController",
     "RolloutFromModel",
+    "TensorDictTokenizer",
+    "TokenizedDatasetLoader",
+    "create_infinite_iterator",
+    "get_dataloader",
 ]
diff --git a/torchrl/data/llm/utils.py b/torchrl/data/llm/utils.py
@@ -7,10 +7,11 @@
 import abc
 import collections
 import importlib
+from typing import TypeVar
 
 import numpy as np
 import torch
-from tensordict import TensorDict
+from tensordict import TensorClass, TensorDict
 from torch import nn, Tensor
 from torch.nn import functional as F
 
@@ -541,3 +542,88 @@ def step_scheduler(self):
             # remove all values
             while len(self._kl_queue):
                 self._kl_queue.remove(self._kl_queue[0])
+
+LLMInpOut = TypeVar("LLMInpOut")
+
+class LLMInput(TensorClass["nocast"]):
+    """Represents the input to a Large Language Model (LLM).
+
+    Attributes:
+        tokens (torch.Tensor): The input tokens as a tensor.
+        attention_mask (torch.Tensor, optional): The attention mask for the input tokens. Default to `None`.
+        token_list (list[int] | list[list[int]], optional): The input tokens as a list of integers or a list of lists of integers. Default to `None`.
+        text (str | list[str], optional): The input text as a string or a list of strings. Default to `None`.
+
+    .. seealso:: :class:`~torchrl.data.LLMOutput` and :class:`~torchrl.data.LLMData`.
+
+    """
+    tokens: torch.Tensor
+    attention_mask: torch.Tensor | None = None
+    token_list: list[int] | list[list[int]] | None = None
+    text: str | list[str] | None = None
+
+class LLMOutput(TensorClass["nocast"]):
+    """Represents the output from a Large Language Model (LLM).
+
+    Attributes:
+        tokens (torch.Tensor): The output tokens as a tensor.
+        tokens_response (torch.Tensor, optional): The response tokens generated by the model. Default to `None`.
+
+            .. note:: the reponse is the sequence of tokens output by a model, excluding the input
+                tokens.
+
+        token_list (list[int] | list[list[int]], optional): The output tokens as a list of integers or a list of lists of integers. Default to `None`.
+        tokens_response_list (list[list[int]], optional): The response tokens generated by the model as a list of lists of integers. Default to `None`.
+        logits (torch.Tensor, optional): The logits of the output tokens. Default to `None`.
+        log_probs (torch.Tensor, optional): The log probabilities of the output tokens. Default to `None`.
+        text (str | list[str], optional): The output text as a string or a list of strings. Default to `None`.
+
+    .. seealso:: :class:`~torchrl.data.LLMInput` and :class:`~torchrl.data.LLMData`.
+
+    """
+    tokens: torch.Tensor
+    tokens_response: torch.Tensor | None = None
+    token_list: list[int] | list[list[int]] | None = None
+    tokens_response_list: list[list[int]] | None = None
+    logits: torch.Tensor | None = None
+    log_probs: torch.Tensor | None = None
+    text: str | list[str] | None = None
+
+    @classmethod
+    def from_vllm_output(cls: type[LLMInpOut], vllm_output) -> LLMInpOut:
+        # placeholder
+        raise NotImplementedError
+
+class LLMData(TensorClass["nocast"]):
+    """Represents the input or output of a Large Language Model (LLM).
+
+    Other algorithm-specific attributes such as `reward`, `advantages` or done states are handled automatically by the
+    envs and, therefore, are not included in this class.
+
+    Attributes:
+        tokens (torch.Tensor): The input/output tokens as a tensor.
+        attention_mask (torch.Tensor, optional): The attention mask for the input tokens. Default to `None`.
+        tokens_response (torch.Tensor, optional): The response tokens generated by the model. Default to `None`.
+
+            .. note:: the reponse is the sequence of tokens output by a model, excluding the input
+                tokens.
+
+        token_list (list[int] | list[list[int]], optional): The output tokens as a list of integers or a list of lists
+            of integers. Default to `None`.
+        tokens_response_list (list[list[int]], optional): The response tokens generated by the model as a list of
+            lists of integers. Default to `None`.
+        logits (torch.Tensor, optional): The logits of the output tokens. Default to `None`.
+        log_probs (torch.Tensor, optional): The log probabilities of the output tokens. Default to `None`.
+        text (str | list[str], optional): The output text as a string or a list of strings. Default to `None`.
+
+    .. seealso:: :class:`~torchrl.data.LLMInput` and :class:`~torchrl.data.LLMOutput`.
+
+    """
+    tokens: torch.Tensor
+    tokens_response: torch.Tensor | None = None
+    attention_mask: torch.Tensor | None = None
+    token_list: list[int] | list[list[int]] | None = None
+    tokens_response_list: list[list[int]] | None = None
+    logits: torch.Tensor | None = None
+    log_probs: torch.Tensor | None = None
+    text: str | list[str] | None = None