[misc] feat: add more utils of tensordict (verl-project#4322)

vermouth1992 · PeterSH6 · gemini-code-assist[bot] · web-flow · commit 312263169b02 · 2025-11-27T20:07:37.000+08:00
### What does this PR do? - Add get/get_keys/pop/pop_keys of tensordict ### Checklist Before Starting - [ ] Search for similar PRs. Paste at least one query link here: ... - [ ] Format the PR title as `[{modules}] {type}: {description}` (This will be checked by the CI) - `{modules}` include `fsdp`, `megatron`, `sglang`, `vllm`, `rollout`, `trainer`, `ci`, `training_utils`, `recipe`, `hardware`, `deployment`, `ray`, `worker`, `single_controller`, `misc`, `perf`, `model`, `algo`, `env`, `tool`, `ckpt`, `doc`, `data` - If this PR involves multiple modules, separate them with `,` like `[megatron, fsdp, doc]` - `{type}` is in `feat`, `fix`, `refactor`, `chore`, `test` - If this PR breaks any API (CLI arguments, config, function signature, etc.), add `[BREAKING]` to the beginning of the title. - Example: `[BREAKING][fsdp, megatron] feat: dynamic batching` ### Test > For changes that can not be tested by CI (e.g., algorithm implementation, new model support), validate by experiment(s) and show results like training curve plots, evaluation results, etc. ### API and Usage Example > Demonstrate how the API changes if any, and provide usage example(s) if possible. ```python # Add code snippet or script demonstrating how to use this ``` ### Design & Code Changes > Demonstrate the high-level design if this PR is complex, and list the specific changes. ### Checklist Before Submitting > [!IMPORTANT] > Please check all the following items before requesting a review, otherwise the reviewer might deprioritize this PR for review. - [ ] Read the [Contribute Guide](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md). - [ ] Apply [pre-commit checks](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md#code-linting-and-formatting): `pre-commit install && pre-commit run --all-files --show-diff-on-failure --color=always` - [ ] Add / Update [the documentation](https://github.com/volcengine/verl/tree/main/docs). - [ ] Add unit or end-to-end test(s) to [the CI workflow](https://github.com/volcengine/verl/tree/main/.github/workflows) to cover all the code. If not feasible, explain why: ... - [ ] Once your PR is ready for CI, send a message in [the `ci-request` channel](https://verl-project.slack.com/archives/C091TCESWB1) in [the `verl` Slack workspace](https://join.slack.com/t/verl-project/shared_invite/zt-3855yhg8g-CTkqXu~hKojPCmo7k_yXTQ). (If not accessible, please try [the Feishu group (飞书群)](https://applink.larkoffice.com/client/chat/chatter/add_by_link?link_token=772jd4f1-cd91-441e-a820-498c6614126a).) --------- Co-authored-by: Guangming Sheng <petershengwhu@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
diff --git a/tests/test_protocol_v2_on_cpu.py b/tests/test_protocol_v2_on_cpu.py
@@ -328,17 +328,65 @@ def test_chunk_concat():
 
 
 def test_pop():
-    obs = torch.randn(100, 10)
-    act = torch.randn(100, 3)
-    dataset = tu.get_tensordict({"obs": obs, "act": act}, non_tensor_dict={"2": 2, "1": 1})
+    obs = torch.randn(3, 10)
+    act = torch.randn(3, 3)
+    labels = ["a", ["b"], []]
+    dataset = tu.get_tensordict({"obs": obs, "act": act, "labels": labels}, non_tensor_dict={"2": 2, "1": 1})
+
+    dataset1 = copy.deepcopy(dataset)
+
+    # test pop keys
+    popped_dataset = tu.pop_keys(dataset, keys=["obs", "2"])
+
+    assert popped_dataset.batch_size[0] == 3
+
+    assert popped_dataset.keys() == {"obs", "2"}
+    assert torch.all(torch.eq(popped_dataset["obs"], obs)).item()
+    assert popped_dataset["2"] == 2
+
+    assert dataset.keys() == {"act", "1", "labels"}
+
+    # test pop non-exist key
+    with pytest.raises(KeyError):
+        tu.pop_keys(dataset, keys=["obs", "2"])
+
+    # test single pop
+    # NonTensorData
+    assert tu.pop(dataset1, key="2") == 2
+    # NonTensorStack
+    assert tu.pop(dataset1, key="labels") == ["a", ["b"], []]
+    # Tensor
+    assert torch.all(torch.eq(tu.pop(dataset1, key="obs"), obs)).item()
+
+
+def test_get():
+    obs = torch.randn(3, 10)
+    act = torch.randn(3, 3)
+    labels = ["a", ["b"], []]
+    dataset = tu.get_tensordict({"obs": obs, "act": act, "labels": labels}, non_tensor_dict={"2": 2, "1": 1})
+
+    # test pop keys
+    popped_dataset = tu.get_keys(dataset, keys=["obs", "2"])
+
+    assert popped_dataset.batch_size[0] == 3
 
-    poped_dataset = tu.pop(dataset, keys=["obs", "2"])
+    assert torch.all(torch.eq(popped_dataset["obs"], dataset["obs"])).item()
 
-    assert poped_dataset.batch_size[0] == 100
+    assert popped_dataset["2"] == dataset["2"]
 
-    assert poped_dataset.keys() == {"obs", "2"}
+    # test pop non-exist key
+    with pytest.raises(KeyError):
+        tu.get_keys(dataset, keys=["obs", "3"])
 
-    assert dataset.keys() == {"act", "1"}
+    # test single pop
+    # NonTensorData
+    assert tu.get(dataset, key="2") == 2
+    # NonTensorStack
+    assert tu.get(dataset, key="labels") == ["a", ["b"], []]
+    # Tensor
+    assert torch.all(torch.eq(tu.get(dataset, key="obs"), obs)).item()
+    # Non-exist key
+    assert tu.get(dataset, key="3", default=3) == 3
 
 
 def test_repeat():
@@ -531,7 +579,7 @@ def test_dataproto_no_batch():
     selected = data.select("labels")
 
     assert selected["labels"] == labels
-    pop_data = tu.pop(data, keys=["labels"])
+    pop_data = tu.pop_keys(data, keys=["labels"])
     assert pop_data["labels"] == labels
     assert "labels" not in data
 
diff --git a/verl/utils/tensordict_utils.py b/verl/utils/tensordict_utils.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import logging
-from typing import Iterator
+from typing import Any, Iterable
 
 import torch
 from tensordict import TensorDict
@@ -256,7 +256,8 @@ def union_tensor_dict(tensor_dict1: TensorDict, tensor_dict2: TensorDict) -> Ten
     )
     for key in tensor_dict2.keys():
         if key not in tensor_dict1.keys():
-            tensor_dict1[key] = tensor_dict2[key]
+            # Note that there is a difference between tensor_dict2[key] and tensor_dict2.get(key)
+            tensor_dict1[key] = tensor_dict2.get(key)
         else:
             if isinstance(tensor_dict2[key], torch.Tensor):
                 assert tensor_dict1[key].equal(tensor_dict2[key]), (
@@ -325,10 +326,59 @@ def assert_tensordict_eq(tensordict1: TensorDict, tensordict2: TensorDict):
             assert val == val2
 
 
-def pop(tensordict: TensorDict, keys: Iterator[str]) -> TensorDict:
+def get(tensordict: TensorDict, key: str, default=None) -> Any:
+    if key not in tensordict:
+        return default
+
+    output = tensordict.get(key)
+    if isinstance(output, torch.Tensor):
+        return output
+    elif isinstance(output, NonTensorStack):
+        return output.tolist()
+    else:
+        assert isinstance(output, NonTensorData)
+        return output.data
+
+
+def get_keys(tensordict: TensorDict, keys: Iterable[str]) -> TensorDict:
+    tensor_output = {}
+    non_tensor_output = {}
+    for key in keys:
+        if key not in tensordict.keys():
+            raise KeyError(f"key {key} not in tensordict")
+        output = tensordict.get(key)
+        if isinstance(output, torch.Tensor):
+            tensor_output[key] = output
+        elif isinstance(output, NonTensorStack):
+            tensor_output[key] = output.tolist()
+        else:
+            assert isinstance(output, NonTensorData)
+            non_tensor_output[key] = output.data
+
+    return get_tensordict(tensor_output, non_tensor_output)
+
+
+def pop(tensordict: TensorDict, key: str, default=None) -> Any:
+    _sentinel = object()
+    output = tensordict.pop(key, _sentinel)
+    if output is _sentinel:
+        return default
+
+    if isinstance(output, torch.Tensor):
+        return output
+    elif isinstance(output, NonTensorStack):
+        return output.tolist()
+    else:
+        assert isinstance(output, NonTensorData)
+        return output.data
+
+
+def pop_keys(tensordict: TensorDict, keys: Iterable[str]) -> TensorDict:
     tensor_output = {}
     non_tensor_output = {}
     for key in keys:
+        if key not in tensordict.keys():
+            raise KeyError(f"key {key} not in tensordict")
         output = tensordict.get(key)
         if isinstance(output, torch.Tensor):
             tensor_output[key] = tensordict.pop(key)