TensorAuto
diff --git a/‎configs/dev/ci_config.json‎
Lines changed: 1 addition & 1 deletion b/‎configs/dev/ci_config.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎configs/examples/value_config.json‎
Lines changed: 1 addition & 1 deletion b/‎configs/examples/value_config.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/concepts.rst‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/concepts.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/opentau/__init__.py‎
Lines changed: 3 additions & 3 deletions b/‎src/opentau/__init__.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/opentau/configs/default.py‎
Lines changed: 7 additions & 7 deletions b/‎src/opentau/configs/default.py‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎src/opentau/datasets/__init__.py‎
Lines changed: 6 additions & 6 deletions b/‎src/opentau/datasets/__init__.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎src/opentau/datasets/factory.py‎
Lines changed: 17 additions & 18 deletions b/‎src/opentau/datasets/factory.py‎
Lines changed: 17 additions & 18 deletions
diff --git a/‎src/opentau/datasets/lerobot_dataset.py‎
Lines changed: 5 additions & 5 deletions b/‎src/opentau/datasets/lerobot_dataset.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎…c/opentau/datasets/grounding/__init__.py‎ ‎src/opentau/datasets/vqa/__init__.py‎src/opentau/datasets/grounding/__init__.py renamed to src/opentau/datasets/vqa/__init__.py
Lines changed: 16 additions & 16 deletions b/‎…c/opentau/datasets/grounding/__init__.py‎ ‎src/opentau/datasets/vqa/__init__.py‎src/opentau/datasets/grounding/__init__.py renamed to src/opentau/datasets/vqa/__init__.py
Lines changed: 16 additions & 16 deletions
@@ -2,7 +2,7 @@
     "dataset_mixture": {
         "datasets": [
             {
-                "grounding": "dummy"
+                "vqa": "dummy"
             },
             {
                 "repo_id": "lerobot/droid_100",
 
@@ -5,7 +5,7 @@
                 "repo_id": "physical-intelligence/libero"
             },
             {
-                "grounding": "clevr"
+                "vqa": "clevr"
             }
         ],
         "weights": [
 
@@ -16,7 +16,7 @@ The dataset format is versioned (currently v2.1) and utilizes parquet files for
 There are currently two types of datasets:
 
 *   ``LeRobotDataset``: For robotic data.
-*   ``GroundingDataset``: For VLM datasets such as Visual Question Answering (VQA) or visual grounding.
+*   ``VQADataset``: For VLM training datasets.
 
 These datasets are used to train policies.
 
@@ -25,7 +25,7 @@ DatasetMixture
 To train policies on multiple datasets simultaneously, OpenTau uses ``opentau.datasets.dataset_mixture.WeightedDatasetMixture``.
 This class:
 
-*   Combines multiple ``LeRobotDataset`` and ``GroundingDataset`` instances.
+*   Combines multiple ``LeRobotDataset`` and ``VQADataset`` instances.
 *   Different weights can be assigned to each dataset to control the sampling frequency.
 *   Aggregates statistics from all constituent datasets to ensure consistent normalization across the mixture.
 *   Resamples the action output frequency to match the action frequency specified in the configuration.
 
@@ -25,7 +25,7 @@
 - `available_tasks_per_env`: Mapping of environments to their available tasks
 - `available_datasets_per_env`: Mapping of environments to their compatible datasets
 - `available_real_world_datasets`: List of real-world robot datasets
-- `available_grounding_datasets`: Registry for grounding datasets (populated via decorator)
+- `available_vqa_datasets`: Registry for vqa datasets (populated via decorator)
 - `available_policies`: List of available policy types (e.g., "pi0", "pi05", "value")
 - `available_policies_per_env`: Mapping of environments to their compatible policies
 
@@ -142,7 +142,7 @@
     "lerobot/usc_cloth_sim",
 ]
 
-available_grounding_datasets = {}
+available_vqa_datasets = {}
 
 available_datasets = sorted(
     set(itertools.chain(*available_datasets_per_env.values(), available_real_world_datasets))
@@ -177,4 +177,4 @@ def decorator(cls):
     return register
 
 
-register_grounding_dataset = registry_factory(available_grounding_datasets)
+register_vqa_dataset = registry_factory(available_vqa_datasets)
@@ -53,9 +53,9 @@ class DatasetConfig:
 
     Args:
         repo_id: HuggingFace repository ID for the dataset. Exactly one of
-            `repo_id` or `grounding` must be set.
-        grounding: Grounding dataset identifier. Exactly one of `repo_id` or
-            `grounding` must be set.
+            `repo_id` or `vqa` must be set.
+        vqa: VQA dataset identifier. Exactly one of `repo_id` or
+            `vqa` must be set.
         root: Root directory where the dataset will be stored (e.g. 'dataset/path').
             Defaults to None.
         episodes: List of episode indices to use from the dataset. If None, all
@@ -72,13 +72,13 @@ class DatasetConfig:
             standard feature names. Defaults to None.
 
     Raises:
-        ValueError: If both or neither of `repo_id` and `grounding` are set, or
+        ValueError: If both or neither of `repo_id` and `vqa` are set, or
             if `data_features_name_mapping` is provided.
             is provided.
     """
 
     repo_id: str | None = None
-    grounding: str | None = None
+    vqa: str | None = None
     # Root directory where the dataset will be stored (e.g. 'dataset/path').
     root: str | None = None
     episodes: list[int] | None = None
@@ -98,8 +98,8 @@ class DatasetConfig:
 
     def __post_init__(self):
         """Validate dataset configuration and register custom mappings if provided."""
-        if (self.repo_id is None) == (self.grounding is None):
-            raise ValueError("Exactly one of `repo_id` or `grounding` for Dataset config should be set.")
+        if (self.repo_id is None) == (self.vqa is None):
+            raise ValueError("Exactly one of `repo_id` or `vqa` for Dataset config should be set.")
 
         # data_features_name_mapping have to be provided if it is not already in standard_data_format_mapping.py
 
 
@@ -16,13 +16,13 @@
 This module provides a comprehensive toolkit for loading, creating, managing, and
 processing datasets for training vision-language-action (VLA) models. It supports
 both robot learning datasets (with actions and states) and vision-language
-grounding datasets (for multimodal understanding tasks).
+vqa datasets (for multimodal understanding tasks).
 
 The module is organized into several key components:
 
     - **Core Datasets**: LeRobotDataset for robot learning data with support for
       temporal alignment, multi-modal data, and version compatibility.
-    - **Grounding Datasets**: Vision-language datasets (CLEVR, COCO-QA, PIXMO, VSR)
+    - **VQA Datasets**: Vision-language datasets (CLEVR, COCO-QA, PIXMO, VSR)
       for training visual understanding without robot actions.
     - **Dataset Mixtures**: WeightedDatasetMixture for combining multiple datasets
       with controlled sampling proportions.
@@ -53,7 +53,7 @@
 Main Modules:
 
     - **lerobot_dataset**: Core dataset implementation for robot learning data.
-    - **grounding**: Vision-language grounding datasets (CLEVR, COCO-QA, PIXMO, VSR).
+    - **vqa**: Vision-language vqa datasets (CLEVR, COCO-QA, PIXMO, VSR).
     - **dataset_mixture**: Weighted combination of multiple datasets.
     - **factory**: Factory functions for creating datasets from configurations.
     - **utils**: Utility functions for I/O, metadata management, and validation.
@@ -76,9 +76,9 @@
         >>> from opentau.datasets.factory import make_dataset
         >>> dataset = make_dataset(dataset_cfg, train_cfg)
 
-    Access grounding datasets:
+    Access vqa datasets:
 
-        >>> from opentau import available_grounding_datasets
-        >>> print(list(available_grounding_datasets.keys()))
+        >>> from opentau import available_vqa_datasets
+        >>> print(list(available_vqa_datasets.keys()))
         ['clevr', 'cocoqa', 'dummy', 'pixmo', 'vsr']
 """
@@ -25,7 +25,7 @@
 The factory supports two types of datasets:
     1. LeRobot datasets: Standard robot learning datasets loaded from HuggingFace
        repositories with configurable delta timestamps for temporal alignment.
-    2. Grounding datasets: Vision-language grounding datasets (CLEVR, COCO-QA,
+    2. VQA datasets: Vision-language vqa datasets (CLEVR, COCO-QA,
        PIXMO, VSR, etc.) for multimodal learning tasks.
 
 Key Features:
@@ -36,7 +36,7 @@
       during dataset creation.
     - Imagenet stats override: Optionally replaces dataset statistics with
       ImageNet normalization statistics for camera features.
-    - Grounding dataset registration: Supports extensible grounding dataset
+    - VQA dataset registration: Supports extensible vqa dataset
       registration through side-effect imports.
 
 Functions:
@@ -68,12 +68,12 @@
 import torch
 
 # NOTE: Don't delete; imported for side effects.
-import opentau.datasets.grounding.clevr  # noqa: F401
-import opentau.datasets.grounding.cocoqa  # noqa: F401
-import opentau.datasets.grounding.dummy  # noqa: F401
-import opentau.datasets.grounding.pixmo  # noqa: F401
-import opentau.datasets.grounding.vsr  # noqa: F401
-from opentau import available_grounding_datasets
+import opentau.datasets.vqa.clevr  # noqa: F401
+import opentau.datasets.vqa.cocoqa  # noqa: F401
+import opentau.datasets.vqa.dummy  # noqa: F401
+import opentau.datasets.vqa.pixmo  # noqa: F401
+import opentau.datasets.vqa.vsr  # noqa: F401
+from opentau import available_vqa_datasets
 from opentau.configs.default import DatasetConfig
 from opentau.configs.train import TrainPipelineConfig
 from opentau.datasets.dataset_mixture import WeightedDatasetMixture
@@ -169,23 +169,22 @@ def make_dataset(
             "episode_end_idx", "current_idx", "last_step", "episode_index", and "timestamp". Defaults to False.
 
     Raises:
-        ValueError: If exactly one of `cfg.grounding` and `cfg.repo_id` is not provided.
-        ValueError: If `cfg.grounding` is not a supported grounding dataset.
+        ValueError: If exactly one of `cfg.vqa` and `cfg.repo_id` is not provided.
+        ValueError: If `cfg.vqa` is not a supported vqa dataset.
 
     Returns:
         BaseDataset or Tuple[BaseDataset, BaseDataset]: A single dataset or a tuple of (train_dataset, val_dataset) if val_freq > 0.
     """
     image_transforms = ImageTransforms(cfg.image_transforms) if cfg.image_transforms.enable else None
 
-    if isinstance(cfg.grounding, str) + isinstance(cfg.repo_id, str) != 1:
-        raise ValueError("Exactly one of `cfg.grounding` and `cfg.repo_id` should be provided.")
+    if isinstance(cfg.vqa, str) + isinstance(cfg.repo_id, str) != 1:
+        raise ValueError("Exactly one of `cfg.vqa` and `cfg.repo_id` should be provided.")
 
-    if isinstance(cfg.grounding, str):
-        ds_cls = available_grounding_datasets.get(cfg.grounding)
+    if isinstance(cfg.vqa, str):
+        ds_cls = available_vqa_datasets.get(cfg.vqa)
         if ds_cls is None:
             raise ValueError(
-                f"Unknown grounding dataset '{cfg.grounding}'. "
-                f"Supported datasets are: {available_grounding_datasets.keys()}"
+                f"Unknown vqa dataset '{cfg.vqa}'. Supported datasets are: {available_vqa_datasets.keys()}"
             )
         # TODO support dataset-specific arg / kwargs
         dataset = ds_cls(train_cfg)
@@ -210,8 +209,8 @@ def make_dataset(
             return_advantage_input=return_advantage_input,
         )
 
-    # TODO grounding datasets implement stats in original feature names, but camera_keys are standardized names
-    if not isinstance(cfg.grounding, str) and "dummy" not in cfg.repo_id and cfg.use_imagenet_stats:
+    # TODO vqa datasets implement stats in original feature names, but camera_keys are standardized names
+    if not isinstance(cfg.vqa, str) and "dummy" not in cfg.repo_id and cfg.use_imagenet_stats:
         for key in dataset.meta.camera_keys:
             for stats_type, stats in IMAGENET_STATS.items():
                 if key not in dataset.meta.stats:
 
@@ -52,8 +52,8 @@
         Metadata manager for LeRobot datasets with Hub integration, version
         checking, and statistics loading.
 
-    GroundingDatasetMetadata
-        Metadata manager for grounding datasets.
+    VQADatasetMetadata
+        Metadata manager for vqa datasets.
 
     BaseDataset
         Base PyTorch Dataset class with common functionality.
@@ -259,8 +259,8 @@ def shapes(self) -> dict:
         return {key: tuple(ft["shape"]) for key, ft in self.features.items()}
 
 
-class GroundingDatasetMetadata(DatasetMetadata):
-    """Metadata class for grounding datasets (vision-language datasets)."""
+class VQADatasetMetadata(DatasetMetadata):
+    """Metadata class for vqa datasets (vision-language datasets)."""
 
     pass
 
@@ -585,7 +585,7 @@ class BaseDataset(torch.utils.data.Dataset):
     """Base class for all robot learning datasets.
 
     This abstract base class provides common functionality for both LeRobotDataset
-    and GroundingDataset, including data format standardization, image processing,
+    and VQADataset, including data format standardization, image processing,
     and vector padding. It ensures all datasets conform to a standard format
     regardless of their source or structure.
 
 
@@ -12,56 +12,56 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Vision-language grounding datasets for multimodal learning.
+"""Vision-language vqa datasets for multimodal learning.
 
 This module provides datasets for training vision-language-action models on
-image-text grounding tasks without requiring robot actions. Grounding datasets
+image-text vqa tasks without requiring robot actions. VQA datasets
 are designed to help models learn visual understanding, spatial reasoning,
-and language grounding capabilities that can be transferred to robotic tasks.
+and language vqa capabilities that can be transferred to robotic tasks.
 
-Grounding datasets differ from standard robot learning datasets in that they:
+VQA datasets differ from standard robot learning datasets in that they:
     - Provide images, prompts, and responses but no robot actions or states
     - Use zero-padding for state and action features to maintain compatibility
-    - Focus on visual question answering, spatial reasoning, and object grounding
+    - Focus on visual question answering, spatial reasoning, and object vqa
     - Enable training on large-scale vision-language data without robot hardware
 
 The module uses a registration system where datasets are registered via the
-`@register_grounding_dataset` decorator, making them available through the
-`available_grounding_datasets` registry.
+`@register_vqa_dataset` decorator, making them available through the
+`available_vqa_datasets` registry.
 
 Available Datasets:
     - CLEVR: Compositional Language and Elementary Visual Reasoning dataset
         for visual question answering with synthetic scenes.
     - COCO-QA: Visual question answering dataset based on COCO images,
         filtered for spatial reasoning tasks.
-    - PIXMO: Pixel-level manipulation grounding dataset for object
+    - PIXMO: Pixel-level manipulation vqa dataset for object
         localization and manipulation tasks.
     - VSR: Visual Spatial Reasoning dataset for true/false statement
-        grounding about spatial relationships in images.
+        vqa about spatial relationships in images.
     - dummy: Synthetic test dataset with simple black, white, and gray
         images for testing infrastructure.
 
 Classes:
-    GroundingDataset: Base class for all grounding datasets, providing
+    VQADataset: Base class for all vqa datasets, providing
         common functionality for metadata creation, data format conversion,
         and zero-padding of state/action features.
 
 Modules:
-    base: Base class and common functionality for grounding datasets.
+    base: Base class and common functionality for vqa datasets.
     clevr: CLEVR dataset implementation.
     cocoqa: COCO-QA dataset implementation.
     dummy: Dummy test dataset implementation.
     pixmo: PIXMO dataset implementation.
     vsr: VSR dataset implementation.
 
 Example:
-    Use a grounding dataset in training configuration:
+    Use a vqa dataset in training configuration:
         >>> from opentau.configs.default import DatasetConfig
-        >>> cfg = DatasetConfig(grounding="cocoqa")
+        >>> cfg = DatasetConfig(vqa="cocoqa")
         >>> dataset = make_dataset(cfg, train_cfg)
 
-    Access available grounding datasets:
-        >>> from opentau import available_grounding_datasets
-        >>> print(list(available_grounding_datasets.keys()))
+    Access available vqa datasets:
+        >>> from opentau import available_vqa_datasets
+        >>> print(list(available_vqa_datasets.keys()))
         ['clevr', 'cocoqa', 'dummy', 'pixmo', 'vsr']
 """
Original file line number	Diff line number	Diff line change
`@@ -2,7 +2,7 @@`
`2`	`2`	`"dataset_mixture": {`
`3`	`3`	`"datasets": [`
`4`	`4`	`{`
`5`		`- "grounding": "dummy"`
	`5`	`+ "vqa": "dummy"`
`6`	`6`	`},`
`7`	`7`	`{`
`8`	`8`	`"repo_id": "lerobot/droid_100",`
Original file line number	Diff line number	Diff line change
`@@ -5,7 +5,7 @@`
`5`	`5`	`"repo_id": "physical-intelligence/libero"`
`6`	`6`	`},`
`7`	`7`	`{`
`8`		`- "grounding": "clevr"`
	`8`	`+ "vqa": "clevr"`
`9`	`9`	`}`
`10`	`10`	`],`
`11`	`11`	`"weights": [`