diff --git a/internnav/dataset/internvla_n1_lerobot_dataset.py b/internnav/dataset/internvla_n1_lerobot_dataset.py index d1c9ebdb..8c2d9084 100644 --- a/internnav/dataset/internvla_n1_lerobot_dataset.py +++ b/internnav/dataset/internvla_n1_lerobot_dataset.py @@ -1371,7 +1371,7 @@ def __getitem__(self, i): def make_supervised_data_module(tokenizer: transformers.PreTrainedTokenizer, data_args) -> Dict: """Make dataset and collator for supervised fine-tuning.""" train_datasets = [] - if data_args.iion_dataset_use: + if data_args.iign_dataset_use: train_datasets.append(VLLNDataset(tokenizer=tokenizer, data_args=data_args)) if data_args.vln_dataset_use: train_datasets.append(NavPixelGoalDataset(tokenizer=tokenizer, data_args=data_args)) diff --git a/internnav/dataset/vlln_lerobot_dataset.py b/internnav/dataset/vlln_lerobot_dataset.py index 40590531..44c64d10 100644 --- a/internnav/dataset/vlln_lerobot_dataset.py +++ b/internnav/dataset/vlln_lerobot_dataset.py @@ -15,21 +15,21 @@ from .rope2d import get_rope_index_2, get_rope_index_25 # Define placeholders for dataset paths -IION_split1 = { +IIGN_split1 = { "data_path": "projects/VL-LN-Bench/traj_data/mp3d_split1", "height": 125, "pitch_1": 0, "pitch_2": 30, } -IION_split2 = { +IIGN_split2 = { "data_path": "projects/VL-LN-Bench/traj_data/mp3d_split2", "height": 125, "pitch_1": 0, "pitch_2": 30, } -IION_split3 = { +IIGN_split3 = { "data_path": "projects/VL-LN-Bench/traj_data/mp3d_split3", "height": 125, "pitch_1": 0, @@ -37,9 +37,9 @@ } data_dict = { - "iion_split1": IION_split1, - "iion_split2": IION_split2, - "iion_split3": IION_split3, + "iign_split1": IIGN_split1, + "iign_split2": IIGN_split2, + "iign_split3": IIGN_split3, } IGNORE_INDEX = -100 @@ -55,14 +55,14 @@ class VLLNDataset(Dataset): """ - Dataset for 'Vision-Language'-'Language-Navigation' (VL-LN) / IION-style training. + Dataset for 'Vision-Language'-'Language-Navigation' (VL-LN) / IIGN-style training. Args: tokenizer (transformers.PreTrainedTokenizer): Tokenizer used to encode the chat template and produce `input_ids` / `labels`. data_args: A config-like object that must provide at least: - - iion_dataset_use (str): comma-separated dataset names, optionally - with sampling rate suffix like `iion_split1%50`. + - iign_dataset_use (str): comma-separated dataset names, optionally + with sampling rate suffix like `iign_split1%50`. - model_type (str): decides which rope-index function to use. - sample_step (int): stride for sampling start frames. - pixel_goal_only (bool): whether to keep only pixel-goal samples. @@ -74,7 +74,7 @@ class VLLNDataset(Dataset): def __init__(self, tokenizer: transformers.PreTrainedTokenizer, data_args): super(VLLNDataset, self).__init__() - dataset = data_args.iion_dataset_use.split(",") + dataset = data_args.iign_dataset_use.split(",") dataset_list = data_list(dataset) rank0_print(f"Loading datasets: {dataset_list}") self.video_max_total_pixels = getattr(data_args, "video_max_total_pixels", 1664 * 28 * 28) diff --git a/internnav/trainer/internvla_n1_argument.py b/internnav/trainer/internvla_n1_argument.py index bc8e0361..5dd6e56d 100644 --- a/internnav/trainer/internvla_n1_argument.py +++ b/internnav/trainer/internvla_n1_argument.py @@ -29,7 +29,7 @@ class DataArguments: video_min_frame_pixels: int = field(default=4 * 28 * 28) vln_dataset_use: str = field(default="") - iion_dataset_use: str = field(default="") + iign_dataset_use: str = field(default="") sample_step: int = field(default=4) num_history: Optional[int] = field(default=8) predict_step_num: Optional[int] = field(default=32) diff --git a/scripts/train/qwenvl_train/train_system2_vlln.sh b/scripts/train/qwenvl_train/train_system2_vlln.sh index 2ac79998..14bf9c20 100644 --- a/scripts/train/qwenvl_train/train_system2_vlln.sh +++ b/scripts/train/qwenvl_train/train_system2_vlln.sh @@ -27,7 +27,7 @@ max_pixels=313600 min_pixels=3136 # Dataset configuration (replace with public dataset names) -iion_datasets=iion_split1,iion_split2 #,iion_split3 +iign_datasets=iign_split1,iign_split2 #,iign_split3 # Output configuration run_name=InternVLA-N1-vlln @@ -38,7 +38,7 @@ srun torchrun --nnodes=$SLURM_NNODES --nproc_per_node=8 \ internnav/trainer/internvla_vlln_trainer.py \ --deepspeed ${deepspeed} \ --model_name_or_path "${llm}" \ - --iion_dataset_use ${iion_datasets} \ + --iign_dataset_use ${iign_datasets} \ --data_flatten False \ --tune_mm_vision True \ --tune_mm_mlp True \