Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion internnav/dataset/internvla_n1_lerobot_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1371,7 +1371,7 @@ def __getitem__(self, i):
def make_supervised_data_module(tokenizer: transformers.PreTrainedTokenizer, data_args) -> Dict:
"""Make dataset and collator for supervised fine-tuning."""
train_datasets = []
if data_args.iion_dataset_use:
if data_args.iign_dataset_use:
train_datasets.append(VLLNDataset(tokenizer=tokenizer, data_args=data_args))
if data_args.vln_dataset_use:
train_datasets.append(NavPixelGoalDataset(tokenizer=tokenizer, data_args=data_args))
Expand Down
20 changes: 10 additions & 10 deletions internnav/dataset/vlln_lerobot_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,31 +15,31 @@
from .rope2d import get_rope_index_2, get_rope_index_25

# Define placeholders for dataset paths
IION_split1 = {
IIGN_split1 = {
"data_path": "projects/VL-LN-Bench/traj_data/mp3d_split1",
"height": 125,
"pitch_1": 0,
"pitch_2": 30,
}

IION_split2 = {
IIGN_split2 = {
"data_path": "projects/VL-LN-Bench/traj_data/mp3d_split2",
"height": 125,
"pitch_1": 0,
"pitch_2": 30,
}

IION_split3 = {
IIGN_split3 = {
"data_path": "projects/VL-LN-Bench/traj_data/mp3d_split3",
"height": 125,
"pitch_1": 0,
"pitch_2": 30,
}

data_dict = {
"iion_split1": IION_split1,
"iion_split2": IION_split2,
"iion_split3": IION_split3,
"iign_split1": IIGN_split1,
"iign_split2": IIGN_split2,
"iign_split3": IIGN_split3,
}

IGNORE_INDEX = -100
Expand All @@ -55,14 +55,14 @@

class VLLNDataset(Dataset):
"""
Dataset for 'Vision-Language'-'Language-Navigation' (VL-LN) / IION-style training.
Dataset for 'Vision-Language'-'Language-Navigation' (VL-LN) / IIGN-style training.

Args:
tokenizer (transformers.PreTrainedTokenizer): Tokenizer used to encode
the chat template and produce `input_ids` / `labels`.
data_args: A config-like object that must provide at least:
- iion_dataset_use (str): comma-separated dataset names, optionally
with sampling rate suffix like `iion_split1%50`.
- iign_dataset_use (str): comma-separated dataset names, optionally
with sampling rate suffix like `iign_split1%50`.
- model_type (str): decides which rope-index function to use.
- sample_step (int): stride for sampling start frames.
- pixel_goal_only (bool): whether to keep only pixel-goal samples.
Expand All @@ -74,7 +74,7 @@ class VLLNDataset(Dataset):

def __init__(self, tokenizer: transformers.PreTrainedTokenizer, data_args):
super(VLLNDataset, self).__init__()
dataset = data_args.iion_dataset_use.split(",")
dataset = data_args.iign_dataset_use.split(",")
dataset_list = data_list(dataset)
rank0_print(f"Loading datasets: {dataset_list}")
self.video_max_total_pixels = getattr(data_args, "video_max_total_pixels", 1664 * 28 * 28)
Expand Down
2 changes: 1 addition & 1 deletion internnav/trainer/internvla_n1_argument.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class DataArguments:
video_min_frame_pixels: int = field(default=4 * 28 * 28)

vln_dataset_use: str = field(default="")
iion_dataset_use: str = field(default="")
iign_dataset_use: str = field(default="")
sample_step: int = field(default=4)
num_history: Optional[int] = field(default=8)
predict_step_num: Optional[int] = field(default=32)
Expand Down
4 changes: 2 additions & 2 deletions scripts/train/qwenvl_train/train_system2_vlln.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ max_pixels=313600
min_pixels=3136

# Dataset configuration (replace with public dataset names)
iion_datasets=iion_split1,iion_split2 #,iion_split3
iign_datasets=iign_split1,iign_split2 #,iign_split3

# Output configuration
run_name=InternVLA-N1-vlln
Expand All @@ -38,7 +38,7 @@ srun torchrun --nnodes=$SLURM_NNODES --nproc_per_node=8 \
internnav/trainer/internvla_vlln_trainer.py \
--deepspeed ${deepspeed} \
--model_name_or_path "${llm}" \
--iion_dataset_use ${iion_datasets} \
--iign_dataset_use ${iign_datasets} \
--data_flatten False \
--tune_mm_vision True \
--tune_mm_mlp True \
Expand Down