Skip to content

Commit c5ee60b

Browse files
author
Ye Shaokai
committed
some bug fixes
1 parent 06e75af commit c5ee60b

File tree

4 files changed

+138
-138
lines changed

4 files changed

+138
-138
lines changed

.vscode/launch.json

Lines changed: 134 additions & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -219,144 +219,144 @@
219219
// }
220220

221221
//shaokai
222-
// {
223-
// "version": "0.2.0",
224-
// "configurations": [
225-
// {
226-
// "name": "Run LLAVA Training with torchrun",
227-
// "type": "debugpy",
228-
// "request": "launch",
229-
// "module": "torch.distributed.run",
230-
// "env": {
231-
// "CUDA_VISIBLE_DEVICES": "0",
232-
// "OMP_NUM_THREADS": "8",
233-
// "NCCL_IB_DISABLE": "0",
234-
// "NCCL_IB_GID_INDEX": "3",
235-
// "NCCL_SOCKET_IFNAME": "eth0",
236-
// "HF_HOME": "/data/shaokai",
237-
// "NCCL_DEBUG": "INFO",
238-
// "ACCELERATE_CPU_AFFINITY": "1",
239-
// "WANDB_API_KEY": "4474ec79de023b0c3ffb43588ab6163264f875db",
240-
// "PYTHONPATH": "/data/shaokai/LLaVA-NeXT:/usr/local/lib/python3.10/site-packages/decord-0.6.0-py3.10-linux-x86_64.egg/"
241-
// },
242-
// "args": [
243-
// "--nproc_per_node=1",
244-
// "--nnodes=1",
245-
// "--node_rank=0",
246-
// "--master_addr=127.0.0.1",
247-
// "--master_port=29500",
248-
// "llava/train/train_mem.py",
249-
// "--deepspeed", "scripts/zero3.json",
250-
// "--model_name_or_path", "lmms-lab/llava-onevision-qwen2-0.5b-ov",
251-
// "--version", "qwen_1_5",
252-
// "--data_path", "scripts/train/simple_avion_top5_gt_and_direct.yaml",
253-
// "--video_folder", "/data/shaokai/EK100_512/",
254-
// "--mm_tunable_parts", "mm_vision_tower,mm_mlp_adapter,mm_language_model",
255-
// "--mm_vision_tower_lr", "2e-6",
256-
// "--vision_tower", "google/siglip-so400m-patch14-384",
257-
// "--mm_projector_type", "mlp2x_gelu",
258-
// "--mm_vision_select_layer", "-2",
259-
// "--mm_use_im_start_end", "False",
260-
// "--mm_use_im_patch_token", "False",
261-
// "--group_by_modality_length", "True",
262-
// "--image_aspect_ratio", "anyres_max_9",
263-
// "--image_grid_pinpoints", "(1x1),...,(6x6)",
264-
// "--mm_patch_merge_type", "spatial_unpad",
265-
// "--bf16", "True",
266-
// "--run_name", "dpo_test",
267-
// "--output_dir", "experiments/dpo_test",
268-
// "--num_train_epochs", "1",
269-
// "--per_device_train_batch_size", "4",
270-
// "--per_device_eval_batch_size", "4",
271-
// "--gradient_accumulation_steps", "2",
272-
// "--evaluation_strategy", "steps",
273-
// "--save_strategy", "steps",
274-
// "--save_steps", "1000",
275-
// "--save_total_limit", "1",
276-
// "--learning_rate", "1e-5",
277-
// "--weight_decay", "0.",
278-
// "--warmup_ratio", "0.03",
279-
// "--lr_scheduler_type", "cosine",
280-
// "--logging_steps", "1",
281-
// "--tf32", "True",
282-
// "--model_max_length", "32768",
283-
// "--gradient_checkpointing", "True",
284-
// "--dataloader_num_workers", "4",
285-
// "--lazy_preprocess", "True",
286-
// "--report_to", "wandb",
287-
// "--torch_compile", "True",
288-
// "--torch_compile_backend", "inductor",
289-
// "--dataloader_drop_last", "True",
290-
// "--frames_upbound", "4",
291-
// "--root", "/data/shaokai/EK100_512/EK100",
292-
// "--action_predictions", "/data/shaokai/AVION_PREDS/avion_pred_ids_val.json",
293-
// "--val_metadata", "/data/shaokai/epic-kitchens-100-annotations/EPIC_100_validation.csv",
294-
// "--llava_num_frames", "4",
295-
// "--clip_length", "4",
296-
// "--action_representation", "official_key",
297-
// "--topk_predictions", "5",
298-
// "--eval_steps", "1",
299-
// "--vision_supervision", "three_tokens",
300-
// "--vision_token_training", "all_layers",
301-
// "--action_types", "97,300,3806",
302-
// "--learn_neighbor_actions", "True"
303-
// ],
304-
// "console": "integratedTerminal",
305-
// "justMyCode": false,
306-
// "cwd": "${workspaceFolder}"
307-
// }
308-
// ]
309-
// }
310-
311-
312222
{
313-
"version": "0.2.0",
314-
"configurations": [
315-
{
316-
"name": "Run LLAVA Training with torchrun",
317-
"type": "debugpy",
318-
"request": "launch",
319-
"module": "torch.distributed.run",
320-
"env": {
321-
"CUDA_VISIBLE_DEVICES": "0",
322-
"OMP_NUM_THREADS": "8",
323-
"NCCL_IB_DISABLE": "0",
324-
"NCCL_IB_GID_INDEX": "3",
325-
"NCCL_SOCKET_IFNAME": "eth0",
326-
"HF_HOME": "/data/shaokai",
327-
"NCCL_DEBUG": "INFO",
328-
"ACCELERATE_CPU_AFFINITY": "1",
329-
"WANDB_API_KEY": "4474ec79de023b0c3ffb43588ab6163264f875db",
330-
"PYTHONPATH": "/data/shaokai/LLaVA-NeXT:/usr/local/lib/python3.10/site-packages/decord-0.6.0-py3.10-linux-x86_64.egg/"
331-
},
332-
"args": [
333-
"--nproc_per_node=1",
334-
"--nnodes=1",
335-
"--node_rank=0",
336-
"--master_addr=127.0.0.1",
337-
"--master_port=29500",
338-
"llava/action/ek_eval.py",
339-
"--pretrained_name", "experiments/dev_0.5b_4f_avion_top5_and_direct_neighbor",
340-
"--root", "/data/shaokai/EK100",
341-
"--train-metadata", "/data/shaokai/epic-kitchens-100-annotations/EPIC_100_train.csv",
342-
"--val-metadata", "/data/shaokai/epic-kitchens-100-annotations/EPIC_100_validation.csv",
343-
"--llava_num_frames", "4",
344-
"--clip-length", "4",
345-
"--action_predictions","/data/shaokai/TIM_PREDS/tim_pred_ids_val.json",
346-
"--action_representation", "official_key",
347-
"--topk_predictions", "5",
348-
"--test_type", "temporal_cot",
349-
"--output_dir", "test_0.5b_direct",
350-
"--learn_neighbor_actions"
351-
],
352-
"console": "integratedTerminal",
353-
"justMyCode": false,
354-
"cwd": "${workspaceFolder}"
355-
}
356-
]
223+
"version": "0.2.0",
224+
"configurations": [
225+
{
226+
"name": "Run LLAVA Training with torchrun",
227+
"type": "debugpy",
228+
"request": "launch",
229+
"module": "torch.distributed.run",
230+
"env": {
231+
"CUDA_VISIBLE_DEVICES": "0",
232+
"OMP_NUM_THREADS": "8",
233+
"NCCL_IB_DISABLE": "0",
234+
"NCCL_IB_GID_INDEX": "3",
235+
"NCCL_SOCKET_IFNAME": "eth0",
236+
"HF_HOME": "/data/shaokai",
237+
"NCCL_DEBUG": "INFO",
238+
"ACCELERATE_CPU_AFFINITY": "1",
239+
"WANDB_API_KEY": "4474ec79de023b0c3ffb43588ab6163264f875db",
240+
"PYTHONPATH": "/data/shaokai/LLaVA-NeXT:/usr/local/lib/python3.10/site-packages/decord-0.6.0-py3.10-linux-x86_64.egg/"
241+
},
242+
"args": [
243+
"--nproc_per_node=1",
244+
"--nnodes=1",
245+
"--node_rank=0",
246+
"--master_addr=127.0.0.1",
247+
"--master_port=29500",
248+
"llava/train/train_mem.py",
249+
"--deepspeed", "scripts/zero3.json",
250+
"--model_name_or_path", "lmms-lab/llava-onevision-qwen2-0.5b-ov",
251+
"--version", "qwen_1_5",
252+
"--data_path", "scripts/train/simple_avion_top5_gt_and_direct.yaml",
253+
"--video_folder", "/data/shaokai/EK100_512/",
254+
"--mm_tunable_parts", "mm_vision_tower,mm_mlp_adapter,mm_language_model",
255+
"--mm_vision_tower_lr", "2e-6",
256+
"--vision_tower", "google/siglip-so400m-patch14-384",
257+
"--mm_projector_type", "mlp2x_gelu",
258+
"--mm_vision_select_layer", "-2",
259+
"--mm_use_im_start_end", "False",
260+
"--mm_use_im_patch_token", "False",
261+
"--group_by_modality_length", "True",
262+
"--image_aspect_ratio", "anyres_max_9",
263+
"--image_grid_pinpoints", "(1x1),...,(6x6)",
264+
"--mm_patch_merge_type", "spatial_unpad",
265+
"--bf16", "True",
266+
"--run_name", "dpo_test",
267+
"--output_dir", "experiments/dpo_test",
268+
"--num_train_epochs", "1",
269+
"--per_device_train_batch_size", "4",
270+
"--per_device_eval_batch_size", "4",
271+
"--gradient_accumulation_steps", "2",
272+
"--evaluation_strategy", "steps",
273+
"--save_strategy", "steps",
274+
"--save_steps", "1000",
275+
"--save_total_limit", "1",
276+
"--learning_rate", "1e-5",
277+
"--weight_decay", "0.",
278+
"--warmup_ratio", "0.03",
279+
"--lr_scheduler_type", "cosine",
280+
"--logging_steps", "1",
281+
"--tf32", "True",
282+
"--model_max_length", "32768",
283+
"--gradient_checkpointing", "True",
284+
"--dataloader_num_workers", "4",
285+
"--lazy_preprocess", "True",
286+
"--report_to", "wandb",
287+
"--torch_compile", "True",
288+
"--torch_compile_backend", "inductor",
289+
"--dataloader_drop_last", "True",
290+
"--frames_upbound", "4",
291+
"--root", "/data/shaokai/EK100_512/EK100",
292+
"--action_predictions", "/data/shaokai/AVION_PREDS/avion_pred_ids_val.json",
293+
"--val_metadata", "/data/shaokai/epic-kitchens-100-annotations/EPIC_100_validation.csv",
294+
"--llava_num_frames", "4",
295+
"--clip_length", "4",
296+
"--action_representation", "official_key",
297+
"--topk_predictions", "5",
298+
"--eval_steps", "1",
299+
"--vision_supervision", "three_tokens",
300+
"--vision_token_training", "all_layers",
301+
"--action_types", "97,300,3806",
302+
"--perspective", "first_person"
303+
],
304+
"console": "integratedTerminal",
305+
"justMyCode": false,
306+
"cwd": "${workspaceFolder}"
307+
}
308+
]
357309
}
358310

359311

312+
// {
313+
// "version": "0.2.0",
314+
// "configurations": [
315+
// {
316+
// "name": "Run LLAVA Training with torchrun",
317+
// "type": "debugpy",
318+
// "request": "launch",
319+
// "module": "torch.distributed.run",
320+
// "env": {
321+
// "CUDA_VISIBLE_DEVICES": "0",
322+
// "OMP_NUM_THREADS": "8",
323+
// "NCCL_IB_DISABLE": "0",
324+
// "NCCL_IB_GID_INDEX": "3",
325+
// "NCCL_SOCKET_IFNAME": "eth0",
326+
// "HF_HOME": "/data/shaokai",
327+
// "NCCL_DEBUG": "INFO",
328+
// "ACCELERATE_CPU_AFFINITY": "1",
329+
// "WANDB_API_KEY": "4474ec79de023b0c3ffb43588ab6163264f875db",
330+
// "PYTHONPATH": "/data/shaokai/LLaVA-NeXT:/usr/local/lib/python3.10/site-packages/decord-0.6.0-py3.10-linux-x86_64.egg/"
331+
// },
332+
// "args": [
333+
// "--nproc_per_node=1",
334+
// "--nnodes=1",
335+
// "--node_rank=0",
336+
// "--master_addr=127.0.0.1",
337+
// "--master_port=29500",
338+
// "llava/action/ek_eval.py",
339+
// "--pretrained_name", "experiments/dev_0.5b_4f_avion_top5_and_direct_neighbor",
340+
// "--root", "/data/shaokai/EK100",
341+
// "--train-metadata", "/data/shaokai/epic-kitchens-100-annotations/EPIC_100_train.csv",
342+
// "--val-metadata", "/data/shaokai/epic-kitchens-100-annotations/EPIC_100_validation.csv",
343+
// "--llava_num_frames", "4",
344+
// "--clip-length", "4",
345+
// "--action_predictions","/data/shaokai/TIM_PREDS/tim_pred_ids_val.json",
346+
// "--action_representation", "official_key",
347+
// "--topk_predictions", "5",
348+
// "--test_type", "temporal_cot",
349+
// "--output_dir", "test_0.5b_direct",
350+
// "--learn_neighbor_actions"
351+
// ],
352+
// "console": "integratedTerminal",
353+
// "justMyCode": false,
354+
// "cwd": "${workspaceFolder}"
355+
// }
356+
// ]
357+
// }
358+
359+
360360
// {
361361
// "version": "0.2.0",
362362
// "configurations": [

llava/train/llava_trainer.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -496,10 +496,7 @@ def __init__(self,
496496
self.model_max_length = model_max_length
497497

498498
def evaluate(self, eval_dataset=None, ignore_keys=None, metric_key_prefix="eval", eval_result_folder = None):
499-
500-
print ('debug')
501-
print (self.eval_args)
502-
499+
503500
accuracy = evaluate_on_EK100(self.eval_args, self.model, self.tokenizer, eval_result_folder = eval_result_folder)
504501
metrics = {f"{metric_key_prefix}_EK100_accuracy": accuracy}
505502
self.log(metrics)

llava/train/train.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ class EK100EvalArguments:
203203
test_type: str = 'base'
204204
learn_neighbor_actions: bool = False
205205
perspective: str = "first_person"
206+
pseudo_folder: str = ""
206207

207208
def maybe_zero_3(param, ignore_status=False, name=None):
208209
from deepspeed import zero

llava/train/train_dpo_new.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,8 @@ class EK100EvalArguments:
223223
n_narrations: int = -1
224224
test_type: str = 'base'
225225
learn_neighbor_actions: bool = False
226+
perspective: str = "first_person"
227+
pseudo_folder: str = ""
226228

227229

228230
def maybe_zero_3(param, ignore_status=False, name=None):

0 commit comments

Comments
 (0)