1+ """
2+ Instead of running the whole validation set,
3+ """
4+ from llava .action .ek_eval import prepare_llava
5+ from llava .action .generate_interval_pred import get_lookup_dict
6+ from llava .action .inference import llava_inference
7+
8+ val_metadata = '/data/shaokai/epic-kitchens-100-annotations/EPIC_100_validation.csv'
9+ root = '/data/shaokai/EK100_512/EK100'
10+ n_frames = 32
11+ action_representation = 'GT_random_narration'
12+
13+ def get_frames_by_uid (uid , root ):
14+ from llava .action .utils import avion_video_loader
15+ vid_path = '_' .join (uid .split ('_' )[:2 ]).replace ('-' , '/' )
16+ start_timestamp , end_timestamp = uid .split ('_' )[2 :]
17+ start_timestamp = float (start_timestamp )
18+ end_timestamp = float (end_timestamp )
19+ print (vid_path , start_timestamp , end_timestamp )
20+ # split uid to video path and start, end second
21+ frames , time_meta = avion_video_loader (root ,
22+ vid_path ,
23+ 'MP4' ,
24+ start_timestamp ,
25+ end_timestamp ,
26+ chunk_len = 15 ,
27+ clip_length = n_frames ,
28+ threads = 1 ,
29+ fast_rrc = False ,
30+ fast_rcc = False ,
31+ jitter = False )
32+ return frames
33+
34+ def inference_task_by_uid (checkpoint_folder , uid , task ):
35+
36+ tokenizer , model , image_processor , max_length = prepare_llava (checkpoint_folder )
37+
38+ frames = get_frames_by_uid (uid , root )
39+
40+ if 'temporal_cot' in task :
41+ get_lookup_dict (val_metadata ,
42+ action_representation ,
43+ test_type = task ,
44+ pseudo_folder = '' )
45+ pred = llava_inference (
46+ frames ,
47+ tokenizer ,
48+ model ,
49+ image_processor ,
50+ mc_data ,
51+ test_type = test_type ,
52+ clip_length = clip_length ,
53+ num_frames = num_frames ,
54+ temperature = temperature ,
55+ time_meta = time_meta ,
56+ learn_neighbor_actions = learn_neighbor_actions ,
57+ meta_data = meta_data ,
58+ perspective = perspective ,
59+ include_time_instruction = include_time_instruction
60+ )
0 commit comments