11import json
22import csv
33import os
4+ import argparse
5+ from action .utils import generate_label_map , MultiChoiceGenerator
6+ from pathlib import Path
7+
8+
9+ GEN_TYPES = ['naive' , 'random_mc' , 'avion_mc' ]
410
511def datetime2sec (str ):
612 hh , mm , ss = str .split (':' )
713 return int (hh ) * 3600 + int (mm ) * 60 + float (ss )
814
9- def generate_label_map (dataset ):
10- if dataset == 'ek100_cls' :
11- print ("Preprocess ek100 action label space" )
12- vn_list = []
13- mapping_vn2narration = {}
14- verb_ids = {}
15- noun_ids = {}
16- for f in [
17- '/data/shaokai/epic-kitchens-100-annotations/EPIC_100_train.csv' ,
18- '/data/shaokai/epic-kitchens-100-annotations/EPIC_100_validation.csv' ,
19- ]:
20- csv_reader = csv .reader (open (f ))
21- _ = next (csv_reader ) # skip the header
22- for row in csv_reader :
23- vn = '{}:{}' .format (int (row [10 ]), int (row [12 ]))
24- narration = row [8 ]
25- if row [10 ] not in verb_ids .keys ():
26- verb_ids [row [10 ]] = row [9 ]
27- if row [12 ] not in noun_ids .keys ():
28- noun_ids [row [12 ]] = row [11 ]
29- if vn not in vn_list :
30- vn_list .append (vn )
31- if vn not in mapping_vn2narration :
32- mapping_vn2narration [vn ] = [narration ]
33- else :
34- mapping_vn2narration [vn ].append (narration )
35- # mapping_vn2narration[vn] = [narration]
36- vn_list = sorted (vn_list )
37- print ('# of action= {}' .format (len (vn_list )))
38- mapping_vn2act = {vn : i for i , vn in enumerate (vn_list )}
39- labels = [list (set (mapping_vn2narration [vn_list [i ]])) for i in range (len (mapping_vn2act ))]
40- print (labels [:5 ])
41- elif dataset == 'charades_ego' :
42- print ("=> preprocessing charades_ego action label space" )
43- vn_list = []
44- labels = []
45- with open ('datasets/CharadesEgo/CharadesEgo/Charades_v1_classes.txt' ) as f :
46- csv_reader = csv .reader (f )
47- for row in csv_reader :
48- vn = row [0 ][:4 ]
49- vn_list .append (vn )
50- narration = row [0 ][5 :]
51- labels .append (narration )
52- mapping_vn2act = {vn : i for i , vn in enumerate (vn_list )}
53- print (labels [:5 ])
54- elif dataset == 'egtea' :
55- print ("=> preprocessing egtea action label space" )
56- labels = []
57- with open ('datasets/EGTEA/action_idx.txt' ) as f :
58- for row in f :
59- row = row .strip ()
60- narration = ' ' .join (row .split (' ' )[:- 1 ])
61- labels .append (narration .replace ('_' , ' ' ).lower ())
62- # labels.append(narration)
63- mapping_vn2act = {label : i for i , label in enumerate (labels )}
64- print (len (labels ), labels [:5 ])
65- else :
66- raise NotImplementedError
67- return labels , mapping_vn2act , verb_ids , noun_ids
68-
69-
70- def parse_train_ann (ann_file , verb_ids , noun_ids ):
15+ def generate_train_ann (ann_file , verb_ids , noun_ids , gen_type = 'naive' ):
16+ assert gen_type in GEN_TYPES
7117 # epic kitchen uses csv
7218 csv_reader = csv .reader (open (ann_file ))
7319 _ = next (csv_reader )
7420 ret = []
21+ ann_root = Path (ann_file ).parent
22+ if gen_type == "random_mc" :
23+ mc_generator = MultiChoiceGenerator (ann_root )
24+
7525 for row in csv_reader :
76- # start_frame, end_frame = row[6], row[7]
7726 start_timestamp , end_timestamp = datetime2sec (row [4 ]), datetime2sec (row [5 ])
78- narration = f' { verb_ids [ row [ 10 ]] } { noun_ids [ row [ 12 ]] } '
27+
7928 pid , vid = row [1 :3 ]
80- vid_path = '{}-{}' .format (pid , vid )
81- conversation = generate_naive_conversation (narration )
29+ vid_path = '{}-{}' .format (pid , vid )
30+
31+ if gen_type == 'naive' :
32+ # here we directly use the names
33+ verb_noun = f'{ verb_ids [row [10 ]]} { noun_ids [row [12 ]]} '
34+ conversation = generate_naive_conversation (verb_noun )
35+ elif gen_type == "random_mc" :
36+ # here we use the index
37+ vn_str = f'{ row [10 ]} :{ row [12 ]} '
38+ mc_data = mc_generator .generate_multi_choice (vn_str , 5 )
39+ options = mc_data ['option' ][0 ]
40+ gt_answer_letter = mc_data ['gt_answer_letter' ][0 ]
41+ gt_answer_name = mc_data ['gt_answer_name' ][0 ]
42+ conversation = generate_random_mc_conversation (options , gt_answer_letter , gt_answer_name )
43+
8244 data = {'video' : vid_path ,
8345 'conversations' : conversation ,
8446 'id' : vid_path ,
@@ -92,19 +54,35 @@ def parse_train_ann(ann_file, verb_ids, noun_ids):
9254 ret .append (data )
9355 return ret
9456
95- def generate_naive_conversation (narration ):
57+ def generate_naive_conversation (vn_str : str ):
9658 # in this version, we do not care about diversifying the questions
9759 return [
9860 {"from" : "human" , "value" : "<image>\n the video is taken from egocentric view. What action is the person performing? Hint: provide your answer in verb-noun pair. " },
99- {"from" : "gpt" , "value" : f"{ narration } " }
61+ {"from" : "gpt" , "value" : f"{ vn_str } " }
10062 ]
10163
102- def main ():
64+ def generate_random_mc_conversation (options :list [str ], gt_answer_letter , gt_answer_name ):
65+ return [
66+ {"from" : "human" , "value" : f"<image>\n the video is taken from egocentric view. What action is the person performing? Please select the letter for the right answer { options } " },
67+ {"from" : "gpt" , "value" : f"{ gt_answer_letter } . { gt_answer_name } " }
68+ ]
69+
70+
71+ def get_args ():
72+ parser = argparse .ArgumentParser (description = "For generating VQA for EPIC-KITCHEN" )
73+ parser .add_argument ('--train_metadata' , default = '/data/shaokai/epic-kitchens-100-annotations/EPIC_100_train.csv' , type = str )
74+ parser .add_argument ('--out_folder' , default = '/data/shaokai/EK100_in_LLAVA/' , type = str )
75+ return parser .parse_args ()
76+
77+ def main ():
78+ args = get_args ()
79+ ann_file = args .train_metadata
80+ inst_train_folder = args .out_folder
81+ print (ann_file )
82+ anno_path = Path (ann_file ).parent
83+ labels , mapping_vn2act , verb_ids , noun_ids = generate_label_map (anno_path )
84+ conv_lst = generate_train_ann (ann_file , verb_ids , noun_ids , gen_type = 'random_mc' )
10385
104- ann_file = "/data/shaokai/epic-kitchens-100-annotations/EPIC_100_train.csv"
105- labels , mapping_vn2act , verb_ids , noun_ids = generate_label_map ('ek100_cls' )
106- conv_lst = parse_train_ann (ann_file , verb_ids , noun_ids )
107- inst_train_folder = '/data/shaokai/EK100_in_LLAVA/'
10886 os .makedirs (inst_train_folder , exist_ok = True )
10987
11088 # save it to a jsonl
@@ -113,6 +91,5 @@ def main():
11391 f .write (json .dumps (conv ) + '\n ' )
11492
11593
116-
11794if __name__ == "__main__" :
11895 main ()
0 commit comments