Skip to content

Commit 030c047

Browse files
author
Ye Shaokai
committed
WIP
1 parent 4b31a16 commit 030c047

File tree

3 files changed

+34
-25
lines changed

3 files changed

+34
-25
lines changed

llava/action/chatgpt_utils.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,7 @@ def __init__(self,
386386

387387
self.do_visualization = do_visualization
388388
self.vis_folder = f"{self.gpt_model}_{self.gen_type}_{self.question_type}_{self.perspective}"
389+
os.makedirs(self.vis_folder, exist_ok = True)
389390
self.data = self.init_data()
390391

391392
def save_visualization(self,frames, uid):
@@ -450,15 +451,17 @@ def init_data(self):
450451
'end_second': end_second,
451452
'vid_path': vid_path
452453
}
453-
454454
return ret
455455

456-
def multi_process_run(self, n_samples = -1, disable_api_calling = False):
456+
def multi_process_run(self, offset= 0, n_samples = -1, disable_api_calling = False):
457457
# inside GPT inference annotator
458458

459-
if n_samples != -1:
460-
indices = list(range(len(self.data)))[:n_samples]
459+
if n_samples == -1:
460+
# do not use offset if n_samples is -1
461+
assert offset == 0
461462

463+
if n_samples != -1:
464+
indices = list(range(len(self.data)))[offset:offset + n_samples]
462465
num_chunks = os.cpu_count() if not self.debug else 2
463466

464467
indices_groups = self.split_indices(indices, num_chunks)
@@ -472,7 +475,7 @@ def multi_process_run(self, n_samples = -1, disable_api_calling = False):
472475
for future in futures:
473476
result_dict = future.result()
474477
combined_results.update(result_dict)
475-
478+
print (combined_results)
476479
if self.debug:
477480
print (combined_results)
478481
if combined_results and 'mc_' in self.question_type:
@@ -560,9 +563,7 @@ def predict_images(self, images, parsed_item):
560563

561564
if 'o1' in self.gpt_model:
562565
system_prompt += format_prompt
563-
564-
#print (system_prompt)
565-
566+
566567
if self.handobj_root is not None:
567568
system_prompt += f"""To further assist you, we mark hands and object when they are visible. The left hand is marked with a bounding box that contains letter L and the right hand's bounding box contains letter R. The object is marked as 'O'."""
568569

llava/action/make_visualizations.py

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,27 @@
1212

1313
from llava.action.chatgpt_utils import GPTInferenceAnnotator
1414

15-
root = '/data/EK100/EK100_320p_15sec_30fps_libx264'
16-
annotation_file = '/data/epic_kitchen/epic-kitchens-100-annotations/EPIC_100_validation.csv'
17-
avion_prediction_file = '/data/epic_kitchen/AVION_PREDS/avion_pred_ids_val.json'
18-
tim_prediction_file = '/data/epic_kitchen/TIM_PREDS/tim_pred_ids_val.json'
19-
n_frames = 4
15+
# root = '/data/EK100/EK100_320p_15sec_30fps_libx264'
16+
# annotation_file = '/data/epic_kitchen/epic-kitchens-100-annotations/EPIC_100_validation.csv'
17+
# avion_prediction_file = '/data/epic_kitchen/AVION_PREDS/avion_pred_ids_val.json'
18+
# tim_prediction_file = '/data/epic_kitchen/TIM_PREDS/tim_pred_ids_val.json'
19+
20+
root = '/data/shaokai/EK100_512/EK100'
21+
annotation_file = '/data/shaokai/epic-kitchens-100-annotations/EPIC_100_validation.csv'
22+
avion_prediction_file = '/data/shaokai/AVION_PREDS/avion_pred_ids_val.json'
23+
tim_prediction_file = '/data/shaokai/TIM_PREDS/tim_pred_ids_val.json'
24+
25+
n_frames = 32
2026
topk = 5
2127
action_representation = 'GT_random_narration'
22-
gpt_model = 'gpt-4o-mini-2024-07-18'
23-
#gpt_model = 'gpt-4o-2024-08-06'
28+
#gpt_model = 'gpt-4o-mini-2024-07-18'
29+
gpt_model = 'gpt-4o-2024-08-06'
2430
perspective = 'first_person'
2531
benchmark_testing = True
2632

2733

2834

29-
def visualize_with_random(n_samples, question_type = 'mc_'):
35+
def visualize_with_random(n_samples, offset = 0, question_type = 'mc_'):
3036
"""
3137
Here we should test gpt-4o, gpt-4o-mini with different prompts
3238
"""
@@ -43,9 +49,9 @@ def visualize_with_random(n_samples, question_type = 'mc_'):
4349
do_visualization = True,
4450
topk = topk)
4551

46-
inferencer.multi_process_run(n_samples, disable_api_calling=False)
52+
inferencer.multi_process_run(n_samples = n_samples, offset = offset, disable_api_calling=False)
4753

48-
def visualize_with_gpt_with_tim(n_samples, question_type = 'mc_'):
54+
def visualize_with_gpt_with_tim(n_samples, offset = 0, question_type = 'mc_'):
4955
"""
5056
Here we should test gpt-4o, gpt-4o-mini with different prompts
5157
"""
@@ -62,10 +68,10 @@ def visualize_with_gpt_with_tim(n_samples, question_type = 'mc_'):
6268
do_visualization = True,
6369
topk = topk)
6470

65-
inferencer.multi_process_run(n_samples, disable_api_calling=False)
71+
inferencer.multi_process_run(n_samples = n_samples, offset = offset, disable_api_calling=False)
6672

6773

68-
def visualize_with_gpt_with_avion(n_samples, question_type = 'mc_'):
74+
def visualize_with_gpt_with_avion(n_samples, offset = 0, question_type = 'mc_'):
6975
"""
7076
Here we should test gpt-4o, gpt-4o-mini with different prompts
7177
"""
@@ -82,9 +88,11 @@ def visualize_with_gpt_with_avion(n_samples, question_type = 'mc_'):
8288
do_visualization = True,
8389
topk = topk)
8490

85-
inferencer.multi_process_run(n_samples, disable_api_calling=False)
91+
inferencer.multi_process_run(n_samples = n_samples, offset = offset, disable_api_calling=False)
8692

8793
if __name__ == '__main__':
88-
#visualize_with_random(1, question_type = "mc_")
89-
#visualize_with_gpt_with_tim(1, question_type = "mc_")
90-
visualize_with_gpt_with_avion(1, question_type = "mc_")
94+
95+
question_type = "gpt-gt-reason"
96+
#visualize_with_random(20, offset = 40, question_type = "gpt-gt-reason")
97+
#visualize_with_gpt_with_tim(20, offset = 40, question_type = "gpt-gt-reason")
98+
visualize_with_gpt_with_avion(20, offset = 40, question_type = "gpt-gt-reason")

llava/action/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ def format_task_related_prompt(question, question_type, meta_data = None, perspe
232232
"""
233233

234234
if perspective == "first_person":
235-
perspective_prefix = "You are seeing this video from egocentric view and you are the person. Your hands are sometimes interacting with objects. What action are you doing?"
235+
perspective_prefix = "You are seeing this video from egocentric view and you are the person. Your hands are sometimes interacting with objects. What action are you doing? Note that you need to use first person perspective."
236236
elif perspective == "third_person":
237237
perspective_prefix = "The video is taken from egocentric view. The person's hands are sometimes interacting with objects. What action is the person doing?"
238238

0 commit comments

Comments
 (0)