Skip to content

Commit c6a1a25

Browse files
author
Ye Shaokai
committed
WIP
1 parent ee20055 commit c6a1a25

File tree

2 files changed

+14
-9
lines changed

2 files changed

+14
-9
lines changed

llava/action/make_visualizations.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@
2525
n_frames = 32
2626
topk = 5
2727
action_representation = 'GT_random_narration'
28-
#gpt_model = 'gpt-4o-mini-2024-07-18'
29-
gpt_model = 'gpt-4o-2024-08-06'
28+
gpt_model = 'gpt-4o-mini-2024-07-18'
29+
#gpt_model = 'gpt-4o-2024-08-06'
3030
perspective = 'first_person'
3131
benchmark_testing = True
3232

@@ -89,10 +89,15 @@ def visualize_with_gpt_with_avion(n_samples, offset = 0, question_type = 'mc_'):
8989
topk = topk)
9090

9191
inferencer.multi_process_run(n_samples = n_samples, offset = offset, disable_api_calling=False)
92+
93+
94+
def visualize_with_llava(uid, ):
95+
"""
96+
97+
"""
98+
99+
92100

93101
if __name__ == '__main__':
94102

95-
question_type = "gpt-gt-reason"
96-
#visualize_with_random(20, offset = 40, question_type = "gpt-gt-reason")
97-
#visualize_with_gpt_with_tim(20, offset = 40, question_type = "gpt-gt-reason")
98-
visualize_with_gpt_with_avion(20, offset = 40, question_type = "gpt-gt-reason")
103+
visualize_with_gpt_with_avion(10, offset = 100, question_type = "caption")

llava/action/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ def format_task_related_prompt(question, question_type, meta_data = None, perspe
232232
"""
233233

234234
if perspective == "first_person":
235-
perspective_prefix = "You are seeing this video from egocentric view and you are the person. Your hands are sometimes interacting with objects. What action are you doing? Note that you need to use first person perspective."
235+
perspective_prefix = "You are seeing this video from egocentric view and you are the person. Your hands are sometimes interacting with objects. What action are you doing? "
236236
elif perspective == "third_person":
237237
perspective_prefix = "The video is taken from egocentric view. The person's hands are sometimes interacting with objects. What action is the person doing?"
238238

@@ -262,8 +262,8 @@ def format_task_related_prompt(question, question_type, meta_data = None, perspe
262262

263263
elif question_type == "temporal_detection":
264264
ret = question
265-
elif question_type == "gpt-gt-reason":
266-
ret = f"{perspective_prefix}Describe in details what you see from the video frames."
265+
elif question_type == "gpt-gt-reason" or question_type == "caption":
266+
ret = f"{perspective_prefix} Describe in details what you see from the video frames. You must talk in the first person perspective. Try to focus on what you are doing. "
267267

268268
elif question_type == "triple_direct_answer":
269269
assert meta_data

0 commit comments

Comments
 (0)