Merge branch 'shaokai/dev' of github.com:yeshaokai/LLaVA-NeXT into shaokai/dev

Haozhe Qi · Haozhe Qi · commit 20a11bbb1862 · 2025-02-15T23:56:47.000+01:00
diff --git a/llava/action/benchmark.py b/llava/action/benchmark.py
@@ -2,21 +2,28 @@
 # benchmark gpt-4o on tim_mcq_top5_500
 # benchmark gpt-4o on random_mcq_top5_500
 from llava.action.chatgpt_utils import GPTInferenceAnnotator
+import glob
+import json
+import os
+# root = '/data/EK100/EK100_320p_15sec_30fps_libx264'
+# annotation_file = '/data/epic_kitchen/epic-kitchens-100-annotations/EPIC_100_validation.csv'
+# avion_prediction_file = '/data/epic_kitchen/AVION_PREDS/avion_pred_ids_val.json'
+# tim_prediction_file = '/data/epic_kitchen/TIM_PREDS/tim_pred_ids_val.json'
 
-root = '/data/EK100/EK100_320p_15sec_30fps_libx264'
-annotation_file = '/data/epic_kitchen/epic-kitchens-100-annotations/EPIC_100_validation.csv'
-avion_prediction_file = '/data/epic_kitchen/AVION_PREDS/avion_pred_ids_val.json'
-tim_prediction_file = '/data/epic_kitchen/TIM_PREDS/tim_pred_ids_val.json'
-n_frames = 4
+root = '/data/shaokai/EK100/'
+annotation_file = '/data/shaokai/epic-kitchens-100-annotations/EPIC_100_validation.csv'
+avion_prediction_file = '/data/shaokai/AVION_PREDS/avion_pred_ids_val.json'
+tim_prediction_file = '/data/shaokai/TIM_PREDS/tim_pred_ids_val.json'
+
+
+n_frames = 8
 topk = 5
 action_representation = 'GT_random_narration'
-#gpt_model = 'gpt-4o-mini-2024-07-18'
-gpt_model = 'gpt-4o-2024-08-06'
 perspective = 'first_person'
 benchmark_testing = True
 
 
-def benchmark_avion_mcq(n_samples):
+def benchmark_avion_mcq(n_samples, gpt_model):
 
     inferencer = GPTInferenceAnnotator(gpt_model,
                                        root,
@@ -29,9 +36,10 @@ def benchmark_avion_mcq(n_samples):
                                         perspective = perspective,
                                         benchmark_testing = benchmark_testing,
                                         topk = topk)
-    inferencer.multi_process_run(n_samples)
+    inferencer.multi_process_run(n_samples = n_samples,
+                                 offset = 0)
                                        
-def benchmark_tim_mcq(n_samples):
+def benchmark_tim_mcq(n_samples, gpt_model):
     
     inferencer = GPTInferenceAnnotator(gpt_model,
                                         root,
@@ -44,9 +52,9 @@ def benchmark_tim_mcq(n_samples):
                                         perspective = perspective,
                                         benchmark_testing = benchmark_testing,
                                         topk = topk) 
-    inferencer.multi_process_run(n_samples)    
+    inferencer.multi_process_run(n_samples = n_samples, offset = 0)    
 
-def benchmark_random_mcq(n_samples):
+def benchmark_random_mcq(n_samples, gpt_model):
     inferencer = GPTInferenceAnnotator(gpt_model,
                                        root,
                                        annotation_file,
@@ -59,10 +67,26 @@ def benchmark_random_mcq(n_samples):
                                         benchmark_testing = benchmark_testing,
                                         topk = topk) 
     
-    inferencer.multi_process_run(n_samples)
+    inferencer.multi_process_run(n_samples = n_samples, offset = 0)
+    
+def calcuate_acc_from_jsons(json_folder):
+    files = glob.glob(os.path.join(json_folder, '*.json'))
+    for file in files:
+        print (file)
+        preds = json.load(open(file))
+        correct = 0
+        for k,v in preds.items():
+            if v['gt_name'] == v['chatgpt_answer']:
+                correct+=1
+        print ('acc ', correct/len(preds))
+
     
     
 if __name__ == '__main__':
-    #benchmark_avion_mcq(100)
-    benchmark_tim_mcq(100)
-    #benchmark_random_mcq(100)    
+    # benchmark_avion_mcq(-1, 'gpt-4o-mini-2024-07-18')
+    # benchmark_tim_mcq(-1, 'gpt-4o-mini-2024-07-18')
+    # benchmark_random_mcq(-1, 'gpt-4o-mini-2024-07-18')
+    # benchmark_avion_mcq(-1, 'gpt-4o-2024-08-06')
+    # benchmark_tim_mcq(-1, 'gpt-4o-2024-08-06')
+    # benchmark_random_mcq(-1, 'gpt-4o-2024-08-06')    
+    calcuate_acc_from_jsons('gpt_full_benchmark_results')
diff --git a/llava/action/chatgpt_utils.py b/llava/action/chatgpt_utils.py
@@ -462,6 +462,8 @@ def multi_process_run(self, offset= 0, n_samples = -1, disable_api_calling = Fal
 
         if n_samples != -1:
             indices = list(range(len(self.data)))[offset:offset + n_samples]
+        else:
+            indices = list(range(len(self.data)))
         num_chunks = os.cpu_count() if not self.debug else 2
 
         indices_groups = self.split_indices(indices, num_chunks)
@@ -481,9 +483,7 @@ def multi_process_run(self, offset= 0, n_samples = -1, disable_api_calling = Fal
         if combined_results and 'mc_' in self.question_type:
             calculation = calculate_gpt_accuracy(data = combined_results)
 
-        prefix = self.gen_type
-        assert n_samples != -1
-        checkpoint_name = f"{prefix}_{self.action_representation}_top{self.topk}_{self.clip_length}f_{n_samples}samples.json"
+        checkpoint_name = f"{self.gpt_model}_{self.gen_type}_{self.action_representation}_top{self.topk}_{self.clip_length}f_{n_samples}samples.json"
 
         if self.do_visualization:
             self.checkpoint(combined_results, os.path.join(self.vis_folder, checkpoint_name))