updates

Ye Shaokai · Ye Shaokai · commit c614e7e99bfc · 2025-02-15T21:51:08.000+01:00
diff --git a/llava/action/benchmark.py b/llava/action/benchmark.py
@@ -2,7 +2,9 @@
 # benchmark gpt-4o on tim_mcq_top5_500
 # benchmark gpt-4o on random_mcq_top5_500
 from llava.action.chatgpt_utils import GPTInferenceAnnotator
-
+import glob
+import json
+import os
 # root = '/data/EK100/EK100_320p_15sec_30fps_libx264'
 # annotation_file = '/data/epic_kitchen/epic-kitchens-100-annotations/EPIC_100_validation.csv'
 # avion_prediction_file = '/data/epic_kitchen/AVION_PREDS/avion_pred_ids_val.json'
@@ -14,7 +16,7 @@
 tim_prediction_file = '/data/shaokai/TIM_PREDS/tim_pred_ids_val.json'
 
 
-n_frames = 16
+n_frames = 8
 topk = 5
 action_representation = 'GT_random_narration'
 perspective = 'first_person'
@@ -67,12 +69,24 @@ def benchmark_random_mcq(n_samples, gpt_model):
     
     inferencer.multi_process_run(n_samples = n_samples, offset = 0)
     
+def calcuate_acc_from_jsons(json_folder):
+    files = glob.glob(os.path.join(json_folder, '*.json'))
+    for file in files:
+        print (file)
+        preds = json.load(open(file))
+        correct = 0
+        for k,v in preds.items():
+            if v['gt_name'] == v['chatgpt_answer']:
+                correct+=1
+        print ('acc ', correct/len(preds))
+
+    
     
 if __name__ == '__main__':
-    benchmark_avion_mcq(-1, 'gpt-4o-mini-2024-07-18')
-    benchmark_tim_mcq(-1, 'gpt-4o-mini-2024-07-18')
-    benchmark_random_mcq(-1, 'gpt-4o-mini-2024-07-18')
-    benchmark_avion_mcq(-1, 'gpt-4o-2024-08-06')
-    benchmark_tim_mcq(-1, 'gpt-4o-2024-08-06')
-    benchmark_random_mcq(-1, 'gpt-4o-2024-08-06')    
-    
+    # benchmark_avion_mcq(-1, 'gpt-4o-mini-2024-07-18')
+    # benchmark_tim_mcq(-1, 'gpt-4o-mini-2024-07-18')
+    # benchmark_random_mcq(-1, 'gpt-4o-mini-2024-07-18')
+    # benchmark_avion_mcq(-1, 'gpt-4o-2024-08-06')
+    # benchmark_tim_mcq(-1, 'gpt-4o-2024-08-06')
+    # benchmark_random_mcq(-1, 'gpt-4o-2024-08-06')    
+    calcuate_acc_from_jsons('gpt_full_benchmark_results')
diff --git a/llava/action/chatgpt_utils.py b/llava/action/chatgpt_utils.py
@@ -483,7 +483,6 @@ def multi_process_run(self, offset= 0, n_samples = -1, disable_api_calling = Fal
         if combined_results and 'mc_' in self.question_type:
             calculation = calculate_gpt_accuracy(data = combined_results)
 
-        assert n_samples != -1
         checkpoint_name = f"{self.gpt_model}_{self.gen_type}_{self.action_representation}_top{self.topk}_{self.clip_length}f_{n_samples}samples.json"
 
         if self.do_visualization: