fixed double init

Ye Shaokai · Ye Shaokai · commit 2a02a4f1675a · 2024-10-16T08:15:08.000+02:00
diff --git a/action/ek_eval.py b/action/ek_eval.py
@@ -24,7 +24,8 @@
 from collections import Counter 
 import torch.distributed as dist
 
-dist.init_process_group(backend='nccl')
+if not dist.is_initialized():
+    dist.init_process_group(backend='nccl')
 rank = dist.get_rank()
 torch.cuda.set_device(rank)
 
@@ -282,7 +283,7 @@ def __init__(
         self.ann_root = Path(metadata).parent
         self.mc_generator = MultiChoiceGenerator(self.ann_root)
         self.rank = dist.get_rank()
-        self.prediction_analysis = PredictionAnalysis(f'prediction_analysis_buf_rank{self.rank}.json')
+        self.prediction_analysis = PredictionAnalysis(rank = self.rank)
         
     def __getitem__(self, i):
         frames, label, time_meta = self.get_raw_item(
@@ -340,7 +341,7 @@ def get_args_parser():
     parser.add_argument('--action_predictions', default=None, type=str, help='path to action predictions')
     parser.add_argument('--topk_predictions', default = 5, type =int)
     parser.add_argument('--llava_checkpoint', default = None, type = str)
-    parser.add_argument('--early_stop', default = None, type = int)
+
     
     return parser
 
@@ -542,10 +543,7 @@ def evaluate_on_EK100(eval_args,
         # let's evaluate 1000 samples to get the complete picture       
         if finish_early and idx> (1000 / dist.get_world_size()):
             break                     
-
-        if eval_args.early_stop and idx > eval_args.early_stop:
-            break
-
+     
         # Update running corrects and total samples
         
         llava_correct, llava_pred = ensemble_llava_evaluation(
@@ -565,14 +563,14 @@ def evaluate_on_EK100(eval_args,
 
         # log the predictions into prediciton analysis
 
-        # val_dataset.prediction_analysis.log(global_index,
-        #                                     llava_pred,
-        #                                     gt_name,
-        #                                     predictions[str(global_index)],
-        #                                     time_meta['start_second'].item(),
-        #                                     time_meta['end_second'].item(),
-        #                                     time_meta['vid_path'],
-        #                                     dataset_name = 'EK100')
+        val_dataset.prediction_analysis.log(global_index,
+                                            llava_pred,
+                                            gt_name,
+                                            predictions[str(global_index)],
+                                            time_meta['start_second'].item(),
+                                            time_meta['end_second'].item(),
+                                            time_meta['vid_path'],
+                                            dataset_name = 'EK100')
 
         
 
@@ -623,7 +621,7 @@ def evaluate_on_EK100(eval_args,
             logger.info(f'Global Avion Accuracy: {global_avion_accuracy:.4f}')
         logger.info(f'Final Global Accuracy: {global_accuracy:.4f}')
 
-    #val_dataset.prediction_analysis.save()
+    val_dataset.prediction_analysis.save()
     
     return global_accuracy
 
diff --git a/action/prediction_analysis.py b/action/prediction_analysis.py
@@ -1,6 +1,6 @@
 import json
 import glob
-
+import os
 class PredictionAnalysis:
     """
     We save data that can be used for ad-hoc analysis
@@ -19,8 +19,11 @@ class PredictionAnalysis:
         vid_path: ''
     }
     """
-    def __init__(self, save_path):
-        self.save_path = save_path
+    def __init__(self, save_folder = '.', rank = 0):
+        self.save_folder = save_folder
+        self.rank = rank
+        self.prefix = 'prediction_analysis_buf'
+        self.save_path = os.path.join(save_folder, f'{self.prefix}_rank{rank}.json')       
         self.data = {}
     def log(self, 
             global_index,
@@ -50,52 +53,50 @@ def save(self):
             json.dump(self.data, f, indent = 4)
 
 
-class Analysis:
-    """
-
-    This same code should be applied to the training too.
-
-    collect all the wrong top-1 prediction from avion
-    collect all the wrong top-1 prediction from llava
-
-    Determine percentage of wrong llava prediction that has wrong verb only
-    Determine percentage of wrong llava prediction that has wrong noun only
-    Determine percentage of wrong llava prediciton that has both verb and noun wrong
-    Determine percentage of wrong llava prediction that was wrong because the answer not in the top k
-    """
-    pass
-
-    def __init__(self, prefix):
-
-        files = glob.glob(prefix + '*')
-
-        self.data = {}
-
-        for file in files:
-            print ('loading pred checkpoint from: ', file)
-            with open(file, 'r') as f:
-                _data = json.load(f)
-                self.data.update(_data)
+    def load(self):
+        save_folder = self.save_folder
+        if self.rank == 0:
+            files = glob.glob(os.path.join(save_folder,self.prefix + '*'))
+            for file in files:
+                print ('loading pred checkpoint from: ', file)
+                with open(file, 'r') as f:
+                    _data = json.load(f)
+                    self.data.update(_data)
 
-        # add some assertion for number of keys in the data
+            print (sorted(list(self.data.keys()), key = lambda x: int(x)))
 
     def wrong_verb(self):
 
         N = len(self.data)
+        llava_wrong_verb_collections = []
+        llava_wrong_noun_collections = []
+        llava_wrong_verb_noun_collections = []
 
-        wrong_verb_collections = []
-        wrong_noun_collections = []
-        wrong_verb_noun_collections = []
+        avion_wrong_verb_collections = []
+        avion_wrong_noun_collections = []
+        avion_wrong_verb_noun_collections = []
 
         wrong_llava_collections = []
         wrong_avion_collections = []
 
-        indices = sorted(self.data.keys())
+        indices = sorted(list(self.data.keys()), key = lambda x: int(x))
 
         for index in indices:
             items = self.data[index]
-        
-
+            llava_pred = items['llava_pred']
+            gt_name = items['gt_name']
+            # only replacing the first : 
+            avion_pred = items['avion_preds']['predictions'][0].replace(':', ' ', 1)
+            
+            if llava_pred != gt_name:
+                wrong_llava_collections.append((llava_pred, gt_name))
+            if avion_pred!= gt_name:
+                # pred, gt
+                wrong_avion_collections.append((avion_pred, gt_name))
+            
 
 if __name__ == '__main__':
-    pass
+
+
+    prediction_analysis = PredictionAnalysis(save_folder = '/storage-rcp-pure/upmwmathis_scratch/shaokai/LLaVA-NeXT')
+    prediction_analysis.load()