Update result aggregation logic in the ResultExplorer to match new naming schema (#495)

R-Palazzo · web-flow · commit 38acb1bcb697 · 2025-11-19T15:19:41.000Z
diff --git a/sdgym/result_explorer/result_handler.py b/sdgym/result_explorer/result_handler.py
@@ -16,6 +16,7 @@
 metainfo_PREFIX = 'metainfo'
 RESULTS_FILE_PREFIX = 'results'
 NUM_DIGITS_DATE = 10
+REGEX_SYNTHESIZER_NAME = r'\s*\(\d+\)\s*$'
 
 
 class ResultsHandler(ABC):
@@ -120,7 +121,15 @@ def _get_column_name_infos(self, folder_to_results):
     def _process_results(self, results):
         """Process results to ensure they are unique and each dataset has all synthesizers."""
         aggregated_results = pd.concat(results, ignore_index=True)
-        aggregated_results = aggregated_results.drop_duplicates(subset=['Dataset', 'Synthesizer'])
+        aggregated_results['Synthesizer'] = (
+            aggregated_results['Synthesizer']
+            .astype(str)
+            .str.replace(REGEX_SYNTHESIZER_NAME, '', regex=True)
+            .str.strip()
+        )
+        aggregated_results = aggregated_results.drop_duplicates(
+            subset=['Dataset', 'Synthesizer'], keep='first'
+        )
         all_synthesizers = aggregated_results['Synthesizer'].unique()
         dataset_synth_counts = aggregated_results.groupby('Dataset')['Synthesizer'].nunique()
         valid_datasets = dataset_synth_counts[dataset_synth_counts == len(all_synthesizers)].index
diff --git a/tests/unit/result_explorer/test_result_handler.py b/tests/unit/result_explorer/test_result_handler.py
@@ -110,12 +110,12 @@ def test__process_results(self):
         results = [
             pd.DataFrame({
                 'Dataset': ['A', 'A', 'B', 'B', 'C'],
-                'Synthesizer': ['Synth1', 'Synth2', 'Synth1', 'Synth2', 'Synth1'],
+                'Synthesizer': ['Synth1', 'Synth2(1)', 'Synth1', 'Synth2(1)', 'Synth1'],
                 'Quality_Score': [0.5, 0.6, 0.7, 0.6, 0.8],
             }),
             pd.DataFrame({
                 'Dataset': ['D', 'D', 'D'],
-                'Synthesizer': ['Synth1', 'Synth2', 'Synth1'],
+                'Synthesizer': ['Synth1(2)', 'Synth2', 'Synth1(2)'],
                 'Quality_Score': [0.7, 0.8, 0.9],
             }),
         ]