Skip to content

Commit 38acb1b

Browse files
authored
Update result aggregation logic in the ResultExplorer to match new naming schema (#495)
1 parent d4ae9e2 commit 38acb1b

File tree

2 files changed

+12
-3
lines changed

2 files changed

+12
-3
lines changed

sdgym/result_explorer/result_handler.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
metainfo_PREFIX = 'metainfo'
1717
RESULTS_FILE_PREFIX = 'results'
1818
NUM_DIGITS_DATE = 10
19+
REGEX_SYNTHESIZER_NAME = r'\s*\(\d+\)\s*$'
1920

2021

2122
class ResultsHandler(ABC):
@@ -120,7 +121,15 @@ def _get_column_name_infos(self, folder_to_results):
120121
def _process_results(self, results):
121122
"""Process results to ensure they are unique and each dataset has all synthesizers."""
122123
aggregated_results = pd.concat(results, ignore_index=True)
123-
aggregated_results = aggregated_results.drop_duplicates(subset=['Dataset', 'Synthesizer'])
124+
aggregated_results['Synthesizer'] = (
125+
aggregated_results['Synthesizer']
126+
.astype(str)
127+
.str.replace(REGEX_SYNTHESIZER_NAME, '', regex=True)
128+
.str.strip()
129+
)
130+
aggregated_results = aggregated_results.drop_duplicates(
131+
subset=['Dataset', 'Synthesizer'], keep='first'
132+
)
124133
all_synthesizers = aggregated_results['Synthesizer'].unique()
125134
dataset_synth_counts = aggregated_results.groupby('Dataset')['Synthesizer'].nunique()
126135
valid_datasets = dataset_synth_counts[dataset_synth_counts == len(all_synthesizers)].index

tests/unit/result_explorer/test_result_handler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,12 @@ def test__process_results(self):
110110
results = [
111111
pd.DataFrame({
112112
'Dataset': ['A', 'A', 'B', 'B', 'C'],
113-
'Synthesizer': ['Synth1', 'Synth2', 'Synth1', 'Synth2', 'Synth1'],
113+
'Synthesizer': ['Synth1', 'Synth2(1)', 'Synth1', 'Synth2(1)', 'Synth1'],
114114
'Quality_Score': [0.5, 0.6, 0.7, 0.6, 0.8],
115115
}),
116116
pd.DataFrame({
117117
'Dataset': ['D', 'D', 'D'],
118-
'Synthesizer': ['Synth1', 'Synth2', 'Synth1'],
118+
'Synthesizer': ['Synth1(2)', 'Synth2', 'Synth1(2)'],
119119
'Quality_Score': [0.7, 0.8, 0.9],
120120
}),
121121
]

0 commit comments

Comments
 (0)