Skip to content

Commit 8b294e4

Browse files
lwalewchrbrunk
andauthored
feat: update scoring (#52)
* feat: update scoring alpha * feat: update scoring functions * feat: update bond length dist * feat: update conformer selection * chore: add todos * feat: update most benchmarks for scoring * feat: let UI distinguish benchmark categories * test: updated tests * test: fix tests * feat: update solvent rdf score * feat: update water rdf score * feat: update solvent rdf score * fix: divide score by 2 --------- Co-authored-by: Christoph Brunken <[email protected]>
1 parent 165c029 commit 8b294e4

File tree

17 files changed

+183
-95
lines changed

17 files changed

+183
-95
lines changed

src/mlipaudit/app.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,14 +230,16 @@ def main():
230230
ring_planarity,
231231
small_molecule_minimization,
232232
bond_length_distribution,
233-
water_radial_distribution,
234-
solvent_radial_distribution,
235233
reactivity,
236234
],
237235
"Biomolecules": [
238236
folding_stability,
239237
sampling,
240238
],
239+
"Molecular Liquids": [
240+
water_radial_distribution,
241+
solvent_radial_distribution,
242+
],
241243
"General": [stability, scaling],
242244
}
243245

@@ -255,6 +257,7 @@ def main():
255257
pages_to_show = [leaderboard] + (
256258
page_categories["Small Molecules"]
257259
+ page_categories["Biomolecules"]
260+
+ page_categories["Molecular Liquids"]
258261
+ page_categories["General"]
259262
)
260263

src/mlipaudit/benchmarks/__init__.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,4 +99,24 @@
9999
StabilityBenchmark,
100100
ScalingBenchmark,
101101
]
102+
102103
BENCHMARK_NAMES = [b.name for b in BENCHMARKS]
104+
105+
BENCHMARK_CATEGORIES = {
106+
"Small Molecules": [
107+
ConformerSelectionBenchmark,
108+
DihedralScanBenchmark,
109+
TautomersBenchmark,
110+
NoncovalentInteractionsBenchmark,
111+
RingPlanarityBenchmark,
112+
SmallMoleculeMinimizationBenchmark,
113+
BondLengthDistributionBenchmark,
114+
ReactivityBenchmark,
115+
],
116+
"Biomolecules": [FoldingStabilityBenchmark, SamplingBenchmark],
117+
"Molecular Liquids": [
118+
WaterRadialDistributionBenchmark,
119+
SolventRadialDistributionBenchmark,
120+
],
121+
"General": [StabilityBenchmark],
122+
}

src/mlipaudit/benchmarks/bond_length_distribution/bond_length_distribution.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
"temperature_kelvin": 300.0,
4444
}
4545

46-
AVG_DEVIATION_SCORE_THRESHOLD = 0.05
46+
DEVIATION_SCORE_THRESHOLD = 0.05
4747

4848

4949
class Molecule(BaseModel):
@@ -218,23 +218,23 @@ def analyze(self) -> BondLengthDistributionResult:
218218
trajectory[:, pattern_indices[0]] - trajectory[:, pattern_indices[1]],
219219
axis=1,
220220
)
221-
deviation_trajectory = list(
221+
deviation_trajectory = np.abs(
222222
bond_length_trajectory - reference_bond_distance
223223
)
224224

225225
molecule_result = BondLengthDistributionMoleculeResult(
226226
molecule_name=molecule_output.molecule_name,
227-
deviation_trajectory=deviation_trajectory,
228-
avg_deviation=statistics.mean(deviation_trajectory),
227+
deviation_trajectory=list(deviation_trajectory),
228+
avg_deviation=float(np.mean(deviation_trajectory)),
229229
)
230230
results.append(molecule_result)
231231

232232
avg_deviation = statistics.mean(r.avg_deviation for r in results)
233233

234234
score = compute_benchmark_score(
235-
[avg_deviation],
235+
[[r.avg_deviation for r in results]],
236236
[
237-
AVG_DEVIATION_SCORE_THRESHOLD,
237+
DEVIATION_SCORE_THRESHOLD,
238238
],
239239
)
240240

src/mlipaudit/benchmarks/conformer_selection/conformer_selection.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@
3131

3232
WIGGLE_DATASET_FILENAME = "wiggle150_dataset.json"
3333

34-
AVG_MAE_SCORE_THRESHOLD = 0.5
35-
AVG_RMSE_SCORE_THRESHOLD = 1.5
34+
MAE_SCORE_THRESHOLD = 0.5
35+
RMSE_SCORE_THRESHOLD = 1.5
3636

3737

3838
class ConformerSelectionMoleculeResult(BaseModel):
@@ -251,8 +251,8 @@ def analyze(self) -> ConformerSelectionResult:
251251
avg_rmse = statistics.mean(r.rmse for r in results)
252252

253253
score = compute_benchmark_score(
254-
[avg_mae, avg_rmse],
255-
[AVG_MAE_SCORE_THRESHOLD, AVG_RMSE_SCORE_THRESHOLD],
254+
[[r.mae for r in results], [r.rmse for r in results]],
255+
[MAE_SCORE_THRESHOLD, RMSE_SCORE_THRESHOLD],
256256
)
257257

258258
return ConformerSelectionResult(

src/mlipaudit/benchmarks/dihedral_scan/dihedral_scan.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232

3333
TORSIONNET_DATASET_FILENAME = "TorsionNet500.json"
3434

35-
MAE_BARRIER_HEIGHT_SCORE_THRESHOLD = 1.0
35+
BARRIER_HEIGHT_SCORE_THRESHOLD = 1.0
3636

3737

3838
class Fragment(BaseModel):
@@ -256,10 +256,9 @@ def analyze(self) -> DihedralScanResult:
256256

257257
results.append(fragment_result)
258258

259-
mae_barrier_height = statistics.mean(r.barrier_height_error for r in results)
260259
score = compute_benchmark_score(
261-
[mae_barrier_height],
262-
[MAE_BARRIER_HEIGHT_SCORE_THRESHOLD],
260+
[[r.barrier_height_error for r in results]],
261+
[BARRIER_HEIGHT_SCORE_THRESHOLD],
263262
)
264263

265264
return DihedralScanResult(

src/mlipaudit/benchmarks/folding_stability/folding_stability.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@
5757
"temperature_kelvin": 300.0,
5858
}
5959

60-
MIN_RMSD_SCORE_THRESHOLD = 2.0
61-
MAX_TM_SCORE_THRESHOLD = 0.5
60+
RMSD_SCORE_THRESHOLD = 2.0
61+
TM_SCORE_THRESHOLD = 0.5
6262

6363

6464
class FoldingStabilityMoleculeResult(BaseModel):
@@ -260,20 +260,20 @@ def analyze(self) -> FoldingStabilityResult:
260260
)
261261
molecule_results.append(molecule_result)
262262

263-
min_rmsd = min(r.avg_rmsd for r in molecule_results)
264-
max_tm_score = max(r.avg_tm_score for r in molecule_results)
265-
266263
score = compute_benchmark_score(
267-
[min_rmsd, max_tm_score],
268-
[MIN_RMSD_SCORE_THRESHOLD, MAX_TM_SCORE_THRESHOLD],
264+
[
265+
[r.avg_rmsd for r in molecule_results],
266+
[r.avg_tm_score for r in molecule_results],
267+
],
268+
[RMSD_SCORE_THRESHOLD, TM_SCORE_THRESHOLD],
269269
)
270270

271271
return FoldingStabilityResult(
272272
molecules=molecule_results,
273273
avg_rmsd=statistics.mean(r.avg_rmsd for r in molecule_results),
274-
min_rmsd=min_rmsd,
274+
min_rmsd=min(r.avg_rmsd for r in molecule_results),
275275
avg_tm_score=statistics.mean(r.avg_tm_score for r in molecule_results),
276-
max_tm_score=max_tm_score,
276+
max_tm_score=max(r.avg_tm_score for r in molecule_results),
277277
avg_match=statistics.mean(r.avg_match for r in molecule_results),
278278
max_abs_deviation_radius_of_gyration=max(
279279
r.max_abs_deviation_radius_of_gyration for r in molecule_results

src/mlipaudit/benchmarks/noncovalent_interactions/noncovalent_interactions.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,7 @@
5858
"B": "Boron",
5959
}
6060

61-
MAE_INTERACTION_ENERGY_SCORE_THRESHOLD = 1.0
62-
RMSE_INTERACTION_ENERGY_SCORE_THRESHOLD = 1.0
61+
INTERACTION_ENERGY_SCORE_THRESHOLD = 1.0
6362

6463

6564
class NoncovalentInteractionsSystemResult(BenchmarkResult):
@@ -262,17 +261,16 @@ def _compute_metrics_from_system_results(
262261
)
263262

264263
all_deviations = [system_results.deviation for system_results in results]
265-
mae_interaction_energy_all = np.mean(np.abs(all_deviations))
266-
rmse_interaction_energy_all = np.sqrt(np.mean(np.array(all_deviations) ** 2))
264+
abs_deviations = [np.abs(dev) for dev in all_deviations]
267265

268266
score = compute_benchmark_score(
269-
[mae_interaction_energy_all, rmse_interaction_energy_all],
270-
[
271-
MAE_INTERACTION_ENERGY_SCORE_THRESHOLD,
272-
RMSE_INTERACTION_ENERGY_SCORE_THRESHOLD,
273-
],
267+
[abs_deviations],
268+
[INTERACTION_ENERGY_SCORE_THRESHOLD],
274269
)
275270

271+
mae_interaction_energy_all = np.mean(abs_deviations)
272+
rmse_interaction_energy_all = np.sqrt(np.mean(np.array(all_deviations) ** 2))
273+
276274
return NoncovalentInteractionsResult(
277275
systems=results,
278276
n_skipped_unallowed_elements=n_skipped_unallowed_elements,

src/mlipaudit/benchmarks/reactivity/reactivity.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@
4545

4646
GRAMBOW_DATASET_FILENAME = "grambow_dataset.json"
4747

48-
MAE_ACTIVATION_ENERGY_SCORE_THRESHOLD = 3.0
49-
MAE_ENTHALPY_OF_REACTION_SCORE_THRESHOLD = 2.0
48+
ACTIVATION_ENERGY_SCORE_THRESHOLD = 3.0
49+
ENTHALPY_OF_REACTION_SCORE_THRESHOLD = 2.0
5050

5151

5252
class Molecule(BaseModel):
@@ -285,17 +285,17 @@ def analyze(self) -> ReactivityResult:
285285
for reaction_result in result.values()
286286
])
287287

288-
mae_activation_energy = float(np.mean(ea_abs_errors))
289-
mae_enthalpy_of_reaction = float(np.mean(dh_abs_errors))
290-
291288
score = compute_benchmark_score(
292-
[mae_activation_energy, mae_enthalpy_of_reaction],
289+
[list(ea_abs_errors), list(dh_abs_errors)],
293290
[
294-
MAE_ACTIVATION_ENERGY_SCORE_THRESHOLD,
295-
MAE_ENTHALPY_OF_REACTION_SCORE_THRESHOLD,
291+
ACTIVATION_ENERGY_SCORE_THRESHOLD,
292+
ENTHALPY_OF_REACTION_SCORE_THRESHOLD,
296293
],
297294
)
298295

296+
mae_activation_energy = float(np.mean(ea_abs_errors))
297+
mae_enthalpy_of_reaction = float(np.mean(dh_abs_errors))
298+
299299
return ReactivityResult(
300300
reaction_results=result,
301301
mae_activation_energy=mae_activation_energy,

src/mlipaudit/benchmarks/ring_planarity/ring_planarity.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
"temperature_kelvin": 300.0,
4545
}
4646

47-
MAE_DEVIATION_SCORE_THRESHOLD = 0.05
47+
DEVIATION_SCORE_THRESHOLD = 0.05
4848

4949

5050
def deviation_from_plane(coords: np.ndarray) -> float:
@@ -237,7 +237,7 @@ def analyze(self) -> RingPlanarityResult:
237237

238238
mae_deviation = statistics.mean(r.avg_deviation for r in results)
239239
score = compute_benchmark_score(
240-
[mae_deviation], [MAE_DEVIATION_SCORE_THRESHOLD]
240+
[[r.avg_deviation for r in results]], [DEVIATION_SCORE_THRESHOLD]
241241
)
242242

243243
return RingPlanarityResult(

src/mlipaudit/benchmarks/sampling/sampling.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
import functools
1616
import logging
17-
import statistics
1817
from collections import defaultdict
1918

2019
import numpy as np
@@ -221,6 +220,8 @@ class SamplingResult(BenchmarkResult):
221220
dihedral distribution for each residue type.
222221
outliers_ratio_sidechain_dihedrals: The ratio of outliers in the sidechain
223222
dihedral distribution for each residue type.
223+
score: The final score for the benchmark between
224+
0 and 1.
224225
"""
225226

226227
systems: list[SamplingSystemResult]
@@ -458,8 +459,8 @@ def analyze(self) -> SamplingResult:
458459

459460
score = compute_benchmark_score(
460461
[
461-
statistics.mean(avg_outliers_ratio_backbone.values()),
462-
statistics.mean(avg_outliers_ratio_sidechain.values()),
462+
list(avg_outliers_ratio_backbone.values()),
463+
list(avg_outliers_ratio_sidechain.values()),
463464
],
464465
[
465466
OUTLIERS_RATIO_BACKBONE_SCORE_THRESHOLD,

0 commit comments

Comments
 (0)