Skip to content

Commit 3cf9cb3

Browse files
FFroehlichdweindl
andauthored
Fix override check and add noise formula check (#48)
* fix timepoint specific mappings and make more flexible * add check for nontrivial noise formulas * fix pep * add test * fixup * Update petab/lint.py Co-authored-by: Daniel Weindl <[email protected]> * fix according to review * fix and add more tests * add test explainers Co-authored-by: Daniel Weindl <[email protected]>
1 parent 1d49112 commit 3cf9cb3

File tree

2 files changed

+92
-23
lines changed

2 files changed

+92
-23
lines changed

petab/lint.py

Lines changed: 58 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import logging
55
import numbers
66
import re
7-
from typing import Optional, Iterable, Union
7+
from typing import Optional, Iterable, Any
88
from collections import Counter
99

1010
import libsbml
@@ -527,67 +527,103 @@ def assert_parameter_estimate_is_boolean(parameter_df: pd.DataFrame) -> None:
527527

528528

529529
def measurement_table_has_timepoint_specific_mappings(
530-
measurement_df: pd.DataFrame) -> bool:
530+
measurement_df: pd.DataFrame,
531+
allow_scalar_numeric_noise_parameters: bool = False,
532+
allow_scalar_numeric_observable_parameters: bool = False,
533+
) -> bool:
531534
"""
532535
Are there time-point or replicate specific parameter assignments in the
533536
measurement table.
534537
535538
Arguments:
536-
measurement_df: PEtab measurement table
539+
measurement_df:
540+
PEtab measurement table
541+
542+
allow_scalar_numeric_noise_parameters:
543+
ignore scalar numeric assignments to noiseParamater placeholders
544+
545+
allow_scalar_numeric_observable_parameters:
546+
ignore scalar numeric assignments to observableParamater
547+
placeholders
537548
538549
Returns:
539-
True if there are time-point or replicate specific parameter
540-
assignments in the measurement table, False otherwise.
550+
True if there are time-point or replicate specific (non-numeric)
551+
parameter assignments in the measurement table, False otherwise.
541552
"""
542553
# since we edit it, copy it first
543554
measurement_df = copy.deepcopy(measurement_df)
544555

545-
def is_numeric(x: Union[str, numbers.Number]) -> bool:
556+
def is_scalar_float(x: Any):
546557
"""
547-
Checks whether x can be transformed into a (list of) float(s)
558+
Checks whether input is a number or can be transformed into a number
559+
via float
548560
:param x:
549-
number or string containing numbers seperated by ;
561+
input
550562
:return:
551-
True if conversion is possible for all values
563+
True if is or can be converted to number, False otherwise.
552564
"""
553565
if isinstance(x, numbers.Number):
554566
return True
555-
if not isinstance(x, str):
556-
return False
557567
try:
558-
[float(y) for y in x.split(';')]
568+
float(x)
559569
return True
560570
except (ValueError, TypeError):
561571
return False
562572

563573
# mask numeric values
564-
for col in [OBSERVABLE_PARAMETERS, NOISE_PARAMETERS]:
574+
for col, allow_scalar_numeric in [
575+
(OBSERVABLE_PARAMETERS, allow_scalar_numeric_observable_parameters),
576+
(NOISE_PARAMETERS, allow_scalar_numeric_noise_parameters)
577+
]:
565578
if col not in measurement_df:
566579
continue
567-
measurement_df.loc[measurement_df[col].apply(is_numeric), col] = np.nan
580+
581+
measurement_df[col] = measurement_df[col].apply(str)
582+
583+
if allow_scalar_numeric:
584+
measurement_df.loc[
585+
measurement_df[col].apply(is_scalar_float), col
586+
] = np.nan
568587

569588
grouping_cols = core.get_notnull_columns(
570589
measurement_df,
571590
[OBSERVABLE_ID,
572591
SIMULATION_CONDITION_ID,
573592
PREEQUILIBRATION_CONDITION_ID,
574593
OBSERVABLE_PARAMETERS,
575-
NOISE_PARAMETERS,
576-
])
577-
grouped_df = measurement_df.groupby(grouping_cols,
578-
dropna=False).size().reset_index()
594+
NOISE_PARAMETERS])
595+
grouped_df = measurement_df.groupby(grouping_cols, dropna=False)
579596

580597
grouping_cols = core.get_notnull_columns(
581-
grouped_df,
598+
measurement_df,
582599
[OBSERVABLE_ID,
583600
SIMULATION_CONDITION_ID,
584601
PREEQUILIBRATION_CONDITION_ID])
585-
grouped_df2 = grouped_df.groupby(grouping_cols).size().reset_index()
602+
grouped_df2 = measurement_df.groupby(grouping_cols)
586603

587604
# data frame has timepoint specific overrides if grouping by noise
588605
# parameters and observable parameters in addition to observable,
589606
# condition and preeq id yields more groups
590-
return len(grouped_df.index) != len(grouped_df2.index)
607+
return len(grouped_df) != len(grouped_df2)
608+
609+
610+
def observable_table_has_nontrivial_noise_formula(
611+
observable_df: pd.DataFrame) -> bool:
612+
"""
613+
Does any observable have a noise formula that is not just a single
614+
parameter?
615+
616+
Arguments:
617+
observable_df: PEtab observable table
618+
619+
Returns:
620+
True if any noise formula does not consist of a single identifier,
621+
False otherwise.
622+
"""
623+
624+
return not observable_df[NOISE_FORMULA].apply(
625+
lambda x: re.match(r'^[\w_\.]+$', x) is not None
626+
).all()
591627

592628

593629
def measurement_table_has_observable_parameter_numeric_overrides(
@@ -598,7 +634,7 @@ def measurement_table_has_observable_parameter_numeric_overrides(
598634
measurement_df: PEtab measurement table
599635
600636
Returns:
601-
True if there any numbers to override observable parameters,
637+
True if there are any numbers to override observable/noise parameters,
602638
False otherwise.
603639
"""
604640
if OBSERVABLE_PARAMETERS not in measurement_df:

tests/test_lint.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,17 +57,50 @@ def test_measurement_table_has_timepoint_specific_mappings():
5757
PREEQUILIBRATION_CONDITION_ID: [nan, nan],
5858
TIME: [1.0, 2.0],
5959
OBSERVABLE_PARAMETERS: ['obsParOverride', ''],
60-
NOISE_PARAMETERS: ['', '']
60+
NOISE_PARAMETERS: ['1.0', 1.0]
6161
})
6262

6363
assert lint.measurement_table_has_timepoint_specific_mappings(
6464
measurement_df) is True
6565

66+
# both measurements different anyways
6667
measurement_df.loc[1, OBSERVABLE_ID] = 'obs2'
68+
assert lint.measurement_table_has_timepoint_specific_mappings(
69+
measurement_df) is False
6770

71+
# mixed numeric string
72+
measurement_df.loc[1, OBSERVABLE_ID] = 'obs1'
73+
measurement_df.loc[1, OBSERVABLE_PARAMETERS] = 'obsParOverride'
6874
assert lint.measurement_table_has_timepoint_specific_mappings(
6975
measurement_df) is False
7076

77+
# different numeric values
78+
measurement_df.loc[1, NOISE_PARAMETERS] = 2.0
79+
assert lint.measurement_table_has_timepoint_specific_mappings(
80+
measurement_df) is True
81+
assert lint.measurement_table_has_timepoint_specific_mappings(
82+
measurement_df, allow_scalar_numeric_noise_parameters=True) is False
83+
84+
85+
def test_observable_table_has_nontrivial_noise_formula():
86+
# Ensure we fail if we have nontrivial noise formulas
87+
88+
observable_df = pd.DataFrame(data={
89+
OBSERVABLE_ID: ['0obsPar1noisePar', '2obsPar0noisePar'],
90+
OBSERVABLE_FORMULA: ['1.0',
91+
'1.0'],
92+
NOISE_FORMULA: ['noiseParameter1_0obsPar1noisePar + 3.0',
93+
'1.0']
94+
})
95+
96+
assert lint.observable_table_has_nontrivial_noise_formula(observable_df)\
97+
is True
98+
99+
observable_df.loc[0, NOISE_FORMULA] = 'sigma1'
100+
101+
assert lint.observable_table_has_nontrivial_noise_formula(observable_df) \
102+
is False
103+
71104

72105
def test_assert_overrides_match_parameter_count():
73106
# Ensure we recognize and fail if we have wrong number of overrides

0 commit comments

Comments
 (0)