Skip to content

Commit 535f2be

Browse files
committed
add an option to drop filter rules for a given dataset
1 parent ee60e02 commit 535f2be

File tree

4 files changed

+106
-15
lines changed

4 files changed

+106
-15
lines changed

doc/sphinx/source/vp/filters.rst

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,33 @@ append a list of filter rules to the rules obtained by the mechanisms described
307307
The value of ``added_filter_rules`` should be a list of rules with the same format as ``filter_rules``.
308308

309309

310+
.. _drop_filter_rules::
311+
312+
Dropping filter rules for selected datasets
313+
-------------------------------------------
314+
315+
Sometimes it might be necessary to drop the filter rules for a dataset while keeping all other rules intact.
316+
This is possible with the ``drop_filter_rules`` key, which will drop all dataset-scoped rules applying to a given dataset.
317+
Since ``drop_filter_rules`` is applied before ``added_filter_rules`` it can be utilized to reset the rules for a given dataset
318+
while keeping all other internal rules.
319+
320+
.. code:: yaml
321+
322+
use_cuts: "internal"
323+
pdf: "NNPDF40_nnlo_as_01180"
324+
325+
dataset_inputs:
326+
- { dataset: ATLAS_Z0J_8TEV_PT-Y }
327+
- { dataset: ATLAS_Z0J_8TEV_PT-M }
328+
329+
theoryid: 40_000_000
330+
331+
drop_internal_rules:
332+
- ATLAS_Z0J_8TEV_PT-Y
333+
334+
actions_:
335+
- groups_chi2_table
336+
310337
311338
Examples
312339
--------
@@ -345,13 +372,13 @@ less than NNLO (i.e LO or NLO). I check what the process type of
345372

346373
.. code:: ipython
347374
348-
In [1]: from validphys.loader import Loader
375+
In [1]: from validphys.loader import Loader
349376
350-
In [2]: l = Loader()
377+
In [2]: l = Loader()
351378
352-
In [3]: cd = l.check_commondata("CMSDY2D12")
379+
In [3]: cd = l.check_commondata("CMSDY2D12")
353380
354-
In [4]: cd.process_type
381+
In [4]: cd.process_type
355382
Out[4]: 'EWK_RAP'
356383
357384
Then cross check this against ``NNPDF.CommonData.kinLabels`` to see that

validphys2/src/validphys/config.py

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1372,7 +1372,13 @@ def parse_added_filter_rules(self, rules: (list, type(None)) = None):
13721372
"""
13731373
return tuple(AddedFilterRule(**rule) for rule in rules) if rules else None
13741374

1375-
@functools.lru_cache
1375+
def parse_drop_internal_rules(self, drop_internal_rules: list | None = None):
1376+
"""Turns drop_internal_rules into a tuple for internal caching."""
1377+
if drop_internal_rules is None:
1378+
return tuple()
1379+
return tuple(drop_internal_rules)
1380+
1381+
@functools.cache
13761382
def produce_rules(
13771383
self,
13781384
theoryid,
@@ -1382,8 +1388,20 @@ def produce_rules(
13821388
filter_rules=None,
13831389
default_filter_rules_recorded_spec_=None,
13841390
added_filter_rules: (tuple, type(None)) = None,
1391+
drop_internal_rules: tuple = tuple(),
13851392
):
1386-
"""Produce filter rules based on the user defined input and defaults."""
1393+
"""Produce filter rules based on the user defined input and defaults.
1394+
1395+
It is possible to overwrite or extend the internal rules from the runcard
1396+
using the following variables:
1397+
1398+
``filter_rules``: tuple(rules)
1399+
Drop all internal rules and take these instead
1400+
``added_filter_rules``: tuple(rules)
1401+
Extended internal rules with these
1402+
``drop_internal_rules``: tuple(dataset names)
1403+
Drop internal dataset-specific rules, it is applied before ``added_filter_rules``
1404+
"""
13871405

13881406
theory_parameters = theoryid.get_description()
13891407

@@ -1397,15 +1415,20 @@ def produce_rules(
13971415
filter_rules = default_filter_rules_input()
13981416

13991417
try:
1400-
rule_list = [
1401-
Rule(
1402-
initial_data=rule,
1403-
defaults=defaults,
1404-
theory_parameters=theory_parameters,
1405-
loader=self.loader,
1418+
rule_list = []
1419+
for rule in filter_rules:
1420+
# Don't load rules that are to be dropped
1421+
if rule.dataset in drop_internal_rules:
1422+
continue
1423+
1424+
rule_list.append(
1425+
Rule(
1426+
initial_data=rule,
1427+
defaults=defaults,
1428+
theory_parameters=theory_parameters,
1429+
loader=self.loader,
1430+
)
14061431
)
1407-
for rule in filter_rules
1408-
]
14091432
except RuleProcessingError as e:
14101433
raise ConfigError(f"Error Processing filter rules: {e}") from e
14111434

validphys2/src/validphys/tests/conftest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
settings.register_profile("extratime", deadline=1500)
1717
settings.load_profile("extratime")
1818

19+
lhapdf.setVerbosity(0)
20+
1921

2022
# Fortunately py.test works much like reportengine and providers are
2123
# connected by argument names.

validphys2/src/validphys/tests/test_filter_rules.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def test_good_rules():
101101
l = Loader()
102102
rules = [mkrule(inp) for inp in good_rules]
103103
dsnames = ['ATLAS_1JET_8TEV_R06_PTY', 'NMC_NC_NOTFIXED_EM-F2']
104-
variants = ["legacy","legacy_dw"]
104+
variants = ["legacy", "legacy_dw"]
105105
for dsname, v in zip(dsnames, variants):
106106
ds = l.check_dataset(
107107
dsname, cuts='internal', rules=tuple(rules), theoryid=THEORYID, variant=v
@@ -137,3 +137,42 @@ def test_added_rules():
137137
assert np.isnan(tb["empty data"].iloc[1, 1])
138138
assert tb["empty data"]["ndata"].iloc[0] == 0
139139
assert np.all(tb[1:]["fewer data"] != tb[1:]["Original"])
140+
141+
142+
def test_drop_internal_rules(data_internal_cuts_config, test_dataset="CMS_Z0J_8TEV_PT-Y"):
143+
"""Check that the key drop_internal_rules work as expected:
144+
- Drops all cuts for a given dataset
145+
- It is applied before added_filter_rules
146+
"""
147+
assert test_dataset in [
148+
i["dataset"] for i in data_internal_cuts_config["dataset_inputs"]
149+
], "If you updated the test DATA, please update this test as well"
150+
151+
def test_fun(**config):
152+
"""Use some internal validphy function which will for sure use cuts and separate
153+
the results for the test dataset.
154+
"""
155+
# Get data and predictions separated by dataset (drop grouping)
156+
ret = API.group_result_central_table_no_table(**config).droplevel(0)
157+
# Now separate the test dataset from the rest
158+
df_test = ret.loc[test_dataset]
159+
df_rest = ret.drop(index=test_dataset)
160+
return df_test, df_rest
161+
162+
# Use internal cuts
163+
def_test, def_all = test_fun(**data_internal_cuts_config)
164+
165+
# Drop all rules for the test dataset only
166+
drop_test, drop_all = test_fun(**data_internal_cuts_config, drop_internal_rules=[test_dataset])
167+
168+
assert len(drop_test) > len(def_test), "Cuts have not been dropped!"
169+
assert len(drop_all) == len(def_all), "Drop cuts have affected other datasets!"
170+
171+
# Add a new rule for this dataset while dropping all previous rules
172+
new_rule = {"dataset": test_dataset, "rule": "pT >= 80"}
173+
add_test, add_all = test_fun(
174+
**data_internal_cuts_config,
175+
added_filter_rules=[new_rule],
176+
drop_internal_rules=[test_dataset]
177+
)
178+
assert len(new_rule) < len(drop_test), "New rule has not been added after dropping the cuts!"

0 commit comments

Comments
 (0)