Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
68b7667
Add TopCP query
kyungeonchoi Mar 4, 2025
0c96c6d
flake8
kyungeonchoi Mar 4, 2025
499a0be
Fix serialization
kyungeonchoi Mar 4, 2025
0b9a759
Add example
kyungeonchoi Mar 4, 2025
ae89359
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 5, 2025
0b0303e
change field names to snake case
kyungeonchoi Mar 10, 2025
936c6ba
Update example
kyungeonchoi Mar 10, 2025
b5b8c8e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 10, 2025
47ed2c6
Add YAML support
kyungeonchoi Mar 10, 2025
06f6c38
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 10, 2025
b922029
set pytest asyncio
kyungeonchoi Mar 10, 2025
ee77e8a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 10, 2025
800d1c7
Load input yaml as string instead of dict
kyungeonchoi Mar 14, 2025
9613773
Drop run options like run_parton
kyungeonchoi Mar 14, 2025
7eba6c3
Drop obsolete fields
kyungeonchoi Mar 17, 2025
b6bb91f
Update reco.yaml
kyungeonchoi Mar 19, 2025
35c5e98
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 19, 2025
902c1a8
Use ruamel.yaml for yaml serialization
kyungeonchoi Apr 2, 2025
dd14db4
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 2, 2025
92bc705
Add tests for TopCP
kyungeonchoi Apr 4, 2025
24dca9d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 4, 2025
4deb263
Remove obsolete fields in transform requests (#565)
ponyisi Mar 4, 2025
437c6f2
Switch S3 client to boto3 (#572)
ponyisi Mar 26, 2025
62c974d
Update codecov execution (#575)
ponyisi Mar 26, 2025
46f5092
Use uv to set up python version in tests (#574)
ponyisi Mar 26, 2025
d439a86
[pre-commit.ci] pre-commit autoupdate
pre-commit-ci[bot] Mar 31, 2025
283bf22
Pushdown progress bar spec & fix tests (#569)
ponyisi Apr 2, 2025
608fea0
Fix docstring for XRootD DID finder
ponyisi Apr 8, 2025
df60499
Upgrade local download concurrency control to use aioboto3's built-in…
ponyisi Apr 10, 2025
258a61e
Support fail_on_missing_trees option (#580)
ponyisi Apr 10, 2025
c9db057
Bump pypa/gh-action-pypi-publish in the actions group
dependabot[bot] Apr 7, 2025
3ae18ac
Merge remote-tracking branch 'origin/master' into add-topcp-query
kyungeonchoi Apr 14, 2025
871e9d5
Revert to plain text
kyungeonchoi Apr 15, 2025
8751dc6
Fix test for topcp
kyungeonchoi Apr 16, 2025
19b065e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions examples/TopCP_Dict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from servicex import query, dataset, deliver


spec = {
"Sample": [
{
"Name": "TopCP_Dict",
"Dataset": dataset.FileList(
[
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000001.pool.root.1", # noqa: E501
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000002.pool.root.1", # noqa: E501
"root://eospublic.cern.ch//eos/opendata/atlas/rucio/data16_13TeV/DAOD_PHYSLITE.37019878._000003.pool.root.1", # noqa: E501
]
),
"Query": query.TopCP(reco_yaml="reco.yaml", max_events=1000),
}
]
}

print(f"Files: {deliver(spec)}")
40 changes: 40 additions & 0 deletions examples/reco.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
CommonServices:
systematicsHistogram: 'listOfSystematics'

PileupReweighting: {}

EventCleaning:
runEventCleaning: False
runGRL: False

Electrons:
- containerName: 'AnaElectrons'
crackVeto: True
IFFClassification: {}
WorkingPoint:
- selectionName: 'loose'
identificationWP: 'TightLH'
isolationWP: 'NonIso'
noEffSF: True
- selectionName: 'tight'
identificationWP: 'TightLH'
isolationWP: 'Tight_VarRad'
noEffSF: True
PtEtaSelection:
minPt: 25000.0
maxEta: 2.47
useClusterEta: True

# After configuring each container, many variables will be saved automatically.
Output:
treeName: 'reco'
vars: []
metVars: []
containers:
# Format should follow: '<suffix>:<output container>'
el_: 'AnaElectrons'
'': 'EventInfo'
commands:
# Turn output branches on and off with 'enable' and 'disable'

AddConfigBlocks: []
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ FuncADL_ATLASxAOD = "servicex.func_adl.func_adl_dataset:FuncADLQuery_ATLASxAOD"
FuncADL_CMS = "servicex.func_adl.func_adl_dataset:FuncADLQuery_CMS"
PythonFunction = "servicex.python_dataset:PythonFunction"
UprootRaw = "servicex.uproot_raw.uproot_raw:UprootRawQuery"
TopCP = "servicex.topcp.topcp:TopCPQuery"

[project.entry-points.'servicex.dataset']
Rucio = "servicex.dataset_identifier:RucioDatasetIdentifier"
Expand Down
137 changes: 137 additions & 0 deletions servicex/topcp/topcp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# Copyright (c) 2025, IRIS-HEP
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# pydantic 2 API

import pydantic
from pathlib import Path

# from servicex.models import DocStringBaseModel
from typing import Optional, Union
from ..query_core import QueryStringGenerator


@pydantic.dataclasses.dataclass
class TopCPQuery(QueryStringGenerator):
yaml_tag = "!TopCP"
default_codegen = "topcp"

reco_yaml: Optional[Union[Path, str]] = None
"""Path to the reco.yaml"""
parton_yaml: Optional[Union[Path, str]] = None
"""Path to the parton.yaml"""
particle_yaml: Optional[Union[Path, str]] = None
"""Path to the particle.yaml"""
max_events: Optional[int] = -1
"""Number of events to process"""
parton: Optional[bool] = False
"""Toggles the parton-level analysis"""
particle: Optional[bool] = False
"""Toggles the particle-level analysis"""
no_reco: Optional[bool] = False
"""Toggles off the detector-level analysis"""
no_systematics: Optional[bool] = True
"""Toggles off the computation of systematics"""
no_filter: Optional[bool] = False
"""Save all events regardless of analysis filters (still saves the decision)"""

@pydantic.model_validator(mode="after")
def check_reco_yaml(self):
if self.reco_yaml is None and self.no_reco is False:
raise ValueError("reco is enabled but reco.yaml is missing!")
return self

@pydantic.model_validator(mode="after")
def no_input_yaml(self):
if (
self.reco_yaml is None
and self.parton_yaml is None
and self.particle_yaml is None
):
raise ValueError("No yaml provided!")
return self

@pydantic.model_validator(mode="after")
def no_parton_yaml(self):
if self.parton_yaml is None and self.parton is True:
raise ValueError("parton is set to True but no parton.yaml provided!")
return self

@pydantic.model_validator(mode="after")
def no_paricle_yaml(self):
if self.particle_yaml is None and self.particle is True:
raise ValueError("particle is set to True but no particle.yaml provided!")
return self

@pydantic.model_validator(mode="after")
def no_run(self):
if self.no_reco is True and self.particle is False and self.parton is False:
raise ValueError("Wrong configuration - no reco, no particle, no parton!")
return self

def generate_selection_string(self):
import yaml
import json

recoYaml = None
if self.reco_yaml:
with open(self.reco_yaml, "r") as reco_file:
recoYaml = yaml.safe_load(reco_file)

partonYaml = None
if self.parton_yaml:
with open(self.parton_yaml, "r") as parton_file:
partonYaml = yaml.safe_load(parton_file)

particleYaml = None
if self.particle_yaml:
with open(self.particle_yaml, "r") as particle_file:
particleYaml = yaml.safe_load(particle_file)

query = {
"RecoYAML": recoYaml,
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Q on this -

  • maybe it would be easier just to call it "reco", "parton", "particle" ?
  • I guess we should assume that if the user provides a YAML file they wish to run that particular type of analysis? so by default if someone provides a particle YAML then RunParticle should be True?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • maybe it would be easier just to call it "reco", "parton", "particle" ?

I agree. Let me also change camel case field names to snake case.

  • I guess we should assume that if the user provides a YAML file they wish to run that particular type of analysis? so by default if someone provides a particle YAML then RunParticle should be True?

I'm following the default values of TopCPToolkit except no_systematics. I'm open to change the defaults but I guess it's common to have all reco.yaml, parton.yaml and particle.yaml in the same directory and mostly run reco.yaml only.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh? How does TopCP determine which one(s) to run?

It just seems to me that there is unlikely to be any situation where someone provides a particle-level YAML but doesn't expect the particle analysis to be run by default... ?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's fair - TopCP doesn't take each yaml input file. A user provides a directory which contains all yaml files instead. I think your proposal makes more sense as ServiceX query. Let me drop options like run_parton.

"PartonYAML": partonYaml,
"ParticleYAML": particleYaml,
"NEvents": self.max_events,
"RunParton": self.parton,
"RunParticle": self.particle,
"NoReco": self.no_reco,
"RunSystematics": self.no_systematics,
"NoFilter": self.no_filter,
}

return json.dumps(query)

@classmethod
def from_yaml(cls, _, node):
code = node.value
import json

queries = json.loads(code)
q = cls(queries)
return q
Loading