Skip to content

Commit 154bd4c

Browse files
authored
Merge pull request #1170 from stephenswat/extras/event_trimmer
Add event trimmer Python tool to extras
2 parents b1557b1 + 80c8c55 commit 154bd4c

File tree

5 files changed

+430
-0
lines changed

5 files changed

+430
-0
lines changed

extras/event_trimmer/.gitignore

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Python-generated files
2+
__pycache__/
3+
*.py[oc]
4+
build/
5+
dist/
6+
wheels/
7+
*.egg-info
8+
9+
# Virtual environments
10+
.venv

extras/event_trimmer/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Event trimmer
2+
3+
This tool takes an input event and extracts the data for a small number of particles. It is designed to be useful for debugging all kinds of reconstruction issues. For example, the invocation:
4+
5+
```
6+
python main.py -p 1 -p 5 -i 2 input output
7+
```
8+
9+
Will read event 2 (`-i`) from directory `input` and extract particles 1 and 5 (`-p`) from it. It will then write the trimmed event to the `output` directory.

extras/event_trimmer/main.py

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
import pandas
2+
import argparse
3+
import logging
4+
import pathlib
5+
6+
7+
log = logging.getLogger("event_trimmer")
8+
9+
10+
def main():
11+
parser = argparse.ArgumentParser()
12+
13+
parser.add_argument(
14+
"input",
15+
type=pathlib.Path,
16+
help="input event directory",
17+
)
18+
19+
parser.add_argument(
20+
"output",
21+
type=pathlib.Path,
22+
help="output event directory",
23+
)
24+
25+
parser.add_argument(
26+
"-i", "--event-id", help="event ID in input directory", default=0, type=int
27+
)
28+
29+
parser.add_argument(
30+
"-p",
31+
"--particle-id",
32+
help="particle ID to filter",
33+
type=int,
34+
required=True,
35+
action="append",
36+
)
37+
38+
args = parser.parse_args()
39+
40+
logging.basicConfig(
41+
level=logging.INFO,
42+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
43+
)
44+
45+
to_keep = args.particle_id
46+
47+
log.info(
48+
"Keeping %d particles: %s", len(to_keep), ", ".join(str(x) for x in to_keep)
49+
)
50+
51+
origin_event_prefix = "event%09d-" % args.event_id
52+
destination_event_prefix = "event%09d-" % 0
53+
54+
# Logic for processing the particle initial states
55+
origin_particles_initial_file = args.input / (
56+
origin_event_prefix + "particles_initial.csv"
57+
)
58+
particles_initial_df = pandas.read_csv(origin_particles_initial_file)
59+
log.info(
60+
"Read data for %d initial input particles from %s",
61+
particles_initial_df.shape[0],
62+
origin_particles_initial_file,
63+
)
64+
filtered_particles_initial_df = particles_initial_df[
65+
particles_initial_df["particle_id"].isin(to_keep)
66+
]
67+
destination_particles_initial_file = args.output / (
68+
destination_event_prefix + "particles_initial.csv"
69+
)
70+
filtered_particles_initial_df.to_csv(
71+
destination_particles_initial_file, index=False
72+
)
73+
log.info(
74+
"Wrote data for %d initial output particles to %s",
75+
filtered_particles_initial_df.shape[0],
76+
destination_particles_initial_file,
77+
)
78+
79+
# Logic for processing the particle final states
80+
origin_particles_final_file = args.input / (
81+
origin_event_prefix + "particles_final.csv"
82+
)
83+
particles_final_df = pandas.read_csv(origin_particles_final_file)
84+
log.info(
85+
"Read data for %d final input particles from %s",
86+
particles_final_df.shape[0],
87+
origin_particles_final_file,
88+
)
89+
filtered_particles_final_df = particles_final_df[
90+
particles_final_df["particle_id"].isin(to_keep)
91+
]
92+
destination_particles_final_file = args.output / (
93+
destination_event_prefix + "particles_final.csv"
94+
)
95+
filtered_particles_final_df.to_csv(destination_particles_final_file, index=False)
96+
log.info(
97+
"Wrote data for %d final output particles to %s",
98+
filtered_particles_final_df.shape[0],
99+
destination_particles_final_file,
100+
)
101+
102+
# Logic for processing hits
103+
origin_hits_file = args.input / (origin_event_prefix + "hits.csv")
104+
hits_df = pandas.read_csv(origin_hits_file)
105+
log.info("Read data for %d input hits from %s", hits_df.shape[0], origin_hits_file)
106+
hits_filtered_df = hits_df[hits_df["particle_id"].isin(to_keep)]
107+
destination_hits_file = args.output / (destination_event_prefix + "hits.csv")
108+
hits_filtered_df.to_csv(destination_hits_file, index=False)
109+
log.info(
110+
"Wrote data for %d output hits to %s",
111+
hits_filtered_df.shape[0],
112+
destination_hits_file,
113+
)
114+
115+
# Logic for processing measurements
116+
origin_measurements_file = args.input / (origin_event_prefix + "measurements.csv")
117+
measurements_df = pandas.read_csv(origin_measurements_file)
118+
log.info(
119+
"Read data for %d input measurements from %s",
120+
measurements_df.shape[0],
121+
origin_measurements_file,
122+
)
123+
measurements_filtered_df = measurements_df[hits_df["particle_id"].isin(to_keep)]
124+
measurement_ids = list(measurements_filtered_df.index)
125+
meas_id_map = {a: b for (b, a) in enumerate(measurement_ids)}
126+
measurements_df["measurement_id"] = measurements_df["measurement_id"].apply(
127+
lambda x: meas_id_map.get(x, -1)
128+
)
129+
measurements_filtered_df = measurements_df[hits_df["particle_id"].isin(to_keep)]
130+
destination_measurements_file = args.output / (
131+
destination_event_prefix + "measurements.csv"
132+
)
133+
measurements_filtered_df.to_csv(destination_measurements_file, index=False)
134+
log.info(
135+
"Wrote data for %d output measurements to %s",
136+
measurements_filtered_df.shape[0],
137+
destination_measurements_file,
138+
)
139+
140+
# Logic for building the simhit map
141+
new_df = pandas.DataFrame(
142+
{
143+
"measurement_id": list(range(measurements_filtered_df.shape[0])),
144+
"hit_id": list(range(measurements_filtered_df.shape[0])),
145+
}
146+
)
147+
destination_simhit_map_file = args.output / (
148+
destination_event_prefix + "measurement-simhit-map.csv"
149+
)
150+
new_df.to_csv(destination_simhit_map_file, index=False)
151+
log.info(
152+
"Wrote data for %d output measurement-to-hit mappings to %s",
153+
new_df.shape[0],
154+
destination_simhit_map_file,
155+
)
156+
157+
# Logic for processing cells
158+
origin_cells_file = args.input / (origin_event_prefix + "cells.csv")
159+
cells_df = pandas.read_csv(origin_cells_file)
160+
log.info(
161+
"Read data for %d input cells from %s", cells_df.shape[0], origin_cells_file
162+
)
163+
filter = cells_df["measurement_id"].isin(measurement_ids)
164+
cells_df["measurement_id"] = cells_df["measurement_id"].apply(
165+
lambda x: meas_id_map.get(x, -1)
166+
)
167+
cells_filtered_df = cells_df[filter]
168+
destination_cells_file = args.output / (destination_event_prefix + "cells.csv")
169+
cells_filtered_df.to_csv(destination_cells_file, index=False)
170+
log.info(
171+
"Wrote data for %d output cells to %s",
172+
cells_filtered_df.shape[0],
173+
destination_cells_file,
174+
)
175+
176+
177+
if __name__ == "__main__":
178+
main()
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[project]
2+
name = "traccc-event-trimmer"
3+
version = "0.1.0"
4+
description = "Tool for slimming down events"
5+
readme = "README.md"
6+
requires-python = ">=3.13"
7+
dependencies = [
8+
"black>=25.9.0",
9+
"pandas>=2.3.2",
10+
]

0 commit comments

Comments
 (0)