Skip to content

Commit 69b8dd3

Browse files
committed
feat: Implement a complete workflow for EIC acceptance data conversion, analysis, and plotting.
1 parent f338ca8 commit 69b8dd3

14 files changed

+45147
-1
lines changed

analysis/acceptance/aa_helpers.py

Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
#!/usr/bin/env python3
2+
"""
3+
EIC Center Forward Plotting and Data Processing Utility
4+
5+
This module provides functions for:
6+
1. Creating plots with calibrated background images (pixel to mm conversion)
7+
2. Converting and concatenating CSV files to Feather format with unique event IDs
8+
"""
9+
10+
import argparse
11+
import sys
12+
import matplotlib.pyplot as plt
13+
import matplotlib.image as mpimg
14+
import pandas as pd
15+
import glob
16+
from pathlib import Path
17+
from typing import List, Dict, Tuple, Optional
18+
19+
# Default configuration for background image and calibration points
20+
DEFAULT_BCK_IMAGE = "eic_center_forward_bw.png"
21+
22+
# Default calibration points for pixel-to-millimeter conversion
23+
# Format: Two correspondence points mapping (x_mm, y_mm) <-> (x_pixel, y_pixel)
24+
DEFAULT_BCK_SCALE_POINTS = [
25+
{"mm": {"x": 0.0, "y": 0.0}, "pixel": {"x": 371.0, "y": 281.0}},
26+
{"mm": {"x": 4937.0, "y": 2622.0}, "pixel": {"x": 739.0, "y": 85.0}}
27+
]
28+
29+
30+
def create_plot_with_background(
31+
figsize: Tuple[int, int] = (20, 10),
32+
bck_image: str = DEFAULT_BCK_IMAGE,
33+
bck_scale_points: List[Dict] = DEFAULT_BCK_SCALE_POINTS
34+
) -> Tuple[plt.Figure, plt.Axes]:
35+
"""
36+
Create a matplotlib plot with a calibrated background image.
37+
38+
This function loads an image and sets up a coordinate system where the image
39+
pixels are mapped to physical millimeter coordinates using two calibration points.
40+
41+
Parameters
42+
----------
43+
figsize : tuple of int, optional
44+
Figure size in inches (width, height). Default is (20, 10).
45+
bck_image : str, optional
46+
Path to the background image file. Default is "eic_center_forward.png".
47+
bck_scale_points : list of dict, optional
48+
Two calibration points for pixel-to-mm conversion. Each point should have
49+
the structure: {"mm": {"x": x_mm, "y": y_mm}, "pixel": {"x": x_px, "y": y_px}}
50+
51+
Returns
52+
-------
53+
fig : matplotlib.figure.Figure
54+
The created figure object
55+
ax : matplotlib.axes.Axes
56+
The axes object with the calibrated background image
57+
58+
Raises
59+
------
60+
FileNotFoundError
61+
If the background image file cannot be found
62+
ValueError
63+
If calibration points are invalid
64+
65+
Examples
66+
--------
67+
>>> fig, ax = create_plot_with_background()
68+
>>> ax.plot([0, 1000], [0, 500], 'r-') # Plot in mm coordinates
69+
>>> plt.show()
70+
"""
71+
# Extract calibration points
72+
p0 = bck_scale_points[0]
73+
p1 = bck_scale_points[1]
74+
75+
p0_x_mm = p0["mm"]["x"]
76+
p0_y_mm = p0["mm"]["y"]
77+
p0_x_pixel = p0["pixel"]["x"]
78+
p0_y_pixel = p0["pixel"]["y"]
79+
80+
p1_x_mm = p1["mm"]["x"]
81+
p1_y_mm = p1["mm"]["y"]
82+
p1_x_pixel = p1["pixel"]["x"]
83+
p1_y_pixel = p1["pixel"]["y"]
84+
85+
# Calculate linear transformation coefficients
86+
# Linear mapping: coord_mm = scale * coord_pixel + offset
87+
x_scale = (p1_x_mm - p0_x_mm) / (p1_x_pixel - p0_x_pixel)
88+
x_offset = p0_x_mm - x_scale * p0_x_pixel
89+
90+
y_scale = (p1_y_mm - p0_y_mm) / (p1_y_pixel - p0_y_pixel)
91+
y_offset = p0_y_mm - y_scale * p0_y_pixel # note: y_scale will often be negative
92+
93+
def pixel_to_mm_x(x_pixel: float) -> float:
94+
"""Convert x-coordinate from pixels to millimeters."""
95+
return x_scale * x_pixel + x_offset
96+
97+
def pixel_to_mm_y(y_pixel: float) -> float:
98+
"""Convert y-coordinate from pixels to millimeters."""
99+
return y_scale * y_pixel + y_offset
100+
101+
# Load image
102+
try:
103+
image = mpimg.imread(bck_image)
104+
except FileNotFoundError:
105+
raise FileNotFoundError(f"Background image not found: {bck_image}")
106+
107+
height_pixel, width_pixel = image.shape[:2]
108+
109+
# Calculate image extent in millimeters (left, right, bottom, top)
110+
left_mm = pixel_to_mm_x(0)
111+
right_mm = pixel_to_mm_x(width_pixel)
112+
top_mm = pixel_to_mm_y(0) # origin='upper' => row 0 is the top
113+
bottom_mm = pixel_to_mm_y(height_pixel)
114+
115+
# Create plot with calibrated background
116+
fig, ax = plt.subplots(figsize=figsize)
117+
ax.imshow(
118+
image,
119+
extent=(left_mm, right_mm, bottom_mm, top_mm),
120+
origin="upper",
121+
interpolation="nearest",
122+
)
123+
124+
ax.set_xlabel("X (mm)")
125+
ax.set_ylabel("Y (mm)")
126+
ax.set_title("EIC Center Forward View")
127+
128+
return fig, ax
129+
130+
131+
def concat_csvs_with_unique_events(files: List[str]) -> pd.DataFrame:
132+
"""
133+
Load and concatenate multiple CSV files with globally unique event IDs.
134+
135+
This function reads multiple CSV files and concatenates them into a single
136+
DataFrame. Event IDs are adjusted to ensure they remain globally unique
137+
across all files by adding an offset based on the maximum event ID from
138+
previous files.
139+
140+
Parameters
141+
----------
142+
files : list of str
143+
List of paths to CSV files to concatenate
144+
145+
Returns
146+
-------
147+
pd.DataFrame
148+
Concatenated DataFrame with unique event IDs
149+
150+
Raises
151+
------
152+
ValueError
153+
If no files are provided
154+
FileNotFoundError
155+
If any of the CSV files cannot be found
156+
157+
Examples
158+
--------
159+
>>> files = ['data1.csv', 'data2.csv', 'data3.csv']
160+
>>> df = concat_csvs_with_unique_events(files)
161+
>>> print(f"Total events: {len(df['event'].unique())}")
162+
"""
163+
if not files:
164+
raise ValueError("No files provided for concatenation")
165+
166+
dfs = []
167+
offset = 0
168+
169+
for file in files:
170+
try:
171+
df = pd.read_csv(file)
172+
except FileNotFoundError:
173+
raise FileNotFoundError(f"CSV file not found: {file}")
174+
except Exception as e:
175+
raise ValueError(f"Error reading CSV file {file}: {e}")
176+
177+
if 'event' not in df.columns:
178+
raise ValueError(f"CSV file {file} does not contain an 'event' column")
179+
180+
# Adjust event IDs to ensure global uniqueness
181+
df['event'] = df['event'] + offset
182+
offset = df['event'].max() + 1 # Set offset for next file
183+
184+
dfs.append(df)
185+
print(f"Loaded {len(df)} rows from {file} (events {df['event'].min()}-{df['event'].max()})")
186+
187+
# Concatenate all DataFrames
188+
combined_df = pd.concat(dfs, ignore_index=True)
189+
print(f"Total: {len(combined_df)} rows with {len(combined_df['event'].unique())} unique events")
190+
191+
return combined_df
192+
193+
194+
def convert_to_feather(
195+
input_files: List[str],
196+
output_file: str,
197+
use_glob: bool = False
198+
) -> pd.DataFrame:
199+
"""
200+
Convert CSV files to Feather format with unique event IDs.
201+
202+
This function can either process a list of specific files or use glob patterns
203+
to find files. The resulting DataFrame is saved in Feather format for faster
204+
loading in future operations.
205+
206+
Parameters
207+
----------
208+
input_files : list of str
209+
List of file paths or glob patterns (if use_glob=True)
210+
output_file : str
211+
Path for the output Feather file
212+
use_glob : bool, optional
213+
If True, treat input_files as glob patterns. Default is False.
214+
215+
Returns
216+
-------
217+
pd.DataFrame
218+
The concatenated DataFrame that was saved to Feather format
219+
220+
Raises
221+
------
222+
ValueError
223+
If no files are found or provided
224+
225+
Examples
226+
--------
227+
>>> # Using specific files
228+
>>> df = convert_to_feather(['file1.csv', 'file2.csv'], 'output.feather')
229+
230+
>>> # Using glob pattern
231+
>>> df = convert_to_feather(['data/*.csv'], 'output.feather', use_glob=True)
232+
"""
233+
if use_glob:
234+
# Expand glob patterns
235+
all_files = []
236+
for pattern in input_files:
237+
matched_files = sorted(glob.glob(pattern))
238+
if matched_files:
239+
all_files.extend(matched_files)
240+
print(f"Pattern '{pattern}' matched {len(matched_files)} files")
241+
else:
242+
print(f"Warning: Pattern '{pattern}' matched no files")
243+
files = all_files
244+
else:
245+
# Use files as-is
246+
files = input_files
247+
248+
if len(files) == 0:
249+
raise ValueError("No files to process")
250+
251+
print(f"\nProcessing {len(files)} CSV files...")
252+
253+
# Concatenate CSVs with unique events
254+
df = concat_csvs_with_unique_events(files)
255+
256+
# Save to Feather format
257+
df.to_feather(output_file)
258+
print(f"\nSaved {len(df)} rows to {output_file}")
259+
260+
return df
261+
262+
263+
def main():
264+
"""
265+
Provides functionality to convert CSV files to Feather format with proper
266+
event ID handling. Supports both explicit file lists and glob patterns.
267+
"""
268+
parser = argparse.ArgumentParser(
269+
description="Convert CSV files to Feather format with unique event IDs",
270+
formatter_class=argparse.RawDescriptionHelpFormatter,
271+
epilog="""
272+
Examples:
273+
# Convert specific files
274+
%(prog)s file1.csv file2.csv file3.csv -o output.feather
275+
276+
# Use glob pattern (quotes are important!)
277+
%(prog)s "data/*.csv" -o output.feather --glob
278+
279+
# Multiple glob patterns
280+
%(prog)s "data1/*.csv" "data2/*.csv" -o output.feather --glob
281+
282+
# Mix of patterns with glob
283+
%(prog)s "run1_*.csv" "run2_*.csv" -o combined.feather --glob
284+
"""
285+
)
286+
287+
parser.add_argument('input_files', nargs='+', help='Input CSV files or glob patterns (when using --glob)')
288+
parser.add_argument('-o', '--output', required=True, help='Output Feather file path')
289+
parser.add_argument('--glob', action='store_true', help='Treat input arguments as glob patterns')
290+
parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose output')
291+
args = parser.parse_args()
292+
293+
try:
294+
# Convert files to Feather format
295+
df = convert_to_feather(
296+
input_files=args.input_files,
297+
output_file=args.output,
298+
use_glob=args.glob
299+
)
300+
301+
if args.verbose:
302+
print("\nDataFrame info:")
303+
print(df.info())
304+
print("\nFirst few rows:")
305+
print(df.head())
306+
307+
except Exception as e:
308+
print(f"Error: {e}", file=sys.stderr)
309+
sys.exit(1)
310+
311+
print(f"\nConversion complete! Output saved to: {args.output}")
312+
313+
314+
if __name__ == "__main__":
315+
main()

0 commit comments

Comments
 (0)