1+ #!/usr/bin/env python3
2+ """
3+ EIC Center Forward Plotting and Data Processing Utility
4+
5+ This module provides functions for:
6+ 1. Creating plots with calibrated background images (pixel to mm conversion)
7+ 2. Converting and concatenating CSV files to Feather format with unique event IDs
8+ """
9+
10+ import argparse
11+ import sys
12+ import matplotlib .pyplot as plt
13+ import matplotlib .image as mpimg
14+ import pandas as pd
15+ import glob
16+ from pathlib import Path
17+ from typing import List , Dict , Tuple , Optional
18+
19+ # Default configuration for background image and calibration points
20+ DEFAULT_BCK_IMAGE = "eic_center_forward_bw.png"
21+
22+ # Default calibration points for pixel-to-millimeter conversion
23+ # Format: Two correspondence points mapping (x_mm, y_mm) <-> (x_pixel, y_pixel)
24+ DEFAULT_BCK_SCALE_POINTS = [
25+ {"mm" : {"x" : 0.0 , "y" : 0.0 }, "pixel" : {"x" : 371.0 , "y" : 281.0 }},
26+ {"mm" : {"x" : 4937.0 , "y" : 2622.0 }, "pixel" : {"x" : 739.0 , "y" : 85.0 }}
27+ ]
28+
29+
30+ def create_plot_with_background (
31+ figsize : Tuple [int , int ] = (20 , 10 ),
32+ bck_image : str = DEFAULT_BCK_IMAGE ,
33+ bck_scale_points : List [Dict ] = DEFAULT_BCK_SCALE_POINTS
34+ ) -> Tuple [plt .Figure , plt .Axes ]:
35+ """
36+ Create a matplotlib plot with a calibrated background image.
37+
38+ This function loads an image and sets up a coordinate system where the image
39+ pixels are mapped to physical millimeter coordinates using two calibration points.
40+
41+ Parameters
42+ ----------
43+ figsize : tuple of int, optional
44+ Figure size in inches (width, height). Default is (20, 10).
45+ bck_image : str, optional
46+ Path to the background image file. Default is "eic_center_forward.png".
47+ bck_scale_points : list of dict, optional
48+ Two calibration points for pixel-to-mm conversion. Each point should have
49+ the structure: {"mm": {"x": x_mm, "y": y_mm}, "pixel": {"x": x_px, "y": y_px}}
50+
51+ Returns
52+ -------
53+ fig : matplotlib.figure.Figure
54+ The created figure object
55+ ax : matplotlib.axes.Axes
56+ The axes object with the calibrated background image
57+
58+ Raises
59+ ------
60+ FileNotFoundError
61+ If the background image file cannot be found
62+ ValueError
63+ If calibration points are invalid
64+
65+ Examples
66+ --------
67+ >>> fig, ax = create_plot_with_background()
68+ >>> ax.plot([0, 1000], [0, 500], 'r-') # Plot in mm coordinates
69+ >>> plt.show()
70+ """
71+ # Extract calibration points
72+ p0 = bck_scale_points [0 ]
73+ p1 = bck_scale_points [1 ]
74+
75+ p0_x_mm = p0 ["mm" ]["x" ]
76+ p0_y_mm = p0 ["mm" ]["y" ]
77+ p0_x_pixel = p0 ["pixel" ]["x" ]
78+ p0_y_pixel = p0 ["pixel" ]["y" ]
79+
80+ p1_x_mm = p1 ["mm" ]["x" ]
81+ p1_y_mm = p1 ["mm" ]["y" ]
82+ p1_x_pixel = p1 ["pixel" ]["x" ]
83+ p1_y_pixel = p1 ["pixel" ]["y" ]
84+
85+ # Calculate linear transformation coefficients
86+ # Linear mapping: coord_mm = scale * coord_pixel + offset
87+ x_scale = (p1_x_mm - p0_x_mm ) / (p1_x_pixel - p0_x_pixel )
88+ x_offset = p0_x_mm - x_scale * p0_x_pixel
89+
90+ y_scale = (p1_y_mm - p0_y_mm ) / (p1_y_pixel - p0_y_pixel )
91+ y_offset = p0_y_mm - y_scale * p0_y_pixel # note: y_scale will often be negative
92+
93+ def pixel_to_mm_x (x_pixel : float ) -> float :
94+ """Convert x-coordinate from pixels to millimeters."""
95+ return x_scale * x_pixel + x_offset
96+
97+ def pixel_to_mm_y (y_pixel : float ) -> float :
98+ """Convert y-coordinate from pixels to millimeters."""
99+ return y_scale * y_pixel + y_offset
100+
101+ # Load image
102+ try :
103+ image = mpimg .imread (bck_image )
104+ except FileNotFoundError :
105+ raise FileNotFoundError (f"Background image not found: { bck_image } " )
106+
107+ height_pixel , width_pixel = image .shape [:2 ]
108+
109+ # Calculate image extent in millimeters (left, right, bottom, top)
110+ left_mm = pixel_to_mm_x (0 )
111+ right_mm = pixel_to_mm_x (width_pixel )
112+ top_mm = pixel_to_mm_y (0 ) # origin='upper' => row 0 is the top
113+ bottom_mm = pixel_to_mm_y (height_pixel )
114+
115+ # Create plot with calibrated background
116+ fig , ax = plt .subplots (figsize = figsize )
117+ ax .imshow (
118+ image ,
119+ extent = (left_mm , right_mm , bottom_mm , top_mm ),
120+ origin = "upper" ,
121+ interpolation = "nearest" ,
122+ )
123+
124+ ax .set_xlabel ("X (mm)" )
125+ ax .set_ylabel ("Y (mm)" )
126+ ax .set_title ("EIC Center Forward View" )
127+
128+ return fig , ax
129+
130+
131+ def concat_csvs_with_unique_events (files : List [str ]) -> pd .DataFrame :
132+ """
133+ Load and concatenate multiple CSV files with globally unique event IDs.
134+
135+ This function reads multiple CSV files and concatenates them into a single
136+ DataFrame. Event IDs are adjusted to ensure they remain globally unique
137+ across all files by adding an offset based on the maximum event ID from
138+ previous files.
139+
140+ Parameters
141+ ----------
142+ files : list of str
143+ List of paths to CSV files to concatenate
144+
145+ Returns
146+ -------
147+ pd.DataFrame
148+ Concatenated DataFrame with unique event IDs
149+
150+ Raises
151+ ------
152+ ValueError
153+ If no files are provided
154+ FileNotFoundError
155+ If any of the CSV files cannot be found
156+
157+ Examples
158+ --------
159+ >>> files = ['data1.csv', 'data2.csv', 'data3.csv']
160+ >>> df = concat_csvs_with_unique_events(files)
161+ >>> print(f"Total events: {len(df['event'].unique())}")
162+ """
163+ if not files :
164+ raise ValueError ("No files provided for concatenation" )
165+
166+ dfs = []
167+ offset = 0
168+
169+ for file in files :
170+ try :
171+ df = pd .read_csv (file )
172+ except FileNotFoundError :
173+ raise FileNotFoundError (f"CSV file not found: { file } " )
174+ except Exception as e :
175+ raise ValueError (f"Error reading CSV file { file } : { e } " )
176+
177+ if 'event' not in df .columns :
178+ raise ValueError (f"CSV file { file } does not contain an 'event' column" )
179+
180+ # Adjust event IDs to ensure global uniqueness
181+ df ['event' ] = df ['event' ] + offset
182+ offset = df ['event' ].max () + 1 # Set offset for next file
183+
184+ dfs .append (df )
185+ print (f"Loaded { len (df )} rows from { file } (events { df ['event' ].min ()} -{ df ['event' ].max ()} )" )
186+
187+ # Concatenate all DataFrames
188+ combined_df = pd .concat (dfs , ignore_index = True )
189+ print (f"Total: { len (combined_df )} rows with { len (combined_df ['event' ].unique ())} unique events" )
190+
191+ return combined_df
192+
193+
194+ def convert_to_feather (
195+ input_files : List [str ],
196+ output_file : str ,
197+ use_glob : bool = False
198+ ) -> pd .DataFrame :
199+ """
200+ Convert CSV files to Feather format with unique event IDs.
201+
202+ This function can either process a list of specific files or use glob patterns
203+ to find files. The resulting DataFrame is saved in Feather format for faster
204+ loading in future operations.
205+
206+ Parameters
207+ ----------
208+ input_files : list of str
209+ List of file paths or glob patterns (if use_glob=True)
210+ output_file : str
211+ Path for the output Feather file
212+ use_glob : bool, optional
213+ If True, treat input_files as glob patterns. Default is False.
214+
215+ Returns
216+ -------
217+ pd.DataFrame
218+ The concatenated DataFrame that was saved to Feather format
219+
220+ Raises
221+ ------
222+ ValueError
223+ If no files are found or provided
224+
225+ Examples
226+ --------
227+ >>> # Using specific files
228+ >>> df = convert_to_feather(['file1.csv', 'file2.csv'], 'output.feather')
229+
230+ >>> # Using glob pattern
231+ >>> df = convert_to_feather(['data/*.csv'], 'output.feather', use_glob=True)
232+ """
233+ if use_glob :
234+ # Expand glob patterns
235+ all_files = []
236+ for pattern in input_files :
237+ matched_files = sorted (glob .glob (pattern ))
238+ if matched_files :
239+ all_files .extend (matched_files )
240+ print (f"Pattern '{ pattern } ' matched { len (matched_files )} files" )
241+ else :
242+ print (f"Warning: Pattern '{ pattern } ' matched no files" )
243+ files = all_files
244+ else :
245+ # Use files as-is
246+ files = input_files
247+
248+ if len (files ) == 0 :
249+ raise ValueError ("No files to process" )
250+
251+ print (f"\n Processing { len (files )} CSV files..." )
252+
253+ # Concatenate CSVs with unique events
254+ df = concat_csvs_with_unique_events (files )
255+
256+ # Save to Feather format
257+ df .to_feather (output_file )
258+ print (f"\n Saved { len (df )} rows to { output_file } " )
259+
260+ return df
261+
262+
263+ def main ():
264+ """
265+ Provides functionality to convert CSV files to Feather format with proper
266+ event ID handling. Supports both explicit file lists and glob patterns.
267+ """
268+ parser = argparse .ArgumentParser (
269+ description = "Convert CSV files to Feather format with unique event IDs" ,
270+ formatter_class = argparse .RawDescriptionHelpFormatter ,
271+ epilog = """
272+ Examples:
273+ # Convert specific files
274+ %(prog)s file1.csv file2.csv file3.csv -o output.feather
275+
276+ # Use glob pattern (quotes are important!)
277+ %(prog)s "data/*.csv" -o output.feather --glob
278+
279+ # Multiple glob patterns
280+ %(prog)s "data1/*.csv" "data2/*.csv" -o output.feather --glob
281+
282+ # Mix of patterns with glob
283+ %(prog)s "run1_*.csv" "run2_*.csv" -o combined.feather --glob
284+ """
285+ )
286+
287+ parser .add_argument ('input_files' , nargs = '+' , help = 'Input CSV files or glob patterns (when using --glob)' )
288+ parser .add_argument ('-o' , '--output' , required = True , help = 'Output Feather file path' )
289+ parser .add_argument ('--glob' , action = 'store_true' , help = 'Treat input arguments as glob patterns' )
290+ parser .add_argument ('-v' , '--verbose' , action = 'store_true' , help = 'Enable verbose output' )
291+ args = parser .parse_args ()
292+
293+ try :
294+ # Convert files to Feather format
295+ df = convert_to_feather (
296+ input_files = args .input_files ,
297+ output_file = args .output ,
298+ use_glob = args .glob
299+ )
300+
301+ if args .verbose :
302+ print ("\n DataFrame info:" )
303+ print (df .info ())
304+ print ("\n First few rows:" )
305+ print (df .head ())
306+
307+ except Exception as e :
308+ print (f"Error: { e } " , file = sys .stderr )
309+ sys .exit (1 )
310+
311+ print (f"\n Conversion complete! Output saved to: { args .output } " )
312+
313+
314+ if __name__ == "__main__" :
315+ main ()
0 commit comments