2424from xlsxwriter import Workbook
2525
2626from aurora_cycler_manager .config import get_config
27- from aurora_cycler_manager .data_bundle import (
27+ from aurora_cycler_manager .data_parse import (
2828 SampleDataBundle ,
2929 get_cycles_summary ,
3030 get_cycling ,
@@ -83,22 +83,47 @@ def _sort_times(start_times: list | np.ndarray, end_times: list | np.ndarray) ->
8383 # Sort by reverse end time, then by start time
8484 sorted_positions = np .lexsort ((valid_ends * - 1 , valid_starts ))
8585 sorted_starts = valid_starts [sorted_positions ]
86-
87- # Remove duplicate start times, keep only the first element (longest)
88- unique_mask = np .concatenate (([True ], sorted_starts [1 :] != sorted_starts [:- 1 ]))
86+ sorted_ends = valid_ends [sorted_positions ]
87+
88+ # Keep only non-overlapping intervals
89+ keep_mask = np .ones (len (sorted_starts ), dtype = bool )
90+ current_max_end = - np .inf
91+ for i in range (len (sorted_starts )):
92+ if sorted_starts [i ] >= current_max_end :
93+ current_max_end = sorted_ends [i ]
94+ elif sorted_ends [i ] <= current_max_end :
95+ keep_mask [i ] = False
96+ else :
97+ current_max_end = sorted_ends [i ]
8998
9099 # Map back to original indices
91- return valid_indices [sorted_positions [unique_mask ]]
100+ return valid_indices [sorted_positions [keep_mask ]]
92101
93102
94- def merge_metadata (job_files : list [Path ], metadatas : list [dict ]) -> dict :
103+ def merge_metadata (job_files : list [Path ], metadatas : list [dict ], sample_id : str ) -> dict :
95104 """Merge several job metadata, add provenance, replace sample data with latest from db."""
96- sample_id = metadatas [ 0 ]. get ( "sample_data" , {}). get ( "Sample ID" , "" )
105+ # Get sample data from database
97106 sample_data = get_sample_data (sample_id )
98- # Merge glossary dicts
107+
108+ # Flatten / merge glossary dicts
99109 glossary = {}
100- for g in [m .get ("glossary" , {}) for m in metadatas ]:
101- glossary .update (g )
110+ for m in metadatas :
111+ g = m .get ("glossary" , {})
112+ if isinstance (g , list ):
113+ for item in g :
114+ glossary .update (item )
115+ elif g :
116+ glossary .update (g )
117+
118+ # Flatten job_data to one list
119+ job_data = []
120+ for m in metadatas :
121+ jd = m .get ("job_data" , {})
122+ if isinstance (jd , list ):
123+ job_data .extend (jd )
124+ elif jd :
125+ job_data .append (jd )
126+
102127 return {
103128 "provenance" : {
104129 "aurora_metadata" : {
@@ -110,10 +135,12 @@ def merge_metadata(job_files: list[Path], metadatas: list[dict]) -> dict:
110135 "datetime" : datetime .now (timezone .utc ).isoformat (),
111136 },
112137 },
113- "original_file_provenance" : {str (f ): m ["provenance" ] for f , m in zip (job_files , metadatas , strict = True )},
138+ "original_file_provenance" : {
139+ str (f ): m .get ("provenance" ) for f , m in zip (job_files , metadatas , strict = True )
140+ },
114141 },
115142 "sample_data" : sample_data ,
116- "job_data" : [ m . get ( " job_data" , {}) for m in metadatas ] ,
143+ "job_data" : job_data ,
117144 "glossary" : glossary ,
118145 }
119146
@@ -157,7 +184,18 @@ def calc_dq(df: pl.DataFrame) -> pl.DataFrame:
157184def merge_dfs (dfs : list [pl .DataFrame ]) -> tuple [pl .DataFrame , pl .DataFrame | None ]:
158185 """Merge cycling dataframes and add cycles. Seperate out EIS."""
159186 for i , df in enumerate (dfs ):
160- dfs [i ] = df .with_columns (pl .lit (i ).alias ("job_number" ))
187+ exprs = [pl .lit (i ).alias ("job_number" )]
188+ if "loop_number" not in df .columns :
189+ exprs .append (pl .lit (0 ).alias ("loop_number" ))
190+ if "cycle_number" not in df .columns :
191+ if "Cycle" in df .columns :
192+ exprs .append (pl .col ("Cycle" ).alias ("cycle_number" ))
193+ else :
194+ exprs .append (pl .lit (0 ).alias ("cycle_number" ))
195+ dfs [i ] = df .with_columns (exprs )
196+
197+ if "dQ (mAh)" not in df .columns :
198+ dfs [i ] = calc_dq (dfs [i ])
161199
162200 df = pl .concat (dfs , how = "diagonal" )
163201
@@ -172,13 +210,6 @@ def merge_dfs(dfs: list[pl.DataFrame]) -> tuple[pl.DataFrame, pl.DataFrame | Non
172210
173211 if not df .is_empty ():
174212 df = df .sort ("uts" )
175- if "loop_number" not in df .columns :
176- df = df .with_columns (pl .lit (0 ).alias ("loop_number" ))
177- else :
178- df = df .with_columns (pl .col ("loop_number" ).fill_null (0 ))
179-
180- if "dQ (mAh)" not in df .columns :
181- df = calc_dq (df )
182213
183214 # Increment step if any job, cycle, or loop changes
184215 df = df .with_columns (pl .struct (["job_number" , "cycle_number" , "loop_number" ]).rle_id ().add (1 ).alias ("Step" ))
@@ -206,7 +237,7 @@ def merge_dfs(dfs: list[pl.DataFrame]) -> tuple[pl.DataFrame, pl.DataFrame | Non
206237 )
207238
208239 # Join back to main dataframe
209- df = df .join (step_stats .select (["Step" , "Cycle" ]), on = "Step" , how = "left" )
240+ df = df .drop ( "Cycle" , strict = False ). join (step_stats .select (["Step" , "Cycle" ]), on = "Step" , how = "left" )
210241
211242 # EIS merge - find last non-zero cycle before the EIS
212243 if eis_df is not None :
@@ -769,7 +800,7 @@ def analyse_sample(sample_id: str) -> SampleDataBundle:
769800 df , eis_df = merge_dfs (dfs )
770801
771802 # Merge metadatas together
772- metadata = merge_metadata (job_files , metadatas )
803+ metadata = merge_metadata (job_files , metadatas , sample_id )
773804
774805 # Get sample and job data
775806 sample_data = metadata .get ("sample_data" , {})
@@ -941,9 +972,9 @@ def shrink_all_samples(sampleid_contains: str = "") -> None:
941972 sampleid_contains (str, optional): only shrink samples with this string in the sampleid
942973
943974 """
944- for batch_folder in Path (CONFIG ["Processed snapshots folder path" ]).iterdir ():
945- if batch_folder .is_dir ():
946- for sample_folder in batch_folder .iterdir ():
975+ for run_folder in Path (CONFIG ["Data folder path" ]).iterdir ():
976+ if run_folder .is_dir ():
977+ for sample_folder in run_folder .iterdir ():
947978 sample_id = sample_folder .name
948979 if sampleid_contains and sampleid_contains not in sample_id :
949980 continue
@@ -983,9 +1014,9 @@ def analyse_all_samples(
9831014 else :
9841015 samples_to_analyse = []
9851016
986- for batch_folder in Path (CONFIG ["Processed snapshots folder path" ]).iterdir ():
987- if batch_folder .is_dir ():
988- for sample in batch_folder .iterdir ():
1017+ for run_folder in Path (CONFIG ["Data folder path" ]).iterdir ():
1018+ if run_folder .is_dir ():
1019+ for sample in run_folder .iterdir ():
9891020 if sampleid_contains and sampleid_contains not in sample .name :
9901021 continue
9911022 if mode != "always" and sample .name not in samples_to_analyse :
0 commit comments