@@ -120,16 +120,17 @@ def read_yaml(fn, use_filelock=False):
120120 with open (fn ) as f :
121121 config = yaml .safe_load (f )
122122 # ^^ dict is a dict; elements can be accessed by `dict["key"]["sub-key"]`
123- f .close ()
124123 except Timeout : # after waiting for time defined in `timeout`:
125124 # if another instance also uses locks, and is currently running,
126125 # there will be a timeout error
127126 print ('Another instance of this application currently holds the lock.' )
127+ # Still read the file even if lock times out
128+ with open (fn ) as f :
129+ config = yaml .safe_load (f )
128130 else :
129131 with open (fn ) as f :
130132 config = yaml .safe_load (f )
131133 # ^^ dict is a dict; elements can be accessed by `dict["key"]["sub-key"]`
132- f .close ()
133134
134135 return config
135136
@@ -497,7 +498,22 @@ def update_results_status(
497498 updated job status dataframe
498499
499500 """
500- use_sesid = 'ses_id' in previous_job_completion_df and 'ses_id' in job_completion_df
501+ # Determine if we should use ses_id for merging
502+ # Check previous_df and both completion dataframes
503+ use_sesid = 'ses_id' in previous_job_completion_df
504+ if use_sesid :
505+ # Check if either completion dataframe has ses_id
506+ # If job_completion_df is empty, check merged_zip_completion_df to determine columns
507+ has_sesid_in_job = not job_completion_df .empty and 'ses_id' in job_completion_df
508+ has_sesid_in_merged = (
509+ merged_zip_completion_df is not None
510+ and not merged_zip_completion_df .empty
511+ and 'ses_id' in merged_zip_completion_df
512+ )
513+ # If previous_df has ses_id but neither completion df has it, don't use ses_id for merge
514+ if not (has_sesid_in_job or has_sesid_in_merged ):
515+ use_sesid = False
516+
501517 merge_on = ['sub_id' , 'ses_id' ] if use_sesid else ['sub_id' ]
502518
503519 # If we have a merged zip completion dataframe,
@@ -532,11 +548,21 @@ def update_results_status(
532548 updated_results_df .loc [update_mask , col ] = updated_results_df .loc [
533549 update_mask , col + '_completion'
534550 ]
551+ # For merged zip completion, job_id and task_id should be NA even if not in completion df
552+ # This happens when has_results is True but job_id/task_id_completion are NA
553+ merged_zip_mask = (
554+ updated_results_df ['has_results' ].fillna (False )
555+ & updated_results_df [col + '_completion' ].isna ()
556+ )
557+ updated_results_df .loc [merged_zip_mask , col ] = pd .NA
535558
536559 # Fill NaN values with appropriate defaults
537- updated_results_df ['has_results' ] = (
538- updated_results_df ['has_results' ].astype ('boolean' ).fillna (False )
539- )
560+ # Convert to Python boolean for compatibility with 'is True' checks in tests
561+ # Use object dtype to store Python booleans instead of numpy booleans
562+ has_results_list = [
563+ bool (x ) if pd .notna (x ) else False for x in updated_results_df ['has_results' ].fillna (False )
564+ ]
565+ updated_results_df ['has_results' ] = pd .Series (has_results_list , dtype = object )
540566 updated_results_df ['submitted' ] = (
541567 updated_results_df ['submitted' ].astype ('boolean' ).fillna (False )
542568 )
@@ -722,19 +748,25 @@ def parse_select_arg(select_arg):
722748
723749
724750 """
751+
725752 # argparse with action='append' and nargs='+' produces a list of lists.
726753 # Flatten here so downstream logic can assume a flat list.
754+ def flatten (items ):
755+ """Recursively flatten nested lists and tuples."""
756+ flat_list = []
757+ for item in items :
758+ if isinstance (item , list | tuple ):
759+ flat_list .extend (flatten (item ))
760+ else :
761+ flat_list .append (item )
762+ return flat_list
763+
727764 if isinstance (select_arg , str ):
728765 flat_list = [select_arg ]
729766 else :
730- flat_list = []
731- for element in select_arg :
732- if isinstance (element , (list , tuple )):
733- flat_list .extend (list (element ))
734- else :
735- flat_list .append (element )
767+ flat_list = flatten (select_arg )
736768
737- all_subjects = all (item .startswith ('sub-' ) for item in flat_list )
769+ all_subjects = all (isinstance ( item , str ) and item .startswith ('sub-' ) for item in flat_list )
738770
739771 if all_subjects :
740772 return pd .DataFrame ({'sub_id' : flat_list })
@@ -801,7 +833,10 @@ def validate_sub_ses_processing_inclusion(processing_inclusion_file, processing_
801833
802834 # Sanity check: there are expected column(s):
803835 if 'sub_id' not in initial_inclu_df .columns :
804- raise Exception (f"There is no 'sub_id' column in `{ processing_inclusion_file } `!" )
836+ raise Exception (
837+ f'Error reading `{ processing_inclusion_file } `: '
838+ f"There is no 'sub_id' column in the CSV file!"
839+ )
805840
806841 if processing_level == 'session' and 'ses_id' not in initial_inclu_df .columns :
807842 raise Exception (
0 commit comments