@@ -501,7 +501,7 @@ def move_prop(self,project_id,old_node,new_node,prop,dd,parent_node=None,require
501501 # ndf.at[ndf['cases.submitter_id']==case_id,prop] = p[case_id]
502502 if 'visit_id' in ndf and 'visit_id' in odf :
503503 if len (list (set (ndf .visit_id ))) == len (ndf ) and len (list (set (odf .visit_id ))) == len (odf ):
504- print ("\t \t \t All records in old/new nodes '{}'/'{}' have a unique visit_id, merging '{}' into new_node on 'visit_id' ." .format (old_node ,new_node , prop ))
504+ print ("\t \t \t Merging on visit_id: All '{}' records in old/new nodes '{}'/'{}' have unique visit_ids ." .format (prop , old_node ,new_node ))
505505 pdf = odf [['visit_id' ,prop ]] # prop dataframe
506506 df = pd .merge (left = ndf , right = pdf , how = 'left' , left_on = 'visit_id' , right_on = 'visit_id' )
507507 else :
@@ -717,7 +717,7 @@ def change_prop_name(self,project_id,node,props,name='temp',force=False):
717717
718718 return df
719719
720- def drop_props (self ,project_id ,node ,props ,name = 'temp' ,warn = True ):
720+ def drop_props (self ,project_id ,node ,props ,name = 'temp' ,drop_nn = True ):
721721 """
722722 Function drops the list of props from column headers of a node TSV.
723723 Args:
@@ -742,32 +742,37 @@ def drop_props(self,project_id,node,props,name='temp',warn=True):
742742 print ("\t \t No '{0}' TSV found in project '{1}'. Nothing changed." .format (node ,project_id ))
743743 return
744744
745- not_dropped ,dropped = [],[]
745+ dropped , not_found , not_dropped ,errors = [],[], [],[]
746746
747747 for prop in props :
748748 if prop in df :
749749 nn = df .loc [df [prop ].notnull ()]
750750 if not nn .empty :
751- if warn is True :
752- print ("\n \n Warning!\n \t \t Found {0} non-null records for '{1}' in '{2}' TSV. Existing data not dropped!" .format (len (nn ),prop ,node ))
751+ print ("\n \n Warning!\n \t \t Found {0} non-null records for '{1}' in '{2}' TSV!" .format (len (nn ),prop ,node ))
752+ if drop_nn is not True :
753+ print ("\t \t Existing '{}' data not dropped from '{}' TSV!\n \n \t \t Set 'drop_nn' to True to override this warning." )
753754 return df
754- else :
755- print ("\n \n Warning!\n \t \t Found {0} non-null records for '{1}' in '{2}' TSV. Existing data not dropped!" .format (len (nn ),prop ,node ))
756- try :
757- df = df .drop (columns = [prop ])
758- dropped .append (prop )
759- except Exception as e :
760- not_dropped .append (prop )
761- continue
755+ try :
756+ df = df .drop (columns = [prop ],errors = 'raise' )
757+ dropped .append (prop )
758+ except Exception as e :
759+ not_dropped .append (prop )
760+ errors .append (e )
761+ continue
762+ else :
763+ not_found .append (prop )
764+
765+ if len (not_found ) > 0 :
766+ print ("\t \t Warning: These props were not found in the '{}' TSV: {}" .format (node ,not_dropped ))
762767
763768 if len (not_dropped ) > 0 :
764- print ("\t \t Warning! Some props were NOT dropped from '{}' TSV:\n \t \t {} " .format (node ,not_dropped ))
769+ print ("\t \t Warning! Some props were NOT dropped from '{}' TSV: {} {} " .format (node ,not_dropped , list ( set ( errors )) ))
765770
766771 if len (dropped ) > 0 :
767- print ("\t \t props dropped from '{}' and data written to TSV '{}': \n \t \t {}" .format (node ,filename ,dropped ))
772+ print ("\t \t props dropped from '{}' and data written to '{}' TSV: {}" .format (node ,node ,dropped ))
768773 df = self .write_tsv (df ,project_id ,node )
769774 else :
770- print ("\t No props dropped. '{}' TSV unchanged." .format (node ))
775+ print ("\t \ t No props dropped. '{}' TSV unchanged." .format (node ))
771776
772777 return df
773778
@@ -787,10 +792,19 @@ def change_enum(self,project_id,node,prop,enums,name='temp'):
787792
788793 df = self .read_tsv (project_id ,node ,name )
789794
795+ if df is None :
796+ print ("\t \t No '{}' TSV found in project '{}'. No TSVs changed." .format (node ,project_id ))
797+ return
798+
790799 if prop not in df :
791800 print ("\t \t '{}' not found in '{}' TSV! Nothing changed." .format (prop ,node ))
792801 return df
793802
803+ nn = df .loc [df [prop ].notnull ()]
804+ if nn .empty :
805+ print ("\t \t All null '{}' enum values in '{}' TSV! Nothing changed." .format (prop ,node ))
806+ return df
807+
794808 changed ,not_changed = [],[]
795809
796810 for old_enum in list (enums .keys ()):
@@ -799,7 +813,7 @@ def change_enum(self,project_id,node,prop,enums,name='temp'):
799813 old_total = len (df .loc [df [prop ]== old_enum ])
800814
801815 if old_total == 0 :
802- print ("\t \t No records found with prop '{}' equal to '{}'. Values in TSV include: '{}'" .format (prop ,old_enum ,df [prop ].value_counts ()))
816+ print ("\t \t No records found with prop '{}' equal to '{}'; '{}' TSV unchanged. Values include: '{}'" .format (prop ,old_enum , node ,df [prop ].value_counts ()))
803817 continue
804818
805819 if new_enum == 'null' :
@@ -824,8 +838,7 @@ def change_enum(self,project_id,node,prop,enums,name='temp'):
824838 print ("\t \t Enum values NOT changed in '{}' TSV: {}" .format (node ,not_changed ))
825839
826840 if len (changed ) > 0 :
827- df .to_csv (filename ,sep = '\t ' ,index = False ,encoding = 'utf-8' )
828- print ("\t \t Enum values changed in '{}' node and TSV written to file '{}': {}" .format (node ,filename ,changed ))
841+ self .write_tsv (df ,project_id ,node ,name )
829842
830843 else :
831844 print ("\t \t No enum values were changed in '{}' node. No TSVs changed." .format (node ))
@@ -1123,7 +1136,7 @@ def get_submission_order(self,dd,project_id,name='temp',suffix='tsv',missing_nod
11231136 print ("\t Submission Order: \n \t \t {}" .format (suborder ))
11241137 return suborder
11251138
1126- def submit_tsvs (self ,project_id ,suborder ,check_done = False ,rm_temp = False ,drop_ids = False ,name = 'temp' ):
1139+ def submit_tsvs (self ,project_id ,suborder ,check_done = False ,remove_done = False ,drop_ids = False ,name = 'temp' ):
11271140 """
11281141 Submits all the TSVs in 'suborder' dictionary obtained by running, e.g.:
11291142 suborder = stag_mig.get_submission_order(stag_dd,project_id,name='temp',suffix='tsv')
@@ -1184,7 +1197,7 @@ def submit_tsvs(self,project_id,suborder,check_done=False,rm_temp=False,drop_ids
11841197 print ("\t {}" .format (e ))
11851198 else :
11861199 print ("\t Previously submitted file already exists in done directory:\n \t \t {}\n " .format (done_file ))
1187- if rm_temp is True :
1200+ if remove_done is True :
11881201 rm_cmd = ['rm' ,filename ]
11891202 try :
11901203 output = subprocess .check_output (rm_cmd , stderr = subprocess .STDOUT ).decode ('UTF-8' )
0 commit comments