Skip to content

Commit 008cba0

Browse files
committed
updates
1 parent 60f205f commit 008cba0

File tree

1 file changed

+34
-21
lines changed

1 file changed

+34
-21
lines changed

migration/migration.py

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,7 @@ def move_prop(self,project_id,old_node,new_node,prop,dd,parent_node=None,require
501501
# ndf.at[ndf['cases.submitter_id']==case_id,prop] = p[case_id]
502502
if 'visit_id' in ndf and 'visit_id' in odf:
503503
if len(list(set(ndf.visit_id))) == len(ndf) and len(list(set(odf.visit_id))) == len(odf):
504-
print("\t\t\tAll records in old/new nodes '{}'/'{}' have a unique visit_id, merging '{}' into new_node on 'visit_id'.".format(old_node,new_node,prop))
504+
print("\t\t\tMerging on visit_id: All '{}' records in old/new nodes '{}'/'{}' have unique visit_ids.".format(prop,old_node,new_node))
505505
pdf = odf[['visit_id',prop]] # prop dataframe
506506
df = pd.merge(left=ndf, right=pdf, how='left', left_on='visit_id', right_on='visit_id')
507507
else:
@@ -717,7 +717,7 @@ def change_prop_name(self,project_id,node,props,name='temp',force=False):
717717

718718
return df
719719

720-
def drop_props(self,project_id,node,props,name='temp',warn=True):
720+
def drop_props(self,project_id,node,props,name='temp',drop_nn=True):
721721
"""
722722
Function drops the list of props from column headers of a node TSV.
723723
Args:
@@ -742,32 +742,37 @@ def drop_props(self,project_id,node,props,name='temp',warn=True):
742742
print("\t\tNo '{0}' TSV found in project '{1}'. Nothing changed.".format(node,project_id))
743743
return
744744

745-
not_dropped,dropped = [],[]
745+
dropped,not_found,not_dropped,errors = [],[],[],[]
746746

747747
for prop in props:
748748
if prop in df:
749749
nn = df.loc[df[prop].notnull()]
750750
if not nn.empty:
751-
if warn is True:
752-
print("\n\nWarning!\n\t\tFound {0} non-null records for '{1}' in '{2}' TSV. Existing data not dropped!".format(len(nn),prop,node))
751+
print("\n\nWarning!\n\t\tFound {0} non-null records for '{1}' in '{2}' TSV!".format(len(nn),prop,node))
752+
if drop_nn is not True:
753+
print("\t\tExisting '{}' data not dropped from '{}' TSV!\n\n\t\tSet 'drop_nn' to True to override this warning.")
753754
return df
754-
else:
755-
print("\n\nWarning!\n\t\tFound {0} non-null records for '{1}' in '{2}' TSV. Existing data not dropped!".format(len(nn),prop,node))
756-
try:
757-
df = df.drop(columns=[prop])
758-
dropped.append(prop)
759-
except Exception as e:
760-
not_dropped.append(prop)
761-
continue
755+
try:
756+
df = df.drop(columns=[prop],errors='raise')
757+
dropped.append(prop)
758+
except Exception as e:
759+
not_dropped.append(prop)
760+
errors.append(e)
761+
continue
762+
else:
763+
not_found.append(prop)
764+
765+
if len(not_found) > 0:
766+
print("\t\tWarning: These props were not found in the '{}' TSV: {}".format(node,not_dropped))
762767

763768
if len(not_dropped) > 0:
764-
print("\t\tWarning! Some props were NOT dropped from '{}' TSV:\n\t\t{}".format(node,not_dropped))
769+
print("\t\tWarning! Some props were NOT dropped from '{}' TSV: {} {}".format(node,not_dropped,list(set(errors))))
765770

766771
if len(dropped) > 0:
767-
print("\t\tprops dropped from '{}' and data written to TSV '{}':\n\t\t{}".format(node,filename,dropped))
772+
print("\t\tprops dropped from '{}' and data written to '{}' TSV: {}".format(node,node,dropped))
768773
df = self.write_tsv(df,project_id,node)
769774
else:
770-
print("\tNo props dropped. '{}' TSV unchanged.".format(node))
775+
print("\t\tNo props dropped. '{}' TSV unchanged.".format(node))
771776

772777
return df
773778

@@ -787,10 +792,19 @@ def change_enum(self,project_id,node,prop,enums,name='temp'):
787792

788793
df = self.read_tsv(project_id,node,name)
789794

795+
if df is None:
796+
print("\t\tNo '{}' TSV found in project '{}'. No TSVs changed.".format(node,project_id))
797+
return
798+
790799
if prop not in df:
791800
print("\t\t'{}' not found in '{}' TSV! Nothing changed.".format(prop,node))
792801
return df
793802

803+
nn = df.loc[df[prop].notnull()]
804+
if nn.empty:
805+
print("\t\tAll null '{}' enum values in '{}' TSV! Nothing changed.".format(prop,node))
806+
return df
807+
794808
changed,not_changed = [],[]
795809

796810
for old_enum in list(enums.keys()):
@@ -799,7 +813,7 @@ def change_enum(self,project_id,node,prop,enums,name='temp'):
799813
old_total = len(df.loc[df[prop]==old_enum])
800814

801815
if old_total == 0:
802-
print("\t\tNo records found with prop '{}' equal to '{}'. Values in TSV include: '{}'".format(prop,old_enum,df[prop].value_counts()))
816+
print("\t\tNo records found with prop '{}' equal to '{}'; '{}' TSV unchanged. Values include: '{}'".format(prop,old_enum,node,df[prop].value_counts()))
803817
continue
804818

805819
if new_enum == 'null':
@@ -824,8 +838,7 @@ def change_enum(self,project_id,node,prop,enums,name='temp'):
824838
print("\t\tEnum values NOT changed in '{}' TSV: {}".format(node,not_changed))
825839

826840
if len(changed) > 0:
827-
df.to_csv(filename,sep='\t',index=False,encoding='utf-8')
828-
print("\t\tEnum values changed in '{}' node and TSV written to file '{}': {}".format(node,filename,changed))
841+
self.write_tsv(df,project_id,node,name)
829842

830843
else:
831844
print("\t\tNo enum values were changed in '{}' node. No TSVs changed.".format(node))
@@ -1123,7 +1136,7 @@ def get_submission_order(self,dd,project_id,name='temp',suffix='tsv',missing_nod
11231136
print("\tSubmission Order: \n\t\t{}".format(suborder))
11241137
return suborder
11251138

1126-
def submit_tsvs(self,project_id,suborder,check_done=False,rm_temp=False,drop_ids=False,name='temp'):
1139+
def submit_tsvs(self,project_id,suborder,check_done=False,remove_done=False,drop_ids=False,name='temp'):
11271140
"""
11281141
Submits all the TSVs in 'suborder' dictionary obtained by running, e.g.:
11291142
suborder = stag_mig.get_submission_order(stag_dd,project_id,name='temp',suffix='tsv')
@@ -1184,7 +1197,7 @@ def submit_tsvs(self,project_id,suborder,check_done=False,rm_temp=False,drop_ids
11841197
print("\t{}".format(e))
11851198
else:
11861199
print("\tPreviously submitted file already exists in done directory:\n\t\t{}\n".format(done_file))
1187-
if rm_temp is True:
1200+
if remove_done is True:
11881201
rm_cmd = ['rm',filename]
11891202
try:
11901203
output = subprocess.check_output(rm_cmd, stderr=subprocess.STDOUT).decode('UTF-8')

0 commit comments

Comments
 (0)