@@ -1850,13 +1850,15 @@ def createFinalSpreadsheets(detailed_BGC_listing_with_Pop_and_GCF_map_file, zol_
18501850 'OG Consensus Direction' , 'Tajima\' s D' , 'Proportion of Filtered Codon Alignment is Segregating Sites' ,
18511851 'Entropy' , 'Upstream Region Entropy' , 'Median Beta-RD-gc' , 'Max Beta-RD-gc' ,
18521852 'Proportion of sites which are highly ambiguous in codon alignment' ,
1853- 'Proportion of sites which are highly ambiguous in trimmed codon alignment' , 'Median GC' , 'Median GC Skew' ]
1854-
1855- if zol_high_qual_flag :
1856- zol_sheet_header += ['GARD Partitions Based on Recombination Breakpoints' ,
1857- 'Number of Sites Identified as Under Positive or Negative Selection by FUBAR' ,
1858- 'Average delta(Beta, Alpha) by FUBAR across sites' ,
1859- 'Proportion of Sites Under Selection which are Positive' ]
1853+ 'Proportion of sites which are highly ambiguous in trimmed codon alignment' , 'Median GC' , 'Median GC Skew' ,
1854+ 'BGC score (GECCO weights)' , 'Viral score (V-Score)' ]
1855+
1856+ # TODO consider bringing back the following - will affect column referencing for coloring below:
1857+ #if zol_high_qual_flag:
1858+ # zol_sheet_header += ['GARD Partitions Based on Recombination Breakpoints',
1859+ # 'Number of Sites Identified as Under Positive or Negative Selection by FUBAR',
1860+ # 'Average delta(Beta, Alpha) by FUBAR across sites',
1861+ # 'Proportion of Sites Under Selection which are Positive']
18601862
18611863 zol_sheet_header += ['KO Annotation (E-value)' , 'PGAP Annotation (E-value)' , 'PaperBLAST Annotation (E-value)' , 'CARD Annotation (E-value)' ,
18621864 'IS Finder (E-value)' , 'MIBiG Annotation (E-value)' , 'VOG Annotation (E-value)' , 'VFDB Annotation (E-value)' ,
@@ -1892,9 +1894,9 @@ def createFinalSpreadsheets(detailed_BGC_listing_with_Pop_and_GCF_map_file, zol_
18921894 if i == 0 : continue
18931895 line = line .strip ()
18941896 ls = line .split ('\t ' )
1895- row = [gcf , ls [0 ], ls [1 ], ls [2 ], comp_cons [gcf ][ls [0 ]]] + ls [3 :16 ] + ls [17 :]
1897+ row = [gcf , ls [0 ], ls [1 ], ls [2 ], comp_cons [gcf ][ls [0 ]]] + ls [3 :18 ] + ls [19 :]
18961898 if zol_high_qual_flag :
1897- row = [gcf , ls [0 ], ls [1 ], ls [2 ], comp_cons [gcf ][ls [0 ]]] + ls [3 :20 ] + ls [21 :]
1899+ row = [gcf , ls [0 ], ls [1 ], ls [2 ], comp_cons [gcf ][ls [0 ]]] + ls [3 :22 ] + ls [23 :]
18981900 zctf_handle .write ('\t ' .join (row ) + '\n ' )
18991901 num_rows += 1
19001902 zctf_handle .close ()
@@ -1904,7 +1906,8 @@ def createFinalSpreadsheets(detailed_BGC_listing_with_Pop_and_GCF_map_file, zol_
19041906 'Number of Sites Identified as Under Positive or Negative Selection by FUBAR' , 'Average delta(Beta, Alpha) by FUBAR across sites' ,
19051907 'Proportion of Sites Under Selection which are Positive' , 'Proportion of Filtered Codon Alignment is Segregating Sites' ,
19061908 'Entropy' , 'Upstream Region Entropy' , 'Median Beta-RD-gc' , 'Max Beta-RD-gc' , 'Proportion of sites which are highly ambiguous in codon alignment' ,
1907- 'Proportion of sites which are highly ambiguous in trimmed codon alignment' , 'Median GC' , 'Median GC Skew' ])
1909+ 'Proportion of sites which are highly ambiguous in trimmed codon alignment' , 'Median GC' , 'Median GC Skew' ,
1910+ 'BGC score (GECCO weights)' , 'Viral score (V-Score)' ])
19081911
19091912 zr_data = loadTableInPandaDataFrame (zol_combined_tsv_file , zr_numeric_columns )
19101913 zr_data .to_excel (writer , sheet_name = 'zol Results' , index = False , na_rep = "NA" )
@@ -1970,11 +1973,19 @@ def createFinalSpreadsheets(detailed_BGC_listing_with_Pop_and_GCF_map_file, zol_
19701973 {'type' : '2_color_scale' , 'min_color' : "#c7afb4" , 'min_type' : 'num' , 'max_type' : 'num' ,
19711974 'max_color' : "#965663" , "min_value" : - 2.0 , "max_value" : 2.0 })
19721975
1976+ # BGC score
1977+ zr_sheet .conditional_format ('S2:S' + str (num_rows ), {'type' : '2_color_scale' , 'min_color' : "#f5aca4" , 'min_type' : 'num' , 'max_type' : 'num' ,
1978+ 'max_color' : "#c75246" , "min_value" : - 7.0 , "max_value" : 13.0 })
1979+
1980+ # V-Score
1981+ zr_sheet .conditional_format ('T2:T' + str (num_rows ), {'type' : '2_color_scale' , 'min_color' : "#dfccff" , 'min_type' : 'num' , 'max_type' : 'num' ,
1982+ 'max_color' : "#715a96" , "min_value" : 0.0 , "max_value" : 10.0 })
1983+
19731984
19741985 # create MIBiG mapping spreadsheet
1975- mibig_json_tar_url = 'https://dl.secondarymetabolites.org/mibig/mibig_json_3.1 .tar.gz'
1976- mibig_json_tar_dir = scratch_dir + 'mibig_json_3.1 /'
1977- mibig_json_tar_file = scratch_dir + 'mibig_json_3.1 .tar.gz'
1986+ mibig_json_tar_url = 'https://dl.secondarymetabolites.org/mibig/mibig_json_4.0 .tar.gz'
1987+ mibig_json_tar_dir = scratch_dir + 'mibig_json_4.0 /'
1988+ mibig_json_tar_file = scratch_dir + 'mibig_json_4.0 .tar.gz'
19781989
19791990 if not os .path .isdir (mibig_json_tar_dir ):
19801991 if os .path .isfile (mibig_json_tar_file ):
@@ -2007,7 +2018,7 @@ def createFinalSpreadsheets(detailed_BGC_listing_with_Pop_and_GCF_map_file, zol_
20072018 json_bgc_file = mibig_json_tar_dir + f
20082019 with open (json_bgc_file ) as json_data :
20092020 data = json .load (json_data )
2010- for comp in data ['cluster' ][ ' compounds' ]:
2021+ for comp in data ['compounds' ]:
20112022 for key in comp :
20122023 if key == 'compound' :
20132024 mibig_bgc_compounds [bgc ].append (comp [key ])
0 commit comments