Skip to content

Commit a888d36

Browse files
author
Christopher Henry
committed
Fixing bugs in gapfilling
1 parent ee1872c commit a888d36

File tree

5 files changed

+91
-22
lines changed

5 files changed

+91
-22
lines changed

modelseedpy/core/annotationontology.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
"tmscore",
3838
"rmsd",
3939
"hmmscore",
40+
"score"
4041
]
4142

4243
def convert_to_search_role(role):
@@ -60,9 +61,9 @@ def __init__(self, parent, event, term, probability=1, scores={}, ref_entity=Non
6061
self.ref_entity = ref_entity
6162
self.entity_type = entity_type
6263
self.scores = scores
63-
for item in self.scores:
64-
if item not in allowable_score_types:
65-
logger.warning(item + " not an allowable score type!")
64+
#for item in self.scores:
65+
#if item not in allowable_score_types:
66+
#logger.warning(item + " not an allowable score type!")
6667

6768
def to_data(self):
6869
output = {
@@ -485,6 +486,8 @@ def get_events_from_priority_list(self,priority_list):
485486
elif item == "Merge":
486487
if len(event.method) > 5 and event.method[0:5] == "Merge" and event.id not in event_list:
487488
selected_merge = event.id
489+
elif item in event.description or item in event.id:
490+
event_list.append(event.id)
488491
if selected_merge:
489492
event_list.append(selected_merge)
490493
return event_list

modelseedpy/core/msgapfill.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def __init__(
116116
}
117117
)
118118

119-
def test_gapfill_database(self, media, target=None, before_filtering=True):
119+
def test_gapfill_database(self, media, target=None, before_filtering=True,active_reactions=[]):
120120
# Testing if gapfilling can work before filtering
121121
if target:
122122
self.gfpkgmgr.getpkg("GapfillingPkg").set_base_objective(target,None)
@@ -126,7 +126,7 @@ def test_gapfill_database(self, media, target=None, before_filtering=True):
126126
target = target[13:]
127127
#Setting media
128128
self.gfpkgmgr.getpkg("KBaseMediaPkg").build_package(media)
129-
if self.gfpkgmgr.getpkg("GapfillingPkg").test_gapfill_database():
129+
if self.gfpkgmgr.getpkg("GapfillingPkg").test_gapfill_database(active_reactions):
130130
return True
131131
if self.gfpkgmgr.getpkg("GapfillingPkg").test_solution.status == 'infeasible':
132132
return False
@@ -162,14 +162,17 @@ def test_and_adjust_gapfilling_conditions(self,medias,targets,thresholds,prefilt
162162
"medias":[],
163163
"targets":[],
164164
"thresholds":[],
165-
"conditions":[]
165+
"conditions":[],
166+
"active_reactions":[]
166167
}
167168
logger.debug("Testing unfiltered database")
168169
for i,media in enumerate(medias):
169-
if self.test_gapfill_database(media,targets[i],before_filtering=True):
170+
active_reactions = []
171+
if self.test_gapfill_database(media,targets[i],before_filtering=True,active_reactions=active_reactions):
170172
output["medias"].append(media)
171173
output["targets"].append(targets[i])
172174
output["thresholds"].append(thresholds[i])
175+
output["active_reactions"].append(active_reactions)
173176
output["conditions"].append({
174177
"media": media,
175178
"is_max_threshold": False,
@@ -179,25 +182,29 @@ def test_and_adjust_gapfilling_conditions(self,medias,targets,thresholds,prefilt
179182
# Filtering
180183
if prefilter:
181184
logger.debug("Filtering database")
182-
self.prefilter(growth_conditions=output["conditions"])
185+
self.prefilter(growth_conditions=output["conditions"],active_reaction_sets=output["active_reactions"])
183186
medias = []
184187
targets = []
185188
thresholds = []
186189
conditions = []
190+
active_reaction_sets = []
187191
logger.debug("Testing filtered database")
188192
for i,media in enumerate(output["medias"]):
189-
if self.test_gapfill_database(media,output["targets"][i],before_filtering=False):
193+
active_reactions = []
194+
if self.test_gapfill_database(media,output["targets"][i],before_filtering=False,active_reactions=active_reactions):
190195
medias.append(media)
191196
targets.append(output["targets"][i])
192197
thresholds.append(output["thresholds"][i])
193198
conditions.append(output["conditions"][i])
199+
active_reaction_sets.append(active_reactions)
194200
output["medias"] = medias
195201
output["targets"] = targets
196202
output["thresholds"] = thresholds
197203
output["conditions"] = conditions
204+
output["active_reactions"] = active_reaction_sets
198205
return output
199206

200-
def prefilter(self,test_conditions=None,growth_conditions=[],use_prior_filtering=False,base_filter_only=False):
207+
def prefilter(self,test_conditions=None,growth_conditions=[],use_prior_filtering=False,base_filter_only=False,active_reaction_sets=[]):
201208
"""Prefilters the database by removing any reactions that break specified ATP tests
202209
Parameters
203210
----------
@@ -215,7 +222,8 @@ def prefilter(self,test_conditions=None,growth_conditions=[],use_prior_filtering
215222
self.test_conditions,
216223
growth_conditions=growth_conditions,
217224
base_filter=base_filter,
218-
base_filter_only=base_filter_only
225+
base_filter_only=base_filter_only,
226+
active_reaction_sets=active_reaction_sets
219227
)
220228
gf_filter = self.gfpkgmgr.getpkg("GapfillingPkg").modelutl.get_attributes(
221229
"gf_filter", {}
@@ -355,6 +363,7 @@ def run_global_gapfilling(
355363
self.gfpkgmgr.getpkg("GapfillingPkg").set_media(media)
356364
#Copying model and either making it the base model or adding to the model list
357365
model_cpy = self.gfmodel.copy()
366+
358367
if i == 0:
359368
merged_model = model_cpy
360369
else:
@@ -400,7 +409,9 @@ def run_global_gapfilling(
400409
self.mdlutl.printlp(model=merged_model,filename="GlobalGapfill",print=True)
401410

402411
# Running gapfilling and checking solution
412+
print("Runninng global optimization")
403413
sol = merged_model.optimize()
414+
print("Global optimization complete")
404415
logger.info(
405416
f"gapfill solution objective value {sol.objective_value} ({sol.status}) for media {media}"
406417
)

modelseedpy/core/msgrowthphenotypes.py

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -281,11 +281,15 @@ def gapfill_model_for_phenotype(
281281

282282
return gfresults
283283

284-
285284
class MSGrowthPhenotypes:
286285
def __init__(
287-
self, base_media=None, base_uptake=0, base_excretion=1000, global_atom_limits={}
286+
self, base_media=None, base_uptake=0, base_excretion=1000, global_atom_limits={}, id=None, name=None, source=None, source_id=None, type=None
288287
):
288+
self.id = id
289+
self.name = name
290+
self.source = source
291+
self.source_id = source_id
292+
self.type = type
289293
self.base_media = base_media
290294
self.phenotypes = DictList()
291295
self.base_uptake = base_uptake
@@ -304,7 +308,7 @@ def from_compound_hash(
304308
type="growth"
305309
):
306310
growthpheno = MSGrowthPhenotypes(
307-
base_media, base_uptake, base_excretion, global_atom_limits
311+
base_media=base_media, base_uptake=base_uptake, base_excretion=base_excretion, global_atom_limits=global_atom_limits, id=None, name=None, source=None, source_id=None, type=None
308312
)
309313
new_phenos = []
310314
for cpd in compounds:
@@ -323,7 +327,7 @@ def from_kbase_object(
323327
global_atom_limits={},
324328
):
325329
growthpheno = MSGrowthPhenotypes(
326-
base_media, base_uptake, base_excretion, global_atom_limits
330+
base_media=base_media, base_uptake=base_uptake, base_excretion=base_excretion, global_atom_limits=global_atom_limits, id=data["id"], name=data["name"], source=data["source"], source_id=data["source_id"], type=data["type"]
327331
)
328332
new_phenos = []
329333
for pheno in data["phenotypes"]:
@@ -414,6 +418,27 @@ def from_ms_file(
414418
growthpheno.add_phenotypes(new_phenos)
415419
return growthpheno
416420

421+
def to_kbase_json(self,genome_ref):
422+
pheno_data = {
423+
"id": self.id,
424+
"name": self.name,
425+
"source": self.source,
426+
"source_id": self.source_id,
427+
"type": self.type,
428+
"phenotypes": [],
429+
"genome_ref": genome_ref
430+
}
431+
for pheno in self.phenotypes:
432+
pheno_data["phenotypes"].append({
433+
"id": pheno.id,
434+
"name": pheno.name,
435+
"media_ref": pheno.media.info.ref,
436+
"normalizedGrowth": pheno.experimental_value,
437+
"geneko_refs": pheno.gene_ko,
438+
"additionalcompound_refs": pheno.additional_compounds
439+
})
440+
return pheno_data
441+
417442
def build_super_media(self):
418443
super_media = None
419444
for pheno in self.phenotypes:

modelseedpy/core/msmodelutl.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -852,13 +852,21 @@ def add_ms_reaction(self, rxn_dict, compartment_trans=["c0", "e0"]):
852852
#################################################################################
853853
# Functions related to utility functions
854854
#################################################################################
855-
def assign_reliability_scores_to_reactions(self):
855+
def assign_reliability_scores_to_reactions(self,active_reaction_sets=[]):
856856
"""Assigns a reliability score to every model reaction which indicates how likely the reaction is to be accurate and to take place
857857
858858
Returns
859859
-------
860860
{ reaction ID<string> : { reaction direction<string> : score<float> } }
861861
"""
862+
active_rxn_dictionary={}
863+
for item in active_reaction_sets:
864+
for array in item:
865+
if array[0] not in active_rxn_dictionary:
866+
active_rxn_dictionary[array[0]] = {}
867+
if array[1] not in active_rxn_dictionary[array[0]]:
868+
active_rxn_dictionary[array[0]][array[1]] = 0
869+
active_rxn_dictionary[array[0]][array[1]]+=1
862870
if self.reliability_scores == None:
863871
self.reliability_scores = {}
864872
biochem = ModelSEEDBiochem.get()
@@ -927,6 +935,15 @@ def assign_reliability_scores_to_reactions(self):
927935
self.reliability_scores[reaction.id] = {}
928936
self.reliability_scores[reaction.id][">"] = 1000
929937
self.reliability_scores[reaction.id]["<"] = 1000
938+
for_multiplier = 1
939+
rev_multiplier = 1
940+
if reaction.id in active_rxn_dictionary:
941+
if ">" in active_rxn_dictionary[reaction.id]:
942+
for_multiplier += 0.1*active_rxn_dictionary[reaction.id][">"]
943+
if "<" in active_rxn_dictionary[reaction.id]:
944+
rev_multiplier += 0.1*active_rxn_dictionary[reaction.id]["<"]
945+
self.reliability_scores[reaction.id][">"] = self.reliability_scores[reaction.id][">"]*for_multiplier
946+
self.reliability_scores[reaction.id]["<"] = self.reliability_scores[reaction.id]["<"]*rev_multiplier
930947
return self.reliability_scores
931948

932949
def is_core(self,rxn):
@@ -1636,7 +1653,8 @@ def reaction_expansion_test(
16361653
binary_search=True,
16371654
attribute_label="gf_filter",
16381655
positive_growth=[],
1639-
resort_by_score=True
1656+
resort_by_score=True,
1657+
active_reaction_sets=[]
16401658
):
16411659
"""Adds reactions in reaction list one by one and appplies tests, filtering reactions that fail
16421660
@@ -1659,7 +1677,7 @@ def reaction_expansion_test(
16591677
self.breaking_reaction = None
16601678
filtered_list = []
16611679
if resort_by_score:
1662-
scores = self.assign_reliability_scores_to_reactions()
1680+
scores = self.assign_reliability_scores_to_reactions(active_reaction_sets=active_reaction_sets)
16631681
reaction_list = sorted(reaction_list, key=lambda x: scores[x[0].id][x[1]])
16641682
for item in reaction_list:
16651683
logger.debug(item[0].id+":"+item[1]+":"+str(scores[item[0].id][item[1]]))

modelseedpy/fbapkg/gapfillingpkg.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import json
99
from optlang.symbolics import Zero, add
1010
from cobra import Model, Reaction, Metabolite
11+
from cobra.flux_analysis import pfba
1112
from cobra.io import (
1213
load_json_model,
1314
save_json_model,
@@ -717,7 +718,7 @@ def run_test_conditions(self, condition_list, solution=None, max_iterations=10):
717718
return None
718719
return solution
719720

720-
def test_gapfill_database(self):
721+
def test_gapfill_database(self,active_reactions=[]):
721722
self.reset_objective_minimum(0,False)
722723
self.model.objective = self.original_objective
723724
self.test_solution = self.model.optimize()
@@ -731,6 +732,11 @@ def test_gapfill_database(self):
731732
self.model.objective = self.parameters["gfobj"]
732733
if self.test_solution.objective_value < self.parameters["minimum_obj"] or self.test_solution.status == 'infeasible':
733734
return False
735+
#Running pFBA to determine active reactions for nonzero objective
736+
solution = pfba(self.model)
737+
for rxn in self.model.reactions:
738+
if solution.fluxes[rxn.id] > 0:
739+
active_reactions.append([rxn.id,">"])
734740
return True
735741

736742
def reset_objective_minimum(self, min_objective,reset_params=True):
@@ -749,7 +755,7 @@ def reset_objective_minimum(self, min_objective,reset_params=True):
749755
if min_objective < 0:
750756
self.pkgmgr.getpkg("ObjConstPkg").constraints["objc"]["1"].ub = min_objective
751757

752-
def filter_database_based_on_tests(self,test_conditions,growth_conditions=[],base_filter=None,base_target="rxn00062_c0",base_filter_only=False,all_noncore=True):
758+
def filter_database_based_on_tests(self,test_conditions,growth_conditions=[],base_filter=None,base_target="rxn00062_c0",base_filter_only=False,all_noncore=True,active_reaction_sets=[]):
753759
#Saving the current media
754760
current_media = self.current_media()
755761
#Clearing element uptake constraints
@@ -776,20 +782,26 @@ def filter_database_based_on_tests(self,test_conditions,growth_conditions=[],bas
776782
if not base_filter_only:
777783
with self.model:
778784
rxnlist = []
785+
rxndict = {}
779786
for reaction in self.model.reactions:
780787
if reaction.id in self.gapfilling_penalties:
788+
rxndict[reaction.id] = 1
781789
if "reverse" in self.gapfilling_penalties[reaction.id]:
782790
rxnlist.append([reaction, "<"])
783791
if "forward" in self.gapfilling_penalties[reaction.id]:
784792
rxnlist.append([reaction, ">"])
785793
elif all_noncore and not self.modelutl.is_core(reaction):
794+
rxndict[reaction.id] = 1
786795
if reaction.lower_bound < 0:
787796
rxnlist.append([reaction, "<"])
788797
if reaction.upper_bound > 0:
789798
rxnlist.append([reaction, ">"])
799+
print("Full model:",len(self.modelutl.model.reactions))
800+
print("Gapfilling count:",len(self.gapfilling_penalties))
801+
print("Reaction list:",len(rxndict))
790802
filtered_list = self.modelutl.reaction_expansion_test(
791-
rxnlist, test_conditions
792-
)
803+
rxnlist, test_conditions,active_reaction_sets=active_reaction_sets
804+
)#,positive_growth=growth_conditions
793805
#Adding base filter reactions to model
794806
if base_filter != None:
795807
gf_filter_att = self.modelutl.get_attributes("gf_filter", {})

0 commit comments

Comments
 (0)