diff --git a/reconstruction/ecoli/dataclasses/process/translation.py b/reconstruction/ecoli/dataclasses/process/translation.py index 62a2cac2b..08aedf3d8 100755 --- a/reconstruction/ecoli/dataclasses/process/translation.py +++ b/reconstruction/ecoli/dataclasses/process/translation.py @@ -129,13 +129,45 @@ def _build_monomer_data(self, raw_data, sim_data): for p in raw_data.protein_half_lives_pulsed_silac } + # Get protease assignments and degradation contributions (in fraction) from Gupta et al. + protease_dict = { + p['id']: {'protease_assignment': p['protease_assignment'], + 'ClpP_fraction': p['ClpP'], + 'Lon_fraction': p['Lon'], + 'HslV_fraction': p['HslV'], + 'Unexplained_fraction': p['Unexplained'] + } + for p in raw_data.priority_protease_assignments_1 + } + deg_rate = np.zeros(len(all_proteins)) + deg_rate_source_id = np.full(len(all_proteins), None) + protease_assignment = np.full(len(all_proteins), None) + ClpP_contribution = np.full(len(all_proteins), None) + Lon_contribution = np.full(len(all_proteins), None) + HslV_contribution = np.full(len(all_proteins), None) + Unexplained_contribution = np.full(len(all_proteins), None) + for i, protein in enumerate(all_proteins): # Use measured degradation rates if available if protein['id'] in measured_deg_rates: deg_rate[i] = measured_deg_rates[protein['id']] + deg_rate_source_id[i] = 'CL_measured_deg_rates_2020' + if protein['id'] in protease_dict.keys(): + protease_assignment[i] = protease_dict[protein['id']]['protease_assignment'] + ClpP_contribution[i] = protease_dict[protein['id']]['ClpP_fraction'] + Lon_contribution[i] = protease_dict[protein['id']]['Lon_fraction'] + HslV_contribution[i] = protease_dict[protein['id']]['HslV_fraction'] + Unexplained_contribution[i] = protease_dict[protein['id']]['Unexplained_fraction'] elif protein['id'] in pulsed_silac_deg_rates: deg_rate[i] = pulsed_silac_deg_rates[protein['id']] + deg_rate_source_id[i] = 'Nagar_et_al_2021' + if protein['id'] in protease_dict.keys(): + protease_assignment[i] = protease_dict[protein['id']]['protease_assignment'] + ClpP_contribution[i] = protease_dict[protein['id']]['ClpP_fraction'] + Lon_contribution[i] = protease_dict[protein['id']]['Lon_fraction'] + HslV_contribution[i] = protease_dict[protein['id']]['HslV_fraction'] + Unexplained_contribution[i] = protease_dict[protein['id']]['Unexplained_fraction'] # If measured rates are unavailable, use N-end rule else: seq = protein['seq'] @@ -145,17 +177,35 @@ def _build_monomer_data(self, raw_data, sim_data): # is cleaved n_end_residue = seq[protein['cleavage_of_initial_methionine']] deg_rate[i] = n_end_rule_deg_rates[n_end_residue] + deg_rate_source_id[i] = 'N_end_rule' + if protein['id'] in protease_dict.keys(): + protease_assignment[i] = protease_dict[protein['id']]['protease_assignment'] + ClpP_contribution[i] = protease_dict[protein['id']]['ClpP_fraction'] + Lon_contribution[i] = protease_dict[protein['id']]['Lon_fraction'] + HslV_contribution[i] = protease_dict[protein['id']]['HslV_fraction'] + Unexplained_contribution[i] = protease_dict[protein['id']]['Unexplained_fraction'] max_protein_id_length = max( len(protein_id) for protein_id in protein_ids_with_compartments) max_cistron_id_length = max( len(cistron_id) for cistron_id in cistron_ids) + max_deg_source_id_length = max( + len(source_id) for source_id in deg_rate_source_id) + max_protease_length = max( + len(protease_id) for protease_id in protease_assignment if protease_id is not None) + monomer_data = np.zeros( n_proteins, dtype = [ ('id', 'U{}'.format(max_protein_id_length)), ('cistron_id', 'U{}'.format(max_cistron_id_length)), ('deg_rate', 'f8'), + ('deg_rate_source', 'U{}'.format(max_deg_source_id_length)), + ('protease_assignment', 'U{}'.format(max_protease_length)), + ('ClpP_fraction', 'f8'), + ('Lon_fraction', 'f8'), + ('HslV_fraction', 'f8'), + ('Unexplained_fraction', 'f8'), ('length', 'i8'), ('aa_counts', '{}i8'.format(n_amino_acids)), ('mw', 'f8'), @@ -165,6 +215,12 @@ def _build_monomer_data(self, raw_data, sim_data): monomer_data['id'] = protein_ids_with_compartments monomer_data['cistron_id'] = cistron_ids monomer_data['deg_rate'] = deg_rate + monomer_data['deg_rate_source'] = deg_rate_source_id + monomer_data['protease_assignment'] = protease_assignment + monomer_data['ClpP_fraction'] = ClpP_contribution + monomer_data['Lon_fraction'] = Lon_contribution + monomer_data['HslV_fraction'] = HslV_contribution + monomer_data['Unexplained_fraction'] = Unexplained_contribution monomer_data['length'] = lengths monomer_data['aa_counts'] = aa_counts monomer_data['mw'] = mws @@ -173,6 +229,12 @@ def _build_monomer_data(self, raw_data, sim_data): 'id': None, 'cistron_id': None, 'deg_rate': deg_rate_units, + 'deg_rate_source': None, + 'protease_assignment': None, + 'ClpP_fraction': None, + 'Lon_fraction': None, + 'HslV_fraction': None, + 'Unexplained_fraction': None, 'length': units.aa, 'aa_counts': units.aa, 'mw': units.g / units.mol, diff --git a/reconstruction/ecoli/flat/priority_protease_assignments_1.tsv b/reconstruction/ecoli/flat/priority_protease_assignments_1.tsv new file mode 100644 index 000000000..4c99df0ed --- /dev/null +++ b/reconstruction/ecoli/flat/priority_protease_assignments_1.tsv @@ -0,0 +1,84 @@ +# Generated by /Users/noravivancogonzalez/code/wcEcoli/reconstruction/ecoli/scripts/protein_half_lives/convert_to_flat_Clim_protease_assignments.py on Fri Jan 24 13:17:32 2025 +"id" "protease_assignment" "ClpP" "Lon" "HslV" "Unexplained" +"EG10156-MONOMER" "ClpP only" 1.0 0.0 0.0 0.0 +"MDLB-MONOMER" "ClpP only" 1.0 0.0 0.0 0.0 +"EG10927-MONOMER" "ClpP only" 1.0 0.0 0.0 0.0 +"EG10618-MONOMER" "ClpP only" 1.0 0.0 0.0 0.0 +"AERGLYC3PDEHYDROG-MONOMER" "ClpP only" 0.9937311622959765 0.0 0.0 0.0 +"EG10823-MONOMER" "ClpP only" 0.9659422539717736 0.0 0.0 0.0 +"EG10690-MONOMER" "ClpP only" 0.9657849990624624 0.0 0.0 0.0 +"EG10159-MONOMER" "ClpP only" 0.9332863287710182 0.0 0.0 0.002226409812859236 +"EG11415-MONOMER" "ClpP only" 0.922363377760449 0.0005893978041944058 0.07704722443535654 0.0 +"DIOHBUTANONEPSYN-MONOMER" "ClpP only" 0.9177149685544131 0.0 0.0 0.03080823441384567 +"G6894-MONOMER" "ClpP only" 0.9107693760253536 0.04671986243041855 0.0 0.04251076154422791 +"EG11783-MONOMER" "ClpP only" 0.9042073852846886 0.0 0.0 0.04529315476535885 +"PUTA-MONOMER" "ClpP only" 0.8871341718156508 0.1044186480620873 0.0 0.00844718012226184 +"G6523-MONOMER" "ClpP only" 0.8812505962987498 0.003450591289189 0.08948491166159973 0.01740777605986891 +"L-LACTDEHYDROGFMN-MONOMER" "ClpP only" 0.8492170014759718 0.0 0.1507829985240282 0.0 +"EG10236-MONOMER" "ClpP only" 0.8486929501138306 0.1439037976463487 0.007403252239820689 0.0 +"G7596-MONOMER" "ClpP only" 0.8447298689611913 0.0 0.0 0.0436535558551446 +"ACYLCOADEHYDROG-MONOMER" "ClpP only" 0.8392680900877766 0.1572280914339504 0.003503818478273008 0.0 +"EG10230-MONOMER" "ClpP only" 0.8377173065892295 0.1622826934107704 0.0 0.0 +"EG10651-MONOMER" "ClpP only" 0.8170292992029067 0.0 0.1752663277407187 0.007704373056374535 +"EG10347-MONOMER" "ClpP only" 0.8078909105784067 0.1045677715676293 0.04688910375167573 0.04065221410228815 +"ZNUC-MONOMER" "ClpP only" 0.7914905910036573 0.0 0.1582851092504883 0.05022429974585437 +"EG10241-MONOMER" "ClpP only" 0.7870329116821451 0.2129670883178549 0.0 0.0 +"RPOC-MONOMER" "ClpP only" 0.7846968320571005 0.1823434125346945 0.0329597554082049 0.0 +"YHES-MONOMER" "ClpP only" 0.7805642482198102 0.1353550155769331 0.0 0.0 +"PD00214" "ClpP only" 0.7795353683613133 0.03610032375508478 0.1843643078836019 0.0 +"RPOS-MONOMER" "ClpP only" 0.7357291473762771 0.226135438282211 0.0 0.03813541434151197 +"RPOB-MONOMER" "ClpP only" 0.7309776222084695 0.2690223777915304 0.0 0.0 +"G6569-MONOMER" "ClpP only" 0.7281643947294957 0.1817684516842942 0.0 0.04850711108196488 +"PD03270" "ClpP only" 0.7227684806789549 0.2447732088266698 0.0 0.03245831049437534 +"EG11064-MONOMER" "ClpP only" 0.7123202686805574 0.272512936642312 0.01516679467713069 0.0 +"EG11734-MONOMER" "ClpP only" 0.711802083069992 0.0 0.0 0.004316674368707835 +"G7656-MONOMER" "ClpP only" 0.7082784410977435 0.03479064480700757 0.0 0.03302909287877121 +"CYSD-MONOMER" "ClpP only" 0.688662917546952 0.1115797554856638 0.1927477493909631 0.007009577576421178 +"EG10625-MONOMER" "ClpP only" 0.6841540907534327 0.1464216490037121 0.0 0.02721687308469743 +"EG10686-MONOMER" "ClpP only" 0.6700160697874652 0.2007026163492992 0.1060950222390016 0.02318629162423417 +"EG10900-MONOMER" "ClpP only" 0.6682086471822161 0.258233226826272 0.0735581259915118 0.0 +"EG11440-MONOMER" "ClpP only" 0.6672590150979288 0.2766356738246408 0.03814846359807092 0.01795684747935953 +"G7715-MONOMER" "ClpP only" 0.6286097880340189 0.0 0.1028724158166868 0.0 +"G7214-MONOMER" "ClpP only" 0.6243641277512176 0.2361253543139149 0.09549104127295707 0.04401947666191024 +"G6890-MONOMER" "Lon only" 0.2261716322246332 0.7239754464173729 0.0 0.04985292135799384 +"PD03938" "Lon only" 0.2740979583819161 0.6914877822544573 0.0 0.03441425936362659 +"G6737-MONOMER" "Lon only" 0.3092136466178453 0.6681864447006194 0.0 0.0225999086815353 +"RPOD-MONOMER" "Lon only" 0.2230379815549332 0.6600080251012231 0.1148842845538507 0.002069708789992873 +"PD02936" "Lon only" 0.3539811062862402 0.6460188937137596 0.0 0.0 +"RED-THIOREDOXIN2-MONOMER" "Lon only" 0.2375501665183758 0.6324608004447548 0.04892339319781415 0.08106563983905513 +"UHPA-MONOMER" "HslV only" 0.08064451280400928 0.1069606013447386 0.6398231926635632 0.1725716931876888 +"EG12402-MONOMER" "Additive: ClpP, Lon, HslV" 0.2320819185431029 0.3377665653021024 0.4301515161547947 0.0 +"G6472-MONOMER" "Additive: ClpP, Lon, HslV" 0.3856805416024634 0.2210715452608285 0.3932479131367082 0.0 +"EG11830-MONOMER" "Additive: ClpP, Lon, HslV" 0.4305894409043098 0.2988507994073917 0.2705597596882984 0.0 +"EG11249-MONOMER" "Additive: ClpP, Lon, HslV" 0.3333333333333333 0.3333333333333333 0.3333333333333333 0.0 +"EG12352-MONOMER" "Additive: ClpP, Lon, HslV" 0.186928038870193 0.5917709288562806 0.2213010322735264 0.0 +"EG11534-MONOMER" "Additive: ClpP, Lon" 0.5296343929777583 0.4703656070222417 0.0 0.0 +"EG10426-MONOMER" "Additive: ClpP, Lon, HslV" 0.4077213064750552 0.3705897358338249 0.2216889576911199 0.0 +"EG10534-MONOMER" "Additive: ClpP, Lon, HslV" 0.5211809250433617 0.2679823803062132 0.2108366946504252 0.0 +"G7532-MONOMER" "Additive: ClpP, Lon, HslV" 0.3314420580080509 0.3408207405900246 0.3277372014019244 0.0 +"EG10844-MONOMER" "Additive: ClpP, Lon, HslV" 0.4641646680853521 0.1982252055938427 0.3376101263208054 0.0 +"EG11100-MONOMER" "Additive: ClpP, Lon, HslV" 0.3834618395223929 0.4187882865673612 0.1977498739102457 0.0 +"MONOMER0-741" "Additive: ClpP, Lon, HslV" 0.2255737593700952 0.4391041354051837 0.3353221052247211 0.0 +"G7395-MONOMER" "Additive: ClpP, Lon, HslV" 0.3064778311322938 0.3488833520416983 0.3446388168260079 0.0 +"EG50003-MONOMER" "Additive: ClpP, Lon, HslV" 0.3384821868859559 0.3434217240948128 0.3180960890192313 0.0 +"EG11874-MONOMER" "Additive: ClpP, Lon, HslV" 0.42127961222654 0.3246575119991103 0.2540628757743498 0.0 +"EG12866-MONOMER" "Additive: ClpP, Lon, HslV" 0.2909542232976917 0.5723306340023894 0.1367151426999189 0.0 +"GLND-MONOMER" "Additive: ClpP, Lon, HslV" 0.5316359967808061 0.3805469956499178 0.08247835860958329 0.005338648959692819 +"EG10776-MONOMER" "Additive: ClpP, Lon, HslV" 0.5461372414440729 0.3357934868701596 0.1051035201725958 0.01296575151317166 +"G7326-MONOMER" "Additive: ClpP, Lon, HslV" 0.5510491078882992 0.2270753205452031 0.2055586875236938 0.01631688404280404 +"EG10687-MONOMER" "Additive: ClpP, Lon, HslV" 0.4557810755518802 0.1734706554945651 0.3511420348708367 0.01960623408271815 +"EG12308-MONOMER" "Additive: ClpP, Lon, HslV" 0.5604658979126905 0.3948864591542473 0.01806362826155732 0.02658401467150481 +"G7057-MONOMER" "Additive: ClpP, Lon, HslV" 0.2432732282010168 0.2817675496854042 0.4475986837846367 0.02736053832894218 +"EG11187-MONOMER" "Additive: ClpP, Lon, HslV" 0.5364887775582662 0.1841433392989469 0.2488544653771275 0.03051341776565957 +"EG12332-MONOMER" "Additive: ClpP, Lon, HslV" 0.480731838615018 0.3768468634634417 0.111832163755529 0.03058913416601136 +"EG10598-MONOMER" "Additive: ClpP, Lon, HslV" 0.3091086079177796 0.06515504837649867 0.5883054330852214 0.03743091062050029 +"EG11410-MONOMER" "Additive: ClpP, Lon, HslV" 0.5487222182238243 0.2477210061293566 0.1650716058273788 0.03848516981944023 +"EG11784-MONOMER" "Additive: ClpP, Lon" 0.5612420121591221 0.3933877171339142 0.0 0.04537027070696378 +"EG12386-MONOMER" "Additive: ClpP, Lon, HslV" 0.3532386803630403 0.5281476337180628 0.06633685728686786 0.05227682863202891 +"EG10975-MONOMER" "Additive: ClpP, Lon, HslV" 0.4234175529995474 0.3247293027557136 0.1964805599793681 0.05537258426537083 +"THI-P-KIN-MONOMER" "Additive: ClpP, Lon, HslV" 0.2322266738247203 0.4390768368112499 0.2713839554040631 0.05731253395996667 +"EG12690-MONOMER" "Additive: ClpP, Lon, HslV" 0.2779719382201942 0.277364353473086 0.378556090604755 0.066107617701965 +"PD03831" "Additive: ClpP, Lon" 0.5199578915782244 0.4031223236631027 0.0 0.07691978475867288 +"AROK-MONOMER" "Additive: ClpP, Lon, HslV" 0.3194359283352617 0.4295110879755015 0.1671518602623916 0.0839011234268451 +"G7263-MONOMER" "Additive: ClpP, Lon, HslV" 0.1835257388193141 0.2060544659018618 0.5209322548615479 0.0894875404172763 +"EG12289-MONOMER" "Additive: ClpP, Lon, HslV" 0.3525269917056569 0.3211387340856484 0.2269161276492065 0.09941814655948829 diff --git a/reconstruction/ecoli/knowledge_base_raw.py b/reconstruction/ecoli/knowledge_base_raw.py index 3468ee18b..c8f53d2f0 100644 --- a/reconstruction/ecoli/knowledge_base_raw.py +++ b/reconstruction/ecoli/knowledge_base_raw.py @@ -57,6 +57,7 @@ "ppgpp_regulation.tsv", "ppgpp_regulation_added.tsv", "ppgpp_regulation_removed.tsv", + "priority_protease_assignments_1.tsv", "protein_half_lives_measured.tsv", "protein_half_lives_n_end_rule.tsv", "protein_half_lives_pulsed_silac.tsv",