-
Notifications
You must be signed in to change notification settings - Fork 10
monomer data updated to include protease contribution to degradation #1463
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
8755f8d
eb4cbbe
cea8eb9
3d3311c
483d634
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -129,13 +129,45 @@ def _build_monomer_data(self, raw_data, sim_data): | |
| for p in raw_data.protein_half_lives_pulsed_silac | ||
| } | ||
|
|
||
| # Get protease assignments and degradation contributions (in fraction) from Gupta et al. | ||
| protease_dict = { | ||
| p['id']: {'protease_assignment': p['protease_assignment'], | ||
| 'ClpP_fraction': p['ClpP'], | ||
| 'Lon_fraction': p['Lon'], | ||
| 'HslV_fraction': p['HslV'], | ||
| 'Unexplained_fraction': p['Unexplained'] | ||
| } | ||
| for p in raw_data.priority_protease_assignments_1 | ||
| } | ||
|
|
||
| deg_rate = np.zeros(len(all_proteins)) | ||
| deg_rate_source_id = np.full(len(all_proteins), None) | ||
| protease_assignment = np.full(len(all_proteins), None) | ||
| ClpP_contribution = np.full(len(all_proteins), None) | ||
| Lon_contribution = np.full(len(all_proteins), None) | ||
| HslV_contribution = np.full(len(all_proteins), None) | ||
| Unexplained_contribution = np.full(len(all_proteins), None) | ||
|
|
||
| for i, protein in enumerate(all_proteins): | ||
| # Use measured degradation rates if available | ||
| if protein['id'] in measured_deg_rates: | ||
| deg_rate[i] = measured_deg_rates[protein['id']] | ||
| deg_rate_source_id[i] = 'CL_measured_deg_rates_2020' | ||
| if protein['id'] in protease_dict.keys(): | ||
| protease_assignment[i] = protease_dict[protein['id']]['protease_assignment'] | ||
| ClpP_contribution[i] = protease_dict[protein['id']]['ClpP_fraction'] | ||
| Lon_contribution[i] = protease_dict[protein['id']]['Lon_fraction'] | ||
| HslV_contribution[i] = protease_dict[protein['id']]['HslV_fraction'] | ||
| Unexplained_contribution[i] = protease_dict[protein['id']]['Unexplained_fraction'] | ||
|
Comment on lines
+156
to
+161
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since it seems like you are doing the same thing for all proteins (not just the ones with measured degradation rates), repeating the same code in lines 156-161, 165-170, and 181-186 is a bit redundant. I would instead move these 5 lines outside of the measured/pulsed/N-end branching, i.e. unindent lines 181-186 and add a new line before 181 |
||
| elif protein['id'] in pulsed_silac_deg_rates: | ||
| deg_rate[i] = pulsed_silac_deg_rates[protein['id']] | ||
| deg_rate_source_id[i] = 'Nagar_et_al_2021' | ||
| if protein['id'] in protease_dict.keys(): | ||
| protease_assignment[i] = protease_dict[protein['id']]['protease_assignment'] | ||
| ClpP_contribution[i] = protease_dict[protein['id']]['ClpP_fraction'] | ||
| Lon_contribution[i] = protease_dict[protein['id']]['Lon_fraction'] | ||
| HslV_contribution[i] = protease_dict[protein['id']]['HslV_fraction'] | ||
| Unexplained_contribution[i] = protease_dict[protein['id']]['Unexplained_fraction'] | ||
| # If measured rates are unavailable, use N-end rule | ||
| else: | ||
| seq = protein['seq'] | ||
|
|
@@ -145,17 +177,35 @@ def _build_monomer_data(self, raw_data, sim_data): | |
| # is cleaved | ||
| n_end_residue = seq[protein['cleavage_of_initial_methionine']] | ||
| deg_rate[i] = n_end_rule_deg_rates[n_end_residue] | ||
| deg_rate_source_id[i] = 'N_end_rule' | ||
| if protein['id'] in protease_dict.keys(): | ||
| protease_assignment[i] = protease_dict[protein['id']]['protease_assignment'] | ||
| ClpP_contribution[i] = protease_dict[protein['id']]['ClpP_fraction'] | ||
| Lon_contribution[i] = protease_dict[protein['id']]['Lon_fraction'] | ||
| HslV_contribution[i] = protease_dict[protein['id']]['HslV_fraction'] | ||
| Unexplained_contribution[i] = protease_dict[protein['id']]['Unexplained_fraction'] | ||
|
|
||
| max_protein_id_length = max( | ||
| len(protein_id) for protein_id in protein_ids_with_compartments) | ||
| max_cistron_id_length = max( | ||
| len(cistron_id) for cistron_id in cistron_ids) | ||
| max_deg_source_id_length = max( | ||
| len(source_id) for source_id in deg_rate_source_id) | ||
| max_protease_length = max( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd rename this as |
||
| len(protease_id) for protease_id in protease_assignment if protease_id is not None) | ||
|
|
||
| monomer_data = np.zeros( | ||
| n_proteins, | ||
| dtype = [ | ||
| ('id', 'U{}'.format(max_protein_id_length)), | ||
| ('cistron_id', 'U{}'.format(max_cistron_id_length)), | ||
| ('deg_rate', 'f8'), | ||
| ('deg_rate_source', 'U{}'.format(max_deg_source_id_length)), | ||
| ('protease_assignment', 'U{}'.format(max_protease_length)), | ||
| ('ClpP_fraction', 'f8'), | ||
| ('Lon_fraction', 'f8'), | ||
| ('HslV_fraction', 'f8'), | ||
| ('Unexplained_fraction', 'f8'), | ||
| ('length', 'i8'), | ||
| ('aa_counts', '{}i8'.format(n_amino_acids)), | ||
| ('mw', 'f8'), | ||
|
|
@@ -165,6 +215,12 @@ def _build_monomer_data(self, raw_data, sim_data): | |
| monomer_data['id'] = protein_ids_with_compartments | ||
| monomer_data['cistron_id'] = cistron_ids | ||
| monomer_data['deg_rate'] = deg_rate | ||
| monomer_data['deg_rate_source'] = deg_rate_source_id | ||
| monomer_data['protease_assignment'] = protease_assignment | ||
| monomer_data['ClpP_fraction'] = ClpP_contribution | ||
| monomer_data['Lon_fraction'] = Lon_contribution | ||
| monomer_data['HslV_fraction'] = HslV_contribution | ||
| monomer_data['Unexplained_fraction'] = Unexplained_contribution | ||
| monomer_data['length'] = lengths | ||
| monomer_data['aa_counts'] = aa_counts | ||
| monomer_data['mw'] = mws | ||
|
|
@@ -173,6 +229,12 @@ def _build_monomer_data(self, raw_data, sim_data): | |
| 'id': None, | ||
| 'cistron_id': None, | ||
| 'deg_rate': deg_rate_units, | ||
| 'deg_rate_source': None, | ||
| 'protease_assignment': None, | ||
| 'ClpP_fraction': None, | ||
| 'Lon_fraction': None, | ||
| 'HslV_fraction': None, | ||
| 'Unexplained_fraction': None, | ||
| 'length': units.aa, | ||
| 'aa_counts': units.aa, | ||
| 'mw': units.g / units.mol, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,3 @@ | ||
| # Adjustments to get protein expression for certain enzymes required for metabolism | ||
| "name" "value" "units" "_source" "_comments" | ||
| "ADENYLATECYC-MONOMER[c]" "2.0/600" "fit_sim_data_1.py" "CyaA, adenylate cyclase; convert from 2 min to 10 hr half life to get expression in acetate condition (required for cAMP)" | ||
| "SPOT-MONOMER[c]" "2.0/600" "fit_sim_data_1.py" "SpoT, ppGpp phosphatase; convert from 2 min to 10 hr half life to better match expected protein counts" | ||
| "EG12298-MONOMER[p]" 0.1 "fit_sim_data_1.py" "yibQ, Predicted polysaccharide deacetylase; This protein is fit for the anaerobic condition" | ||
| "EG12298-MONOMER[c]" 0.1 "fit_sim_data_1.py" "yibQ, Predicted polysaccharide deacetylase; This protein is fit for the anaerobic condition" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,84 @@ | ||
| # Generated by /Users/noravivancogonzalez/code/wcEcoli/reconstruction/ecoli/scripts/protein_half_lives/convert_to_flat_Clim_protease_assignments.py on Fri Jan 24 13:17:32 2025 | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this script also be added to the pull request? |
||
| "id" "protease_assignment" "ClpP" "Lon" "HslV" "Unexplained" | ||
| "EG10156-MONOMER" "ClpP only" 1.0 0.0 0.0 0.0 | ||
| "MDLB-MONOMER" "ClpP only" 1.0 0.0 0.0 0.0 | ||
| "EG10927-MONOMER" "ClpP only" 1.0 0.0 0.0 0.0 | ||
| "EG10618-MONOMER" "ClpP only" 1.0 0.0 0.0 0.0 | ||
| "AERGLYC3PDEHYDROG-MONOMER" "ClpP only" 0.9937311622959765 0.0 0.0 0.0 | ||
| "EG10823-MONOMER" "ClpP only" 0.9659422539717736 0.0 0.0 0.0 | ||
| "EG10690-MONOMER" "ClpP only" 0.9657849990624624 0.0 0.0 0.0 | ||
| "EG10159-MONOMER" "ClpP only" 0.9332863287710182 0.0 0.0 0.002226409812859236 | ||
| "EG11415-MONOMER" "ClpP only" 0.922363377760449 0.0005893978041944058 0.07704722443535654 0.0 | ||
| "DIOHBUTANONEPSYN-MONOMER" "ClpP only" 0.9177149685544131 0.0 0.0 0.03080823441384567 | ||
| "G6894-MONOMER" "ClpP only" 0.9107693760253536 0.04671986243041855 0.0 0.04251076154422791 | ||
| "EG11783-MONOMER" "ClpP only" 0.9042073852846886 0.0 0.0 0.04529315476535885 | ||
| "PUTA-MONOMER" "ClpP only" 0.8871341718156508 0.1044186480620873 0.0 0.00844718012226184 | ||
| "G6523-MONOMER" "ClpP only" 0.8812505962987498 0.003450591289189 0.08948491166159973 0.01740777605986891 | ||
| "L-LACTDEHYDROGFMN-MONOMER" "ClpP only" 0.8492170014759718 0.0 0.1507829985240282 0.0 | ||
| "EG10236-MONOMER" "ClpP only" 0.8486929501138306 0.1439037976463487 0.007403252239820689 0.0 | ||
| "G7596-MONOMER" "ClpP only" 0.8447298689611913 0.0 0.0 0.0436535558551446 | ||
| "ACYLCOADEHYDROG-MONOMER" "ClpP only" 0.8392680900877766 0.1572280914339504 0.003503818478273008 0.0 | ||
| "EG10230-MONOMER" "ClpP only" 0.8377173065892295 0.1622826934107704 0.0 0.0 | ||
| "EG10651-MONOMER" "ClpP only" 0.8170292992029067 0.0 0.1752663277407187 0.007704373056374535 | ||
| "EG10347-MONOMER" "ClpP only" 0.8078909105784067 0.1045677715676293 0.04688910375167573 0.04065221410228815 | ||
| "ZNUC-MONOMER" "ClpP only" 0.7914905910036573 0.0 0.1582851092504883 0.05022429974585437 | ||
| "EG10241-MONOMER" "ClpP only" 0.7870329116821451 0.2129670883178549 0.0 0.0 | ||
| "RPOC-MONOMER" "ClpP only" 0.7846968320571005 0.1823434125346945 0.0329597554082049 0.0 | ||
| "YHES-MONOMER" "ClpP only" 0.7805642482198102 0.1353550155769331 0.0 0.0 | ||
| "PD00214" "ClpP only" 0.7795353683613133 0.03610032375508478 0.1843643078836019 0.0 | ||
| "RPOS-MONOMER" "ClpP only" 0.7357291473762771 0.226135438282211 0.0 0.03813541434151197 | ||
| "RPOB-MONOMER" "ClpP only" 0.7309776222084695 0.2690223777915304 0.0 0.0 | ||
| "G6569-MONOMER" "ClpP only" 0.7281643947294957 0.1817684516842942 0.0 0.04850711108196488 | ||
| "PD03270" "ClpP only" 0.7227684806789549 0.2447732088266698 0.0 0.03245831049437534 | ||
| "EG11064-MONOMER" "ClpP only" 0.7123202686805574 0.272512936642312 0.01516679467713069 0.0 | ||
| "EG11734-MONOMER" "ClpP only" 0.711802083069992 0.0 0.0 0.004316674368707835 | ||
| "G7656-MONOMER" "ClpP only" 0.7082784410977435 0.03479064480700757 0.0 0.03302909287877121 | ||
| "CYSD-MONOMER" "ClpP only" 0.688662917546952 0.1115797554856638 0.1927477493909631 0.007009577576421178 | ||
| "EG10625-MONOMER" "ClpP only" 0.6841540907534327 0.1464216490037121 0.0 0.02721687308469743 | ||
| "EG10686-MONOMER" "ClpP only" 0.6700160697874652 0.2007026163492992 0.1060950222390016 0.02318629162423417 | ||
| "EG10900-MONOMER" "ClpP only" 0.6682086471822161 0.258233226826272 0.0735581259915118 0.0 | ||
| "EG11440-MONOMER" "ClpP only" 0.6672590150979288 0.2766356738246408 0.03814846359807092 0.01795684747935953 | ||
| "G7715-MONOMER" "ClpP only" 0.6286097880340189 0.0 0.1028724158166868 0.0 | ||
| "G7214-MONOMER" "ClpP only" 0.6243641277512176 0.2361253543139149 0.09549104127295707 0.04401947666191024 | ||
| "G6890-MONOMER" "Lon only" 0.2261716322246332 0.7239754464173729 0.0 0.04985292135799384 | ||
| "PD03938" "Lon only" 0.2740979583819161 0.6914877822544573 0.0 0.03441425936362659 | ||
| "G6737-MONOMER" "Lon only" 0.3092136466178453 0.6681864447006194 0.0 0.0225999086815353 | ||
| "RPOD-MONOMER" "Lon only" 0.2230379815549332 0.6600080251012231 0.1148842845538507 0.002069708789992873 | ||
| "PD02936" "Lon only" 0.3539811062862402 0.6460188937137596 0.0 0.0 | ||
| "RED-THIOREDOXIN2-MONOMER" "Lon only" 0.2375501665183758 0.6324608004447548 0.04892339319781415 0.08106563983905513 | ||
| "UHPA-MONOMER" "HslV only" 0.08064451280400928 0.1069606013447386 0.6398231926635632 0.1725716931876888 | ||
| "EG12402-MONOMER" "Additive: ClpP, Lon, HslV" 0.2320819185431029 0.3377665653021024 0.4301515161547947 0.0 | ||
| "G6472-MONOMER" "Additive: ClpP, Lon, HslV" 0.3856805416024634 0.2210715452608285 0.3932479131367082 0.0 | ||
| "EG11830-MONOMER" "Additive: ClpP, Lon, HslV" 0.4305894409043098 0.2988507994073917 0.2705597596882984 0.0 | ||
| "EG11249-MONOMER" "Additive: ClpP, Lon, HslV" 0.3333333333333333 0.3333333333333333 0.3333333333333333 0.0 | ||
| "EG12352-MONOMER" "Additive: ClpP, Lon, HslV" 0.186928038870193 0.5917709288562806 0.2213010322735264 0.0 | ||
| "EG11534-MONOMER" "Additive: ClpP, Lon" 0.5296343929777583 0.4703656070222417 0.0 0.0 | ||
| "EG10426-MONOMER" "Additive: ClpP, Lon, HslV" 0.4077213064750552 0.3705897358338249 0.2216889576911199 0.0 | ||
| "EG10534-MONOMER" "Additive: ClpP, Lon, HslV" 0.5211809250433617 0.2679823803062132 0.2108366946504252 0.0 | ||
| "G7532-MONOMER" "Additive: ClpP, Lon, HslV" 0.3314420580080509 0.3408207405900246 0.3277372014019244 0.0 | ||
| "EG10844-MONOMER" "Additive: ClpP, Lon, HslV" 0.4641646680853521 0.1982252055938427 0.3376101263208054 0.0 | ||
| "EG11100-MONOMER" "Additive: ClpP, Lon, HslV" 0.3834618395223929 0.4187882865673612 0.1977498739102457 0.0 | ||
| "MONOMER0-741" "Additive: ClpP, Lon, HslV" 0.2255737593700952 0.4391041354051837 0.3353221052247211 0.0 | ||
| "G7395-MONOMER" "Additive: ClpP, Lon, HslV" 0.3064778311322938 0.3488833520416983 0.3446388168260079 0.0 | ||
| "EG50003-MONOMER" "Additive: ClpP, Lon, HslV" 0.3384821868859559 0.3434217240948128 0.3180960890192313 0.0 | ||
| "EG11874-MONOMER" "Additive: ClpP, Lon, HslV" 0.42127961222654 0.3246575119991103 0.2540628757743498 0.0 | ||
| "EG12866-MONOMER" "Additive: ClpP, Lon, HslV" 0.2909542232976917 0.5723306340023894 0.1367151426999189 0.0 | ||
| "GLND-MONOMER" "Additive: ClpP, Lon, HslV" 0.5316359967808061 0.3805469956499178 0.08247835860958329 0.005338648959692819 | ||
| "EG10776-MONOMER" "Additive: ClpP, Lon, HslV" 0.5461372414440729 0.3357934868701596 0.1051035201725958 0.01296575151317166 | ||
| "G7326-MONOMER" "Additive: ClpP, Lon, HslV" 0.5510491078882992 0.2270753205452031 0.2055586875236938 0.01631688404280404 | ||
| "EG10687-MONOMER" "Additive: ClpP, Lon, HslV" 0.4557810755518802 0.1734706554945651 0.3511420348708367 0.01960623408271815 | ||
| "EG12308-MONOMER" "Additive: ClpP, Lon, HslV" 0.5604658979126905 0.3948864591542473 0.01806362826155732 0.02658401467150481 | ||
| "G7057-MONOMER" "Additive: ClpP, Lon, HslV" 0.2432732282010168 0.2817675496854042 0.4475986837846367 0.02736053832894218 | ||
| "EG11187-MONOMER" "Additive: ClpP, Lon, HslV" 0.5364887775582662 0.1841433392989469 0.2488544653771275 0.03051341776565957 | ||
| "EG12332-MONOMER" "Additive: ClpP, Lon, HslV" 0.480731838615018 0.3768468634634417 0.111832163755529 0.03058913416601136 | ||
| "EG10598-MONOMER" "Additive: ClpP, Lon, HslV" 0.3091086079177796 0.06515504837649867 0.5883054330852214 0.03743091062050029 | ||
| "EG11410-MONOMER" "Additive: ClpP, Lon, HslV" 0.5487222182238243 0.2477210061293566 0.1650716058273788 0.03848516981944023 | ||
| "EG11784-MONOMER" "Additive: ClpP, Lon" 0.5612420121591221 0.3933877171339142 0.0 0.04537027070696378 | ||
| "EG12386-MONOMER" "Additive: ClpP, Lon, HslV" 0.3532386803630403 0.5281476337180628 0.06633685728686786 0.05227682863202891 | ||
| "EG10975-MONOMER" "Additive: ClpP, Lon, HslV" 0.4234175529995474 0.3247293027557136 0.1964805599793681 0.05537258426537083 | ||
| "THI-P-KIN-MONOMER" "Additive: ClpP, Lon, HslV" 0.2322266738247203 0.4390768368112499 0.2713839554040631 0.05731253395996667 | ||
| "EG12690-MONOMER" "Additive: ClpP, Lon, HslV" 0.2779719382201942 0.277364353473086 0.378556090604755 0.066107617701965 | ||
| "PD03831" "Additive: ClpP, Lon" 0.5199578915782244 0.4031223236631027 0.0 0.07691978475867288 | ||
| "AROK-MONOMER" "Additive: ClpP, Lon, HslV" 0.3194359283352617 0.4295110879755015 0.1671518602623916 0.0839011234268451 | ||
| "G7263-MONOMER" "Additive: ClpP, Lon, HslV" 0.1835257388193141 0.2060544659018618 0.5209322548615479 0.0894875404172763 | ||
| "EG12289-MONOMER" "Additive: ClpP, Lon, HslV" 0.3525269917056569 0.3211387340856484 0.2269161276492065 0.09941814655948829 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -57,6 +57,7 @@ | |
| "ppgpp_regulation.tsv", | ||
| "ppgpp_regulation_added.tsv", | ||
| "ppgpp_regulation_removed.tsv", | ||
| "priority_protease_assignments_1.tsv", | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor detail - could you update the description for the PR to include the _1 in the filename? Just so that way people are searching for the correct name if they come across this PR later |
||
| "protein_half_lives_measured.tsv", | ||
| "protein_half_lives_n_end_rule.tsv", | ||
| "protein_half_lives_pulsed_silac.tsv", | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Style suggestion: I understand this is the usual convention of capital letters for ClpP and the other proteases, but starting variables with capital letters in python is usually reserved for class names, and GitHub is coloring them as such. Could we change these instead to something like
contribution_ClpP(if we want to keep the ClpP capitalization) to avoid confusion? The refactor->rename feature on PyCharm should help update all occurrences.