Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions reconstruction/ecoli/dataclasses/process/translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,45 @@ def _build_monomer_data(self, raw_data, sim_data):
for p in raw_data.protein_half_lives_pulsed_silac
}

# Get protease assignments and degradation contributions (in fraction) from Gupta et al.
protease_dict = {
p['id']: {'protease_assignment': p['protease_assignment'],
'ClpP_fraction': p['ClpP'],
'Lon_fraction': p['Lon'],
'HslV_fraction': p['HslV'],
'Unexplained_fraction': p['Unexplained']
}
for p in raw_data.priority_protease_assignments_1
}

deg_rate = np.zeros(len(all_proteins))
deg_rate_source_id = np.full(len(all_proteins), None)
protease_assignment = np.full(len(all_proteins), None)
ClpP_contribution = np.full(len(all_proteins), None)
Lon_contribution = np.full(len(all_proteins), None)
HslV_contribution = np.full(len(all_proteins), None)
Unexplained_contribution = np.full(len(all_proteins), None)
Comment on lines +146 to +149
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Style suggestion: I understand this is the usual convention of capital letters for ClpP and the other proteases, but starting variables with capital letters in python is usually reserved for class names, and GitHub is coloring them as such. Could we change these instead to something like contribution_ClpP (if we want to keep the ClpP capitalization) to avoid confusion? The refactor->rename feature on PyCharm should help update all occurrences.


for i, protein in enumerate(all_proteins):
# Use measured degradation rates if available
if protein['id'] in measured_deg_rates:
deg_rate[i] = measured_deg_rates[protein['id']]
deg_rate_source_id[i] = 'CL_measured_deg_rates_2020'
if protein['id'] in protease_dict.keys():
protease_assignment[i] = protease_dict[protein['id']]['protease_assignment']
ClpP_contribution[i] = protease_dict[protein['id']]['ClpP_fraction']
Lon_contribution[i] = protease_dict[protein['id']]['Lon_fraction']
HslV_contribution[i] = protease_dict[protein['id']]['HslV_fraction']
Unexplained_contribution[i] = protease_dict[protein['id']]['Unexplained_fraction']
Comment on lines +156 to +161
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since it seems like you are doing the same thing for all proteins (not just the ones with measured degradation rates), repeating the same code in lines 156-161, 165-170, and 181-186 is a bit redundant. I would instead move these 5 lines outside of the measured/pulsed/N-end branching, i.e. unindent lines 181-186 and add a new line before 181

elif protein['id'] in pulsed_silac_deg_rates:
deg_rate[i] = pulsed_silac_deg_rates[protein['id']]
deg_rate_source_id[i] = 'Nagar_et_al_2021'
if protein['id'] in protease_dict.keys():
protease_assignment[i] = protease_dict[protein['id']]['protease_assignment']
ClpP_contribution[i] = protease_dict[protein['id']]['ClpP_fraction']
Lon_contribution[i] = protease_dict[protein['id']]['Lon_fraction']
HslV_contribution[i] = protease_dict[protein['id']]['HslV_fraction']
Unexplained_contribution[i] = protease_dict[protein['id']]['Unexplained_fraction']
# If measured rates are unavailable, use N-end rule
else:
seq = protein['seq']
Expand All @@ -145,17 +177,35 @@ def _build_monomer_data(self, raw_data, sim_data):
# is cleaved
n_end_residue = seq[protein['cleavage_of_initial_methionine']]
deg_rate[i] = n_end_rule_deg_rates[n_end_residue]
deg_rate_source_id[i] = 'N_end_rule'
if protein['id'] in protease_dict.keys():
protease_assignment[i] = protease_dict[protein['id']]['protease_assignment']
ClpP_contribution[i] = protease_dict[protein['id']]['ClpP_fraction']
Lon_contribution[i] = protease_dict[protein['id']]['Lon_fraction']
HslV_contribution[i] = protease_dict[protein['id']]['HslV_fraction']
Unexplained_contribution[i] = protease_dict[protein['id']]['Unexplained_fraction']

max_protein_id_length = max(
len(protein_id) for protein_id in protein_ids_with_compartments)
max_cistron_id_length = max(
len(cistron_id) for cistron_id in cistron_ids)
max_deg_source_id_length = max(
len(source_id) for source_id in deg_rate_source_id)
max_protease_length = max(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd rename this as max_protease_id_length for clarity and consistency

len(protease_id) for protease_id in protease_assignment if protease_id is not None)

monomer_data = np.zeros(
n_proteins,
dtype = [
('id', 'U{}'.format(max_protein_id_length)),
('cistron_id', 'U{}'.format(max_cistron_id_length)),
('deg_rate', 'f8'),
('deg_rate_source', 'U{}'.format(max_deg_source_id_length)),
('protease_assignment', 'U{}'.format(max_protease_length)),
('ClpP_fraction', 'f8'),
('Lon_fraction', 'f8'),
('HslV_fraction', 'f8'),
('Unexplained_fraction', 'f8'),
('length', 'i8'),
('aa_counts', '{}i8'.format(n_amino_acids)),
('mw', 'f8'),
Expand All @@ -165,6 +215,12 @@ def _build_monomer_data(self, raw_data, sim_data):
monomer_data['id'] = protein_ids_with_compartments
monomer_data['cistron_id'] = cistron_ids
monomer_data['deg_rate'] = deg_rate
monomer_data['deg_rate_source'] = deg_rate_source_id
monomer_data['protease_assignment'] = protease_assignment
monomer_data['ClpP_fraction'] = ClpP_contribution
monomer_data['Lon_fraction'] = Lon_contribution
monomer_data['HslV_fraction'] = HslV_contribution
monomer_data['Unexplained_fraction'] = Unexplained_contribution
monomer_data['length'] = lengths
monomer_data['aa_counts'] = aa_counts
monomer_data['mw'] = mws
Expand All @@ -173,6 +229,12 @@ def _build_monomer_data(self, raw_data, sim_data):
'id': None,
'cistron_id': None,
'deg_rate': deg_rate_units,
'deg_rate_source': None,
'protease_assignment': None,
'ClpP_fraction': None,
'Lon_fraction': None,
'HslV_fraction': None,
'Unexplained_fraction': None,
'length': units.aa,
'aa_counts': units.aa,
'mw': units.g / units.mol,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# Adjustments to get protein expression for certain enzymes required for metabolism
"name" "value" "units" "_source" "_comments"
"ADENYLATECYC-MONOMER[c]" "2.0/600" "fit_sim_data_1.py" "CyaA, adenylate cyclase; convert from 2 min to 10 hr half life to get expression in acetate condition (required for cAMP)"
"SPOT-MONOMER[c]" "2.0/600" "fit_sim_data_1.py" "SpoT, ppGpp phosphatase; convert from 2 min to 10 hr half life to better match expected protein counts"
"EG12298-MONOMER[p]" 0.1 "fit_sim_data_1.py" "yibQ, Predicted polysaccharide deacetylase; This protein is fit for the anaerobic condition"
"EG12298-MONOMER[c]" 0.1 "fit_sim_data_1.py" "yibQ, Predicted polysaccharide deacetylase; This protein is fit for the anaerobic condition"
84 changes: 84 additions & 0 deletions reconstruction/ecoli/flat/priority_protease_assignments_1.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Generated by /Users/noravivancogonzalez/code/wcEcoli/reconstruction/ecoli/scripts/protein_half_lives/convert_to_flat_Clim_protease_assignments.py on Fri Jan 24 13:17:32 2025
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this script also be added to the pull request?

"id" "protease_assignment" "ClpP" "Lon" "HslV" "Unexplained"
"EG10156-MONOMER" "ClpP only" 1.0 0.0 0.0 0.0
"MDLB-MONOMER" "ClpP only" 1.0 0.0 0.0 0.0
"EG10927-MONOMER" "ClpP only" 1.0 0.0 0.0 0.0
"EG10618-MONOMER" "ClpP only" 1.0 0.0 0.0 0.0
"AERGLYC3PDEHYDROG-MONOMER" "ClpP only" 0.9937311622959765 0.0 0.0 0.0
"EG10823-MONOMER" "ClpP only" 0.9659422539717736 0.0 0.0 0.0
"EG10690-MONOMER" "ClpP only" 0.9657849990624624 0.0 0.0 0.0
"EG10159-MONOMER" "ClpP only" 0.9332863287710182 0.0 0.0 0.002226409812859236
"EG11415-MONOMER" "ClpP only" 0.922363377760449 0.0005893978041944058 0.07704722443535654 0.0
"DIOHBUTANONEPSYN-MONOMER" "ClpP only" 0.9177149685544131 0.0 0.0 0.03080823441384567
"G6894-MONOMER" "ClpP only" 0.9107693760253536 0.04671986243041855 0.0 0.04251076154422791
"EG11783-MONOMER" "ClpP only" 0.9042073852846886 0.0 0.0 0.04529315476535885
"PUTA-MONOMER" "ClpP only" 0.8871341718156508 0.1044186480620873 0.0 0.00844718012226184
"G6523-MONOMER" "ClpP only" 0.8812505962987498 0.003450591289189 0.08948491166159973 0.01740777605986891
"L-LACTDEHYDROGFMN-MONOMER" "ClpP only" 0.8492170014759718 0.0 0.1507829985240282 0.0
"EG10236-MONOMER" "ClpP only" 0.8486929501138306 0.1439037976463487 0.007403252239820689 0.0
"G7596-MONOMER" "ClpP only" 0.8447298689611913 0.0 0.0 0.0436535558551446
"ACYLCOADEHYDROG-MONOMER" "ClpP only" 0.8392680900877766 0.1572280914339504 0.003503818478273008 0.0
"EG10230-MONOMER" "ClpP only" 0.8377173065892295 0.1622826934107704 0.0 0.0
"EG10651-MONOMER" "ClpP only" 0.8170292992029067 0.0 0.1752663277407187 0.007704373056374535
"EG10347-MONOMER" "ClpP only" 0.8078909105784067 0.1045677715676293 0.04688910375167573 0.04065221410228815
"ZNUC-MONOMER" "ClpP only" 0.7914905910036573 0.0 0.1582851092504883 0.05022429974585437
"EG10241-MONOMER" "ClpP only" 0.7870329116821451 0.2129670883178549 0.0 0.0
"RPOC-MONOMER" "ClpP only" 0.7846968320571005 0.1823434125346945 0.0329597554082049 0.0
"YHES-MONOMER" "ClpP only" 0.7805642482198102 0.1353550155769331 0.0 0.0
"PD00214" "ClpP only" 0.7795353683613133 0.03610032375508478 0.1843643078836019 0.0
"RPOS-MONOMER" "ClpP only" 0.7357291473762771 0.226135438282211 0.0 0.03813541434151197
"RPOB-MONOMER" "ClpP only" 0.7309776222084695 0.2690223777915304 0.0 0.0
"G6569-MONOMER" "ClpP only" 0.7281643947294957 0.1817684516842942 0.0 0.04850711108196488
"PD03270" "ClpP only" 0.7227684806789549 0.2447732088266698 0.0 0.03245831049437534
"EG11064-MONOMER" "ClpP only" 0.7123202686805574 0.272512936642312 0.01516679467713069 0.0
"EG11734-MONOMER" "ClpP only" 0.711802083069992 0.0 0.0 0.004316674368707835
"G7656-MONOMER" "ClpP only" 0.7082784410977435 0.03479064480700757 0.0 0.03302909287877121
"CYSD-MONOMER" "ClpP only" 0.688662917546952 0.1115797554856638 0.1927477493909631 0.007009577576421178
"EG10625-MONOMER" "ClpP only" 0.6841540907534327 0.1464216490037121 0.0 0.02721687308469743
"EG10686-MONOMER" "ClpP only" 0.6700160697874652 0.2007026163492992 0.1060950222390016 0.02318629162423417
"EG10900-MONOMER" "ClpP only" 0.6682086471822161 0.258233226826272 0.0735581259915118 0.0
"EG11440-MONOMER" "ClpP only" 0.6672590150979288 0.2766356738246408 0.03814846359807092 0.01795684747935953
"G7715-MONOMER" "ClpP only" 0.6286097880340189 0.0 0.1028724158166868 0.0
"G7214-MONOMER" "ClpP only" 0.6243641277512176 0.2361253543139149 0.09549104127295707 0.04401947666191024
"G6890-MONOMER" "Lon only" 0.2261716322246332 0.7239754464173729 0.0 0.04985292135799384
"PD03938" "Lon only" 0.2740979583819161 0.6914877822544573 0.0 0.03441425936362659
"G6737-MONOMER" "Lon only" 0.3092136466178453 0.6681864447006194 0.0 0.0225999086815353
"RPOD-MONOMER" "Lon only" 0.2230379815549332 0.6600080251012231 0.1148842845538507 0.002069708789992873
"PD02936" "Lon only" 0.3539811062862402 0.6460188937137596 0.0 0.0
"RED-THIOREDOXIN2-MONOMER" "Lon only" 0.2375501665183758 0.6324608004447548 0.04892339319781415 0.08106563983905513
"UHPA-MONOMER" "HslV only" 0.08064451280400928 0.1069606013447386 0.6398231926635632 0.1725716931876888
"EG12402-MONOMER" "Additive: ClpP, Lon, HslV" 0.2320819185431029 0.3377665653021024 0.4301515161547947 0.0
"G6472-MONOMER" "Additive: ClpP, Lon, HslV" 0.3856805416024634 0.2210715452608285 0.3932479131367082 0.0
"EG11830-MONOMER" "Additive: ClpP, Lon, HslV" 0.4305894409043098 0.2988507994073917 0.2705597596882984 0.0
"EG11249-MONOMER" "Additive: ClpP, Lon, HslV" 0.3333333333333333 0.3333333333333333 0.3333333333333333 0.0
"EG12352-MONOMER" "Additive: ClpP, Lon, HslV" 0.186928038870193 0.5917709288562806 0.2213010322735264 0.0
"EG11534-MONOMER" "Additive: ClpP, Lon" 0.5296343929777583 0.4703656070222417 0.0 0.0
"EG10426-MONOMER" "Additive: ClpP, Lon, HslV" 0.4077213064750552 0.3705897358338249 0.2216889576911199 0.0
"EG10534-MONOMER" "Additive: ClpP, Lon, HslV" 0.5211809250433617 0.2679823803062132 0.2108366946504252 0.0
"G7532-MONOMER" "Additive: ClpP, Lon, HslV" 0.3314420580080509 0.3408207405900246 0.3277372014019244 0.0
"EG10844-MONOMER" "Additive: ClpP, Lon, HslV" 0.4641646680853521 0.1982252055938427 0.3376101263208054 0.0
"EG11100-MONOMER" "Additive: ClpP, Lon, HslV" 0.3834618395223929 0.4187882865673612 0.1977498739102457 0.0
"MONOMER0-741" "Additive: ClpP, Lon, HslV" 0.2255737593700952 0.4391041354051837 0.3353221052247211 0.0
"G7395-MONOMER" "Additive: ClpP, Lon, HslV" 0.3064778311322938 0.3488833520416983 0.3446388168260079 0.0
"EG50003-MONOMER" "Additive: ClpP, Lon, HslV" 0.3384821868859559 0.3434217240948128 0.3180960890192313 0.0
"EG11874-MONOMER" "Additive: ClpP, Lon, HslV" 0.42127961222654 0.3246575119991103 0.2540628757743498 0.0
"EG12866-MONOMER" "Additive: ClpP, Lon, HslV" 0.2909542232976917 0.5723306340023894 0.1367151426999189 0.0
"GLND-MONOMER" "Additive: ClpP, Lon, HslV" 0.5316359967808061 0.3805469956499178 0.08247835860958329 0.005338648959692819
"EG10776-MONOMER" "Additive: ClpP, Lon, HslV" 0.5461372414440729 0.3357934868701596 0.1051035201725958 0.01296575151317166
"G7326-MONOMER" "Additive: ClpP, Lon, HslV" 0.5510491078882992 0.2270753205452031 0.2055586875236938 0.01631688404280404
"EG10687-MONOMER" "Additive: ClpP, Lon, HslV" 0.4557810755518802 0.1734706554945651 0.3511420348708367 0.01960623408271815
"EG12308-MONOMER" "Additive: ClpP, Lon, HslV" 0.5604658979126905 0.3948864591542473 0.01806362826155732 0.02658401467150481
"G7057-MONOMER" "Additive: ClpP, Lon, HslV" 0.2432732282010168 0.2817675496854042 0.4475986837846367 0.02736053832894218
"EG11187-MONOMER" "Additive: ClpP, Lon, HslV" 0.5364887775582662 0.1841433392989469 0.2488544653771275 0.03051341776565957
"EG12332-MONOMER" "Additive: ClpP, Lon, HslV" 0.480731838615018 0.3768468634634417 0.111832163755529 0.03058913416601136
"EG10598-MONOMER" "Additive: ClpP, Lon, HslV" 0.3091086079177796 0.06515504837649867 0.5883054330852214 0.03743091062050029
"EG11410-MONOMER" "Additive: ClpP, Lon, HslV" 0.5487222182238243 0.2477210061293566 0.1650716058273788 0.03848516981944023
"EG11784-MONOMER" "Additive: ClpP, Lon" 0.5612420121591221 0.3933877171339142 0.0 0.04537027070696378
"EG12386-MONOMER" "Additive: ClpP, Lon, HslV" 0.3532386803630403 0.5281476337180628 0.06633685728686786 0.05227682863202891
"EG10975-MONOMER" "Additive: ClpP, Lon, HslV" 0.4234175529995474 0.3247293027557136 0.1964805599793681 0.05537258426537083
"THI-P-KIN-MONOMER" "Additive: ClpP, Lon, HslV" 0.2322266738247203 0.4390768368112499 0.2713839554040631 0.05731253395996667
"EG12690-MONOMER" "Additive: ClpP, Lon, HslV" 0.2779719382201942 0.277364353473086 0.378556090604755 0.066107617701965
"PD03831" "Additive: ClpP, Lon" 0.5199578915782244 0.4031223236631027 0.0 0.07691978475867288
"AROK-MONOMER" "Additive: ClpP, Lon, HslV" 0.3194359283352617 0.4295110879755015 0.1671518602623916 0.0839011234268451
"G7263-MONOMER" "Additive: ClpP, Lon, HslV" 0.1835257388193141 0.2060544659018618 0.5209322548615479 0.0894875404172763
"EG12289-MONOMER" "Additive: ClpP, Lon, HslV" 0.3525269917056569 0.3211387340856484 0.2269161276492065 0.09941814655948829
1 change: 1 addition & 0 deletions reconstruction/ecoli/knowledge_base_raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
"ppgpp_regulation.tsv",
"ppgpp_regulation_added.tsv",
"ppgpp_regulation_removed.tsv",
"priority_protease_assignments_1.tsv",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor detail - could you update the description for the PR to include the _1 in the filename? Just so that way people are searching for the correct name if they come across this PR later

"protein_half_lives_measured.tsv",
"protein_half_lives_n_end_rule.tsv",
"protein_half_lives_pulsed_silac.tsv",
Expand Down