Skip to content

Commit 7cec084

Browse files
authored
Merge pull request #133 from singjc/feature/aripf
Feature: Alignment Scoring and Across Run IPF
2 parents 3404961 + c7e4a9b commit 7cec084

20 files changed

+432
-199
lines changed

pyprophet/data_handling.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ def prepare_data_table(table,
143143
decoy_name="decoy",
144144
main_score_name=None,
145145
score_columns=None,
146+
level=None
146147
):
147148
N = len(table)
148149
if not N:
@@ -219,7 +220,7 @@ def prepare_data_table(table,
219220

220221
tg_ids = table[tg_id_name]
221222

222-
if not check_for_unique_blocks(tg_ids):
223+
if not check_for_unique_blocks(tg_ids) and level != 'alignment':
223224
raise click.ClickException("" + tg_id_name + " values do not form unique blocks in input file(s).")
224225

225226
tg_map = dict()
@@ -285,6 +286,8 @@ def update_chosen_main_score_in_table(train, score_columns, use_as_main_score):
285286
train.df.insert(5, temp_col.name, temp_col)
286287
click.echo(f"Info: Updated main score column from {old_main_score_column} to {use_as_main_score}...")
287288
return train, tuple(updated_score_columns)
289+
290+
288291
class Experiment(object):
289292

290293
@profile

pyprophet/ipf.py

Lines changed: 186 additions & 72 deletions
Large diffs are not rendered by default.

pyprophet/main.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ def type_cast_value(self, ctx, value):
7878
@click.option('--lfdr_adj', default=1.5, show_default=True, type=float, help='Numeric value that is applied as a multiple of the smoothing bandwidth used in the density estimation.')
7979
@click.option('--lfdr_eps', default=np.power(10.0,-8), show_default=True, type=float, help='Numeric value that is threshold for the tails of the empirical p-value distribution.')
8080
# OpenSWATH options
81-
@click.option('--level', default='ms2', show_default=True, type=click.Choice(['ms1', 'ms2', 'ms1ms2', 'transition']), help='Either "ms1", "ms2", "ms1ms2" or "transition"; the data level selected for scoring. "ms1ms2 integrates both MS1- and MS2-level scores and can be used instead of "ms2"-level results."')
81+
@click.option('--level', default='ms2', show_default=True, type=click.Choice(['ms1', 'ms2', 'ms1ms2', 'transition', 'alignment']), help='Either "ms1", "ms2", "ms1ms2", "transition", or "alignment"; the data level selected for scoring. "ms1ms2 integrates both MS1- and MS2-level scores and can be used instead of "ms2"-level results."')
82+
@click.option('--add_alignment_features/--no-add_alignment_features', default=False, show_default=True, help='Add alignment features to scoring.')
8283
# IPF options
8384
@click.option('--ipf_max_peakgroup_rank', default=1, show_default=True, type=int, help='Assess transitions only for candidate peak groups until maximum peak group rank.')
8485
@click.option('--ipf_max_peakgroup_pep', default=0.7, show_default=True, type=float, help='Assess transitions only for candidate peak groups until maximum posterior error probability.')
@@ -92,7 +93,7 @@ def type_cast_value(self, ctx, value):
9293
# Processing
9394
@click.option('--threads', default=1, show_default=True, type=int, help='Number of threads used for semi-supervised learning. -1 means all available CPUs.', callback=transform_threads)
9495
@click.option('--test/--no-test', default=False, show_default=True, help='Run in test mode with fixed seed.')
95-
def score(infile, outfile, classifier, xgb_autotune, apply_weights, xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, ss_num_iter, ss_main_score, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_max_transition_isotope_overlap, ipf_min_transition_sn, tric_chromprob, threads, test, ss_score_filter, color_palette, main_score_selection_report):
96+
def score(infile, outfile, classifier, xgb_autotune, apply_weights, xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, ss_num_iter, ss_main_score, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, level, add_alignment_features, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_max_transition_isotope_overlap, ipf_min_transition_sn, tric_chromprob, threads, test, ss_score_filter, color_palette, main_score_selection_report):
9697
"""
9798
Conduct semi-supervised learning and error-rate estimation for MS1, MS2 and transition-level data.
9899
"""
@@ -112,10 +113,10 @@ def score(infile, outfile, classifier, xgb_autotune, apply_weights, xeval_fracti
112113
xgb_params_space = {'eta': hp.uniform('eta', 0.0, 0.3), 'gamma': hp.uniform('gamma', 0.0, 0.5), 'max_depth': hp.quniform('max_depth', 2, 8, 1), 'min_child_weight': hp.quniform('min_child_weight', 1, 5, 1), 'subsample': 1, 'colsample_bytree': 1, 'colsample_bylevel': 1, 'colsample_bynode': 1, 'lambda': hp.uniform('lambda', 0.0, 1.0), 'alpha': hp.uniform('alpha', 0.0, 1.0), 'scale_pos_weight': 1.0, 'verbosity': 0, 'objective': 'binary:logitraw', 'nthread': 1, 'eval_metric': 'auc'}
113114

114115
if not apply_weights:
115-
PyProphetLearner(infile, outfile, classifier, xgb_hyperparams, xgb_params, xgb_params_space, xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, ss_num_iter, ss_main_score, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_max_transition_isotope_overlap, ipf_min_transition_sn, tric_chromprob, threads, test, ss_score_filter, color_palette, main_score_selection_report).run()
116+
PyProphetLearner(infile, outfile, classifier, xgb_hyperparams, xgb_params, xgb_params_space, xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, ss_num_iter, ss_main_score, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, level, add_alignment_features, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_max_transition_isotope_overlap, ipf_min_transition_sn, tric_chromprob, threads, test, ss_score_filter, color_palette, main_score_selection_report).run()
116117
else:
117118

118-
PyProphetWeightApplier(infile, outfile, classifier, xgb_hyperparams, xgb_params, xgb_params_space, xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, ss_num_iter, ss_main_score, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, level, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_max_transition_isotope_overlap, ipf_min_transition_sn, tric_chromprob, threads, test, apply_weights, ss_score_filter, color_palette, main_score_selection_report).run()
119+
PyProphetWeightApplier(infile, outfile, classifier, xgb_hyperparams, xgb_params, xgb_params_space, xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, ss_num_iter, ss_main_score, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, level, add_alignment_features, ipf_max_peakgroup_rank, ipf_max_peakgroup_pep, ipf_max_transition_isotope_overlap, ipf_min_transition_sn, tric_chromprob, threads, test, apply_weights, ss_score_filter, color_palette, main_score_selection_report).run()
119120

120121

121122
# IPF
@@ -132,7 +133,10 @@ def score(infile, outfile, classifier, xgb_autotune, apply_weights, xeval_fracti
132133
@click.option('--ipf_max_peakgroup_pep', default=0.7, show_default=True, type=float, help='Maximum PEP to consider scored peak groups in IPF.')
133134
@click.option('--ipf_max_precursor_peakgroup_pep', default=0.4, show_default=True, type=float, help='Maximum BHM layer 1 integrated precursor peakgroup PEP to consider in IPF.')
134135
@click.option('--ipf_max_transition_pep', default=0.6, show_default=True, type=float, help='Maximum PEP to consider scored transitions in IPF.')
135-
def ipf(infile, outfile, ipf_ms1_scoring, ipf_ms2_scoring, ipf_h0, ipf_grouped_fdr, ipf_max_precursor_pep, ipf_max_peakgroup_pep, ipf_max_precursor_peakgroup_pep, ipf_max_transition_pep):
136+
@click.option('--propagate_signal_across_runs/--no-propagate_signal_across_runs', default=False, show_default=True, help='Propagate signal across runs (requires running alignment).')
137+
@click.option('--ipf_max_alignment_pep', default=1.0, show_default=True, type=float, help='Maximum PEP to consider for good alignments.')
138+
@click.option('--across_run_confidence_threshold', default=0.5, show_default=True, type=float, help='Maximum PEP to consider for propagating signal across runs for aligned features.')
139+
def ipf(infile, outfile, ipf_ms1_scoring, ipf_ms2_scoring, ipf_h0, ipf_grouped_fdr, ipf_max_precursor_pep, ipf_max_peakgroup_pep, ipf_max_precursor_peakgroup_pep, ipf_max_transition_pep, propagate_signal_across_runs, ipf_max_alignment_pep, across_run_confidence_threshold):
136140
"""
137141
Infer peptidoforms after scoring of MS1, MS2 and transition-level data.
138142
"""
@@ -142,7 +146,7 @@ def ipf(infile, outfile, ipf_ms1_scoring, ipf_ms2_scoring, ipf_h0, ipf_grouped_f
142146
else:
143147
outfile = outfile
144148

145-
infer_peptidoforms(infile, outfile, ipf_ms1_scoring, ipf_ms2_scoring, ipf_h0, ipf_grouped_fdr, ipf_max_precursor_pep, ipf_max_peakgroup_pep, ipf_max_precursor_peakgroup_pep, ipf_max_transition_pep)
149+
infer_peptidoforms(infile, outfile, ipf_ms1_scoring, ipf_ms2_scoring, ipf_h0, ipf_grouped_fdr, ipf_max_precursor_pep, ipf_max_peakgroup_pep, ipf_max_precursor_peakgroup_pep, ipf_max_transition_pep, propagate_signal_across_runs, ipf_max_alignment_pep, across_run_confidence_threshold)
146150

147151

148152
# Peptide-level inference

pyprophet/pyprophet.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def calculate_params_for_d_score(classifier, experiment):
7171

7272
class Scorer(object):
7373

74-
def __init__(self, classifier, score_columns, experiment, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, tric_chromprob, ss_score_filter, color_palette):
74+
def __init__(self, classifier, score_columns, experiment, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, tric_chromprob, ss_score_filter, color_palette, level):
7575

7676
self.classifier = classifier
7777
self.score_columns = score_columns
@@ -95,6 +95,7 @@ def __init__(self, classifier, score_columns, experiment, group_id, parametric,
9595
self.tric_chromprob = tric_chromprob
9696
self.ss_score_filter = ss_score_filter
9797
self.color_palette = color_palette
98+
self.level = level
9899

99100
target_scores = experiment.get_top_target_peaks()["d_score"]
100101
decoy_scores = experiment.get_top_decoy_peaks()["d_score"]
@@ -122,7 +123,7 @@ def __init__(self, classifier, score_columns, experiment, group_id, parametric,
122123

123124
def score(self, table):
124125

125-
prepared_table, __ = prepare_data_table(table, self.ss_score_filter, tg_id_name=self.group_id, score_columns=self.score_columns)
126+
prepared_table, __ = prepare_data_table(table, self.ss_score_filter, tg_id_name=self.group_id, score_columns=self.score_columns, level=self.level)
126127
texp = Experiment(prepared_table)
127128
score = self.classifier.score(texp, True)
128129
texp["r_score"] = score
@@ -180,7 +181,7 @@ class HolyGostQuery(object):
180181
See below how PyProphet parameterises this class.
181182
"""
182183

183-
def __init__(self, semi_supervised_learner, classifier, ss_num_iter, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, tric_chromprob, threads, test, ss_score_filter, color_palette):
184+
def __init__(self, semi_supervised_learner, classifier, ss_num_iter, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, tric_chromprob, threads, test, ss_score_filter, color_palette, level):
184185
assert isinstance(semi_supervised_learner,
185186
AbstractSemiSupervisedLearner)
186187
self.semi_supervised_learner = semi_supervised_learner
@@ -203,9 +204,10 @@ def __init__(self, semi_supervised_learner, classifier, ss_num_iter, group_id, p
203204
self.test = test
204205
self.ss_score_filter = ss_score_filter
205206
self.color_palette = color_palette
207+
self.level = level
206208

207209
def _setup_experiment(self, table):
208-
prepared_table, score_columns = prepare_data_table(table, self.ss_score_filter, tg_id_name=self.group_id)
210+
prepared_table, score_columns = prepare_data_table(table, self.ss_score_filter, tg_id_name=self.group_id, level=self.level)
209211
experiment = Experiment(prepared_table)
210212
experiment.log_summary()
211213
return experiment, score_columns
@@ -339,7 +341,7 @@ def _build_result(self, table, final_classifier, score_columns, experiment):
339341
for key, value in reversed(sorted(mapped.items(), key=operator.itemgetter(1))):
340342
click.echo("Info: Importance of %s: %s" % (key, value))
341343

342-
scorer = Scorer(final_classifier, score_columns, experiment, self.group_id, self.parametric, self.pfdr, self.pi0_lambda, self.pi0_method, self.pi0_smooth_df, self.pi0_smooth_log_pi0, self.lfdr_truncate, self.lfdr_monotone, self.lfdr_transformation, self.lfdr_adj, self.lfdr_eps, self.tric_chromprob, self.ss_score_filter, self.color_palette)
344+
scorer = Scorer(final_classifier, score_columns, experiment, self.group_id, self.parametric, self.pfdr, self.pi0_lambda, self.pi0_method, self.pi0_smooth_df, self.pi0_smooth_log_pi0, self.lfdr_truncate, self.lfdr_monotone, self.lfdr_transformation, self.lfdr_adj, self.lfdr_eps, self.tric_chromprob, self.ss_score_filter, self.color_palette, self.level)
343345

344346
scored_table = scorer.score(table)
345347

@@ -354,8 +356,8 @@ def _build_result(self, table, final_classifier, score_columns, experiment):
354356
@profile
355357
def PyProphet(classifier, xgb_hyperparams, xgb_params, xgb_params_space, xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, ss_num_iter, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, tric_chromprob, threads, test, ss_score_filter, color_palette, main_score_selection_report, outfile, level, ss_use_dynamic_main_score):
356358
if classifier == "LDA":
357-
return HolyGostQuery(StandardSemiSupervisedLearner(LDALearner(), xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, test, main_score_selection_report, outfile, level, ss_use_dynamic_main_score), classifier, ss_num_iter, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, tric_chromprob, threads, test, ss_score_filter, color_palette)
359+
return HolyGostQuery(StandardSemiSupervisedLearner(LDALearner(), xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, test, main_score_selection_report, outfile, level, ss_use_dynamic_main_score), classifier, ss_num_iter, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, tric_chromprob, threads, test, ss_score_filter, color_palette, level)
358360
elif classifier == "XGBoost":
359-
return HolyGostQuery(StandardSemiSupervisedLearner(XGBLearner(xgb_hyperparams, xgb_params, xgb_params_space, threads), xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, test, main_score_selection_report, outfile, level, ss_use_dynamic_main_score), classifier, ss_num_iter, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, tric_chromprob, threads, test, ss_score_filter, color_palette)
361+
return HolyGostQuery(StandardSemiSupervisedLearner(XGBLearner(xgb_hyperparams, xgb_params, xgb_params_space, threads), xeval_fraction, xeval_num_iter, ss_initial_fdr, ss_iteration_fdr, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, test, main_score_selection_report, outfile, level, ss_use_dynamic_main_score), classifier, ss_num_iter, group_id, parametric, pfdr, pi0_lambda, pi0_method, pi0_smooth_df, pi0_smooth_log_pi0, lfdr_truncate, lfdr_monotone, lfdr_transformation, lfdr_adj, lfdr_eps, tric_chromprob, threads, test, ss_score_filter, color_palette, level)
360362
else:
361363
raise click.ClickException("Classifier not supported.")

0 commit comments

Comments
 (0)