Skip to content

Commit ba3cb43

Browse files
A script to generate the peptide_table.tsv for Modifying peptides
1 parent 0ddf476 commit ba3cb43

File tree

1 file changed

+88
-0
lines changed

1 file changed

+88
-0
lines changed

scripts/modify_peptides.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import argparse
2+
from itertools import product, chain
3+
import pandas as pd
4+
5+
6+
# ---- PARSE ARGUMENTS -------------------------------------------------------
7+
# Parses command line arguments
8+
# Enables user help
9+
def parse_arguments():
10+
# Parse command line arugments
11+
parser = argparse.ArgumentParser(description='Create peptide_table.tsv needed for modifying peptide sequences for pVACbind')
12+
13+
parser.add_argument('-n',
14+
help='The maximum number of mofifying peptides to add to the begining or end')
15+
parser.add_argument('-m',
16+
help='A csv file containing the name/indentifer for the sequence which does NOT have to be unique')
17+
return(parser.parse_args())
18+
19+
def generate_modifed_peptides(n, name, base_sequence):
20+
characters = ['K', 'R']
21+
22+
# Generate all possible combinations up to length n
23+
all_combinations = chain.from_iterable(product(characters, repeat=i) for i in range(1, n + 1))
24+
possible_modifications = set(''.join(combination) for combination in all_combinations)
25+
26+
peptide_table = []
27+
28+
for modification in possible_modifications:
29+
30+
Nterm_dict = {}
31+
Cterm_dict = {}
32+
33+
Nterm_sequence_name = name + "." + "n-term" + "-" + modification
34+
Cterm_sequence_name = name + "." + "c-term" + "-" + modification
35+
36+
Nterm_sequence = modification + base_sequence
37+
Cterm_sequence = base_sequence + modification
38+
39+
Nterm_parsed_sequence = modification + '|' + base_sequence
40+
Cterm_parsed_sequence = base_sequence + '|' + modification
41+
42+
Nterm_dict.update({'sequence_name': Nterm_sequence_name, 'sequence': Nterm_sequence, 'parsed_sequence': Nterm_parsed_sequence})
43+
Cterm_dict.update({'sequence_name': Cterm_sequence_name, 'sequence': Cterm_sequence, 'parsed_sequence': Cterm_parsed_sequence})
44+
45+
peptide_table.append(Nterm_dict)
46+
peptide_table.append(Cterm_dict)
47+
48+
return(peptide_table)
49+
50+
51+
def assign_unique_numbers(df, column_name):
52+
counts = df[column_name].value_counts()
53+
duplicated_values = counts[counts > 1].index
54+
55+
for value in duplicated_values:
56+
indices = df.index[df[column_name] == value]
57+
for i, index in enumerate(indices, start=1):
58+
df.at[index, column_name] = f"{value}.{i}"
59+
60+
return df
61+
62+
def main():
63+
args = parse_arguments()
64+
65+
max_length = args.n
66+
67+
peptides = pd.read_csv(args.m, names=["Name", "Sequence"], header=None)
68+
peptides = peptides[1:]
69+
peptides = assign_unique_numbers(peptides, "Name")
70+
71+
max_length = 3
72+
list = []
73+
74+
for index, row in peptides.iterrows():
75+
sequences_list = []
76+
77+
name = row['Name']
78+
base_sequence = row['Sequence']
79+
sequences_list = generate_modifed_peptides(max_length, name, base_sequence)
80+
81+
list = list + sequences_list
82+
83+
df = pd.DataFrame(list)
84+
85+
df.to_csv('peptide_table.tsv', sep="\t", index=False, header=None)
86+
87+
if __name__ == "__main__":
88+
main()

0 commit comments

Comments
 (0)