Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions .github/workflows/predictor_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ jobs:
sudo apt-get install -y ghostscript
sudo apt-get install -y gcc
sudo apt-get install -y pandoc
sudo apt-get install -y mafft
sudo apt-get install -y build-essential
pip install pypandoc==1.7.2
pip install coverage
pip install coveralls
Expand All @@ -47,6 +49,32 @@ jobs:
pip install git+https://github.com/griffithlab/deepimmuno.git#egg=deepimmuno
pip install -e .
mhcflurry-downloads fetch
- name: Install MixMHCpred
uses: actions/checkout@master
with:
repository: GfellerLab/MixMHCpred
path: ./MixMHCpred
token: ${{ secrets.GITHUB_TOKEN }}
- name: Set up MixMHCpred
run: |
cd MixMHCpred
pip install -r code/setup_pythonLibrary.txt
chmod +x MixMHCpred
echo "$(pwd -P)" >> $GITHUB_PATH
cd ..
- name: Install PRIME
uses: actions/checkout@master
with:
repository: GfellerLab/PRIME
path: ./PRIME
token: ${{ secrets.GITHUB_TOKEN }}
- name: Set up PRIME
run: |
cd PRIME
echo "$(pwd -P)" >> $GITHUB_PATH
cd lib
g++ -O3 PRIME.cc -o PRIME.x
cd ../..
- name: List installed packages
run: |
pip list
Expand Down
40 changes: 37 additions & 3 deletions predictor_tests/test_call_iedb.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,40 @@ def test_deepimmuno_method_generates_expected_files(self):
actual_df = pd.read_csv(call_iedb_output_file.name, sep="\t", index_col=[0,3,4])
pd.testing.assert_frame_equal(expected_df, actual_df, check_like=True, check_exact=False)

def test_mixmhcpred_method_generates_expected_files(self):
call_iedb_output_file = tempfile.NamedTemporaryFile()
tmp_call_iedb_output_dir = tempfile.TemporaryDirectory()

pvactools.lib.call_iedb.main([
self.input_file,
call_iedb_output_file.name,
'MixMHCpred',
self.allele,
'-l', str(self.epitope_length),
'--tmp-dir', tmp_call_iedb_output_dir.name,
])
expected_output_file = os.path.join(self.test_data_dir, 'output_mixmhcpred.tsv')
expected_df = pd.read_csv(expected_output_file, sep="\t", index_col=[0,6,7])
actual_df = pd.read_csv(call_iedb_output_file.name, sep="\t", index_col=[0,6,7])
pd.testing.assert_frame_equal(expected_df, actual_df, check_like=True, check_exact=False)

def test_prime_method_generates_expected_files(self):
call_iedb_output_file = tempfile.NamedTemporaryFile()
tmp_call_iedb_output_dir = tempfile.TemporaryDirectory()

pvactools.lib.call_iedb.main([
self.input_file,
call_iedb_output_file.name,
'PRIME',
self.allele,
'-l', str(self.epitope_length),
'--tmp-dir', tmp_call_iedb_output_dir.name,
])
expected_output_file = os.path.join(self.test_data_dir, 'output_prime.tsv')
expected_df = pd.read_csv(expected_output_file, sep="\t", index_col=[0,8,9])
actual_df = pd.read_csv(call_iedb_output_file.name, sep="\t", index_col=[0,8,9])
pd.testing.assert_frame_equal(expected_df, actual_df, check_like=True, check_exact=False)

class CallIEDBClassIITests(CallIEDBTests):
@classmethod
def additional_setup(cls):
Expand Down Expand Up @@ -230,7 +264,7 @@ def test_netmhciipan_method_with_version(self):
temp_dir.name,
log_dir.name
)

with tempfile.NamedTemporaryFile(mode='w+', delete=False) as output_file:
output_file.write(response_text)
output_file.seek(0)
Expand All @@ -240,7 +274,7 @@ def test_netmhciipan_method_with_version(self):
expected_df = pd.read_csv(expected_output_file, sep="\t", index_col=[0,2,3])

pd.testing.assert_frame_equal(expected_df, actual_df, check_like=True, check_exact=False)

def test_netmhciipan_el_method_with_version(self):
temp_dir = tempfile.TemporaryDirectory()
log_dir = tempfile.TemporaryDirectory()
Expand All @@ -258,7 +292,7 @@ def test_netmhciipan_el_method_with_version(self):
temp_dir.name,
log_dir.name
)

with tempfile.NamedTemporaryFile(mode='w+', delete=False) as output_file:
output_file.write(response_text)
output_file.seek(0)
Expand Down
2,787 changes: 2,787 additions & 0 deletions predictor_tests/test_data/output_mixmhcpred.tsv

Large diffs are not rendered by default.

2,787 changes: 2,787 additions & 0 deletions predictor_tests/test_data/output_prime.tsv

Large diffs are not rendered by default.

739 changes: 399 additions & 340 deletions pvactools/lib/output_parser.py

Large diffs are not rendered by default.

137 changes: 137 additions & 0 deletions pvactools/lib/prediction_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,23 +252,33 @@ def allele_to_species_map(self):
'DP' : 'human',
'DQ' : 'human',
'DR' : 'human',
'Aotr': 'three-striped night monkey',
'Atbe': 'white-fronted spider monkey',
'Atfu': 'black-headed spider monkey',
'Bogr': 'domestic yak',
'BoLA': 'cow',
'Caja': 'common marmoset',
'Ceat': 'sooty mangabey',
'Cemi': 'blue monkey',
'Chae': 'grivet',
'Chsa': 'green monkey',
'Chpy': 'vervet monkey',
'DLA' : 'dog',
'Eqca': 'horse',
'Gaga': 'chicken',
'Gobe': 'eastern gorilla',
'Gogo': 'gorilla',
'H-2' : 'mouse',
'H2' : 'mouse',
'Hyla': 'lar gibbon',
'Lero': 'golden lion tamarin',
'Maar': 'stump-tailed macaque',
'Maas': 'assam macaque',
'Mafa': 'crab-eating macaque',
'Malo': 'northern pig-tailed macaque',
'Mamu': 'rhesus macaque',
'Mane': 'southern pig-tailed macaque',
'Math': 'tibetan macaque',
'Onmy': 'rainbow trout',
'Ovar': 'sheep',
'Paan': 'olive baboon',
Expand All @@ -277,9 +287,12 @@ def allele_to_species_map(self):
'Papa': 'bonobo',
'Patr': 'chimpanzee',
'Pipi': 'white-faced saki',
'Poab': 'sumatran orangutan',
'Popy': 'bornean orangutan',
'Rano': 'norway rat',
'Safu': 'brown-mantled tamarin',
'Sage': "Geoffroy's tamarin",
'Sala': 'white-lipped tamarin',
'Samy': 'moustached tamarin',
'Saoe': 'cottontop tamarin',
'Sasa': 'atlantic salmon',
Expand Down Expand Up @@ -538,6 +551,130 @@ def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb
class MHCflurryEL(MHCflurry):
pass

class MixMHCpred(MHCI):
def valid_allele_names(self):
base_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
alleles_dir = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles', 'class_i')
alleles_file_name = os.path.join(alleles_dir, "MixMHCpred.txt")
with open(alleles_file_name, 'r') as fh:
return list(filter(None, fh.read().split('\n')))

def check_length_valid_for_allele(self, length, allele):
return True

def valid_lengths_for_allele(self, allele):
return [8,9,10,11,12,13,14]

def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
results = pd.DataFrame()
all_epitopes = []
for record in SeqIO.parse(input_file, "fasta"):
seq_num = record.id
peptide = str(record.seq)
epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
all_epitopes.extend(epitopes.values())

all_epitopes = list(set(all_epitopes))
if len(all_epitopes) > 0:
tmp_input_file = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
for epitope in all_epitopes:
tmp_input_file.write("{}\n".format(epitope))
tmp_input_file.close()
tmp_output_file = tempfile.NamedTemporaryFile('r', dir=tmp_dir, delete=False)
arguments = ["MixMHCpred", "-i", tmp_input_file.name, "-o", tmp_output_file.name, "-a", allele]
stderr_fh = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
try:
response = run(arguments, check=True, stdout=DEVNULL, stderr=stderr_fh)
except:
stderr_fh.close()
with open(stderr_fh.name, 'r') as fh:
err = fh.read()
os.unlink(stderr_fh.name)
raise Exception("An error occurred while calling MixMHCpred:\n{}".format(err))
stderr_fh.close()
os.unlink(stderr_fh.name)
tmp_output_file.close()
df = pd.read_csv(tmp_output_file.name, sep="\t", skiprows=11)
os.unlink(tmp_output_file.name)
df.rename(columns={
'Score_bestAllele': 'score',
'%Rank_bestAllele': 'percentile',
'Peptide': 'peptide',
}, inplace=True)
for record in SeqIO.parse(input_file, "fasta"):
seq_num = record.id
peptide = str(record.seq)
epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
for start, epitope in epitopes.items():
epitope_df = df[df['peptide'] == epitope]
epitope_df['seq_num'] = seq_num
epitope_df['start'] = start
epitope_df['allele'] = allele
results = pd.concat((results, epitope_df), axis=0)
return (results, 'pandas')

class PRIME(MHCI):
def valid_allele_names(self):
base_dir = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
alleles_dir = os.path.join(base_dir, 'tools', 'pvacseq', 'iedb_alleles', 'class_i')
alleles_file_name = os.path.join(alleles_dir, "PRIME.txt")
with open(alleles_file_name, 'r') as fh:
return list(filter(None, fh.read().split('\n')))

def check_length_valid_for_allele(self, length, allele):
return True

def valid_lengths_for_allele(self, allele):
return [8,9,10,11,12,13,14]

def predict(self, input_file, allele, epitope_length, iedb_executable_path, iedb_retries, tmp_dir=None, log_dir=None):
results = pd.DataFrame()
all_epitopes = []
for record in SeqIO.parse(input_file, "fasta"):
seq_num = record.id
peptide = str(record.seq)
epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
all_epitopes.extend(epitopes.values())

all_epitopes = list(set(all_epitopes))
if len(all_epitopes) > 0:
tmp_input_file = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
for epitope in all_epitopes:
tmp_input_file.write("{}\n".format(epitope))
tmp_input_file.close()
tmp_output_file = tempfile.NamedTemporaryFile('r', dir=tmp_dir, delete=False)
arguments = ["PRIME", "-i", tmp_input_file.name, "-o", tmp_output_file.name, "-a", allele]
stderr_fh = tempfile.NamedTemporaryFile('w', dir=tmp_dir, delete=False)
try:
response = run(arguments, check=True, stdout=DEVNULL, stderr=stderr_fh)
except:
stderr_fh.close()
with open(stderr_fh.name, 'r') as fh:
err = fh.read()
os.unlink(stderr_fh.name)
raise Exception("An error occurred while calling PRIME:\n{}".format(err))
stderr_fh.close()
os.unlink(stderr_fh.name)
tmp_output_file.close()
df = pd.read_csv(tmp_output_file.name, sep="\t", skiprows=11)
os.unlink(tmp_output_file.name)
df.rename(columns={
'Score_bestAllele': 'score',
'%Rank_bestAllele': 'percentile',
'Peptide': 'peptide',
}, inplace=True)
for record in SeqIO.parse(input_file, "fasta"):
seq_num = record.id
peptide = str(record.seq)
epitopes = pvactools.lib.run_utils.determine_neoepitopes(peptide, epitope_length)
for start, epitope in epitopes.items():
epitope_df = df[df['peptide'] == epitope]
epitope_df['seq_num'] = seq_num
epitope_df['start'] = start
epitope_df['allele'] = allele
results = pd.concat((results, epitope_df), axis=0)
return (results, 'pandas')

class MHCnuggetsI(MHCI, MHCnuggets):
def valid_allele_names(self):
return self.valid_allele_names_for_class('class_i')
Expand Down
Loading