Skip to content

Commit 2a49bd0

Browse files
authored
Merge pull request #11 from bbi-lab/hgvs
adding support for HGVS p. strings
2 parents 93acd5c + 7229421 commit 2a49bd0

File tree

3 files changed

+14
-6
lines changed

3 files changed

+14
-6
lines changed

bin/getVariantAnnotations

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def annotateWithVEP(args, invcf):
5757
subprocess.run(["vep", "--force_overwrite", "--pick",
5858
"--cache", "--dir_cache",
5959
"/net/bbi/vol1/nobackup/external/vep/GRCh38",
60-
"--no_stats",
60+
"--no_stats", "--hgvs", "--mane_select",
6161
"-i", invcf,
6262
"-o", outfile])
6363
return outfile

bin/scoreSNVs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def getAnnots(args, vepdir, targetlist):
101101
vepdf["amino_acid_change"] = vepdf.apply(sge_util.makeAAsub, axis=1)
102102
vepdf[["chrom", "pos"]] = vepdf["Location"].str.split(":", expand=True)
103103
vepdf["pos_id"] = vepdf["pos"] + ":" + vepdf["allele"]
104-
annotdf = pd.concat([annotdf, vepdf[["chrom", "pos", "allele", "pos_id", "amino_acid_change", "Consequence"]]])
104+
annotdf = pd.concat([annotdf, vepdf[["chrom", "pos", "allele", "pos_id", "amino_acid_change", "Consequence", "hgvs_p"]]])
105105
annotdf = annotdf.drop_duplicates()
106106
return annotdf
107107

@@ -478,7 +478,7 @@ def main():
478478
scoredf["95_ci_upper"] = scoredf["score"] + (1.96 * scoredf["standard_error"])
479479
scoredf["95_ci_lower"] = scoredf["score"] - (1.96 * scoredf["standard_error"])
480480
scoredf = scoredf.merge(annotdf[["pos_id", "Consequence",
481-
"amino_acid_change"]], on="pos_id")
481+
"amino_acid_change", "hgvs_p"]], on="pos_id")
482482

483483
scoredf["simplified_consequence"] = scoredf["Consequence"].apply(get_simplified_consequence, ensemblfile=args.ensemblfile)
484484
scoredf = scoredf.drop(columns=["Consequence"]).rename(columns={'simplified_consequence': 'consequence',
@@ -518,7 +518,7 @@ def main():
518518
scoredf = scoredf[[
519519
"chrom", "pos", "ref", "alt", "exon", "target",
520520
"consequence", "score", "standard_error", "95_ci_upper", "95_ci_lower",
521-
"amino_acid_change", "functional_consequence",
521+
"amino_acid_change", "hgvs_p", "functional_consequence",
522522
"functional_consequence_zscore", "variant_qc_flag",
523523
"snvlib_lib1", "snvlib_lib2",
524524
"D05_R1_lib1", "D05_R1_lib2",

lib/sge_util.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,23 +54,31 @@ def calcMeanPearsonR(targetfile, targetname, countsdir):
5454
return mean_corrs
5555

5656

57+
def getHGVSp(vepstring):
58+
try:
59+
return vepstring.split(";")[4].split("=")[1].replace("%3D", "=")
60+
except:
61+
return ""
62+
5763

5864
def getVEPdf(vepfile, type="snv"):
5965
'''reads the output of Variant Effect Predictor files, converts to
6066
pandas df, and returns it
6167
6268
'''
6369
if type == "snv":
64-
vepdf = pd.read_csv(vepfile, sep="\t", skiprows=41)
70+
vepdf = pd.read_csv(vepfile, sep="\t", skiprows=45)
6571
vepdf = vepdf.rename(columns={'Allele': 'allele'})
6672
vepdf[["chrom", "pos"]] = vepdf["Location"].str.split(":", expand=True)
6773
vepdf["pos"] = vepdf["pos"].astype(int)
74+
vepdf["hgvs_p"] = vepdf["Extra"].apply(getHGVSp)
6875
elif type == "del":
69-
vepdf = pd.read_csv(vepfile, sep="\t", skiprows=41)
76+
vepdf = pd.read_csv(vepfile, sep="\t", skiprows=45)
7077
vepdf[["chrom", "coords"]] = vepdf["Location"].str.split(":", expand=True)
7178
vepdf[["start", "end"]] = vepdf["coords"].str.split("-", expand=True)
7279
vepdf["start"] = vepdf["start"].astype(int)
7380
vepdf["end"] = vepdf["end"].astype(int)
81+
vepdf["hgvs_p"] = vepdf["Extra"].apply(getHGVSp)
7482
else:
7583
return None
7684
return vepdf

0 commit comments

Comments
 (0)