Skip to content

Commit 896ee94

Browse files
authored
Merge pull request #127 from griffithlab/improved_compare_junctions
Improved compare junctions
2 parents 1cce7e7 + 86da3d4 commit 896ee94

10 files changed

+652
-14
lines changed

.DS_Store

-2 KB
Binary file not shown.

Dockerfile

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,31 @@
11
################################################################################
22
##################### Set Inital Image to work from ############################
3-
FROM ubuntu:16.04
3+
4+
# work from latest LTS ubuntu release
5+
FROM ubuntu:18.04
6+
7+
# set variables
8+
ENV r_version 3.6.0
9+
10+
# run update
11+
RUN apt-get update -y && apt-get install -y \
12+
gfortran \
13+
libreadline-dev \
14+
libpcre3-dev \
15+
libcurl4-openssl-dev \
16+
build-essential \
17+
zlib1g-dev \
18+
libbz2-dev \
19+
liblzma-dev \
20+
openjdk-8-jdk \
21+
wget \
22+
libssl-dev \
23+
libxml2-dev \
24+
libnss-sss \
25+
git \
26+
build-essential \
27+
cmake \
28+
python3
429

530
################################################################################
631
##################### Add Container Labels #####################################
@@ -9,14 +34,6 @@ LABEL "Description"="Software package which integrate DNA-seq and RNA-seq data\
934
to help interpret mutations in a regulatory and splicing\
1035
context."
1136

12-
################################################################################
13-
##################### Install System Dependencies ##############################
14-
RUN apt-get update -y && apt-get install -y \
15-
git \
16-
build-essential \
17-
cmake \
18-
zlib1g-dev
19-
2037
################################################################################
2138
##################### Install Regtools #########################################
2239

@@ -36,3 +53,20 @@ RUN cd /regtools/build && make
3653

3754
# add regtools executable to path
3855
ENV PATH="/regtools/build:${PATH}"
56+
57+
################################################################################
58+
####################### Install R ##############################################
59+
60+
# change working dir
61+
WORKDIR /usr/local/bin
62+
63+
# install R
64+
RUN wget https://cran.r-project.org/src/base/R-3/R-${r_version}.tar.gz
65+
RUN tar -zxvf R-${r_version}.tar.gz
66+
WORKDIR /usr/local/bin/R-${r_version}
67+
RUN ./configure --prefix=/usr/local/ --with-x=no
68+
RUN make
69+
RUN make install
70+
71+
# install R packages
72+
RUN R --vanilla -e 'install.packages(c("data.table", "plyr", "tidyverse"), repos = "http://cran.us.r-project.org")'

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ in a regulatory and splicing context.
1616

1717
## Installation
1818

19-
Clone and install regtools by running:
19+
Clone and install regtools by running the following:
2020
```
2121
git clone https://github.com/griffithlab/regtools
2222
cd regtools/

mkdocs.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
site_name: regtools
1+
site_name: RegTools
22
pages:
33
- Home: 'index.md'
44
- Commands:
@@ -9,5 +9,6 @@ pages:
99
- 'junctions extract': 'commands/junctions-extract.md'
1010
- 'junctions annotate': 'commands/junctions-annotate.md'
1111
- 'variants annotate': 'commands/variants-annotate.md'
12+
- Example Workflow: 'workflow.md'
1213
- About: 'about.md'
1314
theme: mkdocs

scripts/annotate_spliceai_gtex.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import csv
2+
import fnmatch
3+
4+
regtools_file = '/Users/kcotto/Downloads/junction_pvalues_significant_0.05_filtered_BH_default.tsv'
5+
spliceAI_file = '/Users/kcotto/Downloads/all_variants_sorted_spliceAI.vcf'
6+
gtex_file = '/Users/kcotto/Projects/regtools/TCGA_paper/GTEx_matrices_avg/Liver.tsv'
7+
base_file = regtools_file.split('.')[0]
8+
tmp_file = f'{base_file}_tmp.tsv'
9+
final_output_file = f'{base_file}_gtex_spliceai.tsv'
10+
11+
12+
def annotate_GTEx(regtools_input_file, gtex_file):
13+
with open(regtools_input_file, 'r') as regtools, open(gtex_file, 'r') as gtex:
14+
gtex_values = {}
15+
gtex_reader = csv.reader(gtex, delimiter='\t')
16+
regtools_reader = csv.reader(regtools, delimiter='\t')
17+
next(gtex_reader)
18+
header = next(regtools_reader)
19+
outputfile = tmp_file
20+
match_count = 0
21+
with open(outputfile, 'w') as outfile:
22+
reformatted_header = '\t'.join(header)
23+
outfile.write(f'{reformatted_header}\tGTEx_mean\tGTEx_sd\n')
24+
for line in gtex_reader:
25+
mean = line[2]
26+
stdev = line[3]
27+
gtex_key = line[0]
28+
gtex_values[gtex_key] = (mean, stdev)
29+
for line in regtools_reader:
30+
chrom = line[1]
31+
junction_start = str(int(line[2]) + 1)
32+
junction_end = str(int(line[3]) -1)
33+
regtools_key = f'{chrom}_{junction_start}_{junction_end}'
34+
output_line = '\t'.join(line)
35+
if regtools_key in gtex_values:
36+
match_count += 1
37+
print(f'match for {output_line}')
38+
outfile.write(f'{output_line}\t{gtex_values[regtools_key][0]}\t{gtex_values[regtools_key][1]}\n')
39+
else:
40+
outfile.write(f'{output_line}\tNA\tNA\n')
41+
print(match_count)
42+
43+
def annotate_spliceAI(regtools_input_file, spliceAI_file):
44+
with open(regtools_input_file, 'r') as regtools, open(spliceAI_file, 'r') as spliceAI:
45+
spliceAI_reader = csv.reader(spliceAI, delimiter='\t')
46+
regtools_reader = csv.reader(regtools, delimiter='\t')
47+
spliceAI_values = {}
48+
for line in spliceAI_reader:
49+
li = str(line[0]).strip()
50+
if not li.startswith('#'):
51+
info_field = line[7]
52+
if str(info_field).__contains__('SpliceAI'):
53+
pattern = 'SpliceAI*'
54+
extract_SF = line[7].split(';')
55+
matching = fnmatch.filter(extract_SF, pattern)
56+
spliceAI_variant_key = f'{line[0]}:{line[1]}'
57+
spliceAI_string = matching[0]
58+
if spliceAI_string.__contains__(','):
59+
split_values = spliceAI_string.split(',')
60+
spliceAI_values[spliceAI_variant_key] = split_values[0]
61+
else:
62+
spliceAI_values[spliceAI_variant_key] = spliceAI_string
63+
header = next(regtools_reader)
64+
outputfile = final_output_file
65+
with open (outputfile, 'w') as outfile:
66+
reformatted_header = '\t'.join(header)
67+
outfile.write(f'{reformatted_header}\tSpliceAI_raw\tSpliceAI_match\n')
68+
for line in regtools_reader:
69+
chrom = line[6].split(':')[0]
70+
variant = line[6].split('-')[-1]
71+
regtools_variant_key = f'{chrom}:{variant}'
72+
output_line = '\t'.join(line)
73+
if regtools_variant_key in spliceAI_values:
74+
junction_start_match = False
75+
junction_end_match = False
76+
junction_start = int(line[2])
77+
junction_end = int(line[3])
78+
spliceAI_info = spliceAI_values[regtools_variant_key]
79+
spliceAI_split = spliceAI_info.split('|')
80+
gene = spliceAI_split[1]
81+
positions = spliceAI_split[-4:]
82+
for position in positions:
83+
genomic_location = int(variant) + int(position)
84+
if junction_start == genomic_location:
85+
junction_start_match = True
86+
if junction_end == genomic_location:
87+
junction_end_match = True
88+
if junction_start_match and junction_end_match:
89+
outfile.write(f'{output_line}\t{spliceAI_info}\tjunction start and end match\n')
90+
elif junction_start_match:
91+
outfile.write(f'{output_line}\t{spliceAI_info}\tjunction start match\n')
92+
elif junction_end_match:
93+
outfile.write(f'{output_line}\t{spliceAI_info}\tjunction end match\n')
94+
else:
95+
outfile.write(f'{output_line}\t{spliceAI_info}\tNA\n')
96+
else:
97+
outfile.write(f'{output_line}\tNA\tNA\n')
98+
99+
100+
annotate_GTEx(regtools_file, gtex_file)
101+
annotate_spliceAI(tmp_file, spliceAI_file)

0 commit comments

Comments
 (0)