Skip to content

Commit 56e36e6

Browse files
authored
Merge pull request #1 from VariantEffect/bencap/upstream-integration
Refactor vrs_map to use mavehgvs
2 parents 374672a + c078256 commit 56e36e6

File tree

18 files changed

+3862
-235
lines changed

18 files changed

+3862
-235
lines changed

.github/workflows/checks.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ jobs:
66
runs-on: ubuntu-latest
77
strategy:
88
matrix:
9-
python-version: ["3.10", "3.11", "3.12"]
9+
python-version: ["3.11", "3.12"]
1010
steps:
1111
- uses: actions/checkout@v3
1212

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ cython_debug/
161161
.idea/
162162

163163
*.pickle
164+
.vscode
164165

165166
# mapping data/output
166167
notebooks/analysis/analysis_files

.python-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.11.4

Dockerfile

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
FROM --platform=x86_64 python:3.11
2+
3+
RUN apt update
4+
# Install tools necessary used to install samtools and htslib so we can configure fasta files for genomic assembly.
5+
RUN apt-get clean && apt-get update && apt-get install -y \
6+
postgresql-client \
7+
build-essential \
8+
curl \
9+
git \
10+
libbz2-dev \
11+
libcurl4-openssl-dev \
12+
libgsl0-dev \
13+
liblzma-dev \
14+
libncurses5-dev \
15+
libperl-dev \
16+
libssl-dev \
17+
zlib1g-dev \
18+
&& rm -rf /var/lib/apt/lists/*
19+
20+
# download and install blat executable
21+
WORKDIR /usr/bin
22+
RUN wget http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/blat/blat
23+
RUN chmod +x blat
24+
25+
# set dcd_mapping resources directory and download reference file
26+
WORKDIR /home/.local/share/dcd_mapping
27+
ENV DCD_MAPPING_RESOURCES_DIR=/home/.local/share/dcd_mapping
28+
RUN curl -LJO https://hgdownload.cse.ucsc.edu/goldenpath/hg38/bigZips/hg38.2bit
29+
30+
# Install samtools and htslib.
31+
ARG htsversion=1.19
32+
RUN curl -L https://github.com/samtools/htslib/releases/download/${htsversion}/htslib-${htsversion}.tar.bz2 | tar xj && \
33+
(cd htslib-${htsversion} && ./configure --enable-plugins --with-plugin-path='$(libexecdir)/htslib:/usr/libexec/htslib' && make install) && \
34+
ldconfig && \
35+
curl -L https://github.com/samtools/samtools/releases/download/${htsversion}/samtools-${htsversion}.tar.bz2 | tar xj && \
36+
(cd samtools-${htsversion} && ./configure --with-htslib=system && make install) && \
37+
curl -L https://github.com/samtools/bcftools/releases/download/${htsversion}/bcftools-${htsversion}.tar.bz2 | tar xj && \
38+
(cd bcftools-${htsversion} && ./configure --enable-libgsl --enable-perl-filters --with-htslib=system && make install)
39+
40+
RUN mkdir /usr/src/app
41+
WORKDIR /usr/src/app
42+
COPY . .
43+
44+
RUN pip install -e '.[dev,tests]'
45+
# use polars-lts-cpu to avoid issues with x86 emulation on arm machine
46+
RUN pip install -U polars-lts-cpu
47+
# install gene normalizer with pg dependencies. TODO: can the pg dependencies be specified in pyproject.toml?
48+
#RUN pip install 'gene-normalizer[pg]'
49+
ENV PYTHONUNBUFFERED 1
50+
51+
ENV PYTHONPATH "${PYTHONPATH}:/usr/src/app/src"

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,14 @@ Use `dcd-map --help` to see other available options.
4444

4545
Notebooks for manuscript data analysis and figure generation are provided within `notebooks/analysis`. See [`notebooks/analysis/README.md`](notebooks/analysis/README.md) for more information.
4646

47+
Following installation instructions for [CoolSeqTool](https://coolseqtool.readthedocs.io/latest/install.html) and [Gene Normalizer](https://gene-normalizer.readthedocs.io/latest/install.html) should take care of the external data dependencies.
48+
49+
Note that Gene Normalizer's `pg` dependency group must be installed to make use of the PostgreSQL-based backend:
50+
51+
```shell
52+
python3 -m pip install 'gene-normalizer[pg]'
53+
```
54+
4755
## Development
4856

4957
Clone the repo

docker-compose-dev.yml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
version: "3"
2+
3+
services:
4+
app:
5+
build: .
6+
command: bash -c "tail -f /dev/null"
7+
depends_on:
8+
- db
9+
- seqrepo
10+
env_file:
11+
- settings/.env.dev
12+
environment:
13+
DB_HOST: db
14+
DB_PORT: 5432
15+
ports:
16+
- "8002:8000"
17+
volumes:
18+
- .:/usr/src/app
19+
- vrs-mapping-seqrepo-dev:/usr/local/share/seqrepo
20+
21+
db:
22+
image: postgres:14
23+
env_file:
24+
- settings/.env.dev
25+
ports:
26+
- "5434:5432"
27+
expose:
28+
- 5432
29+
volumes:
30+
- vrs-mapping-data-dev:/var/lib/postgresql/data
31+
32+
seqrepo:
33+
image: biocommons/seqrepo:2021-01-29
34+
volumes:
35+
- vrs-mapping-seqrepo-dev:/usr/local/share/seqrepo
36+
37+
volumes:
38+
vrs-mapping-data-dev:
39+
vrs-mapping-seqrepo-dev:

0 commit comments

Comments
 (0)