Skip to content

Commit b40dbc8

Browse files
authored
Merge pull request #140 from VariantEffect/release-2023.5.0
Release 2023.5.0
2 parents 149c101 + fe4c9b6 commit b40dbc8

File tree

18 files changed

+436
-73
lines changed

18 files changed

+436
-73
lines changed
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
name: Run Tests (On Push)
2+
on:
3+
push:
4+
5+
jobs:
6+
run-tests-3_9:
7+
runs-on: ubuntu-20.04
8+
name: Pytest on Python 3.9 / Ubuntu 20.04
9+
steps:
10+
- uses: actions/checkout@v4
11+
- uses: actions/setup-python@v5
12+
with:
13+
python-version: "3.9"
14+
cache: 'pip'
15+
- run: pip install --upgrade pip
16+
- run: pip install .[dev,server]
17+
- run: pytest tests/
18+
19+
run-tests-3_10:
20+
runs-on: ubuntu-latest
21+
name: Pytest on Python 3.10
22+
steps:
23+
- uses: actions/checkout@v4
24+
- uses: actions/setup-python@v5
25+
with:
26+
python-version: "3.10"
27+
cache: 'pip'
28+
- run: pip install --upgrade pip
29+
- run: pip install .[dev,server]
30+
- run: pytest tests/
31+
32+
run-tests-3_11:
33+
runs-on: ubuntu-latest
34+
name: Pytest on Python 3.11
35+
steps:
36+
- uses: actions/checkout@v4
37+
- uses: actions/setup-python@v5
38+
with:
39+
python-version: "3.11"
40+
cache: 'pip'
41+
- run: pip install --upgrade pip
42+
- run: pip install .[dev,server]
43+
- run: pytest tests/
44+

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ celerybeat.pid
105105
.env
106106
.env.*
107107
!.env.template
108+
!.env.dev
108109
.venv
109110
env/
110111
venv/

Dockerfile

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,40 @@
1+
FROM python:3.9 AS downloader
2+
3+
WORKDIR /data
4+
5+
# Install tools necessary used to install samtools and htslib so we can configure fasta files for genomic assembly.
6+
RUN apt-get update && apt-get install -y \
7+
build-essential \
8+
curl \
9+
git \
10+
libbz2-dev \
11+
libcurl4-openssl-dev \
12+
libgsl0-dev \
13+
liblzma-dev \
14+
libncurses5-dev \
15+
libperl-dev \
16+
libssl-dev \
17+
zlib1g-dev \
18+
&& rm -rf /var/lib/apt/lists/*
19+
20+
# Install samtools and htslib.
21+
ARG htsversion=1.19
22+
RUN curl -L https://github.com/samtools/htslib/releases/download/${htsversion}/htslib-${htsversion}.tar.bz2 | tar xj && \
23+
(cd htslib-${htsversion} && ./configure --enable-plugins --with-plugin-path='$(libexecdir)/htslib:/usr/libexec/htslib' && make install) && \
24+
ldconfig && \
25+
curl -L https://github.com/samtools/samtools/releases/download/${htsversion}/samtools-${htsversion}.tar.bz2 | tar xj && \
26+
(cd samtools-${htsversion} && ./configure --with-htslib=system && make install) && \
27+
curl -L https://github.com/samtools/bcftools/releases/download/${htsversion}/bcftools-${htsversion}.tar.bz2 | tar xj && \
28+
(cd bcftools-${htsversion} && ./configure --enable-libgsl --enable-perl-filters --with-htslib=system && make install)
29+
30+
# Fetch and index GRCh37 and GRCh38 assemblies. These will augment seqrepo transcript sequences.
31+
RUN wget -O - https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.25_GRCh37.p13/GCF_000001405.25_GRCh37.p13_genomic.fna.gz | gzip -d | bgzip > GCF_000001405.25_GRCh37.p13_genomic.fna.gz
32+
RUN wget -O - https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/all_assembly_versions/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_genomic.fna.gz | gzip -d | bgzip > GCF_000001405.39_GRCh38.p13_genomic.fna.gz
33+
RUN samtools faidx GCF_000001405.25_GRCh37.p13_genomic.fna.gz
34+
RUN samtools faidx GCF_000001405.39_GRCh38.p13_genomic.fna.gz
35+
136
FROM python:3.9
37+
COPY --from=downloader /data /data
238

339
WORKDIR /code
440

pyproject.toml

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,10 @@ dependencies = [
2525
"eutils~=0.6.0",
2626
"hgvs~=1.5.4",
2727
"biocommons~=0.0.0",
28-
"numpy~=1.22.3",
28+
"cdot~=0.2.21",
29+
"numpy~=1.22",
2930
"pandas~=1.4.1",
30-
"pydantic~=1.9.1",
31+
"pydantic~=1.10",
3132
"python-dotenv~=0.20.0",
3233
"SQLAlchemy~=1.4.31",
3334
]
@@ -39,20 +40,21 @@ dev = [
3940
"pre-commit",
4041
"pytest~=7.0.1",
4142
"jsonschema",
43+
"httpx~=0.26.0",
4244
]
4345
server = [
4446
"alembic~=1.7.6",
4547
"authlib~=0.15.5",
46-
"cryptography~=37.0.4",
48+
"cryptography~=41.0.6",
4749
"celery~=5.2.3",
48-
"fastapi~=0.71.0",
50+
"fastapi~=0.95.0",
4951
"orcid~=1.0.3",
5052
"psycopg2~=2.9.3",
5153
"python-jose[cryptography]~=3.3.0",
5254
"python-multipart~=0.0.5",
53-
"requests~=2.28.1",
55+
"requests~=2.31.0",
5456
"slack-sdk~=3.21.3",
55-
"starlette~=0.17.1",
57+
"starlette~=0.27.0",
5658
"uvicorn[standard]",
5759
]
5860

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
alembic~=1.7.6
22
authlib~=0.15.5
33
biocommons~=0.0.0
4+
cdot~=0.2.21
45
celery~=5.2.3
56
cryptography~=37.0.4
67
eutils~=0.6.0

settings/.env.dev

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
####################################################################################################
2+
# Environment variables for mavedb-api
3+
####################################################################################################
4+
5+
DB_DATABASE_NAME=mavedb
6+
DB_USERNAME=postgres
7+
DB_PASSWORD=postgres
8+
NCBI_API_KEY=abc
9+
10+
####################################################################################################
11+
# Environment variables for postgres
12+
####################################################################################################
13+
14+
POSTGRES_DB=mavedb
15+
POSTGRES_USER=postgres
16+
POSTGRES_PASSWORD=postgres
17+
18+
####################################################################################################
19+
# Environment variables for Slack
20+
####################################################################################################
21+
22+
SLACK_WEBHOOK_URL=

src/mavedb/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
__project__ = "mavedb-api"
2-
__version__ = "2023.4.2"
2+
__version__ = "2023.5.0"

src/mavedb/deps.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import sys
33
from typing import Generator
44

5+
from cdot.hgvs.dataproviders import RESTDataProvider, ChainedSeqFetcher, FastaSeqFetcher, SeqFetcher
56
from sqlalchemy.dialects.postgresql import JSONB as POSTGRES_JSONB
67
from sqlalchemy.types import JSON
78

@@ -17,6 +18,29 @@ def get_db() -> Generator:
1718
db.close()
1819

1920

21+
def hgvs_data_provider() -> RESTDataProvider:
22+
# Prioritize fetching from SeqRepo, then GRCh38, then GRCh37.
23+
seqfetcher = ChainedSeqFetcher(SeqFetcher())
24+
25+
# Attempt to resolve FASTA Seq fetchers from data files, but don't fail if neither file is
26+
# available. This way, we at least retain some ability to resolve sequences if we don't have
27+
# FASTA file access and we are able to run our test suite without needing access to large genomic
28+
# files.
29+
try:
30+
grch38_fetcher = FastaSeqFetcher("/data/GCF_000001405.39_GRCh38.p13_genomic.fna.gz")
31+
seqfetcher.seq_fetchers.append(grch38_fetcher)
32+
except OSError:
33+
pass
34+
35+
try:
36+
grch37_fetcher = FastaSeqFetcher("/data/GCF_000001405.25_GRCh37.p13_genomic.fna.gz")
37+
seqfetcher.seq_fetchers.append(grch37_fetcher)
38+
except OSError:
39+
pass
40+
41+
return RESTDataProvider(seqfetcher=seqfetcher)
42+
43+
2044
# if 'PYTEST_RUN_CONFIG' in os.environ:
2145
if "pytest" in sys.modules:
2246
JSONB = JSON

src/mavedb/lib/identifiers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def reference_html(self) -> str:
149149
else:
150150
doi_str = "" if not self.preprint_doi else self.preprint_doi
151151
title = "(None)" if not self.title else self.title.strip(".")
152-
journal = "(None)" if not self.publication_journal else self.publication_journal.strip(".")
152+
journal = "(None)" if not (hasattr(self, "publication_journal") and self.publication_journal) else self.publication_journal.strip(".")
153153
year = "(Unknown year)" if not self.preprint_date else self.preprint_date.year
154154

155155
# We don't receive these fields from rxiv platforms

src/mavedb/lib/score_sets.py

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@
22

33
import numpy as np
44
import pandas as pd
5-
import json
65
from pandas.testing import assert_index_equal
7-
from sqlalchemy import func, or_, and_
8-
from sqlalchemy.orm import Session, aliased
6+
from sqlalchemy import func, or_
7+
from sqlalchemy.orm import aliased, contains_eager, joinedload, selectinload, Session
98

109
from mavedb.lib.array_comparison import assert_array_equal
1110
from mavedb.lib.exceptions import ValidationError
@@ -17,15 +16,20 @@
1716
VARIANT_SCORE_DATA,
1817
)
1918
from mavedb.lib.mave.utils import is_csv_null
19+
from mavedb.models.ensembl_offset import EnsemblOffset
2020
from mavedb.models.experiment import Experiment
21+
from mavedb.models.experiment_publication_identifier import ExperimentPublicationIdentifierAssociation
2122
from mavedb.models.experiment_set import ExperimentSet
2223
from mavedb.models.keyword import Keyword
2324
from mavedb.models.publication_identifier import PublicationIdentifier
25+
from mavedb.models.score_set_publication_identifier import ScoreSetPublicationIdentifierAssociation
2426
from mavedb.models.reference_genome import ReferenceGenome
27+
from mavedb.models.refseq_offset import RefseqOffset
2528
from mavedb.models.score_set import ScoreSet
2629
from mavedb.models.target_accession import TargetAccession
2730
from mavedb.models.target_gene import TargetGene
2831
from mavedb.models.target_sequence import TargetSequence
32+
from mavedb.models.uniprot_offset import UniprotOffset
2933
from mavedb.models.user import User
3034
from mavedb.view_models.search import ScoreSetsSearch
3135

@@ -127,7 +131,50 @@ def search_score_sets(db: Session, owner: Optional[User], search: ScoreSetsSearc
127131
)
128132

129133
score_sets: list[ScoreSet] = (
130-
query.join(ScoreSet.experiment).join(ScoreSet.target_genes).order_by(Experiment.title).all()
134+
query.join(ScoreSet.experiment)
135+
.options(
136+
contains_eager(ScoreSet.experiment).options(
137+
joinedload(Experiment.experiment_set),
138+
joinedload(Experiment.keyword_objs),
139+
joinedload(Experiment.created_by),
140+
joinedload(Experiment.modified_by),
141+
joinedload(Experiment.keyword_objs),
142+
joinedload(Experiment.doi_identifiers),
143+
joinedload(Experiment.publication_identifier_associations).joinedload(
144+
ExperimentPublicationIdentifierAssociation.publication
145+
),
146+
joinedload(Experiment.raw_read_identifiers),
147+
selectinload(Experiment.score_sets).options(
148+
joinedload(ScoreSet.keyword_objs),
149+
joinedload(ScoreSet.doi_identifiers),
150+
joinedload(ScoreSet.publication_identifier_associations).joinedload(
151+
ScoreSetPublicationIdentifierAssociation.publication
152+
),
153+
joinedload(ScoreSet.target_genes).options(
154+
joinedload(TargetGene.ensembl_offset).joinedload(EnsemblOffset.identifier),
155+
joinedload(TargetGene.refseq_offset).joinedload(RefseqOffset.identifier),
156+
joinedload(TargetGene.uniprot_offset).joinedload(UniprotOffset.identifier),
157+
joinedload(TargetGene.target_sequence).joinedload(TargetSequence.reference),
158+
joinedload(TargetGene.target_accession),
159+
),
160+
),
161+
),
162+
joinedload(ScoreSet.keyword_objs),
163+
joinedload(ScoreSet.license),
164+
joinedload(ScoreSet.doi_identifiers),
165+
joinedload(ScoreSet.publication_identifier_associations).joinedload(
166+
ScoreSetPublicationIdentifierAssociation.publication
167+
),
168+
joinedload(ScoreSet.target_genes).options(
169+
joinedload(TargetGene.ensembl_offset).joinedload(EnsemblOffset.identifier),
170+
joinedload(TargetGene.refseq_offset).joinedload(RefseqOffset.identifier),
171+
joinedload(TargetGene.uniprot_offset).joinedload(UniprotOffset.identifier),
172+
joinedload(TargetGene.target_sequence).joinedload(TargetSequence.reference),
173+
joinedload(TargetGene.target_accession),
174+
),
175+
)
176+
.order_by(Experiment.title)
177+
.all()
131178
)
132179
if not score_sets:
133180
score_sets = []

0 commit comments

Comments
 (0)