Skip to content

Commit 8faeffc

Browse files
authored
Merge pull request #81 from VariantEffect/bencap/feature/60/genomic-coordinate-score-sets
Add Support for Genomic Coordinate Score Sets
2 parents 803598f + c82a66e commit 8faeffc

34 files changed

+1559
-544
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
"""Rename Wild Type Sequence
2+
3+
Revision ID: 194cfebabe32
4+
Revises: 44d5c568f64b
5+
Create Date: 2023-08-29 12:48:18.390567
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
11+
12+
# revision identifiers, used by Alembic.
13+
revision = "194cfebabe32"
14+
down_revision = "44d5c568f64b"
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
op.rename_table("wild_type_sequences", "target_sequences")
21+
op.alter_column("target_genes", "wt_sequence_id", new_column_name="target_sequence_id")
22+
op.execute("ALTER SEQUENCE wild_type_sequences_id_seq RENAME TO target_sequences_id_seq")
23+
op.execute("ALTER INDEX wild_type_sequences_pkey RENAME TO target_sequences_pkey")
24+
op.execute("ALTER INDEX ix_wild_type_sequences_id RENAME TO ix_target_sequences_id")
25+
26+
27+
def downgrade():
28+
op.rename_table("target_sequences", "wild_type_sequences")
29+
op.alter_column("target_genes", "target_sequence_id", new_column_name="wt_sequence_id")
30+
op.execute("ALTER SEQUENCE target_sequences_id_seq RENAME TO wild_type_sequences_id_seq")
31+
op.execute("ALTER INDEX target_sequences_pkey RENAME TO wild_type_sequences_pkey")
32+
op.execute("ALTER INDEX ix_target_sequences_id RENAME TO ix_wild_type_sequences_id")
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
"""Simplify Reference Genome Target Structure
2+
3+
Revision ID: 44d5c568f64b
4+
Revises: 90e7860964a2
5+
Create Date: 2023-08-24 15:20:01.208691
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
11+
12+
# revision identifiers, used by Alembic.
13+
revision = "44d5c568f64b"
14+
down_revision = "90e7860964a2"
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
op.add_column(
21+
"wild_type_sequences",
22+
sa.Column("reference_id", sa.Integer, sa.ForeignKey("reference_genomes.id"), nullable=True),
23+
)
24+
op.execute(
25+
"""
26+
UPDATE wild_type_sequences w
27+
SET reference_id = l.genome_id
28+
FROM
29+
(select * from reference_maps inner join target_genes on target_genes.id = reference_maps.target_id) as l
30+
WHERE w.id = l.wt_sequence_id
31+
"""
32+
)
33+
34+
op.alter_column("wild_type_sequences", "reference_id", nullable=True)
35+
op.drop_table("reference_maps")
36+
37+
38+
def downgrade():
39+
op.create_table(
40+
"reference_maps",
41+
sa.Column("id", sa.Integer(), nullable=False),
42+
sa.Column("is_primary", sa.Boolean(), nullable=False),
43+
sa.Column("genome_id", sa.Integer(), sa.ForeignKey("reference_genomes.id"), nullable=False),
44+
sa.Column("target_id", sa.Integer(), sa.ForeignKey("target_genes.id"), nullable=False),
45+
sa.Column("creation_date", sa.Date(), nullable=False),
46+
sa.Column("modification_date", sa.Date(), nullable=False),
47+
sa.PrimaryKeyConstraint("id"),
48+
)
49+
op.execute("delete from target_genes where wt_sequence_id is NULL")
50+
op.execute(
51+
"""
52+
INSERT INTO reference_maps (is_primary, genome_id, target_id, creation_date, modification_date)
53+
SELECT false, wild_type_sequences.reference_id, target_genes.id, current_date, current_date
54+
FROM target_genes JOIN wild_type_sequences on target_genes.wt_sequence_id = wild_type_sequences.id
55+
WHERE wild_type_sequences.reference_id is not null
56+
"""
57+
)
58+
op.execute(
59+
"delete from reference_maps where target_id in (select id from target_genes where wt_sequence_id is NULL)"
60+
)
61+
op.drop_column("wild_type_sequences", "reference_id")
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
"""Add Target Sequence Label
2+
3+
Revision ID: 60103ad1cb5b
4+
Revises: 194cfebabe32
5+
Create Date: 2023-08-29 16:04:44.620385
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
11+
12+
# revision identifiers, used by Alembic.
13+
revision = "60103ad1cb5b"
14+
down_revision = "194cfebabe32"
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
op.add_column("target_sequences", sa.Column("label", sa.String(), nullable=True))
21+
22+
23+
def downgrade():
24+
op.drop_column("target_sequences", "label")
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
"""Add Target Accession
2+
3+
Revision ID: 90e7860964a2
4+
Revises: 22e2d92d602e
5+
Create Date: 2023-08-04 11:40:37.434740
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
11+
12+
# revision identifiers, used by Alembic.
13+
revision = "90e7860964a2"
14+
down_revision = "22e2d92d602e"
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
# ### commands auto generated by Alembic - please adjust! ###
21+
op.create_table(
22+
"target_accessions",
23+
sa.Column("id", sa.Integer(), nullable=False),
24+
sa.Column("assembly", sa.String(), nullable=False),
25+
sa.Column("accession", sa.String(), nullable=False),
26+
sa.Column("creation_date", sa.Date(), nullable=False),
27+
sa.Column("modification_date", sa.Date(), nullable=False),
28+
sa.PrimaryKeyConstraint("id"),
29+
)
30+
op.add_column(
31+
"target_genes", sa.Column("accession_id", sa.Integer, sa.ForeignKey("target_accessions.id"), nullable=True)
32+
)
33+
op.alter_column("target_genes", "wt_sequence_id", nullable=True)
34+
# ### end Alembic commands ###
35+
36+
37+
def downgrade():
38+
# ### commands auto generated by Alembic - please adjust! ###
39+
op.drop_column("target_genes", "accession_id")
40+
op.drop_table("target_accessions")
41+
op.alter_column("target_genes", "wt_sequence_id", nullable=False)
42+
# ### end Alembic commands ###
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
"""Add gene name column to target accession definition
2+
3+
Revision ID: c6154dd7d9b9
4+
Revises: 60103ad1cb5b
5+
Create Date: 2023-10-04 17:00:42.960917
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
11+
12+
# revision identifiers, used by Alembic.
13+
revision = "c6154dd7d9b9"
14+
down_revision = "60103ad1cb5b"
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade():
20+
# ### commands auto generated by Alembic - please adjust! ###
21+
op.add_column("target_accessions", sa.Column("gene", sa.String(), nullable=True))
22+
op.alter_column("target_accessions", "assembly", nullable=True)
23+
# ### end Alembic commands ###
24+
25+
26+
def downgrade():
27+
# ### commands auto generated by Alembic - please adjust! ###
28+
op.drop_column("target_accessions", "gene")
29+
op.alter_column("target_accessions", "assembly", nullable=False)
30+
# ### end Alembic commands ###

docker-compose-dev.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ services:
88
command: bash -c "uvicorn mavedb.server_main:app --host 0.0.0.0 --port 8000 --reload"
99
depends_on:
1010
- db
11+
- seqrepo
1112
env_file:
1213
- settings/.env.dev
1314
environment:
@@ -17,6 +18,7 @@ services:
1718
- "8002:8000"
1819
volumes:
1920
- .:/code
21+
- mavedb-seqrepo-dev:/usr/local/share/seqrepo
2022

2123
db:
2224
image: postgres:14
@@ -27,10 +29,16 @@ services:
2729
volumes:
2830
- mavedb-data-dev:/var/lib/postgresql/data
2931

32+
seqrepo:
33+
image: biocommons/seqrepo:latest
34+
volumes:
35+
- mavedb-seqrepo-dev:/usr/local/share/seqrepo
36+
3037
# rabbitmq:
3138
# image: rabbitmq:3.8.3
3239
# ports:
3340
# - "5673:5672"
3441

3542
volumes:
3643
mavedb-data-dev:
44+
mavedb-seqrepo-dev:

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ dependencies = [
2323
"IDUtils~=1.2.0",
2424
"mavehgvs~=0.6.0",
2525
"eutils~=0.6.0",
26+
"hgvs~=1.5.4",
27+
"biocommons~=0.0.0",
2628
"numpy~=1.22.3",
2729
"pandas~=1.4.1",
2830
"pydantic~=1.9.1",

requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
alembic~=1.7.6
22
authlib~=0.15.5
3+
biocommons~=0.0.0
34
celery~=5.2.3
45
cryptography~=37.0.4
56
eutils~=0.6.0
67
fastapi~=0.71.0
78
# fastapi-oidc~=0.0.9
89
fqfa~=1.2.3
910
IDUtils~=1.2.0
11+
hgvs~=1.5.4
1012
jsonschema~=4.17.3
1113
mavehgvs~=0.6.0
1214
numpy~=1.22.3

src/mavedb/lib/exceptions.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,3 +148,9 @@ class NonexistentIdentifierError(ValueError):
148148
"""Raised when a user tries to create a publication with a non-existent identifier"""
149149

150150
pass
151+
152+
153+
class MixedTargetError(ValueError):
154+
"""Raised when a user tries to create a score-set against a set of mixed targets"""
155+
156+
pass

src/mavedb/lib/score_sets.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
from mavedb.models.keyword import Keyword
2323
from mavedb.models.publication_identifier import PublicationIdentifier
2424
from mavedb.models.reference_genome import ReferenceGenome
25-
from mavedb.models.reference_map import ReferenceMap
2625
from mavedb.models.score_set import ScoreSet
26+
from mavedb.models.target_accession import TargetAccession
2727
from mavedb.models.target_gene import TargetGene
2828
from mavedb.models.user import User
2929
from mavedb.view_models.search import ScoreSetsSearch
@@ -52,8 +52,8 @@ def search_score_sets(db: Session, owner: Optional[User], search: ScoreSetsSearc
5252
ScoreSet.title.contains(lower_search_text),
5353
ScoreSet.short_description.contains(lower_search_text),
5454
ScoreSet.abstract_text.contains(lower_search_text),
55-
ScoreSet.target_gene.has(func.lower(TargetGene.name).contains(lower_search_text)),
56-
ScoreSet.target_gene.has(func.lower(TargetGene.category).contains(lower_search_text)),
55+
ScoreSet.target_genes.any(func.lower(TargetGene.name).contains(lower_search_text)),
56+
ScoreSet.target_genes.any(func.lower(TargetGene.category).contains(lower_search_text)),
5757
ScoreSet.keyword_objs.any(func.lower(Keyword.text).contains(lower_search_text)),
5858
# TODO Add: ORGANISM_NAME UNIPROT, ENSEMBL, REFSEQ, LICENSE, plus TAX_ID if numeric
5959
ScoreSet.publication_identifiers.any(
@@ -77,19 +77,17 @@ def search_score_sets(db: Session, owner: Optional[User], search: ScoreSetsSearc
7777
)
7878

7979
if search.targets:
80-
query = query.filter(ScoreSet.target_gene.has(TargetGene.name.in_(search.targets)))
80+
query = query.filter(ScoreSet.target_genes.any(TargetGene.name.in_(search.targets)))
8181

8282
if search.target_organism_names:
8383
query = query.filter(
84-
ScoreSet.target_gene.has(
85-
TargetGene.reference_maps.any(
86-
ReferenceMap.genome.has(ReferenceGenome.organism_name.in_(search.target_organism_names))
87-
)
84+
ScoreSet.target_genes.any(
85+
TargetGene.reference.any(ReferenceGenome.organism_name.in_(search.target_organism_names))
8886
)
8987
)
9088

9189
if search.target_types:
92-
query = query.filter(ScoreSet.target_gene.has(TargetGene.category.in_(search.target_types)))
90+
query = query.filter(ScoreSet.target_genes.any(TargetGene.category.in_(search.target_types)))
9391

9492
if search.publication_identifiers:
9593
query = query.filter(
@@ -111,8 +109,15 @@ def search_score_sets(db: Session, owner: Optional[User], search: ScoreSetsSearc
111109
)
112110
)
113111

112+
if search.target_accessions:
113+
query = query.filter(
114+
ScoreSet.target_genes.any(
115+
TargetGene.target_accession.has(TargetAccession.accession.in_(search.target_accessions))
116+
)
117+
)
118+
114119
score_sets: list[ScoreSet] = (
115-
query.join(ScoreSet.experiment).join(ScoreSet.target_gene).order_by(Experiment.title).all()
120+
query.join(ScoreSet.experiment).join(ScoreSet.target_genes).order_by(Experiment.title).all()
116121
)
117122
if not score_sets:
118123
score_sets = []

0 commit comments

Comments
 (0)