Skip to content

Commit 44a25e5

Browse files
committed
Use published variants materialized view for statistics dashboard
1 parent 4f53e68 commit 44a25e5

File tree

3 files changed

+19
-14
lines changed

3 files changed

+19
-14
lines changed

src/mavedb/routers/statistics.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
)
2020
from mavedb.models.experiment_controlled_keyword import ExperimentControlledKeywordAssociation
2121
from mavedb.models.experiment_publication_identifier import ExperimentPublicationIdentifierAssociation
22-
from mavedb.models.mapped_variant import MappedVariant
2322
from mavedb.models.publication_identifier import PublicationIdentifier
23+
from mavedb.models.published_variant import PublishedVariantsMV
2424
from mavedb.models.raw_read_identifier import RawReadIdentifier
2525
from mavedb.models.refseq_identifier import RefseqIdentifier
2626
from mavedb.models.refseq_offset import RefseqOffset
@@ -37,7 +37,6 @@
3737
from mavedb.models.uniprot_identifier import UniprotIdentifier
3838
from mavedb.models.uniprot_offset import UniprotOffset
3939
from mavedb.models.user import User
40-
from mavedb.models.variant import Variant
4140

4241
router = APIRouter(
4342
prefix="/api/v1/statistics",
@@ -484,9 +483,12 @@ def variant_counts(group: Optional[GroupBy] = None, db: Session = Depends(get_db
484483
Returns a dictionary of counts for the number of published and distinct variants in the database.
485484
Optionally, group the counts by the day on which the score set (and by extension, the variant) was published.
486485
"""
487-
query = _join_model_and_filter_unpublished(select(ScoreSet.published_date, func.count(Variant.id)), ScoreSet)
486+
variants = db.execute(
487+
select(PublishedVariantsMV.published_date, func.count(PublishedVariantsMV.variant_id))
488+
.group_by(PublishedVariantsMV.published_date)
489+
.order_by(PublishedVariantsMV.published_date)
490+
).all()
488491

489-
variants = db.execute(query.group_by(ScoreSet.published_date).order_by(ScoreSet.published_date)).all()
490492
if group == GroupBy.month:
491493
grouped = {k: sum(c for _, c in g) for k, g in itertools.groupby(variants, lambda t: t[0].strftime("%Y-%m"))}
492494
elif group == GroupBy.year:
@@ -506,17 +508,15 @@ def mapped_variant_counts(
506508
Optionally, group the counts by the day on which the score set (and by extension, the variant) was published.
507509
Optionally, return the count of all mapped variants, not just the current/most up to date ones.
508510
"""
509-
query = _join_model_and_filter_unpublished(
510-
select(ScoreSet.published_date, func.count(MappedVariant.id)).join(
511-
Variant, Variant.id == MappedVariant.variant_id
512-
),
513-
ScoreSet,
514-
)
511+
query = select(PublishedVariantsMV.published_date, func.count(PublishedVariantsMV.mapped_variant_id))
515512

516513
if onlyCurrent:
517-
query = query.where(MappedVariant.current.is_(True))
514+
query = query.where(PublishedVariantsMV.current_mapped_variant.is_(True))
515+
516+
variants = db.execute(
517+
query.group_by(PublishedVariantsMV.published_date).order_by(PublishedVariantsMV.published_date)
518+
).all()
518519

519-
variants = db.execute(query.group_by(ScoreSet.published_date).order_by(ScoreSet.published_date)).all()
520520
if group == GroupBy.month:
521521
grouped = {k: sum(c for _, c in g) for k, g in itertools.groupby(variants, lambda t: t[0].strftime("%Y-%m"))}
522522
elif group == GroupBy.year:

tests/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import os
2-
import logging
2+
import logging # noqa: F401
33
import sys
44
from concurrent import futures
55
from inspect import getsourcefile
@@ -44,7 +44,7 @@
4444
@pytest.fixture()
4545
def session(postgresql):
4646
# Un-comment this line to log all database queries:
47-
logging.getLogger("sqlalchemy.engine").setLevel(logging.INFO)
47+
# logging.getLogger("sqlalchemy.engine").setLevel(logging.INFO)
4848

4949
connection = (
5050
f"postgresql+psycopg2://{postgresql.info.user}:"

tests/routers/conftest.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from mavedb.models.controlled_keyword import ControlledKeyword
99
from mavedb.models.contributor import Contributor
1010
from mavedb.models.enums.user_role import UserRole
11+
from mavedb.models.published_variant import PublishedVariantsMV
1112
from mavedb.models.license import License
1213
from mavedb.models.role import Role
1314
from mavedb.models.taxonomy import Taxonomy
@@ -80,6 +81,10 @@ def setup_seq_scoreset(setup_router_db, session, data_provider, client, data_fil
8081
create_mapped_variants_for_score_set(session, score_set["urn"])
8182
publish_score_set(client, score_set["urn"])
8283

84+
# Note that we have not created indexes for this view when it is generated via metadata. This differs
85+
# from the database created via alembic, which does create indexes.
86+
PublishedVariantsMV.refresh(session, False)
87+
8388

8489
@pytest.fixture
8590
def mock_publication_fetch(request, requests_mock):

0 commit comments

Comments
 (0)