Skip to content

Commit cb5717c

Browse files
committed
fixed: variant count endpoint was returning a count including non-distinct variants
- Clarifies distinct variant IDs for variants and mapped variants endpoints - Adds distinct fast path query for both endpoints when grouping is not requested
1 parent a150ebd commit cb5717c

File tree

1 file changed

+34
-14
lines changed

1 file changed

+34
-14
lines changed

src/mavedb/routers/statistics.py

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -515,18 +515,24 @@ def variant_counts(group: Optional[GroupBy] = None, db: Session = Depends(get_db
515515
Returns a dictionary of counts for the number of published and distinct variants in the database.
516516
Optionally, group the counts by the day on which the score set (and by extension, the variant) was published.
517517
"""
518-
variants = db.execute(
519-
select(PublishedVariantsMV.published_date, func.count(PublishedVariantsMV.variant_id))
518+
# Fast path: total distinct variants without per-date aggregation.
519+
if group is None:
520+
total = db.execute(select(func.count(func.distinct(PublishedVariantsMV.variant_id)))).scalar_one() # type: ignore
521+
return OrderedDict([("count", total)])
522+
523+
# Grouped path: materialize distinct counts per published_date, then roll up.
524+
per_date = db.execute(
525+
select(PublishedVariantsMV.published_date, func.count(func.distinct(PublishedVariantsMV.variant_id)))
520526
.group_by(PublishedVariantsMV.published_date)
521527
.order_by(PublishedVariantsMV.published_date)
522528
).all()
523529

524530
if group == GroupBy.month:
525-
grouped = {k: sum(c for _, c in g) for k, g in itertools.groupby(variants, lambda t: t[0].strftime("%Y-%m"))}
531+
grouped = {k: sum(c for _, c in g) for k, g in itertools.groupby(per_date, lambda t: t[0].strftime("%Y-%m"))}
526532
elif group == GroupBy.year:
527-
grouped = {k: sum(c for _, c in g) for k, g in itertools.groupby(variants, lambda t: t[0].strftime("%Y"))}
528-
else:
529-
grouped = {"count": sum(count for _, count in variants)}
533+
grouped = {k: sum(c for _, c in g) for k, g in itertools.groupby(per_date, lambda t: t[0].strftime("%Y"))}
534+
else: # Defensive fallback.
535+
grouped = {"count": sum(c for _, c in per_date)}
530536

531537
return OrderedDict(sorted(grouped.items()))
532538

@@ -540,20 +546,34 @@ def mapped_variant_counts(
540546
Optionally, group the counts by the day on which the score set (and by extension, the variant) was published.
541547
Optionally, return the count of all mapped variants, not just the current/most up to date ones.
542548
"""
543-
query = select(PublishedVariantsMV.published_date, func.count(PublishedVariantsMV.mapped_variant_id))
549+
# Fast path: total distinct mapped variants (optionally only current) without per-date aggregation.
550+
if group is None:
551+
total_stmt = select(func.count(func.distinct(PublishedVariantsMV.mapped_variant_id)))
552+
553+
if onlyCurrent:
554+
total_stmt = total_stmt.where(PublishedVariantsMV.current_mapped_variant.is_(True))
555+
556+
total = db.execute(total_stmt).scalar_one() # type: ignore
557+
return OrderedDict([("count", total)])
558+
559+
# Grouped path: materialize distinct counts per published_date, then roll up.
560+
per_date_stmt = select(
561+
PublishedVariantsMV.published_date,
562+
func.count(func.distinct(PublishedVariantsMV.mapped_variant_id)),
563+
)
544564

545565
if onlyCurrent:
546-
query = query.where(PublishedVariantsMV.current_mapped_variant.is_(True))
566+
per_date_stmt = per_date_stmt.where(PublishedVariantsMV.current_mapped_variant.is_(True))
547567

548-
variants = db.execute(
549-
query.group_by(PublishedVariantsMV.published_date).order_by(PublishedVariantsMV.published_date)
568+
per_date = db.execute(
569+
per_date_stmt.group_by(PublishedVariantsMV.published_date).order_by(PublishedVariantsMV.published_date)
550570
).all()
551571

552572
if group == GroupBy.month:
553-
grouped = {k: sum(c for _, c in g) for k, g in itertools.groupby(variants, lambda t: t[0].strftime("%Y-%m"))}
573+
grouped = {k: sum(c for _, c in g) for k, g in itertools.groupby(per_date, lambda t: t[0].strftime("%Y-%m"))}
554574
elif group == GroupBy.year:
555-
grouped = {k: sum(c for _, c in g) for k, g in itertools.groupby(variants, lambda t: t[0].strftime("%Y"))}
556-
else:
557-
grouped = {"count": sum(count for _, count in variants)}
575+
grouped = {k: sum(c for _, c in g) for k, g in itertools.groupby(per_date, lambda t: t[0].strftime("%Y"))}
576+
else: # Defensive fallback.
577+
grouped = {"count": sum(c for _, c in per_date)}
558578

559579
return OrderedDict(sorted(grouped.items()))

0 commit comments

Comments
 (0)