Skip to content

Commit 31477a5

Browse files
authored
Merge pull request #13 from gavinelder/ge/feat/daily-stats
2 parents 175cdc9 + 8ff884c commit 31477a5

File tree

2 files changed

+127
-4
lines changed

2 files changed

+127
-4
lines changed

app/db.py

Lines changed: 96 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import pandas as pd
99

10+
from sqlalchemy import text
1011
from sqlmodel import create_engine, Field, select, Session, SQLModel
1112

1213
sql_url = os.getenv("DATABASE_URL")
@@ -38,6 +39,27 @@ class VisitStats(SQLModel, table=True):
3839
count: int
3940

4041

42+
class VisitDailyStats(SQLModel, table=True):
43+
"""
44+
Daily aggregated visit statistics.
45+
46+
Populated by a daily task that rolls up from multiqc_api_visits_stats.
47+
"""
48+
49+
__tablename__ = "multiqc_api_visits_daily_stats"
50+
51+
day: datetime.date = Field(primary_key=True)
52+
version_multiqc: str = Field(primary_key=True)
53+
version_python: str = Field(primary_key=True)
54+
operating_system: str = Field(primary_key=True)
55+
is_docker: bool = Field(primary_key=True)
56+
is_singularity: bool = Field(primary_key=True)
57+
is_conda: bool = Field(primary_key=True)
58+
is_ci: bool = Field(primary_key=True)
59+
is_uv: bool = Field(primary_key=True)
60+
count: int
61+
62+
4163
class DownloadStats(SQLModel, table=True):
4264
"""
4365
Daily download statistics.
@@ -97,12 +119,12 @@ def get_download_stats(
97119
with Session(engine) as session:
98120
statement = select(DownloadStats)
99121
if start:
100-
statement.where(DownloadStats.date >= start) # type: ignore
122+
statement = statement.where(DownloadStats.date >= start) # type: ignore
101123
if end:
102-
statement.where(DownloadStats.date <= end) # type: ignore
124+
statement = statement.where(DownloadStats.date <= end) # type: ignore
103125
if limit:
104-
statement.limit(limit)
105-
statement.order_by(DownloadStats.date.desc()) # type: ignore
126+
statement = statement.limit(limit)
127+
statement = statement.order_by(DownloadStats.date.desc()) # type: ignore
106128
return session.exec(statement).all()
107129

108130

@@ -114,6 +136,76 @@ def insert_visit_stats(visit_stats: pd.DataFrame):
114136
session.commit()
115137

116138

139+
def has_daily_stats_for_date(day: datetime.date) -> bool:
140+
"""Check if daily stats already exist for a given date."""
141+
with Session(engine) as session:
142+
result = session.exec(
143+
select(VisitDailyStats).where(VisitDailyStats.day == day).limit(1)
144+
).first()
145+
return result is not None
146+
147+
148+
def aggregate_visits_for_date(target_date: datetime.date) -> int:
149+
"""
150+
Aggregate visit stats for a specific date and upsert into daily stats table.
151+
152+
Returns the number of rows inserted/updated.
153+
"""
154+
query = text("""
155+
INSERT INTO multiqc_api_visits_daily_stats (
156+
day,
157+
version_multiqc,
158+
version_python,
159+
operating_system,
160+
is_docker,
161+
is_singularity,
162+
is_conda,
163+
is_ci,
164+
is_uv,
165+
count
166+
)
167+
SELECT
168+
:target_date AS day,
169+
version_multiqc,
170+
version_python,
171+
operating_system,
172+
is_docker,
173+
is_singularity,
174+
is_conda,
175+
is_ci,
176+
is_uv,
177+
SUM(count) AS count
178+
FROM multiqc_api_visits_stats
179+
WHERE start >= :target_date AND start < :target_date + INTERVAL '1 day'
180+
GROUP BY
181+
version_multiqc,
182+
version_python,
183+
operating_system,
184+
is_docker,
185+
is_singularity,
186+
is_conda,
187+
is_ci,
188+
is_uv
189+
ON CONFLICT (
190+
day,
191+
version_multiqc,
192+
version_python,
193+
operating_system,
194+
is_docker,
195+
is_singularity,
196+
is_conda,
197+
is_ci,
198+
is_uv
199+
)
200+
DO UPDATE SET count = EXCLUDED.count
201+
""")
202+
203+
with engine.connect() as conn:
204+
result = conn.execute(query, {"target_date": target_date})
205+
conn.commit()
206+
return result.rowcount
207+
208+
117209
def insert_download_stats(df: pd.DataFrame) -> pd.DataFrame:
118210
# df has "date" as an index. Re-adding it as a separate field with a type datetime
119211
df["date"] = pd.to_datetime(df.index)

app/main.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
from app import __version__, db, models
2626
from app.downloads import daily
27+
from app.db import aggregate_visits_for_date, has_daily_stats_for_date
2728

2829

2930
logger = logging.getLogger("multiqc_api")
@@ -346,6 +347,36 @@ async def update_downloads():
346347
_update_download_stats()
347348

348349

350+
def _aggregate_daily_visits():
351+
"""
352+
Aggregate yesterday's visit stats into the daily stats table.
353+
Skips if yesterday's data has already been aggregated.
354+
"""
355+
yesterday = datetime.date.today() - datetime.timedelta(days=1)
356+
if has_daily_stats_for_date(yesterday):
357+
logger.info(f"Daily stats for {yesterday} already exist, skipping")
358+
return
359+
logger.info(f"Aggregating visit stats for {yesterday}...")
360+
try:
361+
rows = aggregate_visits_for_date(yesterday)
362+
logger.info(f"Successfully aggregated {rows} rows for {yesterday}")
363+
except Exception as e:
364+
logger.error(f"Failed to aggregate visit stats: {e}")
365+
366+
367+
@app.on_event("startup")
368+
@repeat_every(
369+
seconds=60 * 60 * 24, # every day
370+
wait_first=True,
371+
logger=logger,
372+
)
373+
async def aggregate_daily_visits():
374+
"""
375+
Repeated task to aggregate yesterday's visits into daily stats.
376+
"""
377+
_aggregate_daily_visits()
378+
379+
349380
@app.post("/persist_visits")
350381
async def persist_visits_endpoint():
351382
try:

0 commit comments

Comments
 (0)