77
88import pandas as pd
99
10+ from sqlalchemy import text
1011from sqlmodel import create_engine , Field , select , Session , SQLModel
1112
1213sql_url = os .getenv ("DATABASE_URL" )
@@ -38,6 +39,27 @@ class VisitStats(SQLModel, table=True):
3839 count : int
3940
4041
42+ class VisitDailyStats (SQLModel , table = True ):
43+ """
44+ Daily aggregated visit statistics.
45+
46+ Populated by a daily task that rolls up from multiqc_api_visits_stats.
47+ """
48+
49+ __tablename__ = "multiqc_api_visits_daily_stats"
50+
51+ day : datetime .date = Field (primary_key = True )
52+ version_multiqc : str = Field (primary_key = True )
53+ version_python : str = Field (primary_key = True )
54+ operating_system : str = Field (primary_key = True )
55+ is_docker : bool = Field (primary_key = True )
56+ is_singularity : bool = Field (primary_key = True )
57+ is_conda : bool = Field (primary_key = True )
58+ is_ci : bool = Field (primary_key = True )
59+ is_uv : bool = Field (primary_key = True )
60+ count : int
61+
62+
4163class DownloadStats (SQLModel , table = True ):
4264 """
4365 Daily download statistics.
@@ -97,12 +119,12 @@ def get_download_stats(
97119 with Session (engine ) as session :
98120 statement = select (DownloadStats )
99121 if start :
100- statement .where (DownloadStats .date >= start ) # type: ignore
122+ statement = statement .where (DownloadStats .date >= start ) # type: ignore
101123 if end :
102- statement .where (DownloadStats .date <= end ) # type: ignore
124+ statement = statement .where (DownloadStats .date <= end ) # type: ignore
103125 if limit :
104- statement .limit (limit )
105- statement .order_by (DownloadStats .date .desc ()) # type: ignore
126+ statement = statement .limit (limit )
127+ statement = statement .order_by (DownloadStats .date .desc ()) # type: ignore
106128 return session .exec (statement ).all ()
107129
108130
@@ -114,6 +136,76 @@ def insert_visit_stats(visit_stats: pd.DataFrame):
114136 session .commit ()
115137
116138
139+ def has_daily_stats_for_date (day : datetime .date ) -> bool :
140+ """Check if daily stats already exist for a given date."""
141+ with Session (engine ) as session :
142+ result = session .exec (
143+ select (VisitDailyStats ).where (VisitDailyStats .day == day ).limit (1 )
144+ ).first ()
145+ return result is not None
146+
147+
148+ def aggregate_visits_for_date (target_date : datetime .date ) -> int :
149+ """
150+ Aggregate visit stats for a specific date and upsert into daily stats table.
151+
152+ Returns the number of rows inserted/updated.
153+ """
154+ query = text ("""
155+ INSERT INTO multiqc_api_visits_daily_stats (
156+ day,
157+ version_multiqc,
158+ version_python,
159+ operating_system,
160+ is_docker,
161+ is_singularity,
162+ is_conda,
163+ is_ci,
164+ is_uv,
165+ count
166+ )
167+ SELECT
168+ :target_date AS day,
169+ version_multiqc,
170+ version_python,
171+ operating_system,
172+ is_docker,
173+ is_singularity,
174+ is_conda,
175+ is_ci,
176+ is_uv,
177+ SUM(count) AS count
178+ FROM multiqc_api_visits_stats
179+ WHERE start >= :target_date AND start < :target_date + INTERVAL '1 day'
180+ GROUP BY
181+ version_multiqc,
182+ version_python,
183+ operating_system,
184+ is_docker,
185+ is_singularity,
186+ is_conda,
187+ is_ci,
188+ is_uv
189+ ON CONFLICT (
190+ day,
191+ version_multiqc,
192+ version_python,
193+ operating_system,
194+ is_docker,
195+ is_singularity,
196+ is_conda,
197+ is_ci,
198+ is_uv
199+ )
200+ DO UPDATE SET count = EXCLUDED.count
201+ """ )
202+
203+ with engine .connect () as conn :
204+ result = conn .execute (query , {"target_date" : target_date })
205+ conn .commit ()
206+ return result .rowcount
207+
208+
117209def insert_download_stats (df : pd .DataFrame ) -> pd .DataFrame :
118210 # df has "date" as an index. Re-adding it as a separate field with a type datetime
119211 df ["date" ] = pd .to_datetime (df .index )
0 commit comments