Skip to content

Commit 544b130

Browse files
psincraianclaude
andauthored
feat: group python version by minor (#24)
* feat: Group Python versions by minor version in dashboard Modified Python version analytics to group by minor version (e.g., 3.14) instead of patch version (e.g., 3.14.1, 3.14.2) for cleaner dashboard metrics. Changes: - Updated get_python_version_distribution() to extract and group by major.minor - Updated get_unique_python_versions_count() to count unique minor versions - Updated get_unique_users_by_dimension() to handle python_version dimension with minor version grouping Uses PostgreSQL regexp_replace to extract "3.14" from versions like "3.14.1", "3.14.2", etc. * feat: Add database-agnostic Python version grouping with tests Added support for both PostgreSQL and SQLite databases in Python version minor version extraction logic. This allows tests to run on SQLite while production uses PostgreSQL. Changes: - Added _extract_minor_version() helper method in AnalyticsEventRepository - PostgreSQL: Uses regexp_replace for version extraction - SQLite: Uses substr/instr for version extraction - Updated get_python_version_distribution() to use helper method - Updated get_unique_python_versions_count() to use helper method - Updated get_unique_users_by_dimension() to use helper method - Added comprehensive test suite in test_python_version_grouping.py - Tests version distribution grouping - Tests unique version counting - Tests user grouping by version - Tests edge case of already-minor versions All tests pass, linting clean, type checking passes. --------- Co-authored-by: Claude <noreply@anthropic.com>
1 parent 49d4f63 commit 544b130

File tree

2 files changed

+355
-9
lines changed

2 files changed

+355
-9
lines changed

app/src/repositories/analytics_event_repository.py

Lines changed: 56 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from datetime import datetime
22
from typing import List, Optional, Dict, Any
3-
from sqlalchemy import select, func, desc, and_
3+
from sqlalchemy import select, func, desc, and_, case
44
from sqlalchemy.ext.asyncio import AsyncSession
55

66
from src.models.analytics_event import AnalyticsEvent
@@ -24,6 +24,43 @@ class AnalyticsEventRepository(BaseRepository[AnalyticsEvent]):
2424
def __init__(self, db: AsyncSession):
2525
super().__init__(db, AnalyticsEvent)
2626

27+
def _extract_minor_version(self, version_column):
28+
"""
29+
Extract minor version (e.g., "3.14" from "3.14.1") in a database-agnostic way.
30+
31+
Uses PostgreSQL's regexp_replace for PostgreSQL databases,
32+
and a substring approach for SQLite.
33+
"""
34+
# Detect database type from the session's bind
35+
dialect_name = self.db.bind.dialect.name if self.db.bind else 'postgresql'
36+
37+
if dialect_name == 'postgresql':
38+
# PostgreSQL: Use regexp_replace
39+
return func.regexp_replace(
40+
version_column,
41+
r'^(\d+\.\d+).*$',
42+
r'\1'
43+
)
44+
else:
45+
# SQLite: Use substr and instr to extract major.minor
46+
# Find the position of the second dot, then extract substring before it
47+
# E.g., "3.14.1" -> find second "." at position 5 -> substr(0, 4) -> "3.14"
48+
second_dot_pos = func.instr(
49+
func.substr(version_column, func.instr(version_column, '.') + 1),
50+
'.'
51+
) + func.instr(version_column, '.')
52+
53+
return func.substr(
54+
version_column,
55+
1,
56+
case(
57+
# If there's a second dot, extract up to it
58+
(second_dot_pos > func.instr(version_column, '.'), second_dot_pos - 1),
59+
# Otherwise, return the whole version (it's already major.minor)
60+
else_=func.length(version_column)
61+
)
62+
)
63+
2764
async def get_by_api_key(self, api_key: str, limit: Optional[int] = None) -> List[AnalyticsEvent]:
2865
"""Get events by API key."""
2966
query = select(AnalyticsEvent).filter(AnalyticsEvent.api_key == api_key)
@@ -57,13 +94,16 @@ async def get_stats_for_api_key(self, api_key: str, start_date: datetime,
5794
"active_days": int(stats.active_days or 0) # type: ignore[possibly-missing-attribute]
5895
}
5996

60-
async def get_python_version_distribution(self, api_keys: List[str],
61-
start_date: datetime,
97+
async def get_python_version_distribution(self, api_keys: List[str],
98+
start_date: datetime,
6299
end_date: datetime) -> List[Dict[str, Any]]:
63100
"""Get Python version distribution for given API keys."""
101+
# Extract minor version (e.g., "3.14" from "3.14.1")
102+
minor_version = self._extract_minor_version(AnalyticsEvent.python_version).label("minor_version")
103+
64104
python_stats_query = (
65105
select(
66-
AnalyticsEvent.python_version,
106+
minor_version,
67107
func.count(AnalyticsEvent.id).label("total_events"),
68108
func.count(func.distinct(AnalyticsEvent.session_id)).label("total_sessions"),
69109
)
@@ -74,14 +114,14 @@ async def get_python_version_distribution(self, api_keys: List[str],
74114
AnalyticsEvent.event_timestamp <= end_date,
75115
)
76116
)
77-
.group_by(AnalyticsEvent.python_version)
117+
.group_by(minor_version)
78118
.order_by(desc("total_events"))
79119
)
80120

81121
result = await self.db.execute(python_stats_query)
82122
return [
83123
{
84-
"python_version": row.python_version,
124+
"python_version": row.minor_version,
85125
"total_events": row.total_events,
86126
"total_sessions": row.total_sessions
87127
}
@@ -210,11 +250,14 @@ async def get_events_in_date_range_count(self, api_keys: List[str],
210250
)
211251
return result.scalar()
212252

213-
async def get_unique_python_versions_count(self, api_key: str,
253+
async def get_unique_python_versions_count(self, api_key: str,
214254
start_date: datetime) -> int:
215-
"""Get count of unique Python versions for an API key."""
255+
"""Get count of unique Python versions (by minor version) for an API key."""
256+
# Extract minor version (e.g., "3.14" from "3.14.1")
257+
minor_version = self._extract_minor_version(AnalyticsEvent.python_version)
258+
216259
result = await self.db.execute(
217-
select(func.count(func.distinct(AnalyticsEvent.python_version))).filter(
260+
select(func.count(func.distinct(minor_version))).filter(
218261
and_(
219262
AnalyticsEvent.api_key == api_key,
220263
AnalyticsEvent.event_timestamp >= start_date,
@@ -446,6 +489,10 @@ async def get_unique_users_by_dimension(
446489
# Map dimension field to model attribute
447490
dimension_column = getattr(AnalyticsEvent, dimension_field)
448491

492+
# Special handling for python_version: group by minor version
493+
if dimension_field == "python_version":
494+
dimension_column = self._extract_minor_version(dimension_column)
495+
449496
query = (
450497
select(
451498
dimension_column.label("dimension_name"),

0 commit comments

Comments
 (0)