Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 4e34041

Browse files
authored
Add a new version of the R30 phone-home metric, which removes a false impression of retention given by the old R30 metric (#10332)
Signed-off-by: Olivier Wilkinson (reivilibre) <[email protected]>
1 parent 95e47b2 commit 4e34041

File tree

6 files changed

+416
-5
lines changed

6 files changed

+416
-5
lines changed

changelog.d/10332.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add a new version of the R30 phone-home metric, which removes a false impression of retention given by the old R30 metric.

synapse/app/phone_stats_home.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,10 @@ async def phone_stats_home(hs, stats, stats_process=_stats_process):
107107
for name, count in r30_results.items():
108108
stats["r30_users_" + name] = count
109109

110+
r30v2_results = await hs.get_datastore().count_r30_users()
111+
for name, count in r30v2_results.items():
112+
stats["r30v2_users_" + name] = count
113+
110114
stats["cache_factor"] = hs.config.caches.global_factor
111115
stats["event_cache_size"] = hs.config.caches.event_cache_size
112116

synapse/storage/databases/main/metrics.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,135 @@ def _count_r30_users(txn):
316316

317317
return await self.db_pool.runInteraction("count_r30_users", _count_r30_users)
318318

319+
async def count_r30v2_users(self) -> Dict[str, int]:
320+
"""
321+
Counts the number of 30 day retained users, defined as users that:
322+
- Appear more than once in the past 60 days
323+
- Have more than 30 days between the most and least recent appearances that
324+
occurred in the past 60 days.
325+
326+
(This is the second version of this metric, hence R30'v2')
327+
328+
Returns:
329+
A mapping from client type to the number of 30-day retained users for that client.
330+
331+
The dict keys are:
332+
- "all" (a combined number of users across any and all clients)
333+
- "android" (Element Android)
334+
- "ios" (Element iOS)
335+
- "electron" (Element Desktop)
336+
- "web" (any web application -- it's not possible to distinguish Element Web here)
337+
"""
338+
339+
def _count_r30v2_users(txn):
340+
thirty_days_in_secs = 86400 * 30
341+
now = int(self._clock.time())
342+
sixty_days_ago_in_secs = now - 2 * thirty_days_in_secs
343+
one_day_from_now_in_secs = now + 86400
344+
345+
# This is the 'per-platform' count.
346+
sql = """
347+
SELECT
348+
client_type,
349+
count(client_type)
350+
FROM
351+
(
352+
SELECT
353+
user_id,
354+
CASE
355+
WHEN
356+
LOWER(user_agent) LIKE '%%riot%%' OR
357+
LOWER(user_agent) LIKE '%%element%%'
358+
THEN CASE
359+
WHEN
360+
LOWER(user_agent) LIKE '%%electron%%'
361+
THEN 'electron'
362+
WHEN
363+
LOWER(user_agent) LIKE '%%android%%'
364+
THEN 'android'
365+
WHEN
366+
LOWER(user_agent) LIKE '%%ios%%'
367+
THEN 'ios'
368+
ELSE 'unknown'
369+
END
370+
WHEN
371+
LOWER(user_agent) LIKE '%%mozilla%%' OR
372+
LOWER(user_agent) LIKE '%%gecko%%'
373+
THEN 'web'
374+
ELSE 'unknown'
375+
END as client_type
376+
FROM
377+
user_daily_visits
378+
WHERE
379+
timestamp > ?
380+
AND
381+
timestamp < ?
382+
GROUP BY
383+
user_id,
384+
client_type
385+
HAVING
386+
max(timestamp) - min(timestamp) > ?
387+
) AS temp
388+
GROUP BY
389+
client_type
390+
;
391+
"""
392+
393+
# We initialise all the client types to zero, so we get an explicit
394+
# zero if they don't appear in the query results
395+
results = {"ios": 0, "android": 0, "web": 0, "electron": 0}
396+
txn.execute(
397+
sql,
398+
(
399+
sixty_days_ago_in_secs * 1000,
400+
one_day_from_now_in_secs * 1000,
401+
thirty_days_in_secs * 1000,
402+
),
403+
)
404+
405+
for row in txn:
406+
if row[0] == "unknown":
407+
continue
408+
results[row[0]] = row[1]
409+
410+
# This is the 'all users' count.
411+
sql = """
412+
SELECT COUNT(*) FROM (
413+
SELECT
414+
1
415+
FROM
416+
user_daily_visits
417+
WHERE
418+
timestamp > ?
419+
AND
420+
timestamp < ?
421+
GROUP BY
422+
user_id
423+
HAVING
424+
max(timestamp) - min(timestamp) > ?
425+
) AS r30_users
426+
"""
427+
428+
txn.execute(
429+
sql,
430+
(
431+
sixty_days_ago_in_secs * 1000,
432+
one_day_from_now_in_secs * 1000,
433+
thirty_days_in_secs * 1000,
434+
),
435+
)
436+
row = txn.fetchone()
437+
if row is None:
438+
results["all"] = 0
439+
else:
440+
results["all"] = row[0]
441+
442+
return results
443+
444+
return await self.db_pool.runInteraction(
445+
"count_r30v2_users", _count_r30v2_users
446+
)
447+
319448
def _get_start_of_day(self):
320449
"""
321450
Returns millisecond unixtime for start of UTC day.

0 commit comments

Comments
 (0)