Skip to content

Commit a65df9b

Browse files
committed
Rework NR verification command
1 parent 0867265 commit a65df9b

File tree

1 file changed

+50
-28
lines changed

1 file changed

+50
-28
lines changed
Lines changed: 50 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,84 @@
11
import time
2+
23
from django.core.management.base import BaseCommand
34
from django.db.models import Count
45
from django.db import connection
6+
57
from osf.models import NotificationSubscription, NotificationSubscriptionLegacy
68

79

810
class Command(BaseCommand):
9-
help = 'Verify notification migration integrity (duplicates, invalid frequencies, counts, distribution)'
10-
'''
11+
"""
1112
Usage example:
1213
python manage.py migrate_notifications_verification
13-
python manage.py migrate_notifications_verification --duplicates --counts
14-
'''
14+
python manage.py migrate_notifications_verification --duplicates --distribution
15+
python manage.py migrate_notifications_verification --duplicates --unique-digest --output-size=100
16+
"""
17+
18+
help = 'Verify notification migration integrity (duplicates, invalid frequencies, counts and distribution)'
1519

1620
def add_arguments(self, parser):
21+
parser.add_argument('--all', action='store_true', default=False, help='Run all checks')
1722
parser.add_argument('--duplicates', action='store_true', help='Check for duplicate NotificationSubscription entries')
1823
parser.add_argument('--frequencies', action='store_true', help='Check message_frequency values for invalid ones')
1924
parser.add_argument('--counts', action='store_true', help='Compare legacy M2M total with migrated count')
2025
parser.add_argument('--distribution', action='store_true', help='Print breakdown summary')
21-
parser.add_argument('--all', action='store_true', help='Run all checks')
26+
parser.add_argument('--unique-digest', action='store_true', default=False, help='Used along with --duplicates to include _is_digest field in unique_together')
27+
parser.add_argument('--output-size', type=int, default=10, help='Used along with other options to set the number of found duplicates for output')
2228

2329
def handle(self, *args, **options):
30+
2431
start = time.time()
2532
flags = {k for k, v in options.items() if v and k in ['duplicates', 'frequencies', 'counts', 'distribution']}
33+
run_all = options['all']
34+
output_size = options['output_size']
2635

27-
run_all = options['all'] or not flags
2836
print('\n================ Notification Migration Verification ================\n')
2937

38+
if not run_all and not flags:
39+
print('\n⚠ No options selected, command will exit ... \n')
40+
41+
# 1. Detect duplicates
3042
if run_all or 'duplicates' in flags:
31-
# 1. Detect duplicates
32-
print('1) Checking duplicate NotificationSubscription entries...')
33-
duplicates = (
34-
NotificationSubscription.objects.values(
35-
'user_id', 'content_type_id', 'object_id', 'notification_type_id'
43+
print(f'1) Checking duplicate NotificationSubscription entries (unique-digest:{options['unique_digest']})...')
44+
if options['unique_digest']:
45+
duplicates = (
46+
NotificationSubscription.objects.values(
47+
'user_id', 'content_type_id', 'object_id', 'notification_type_id', '_is_digest',
48+
)
49+
.annotate(count=Count('id'))
50+
.filter(count__gt=1)
3651
)
37-
.annotate(count=Count('id'))
38-
.filter(count__gt=1)
39-
)
40-
print(f" → Duplicates found: {duplicates.count()}")
52+
else:
53+
duplicates = (
54+
NotificationSubscription.objects.values(
55+
'user_id', 'content_type_id', 'object_id', 'notification_type_id',
56+
)
57+
.annotate(count=Count('id'))
58+
.filter(count__gt=1)
59+
)
60+
print(f' → Duplicates found: {duplicates.count()}.')
4161
if duplicates.exists():
42-
print(' Sample (up to 10):')
43-
for d in duplicates[:10]:
62+
print(f' Sample (up to {output_size}):')
63+
for d in duplicates.order_by('-count')[:output_size]:
4464
print(' ', d)
4565
print(' ✔ OK' if not duplicates.exists() else ' ⚠ Needs review')
4666

67+
# 2. Invalid frequencies
4768
if run_all or 'frequencies' in flags:
48-
# 2. Invalid frequencies
4969
print('\n2) Checking invalid message_frequency values...')
5070
valid = {'none', 'daily', 'instantly'}
5171
invalid_freq = NotificationSubscription.objects.exclude(message_frequency__in=valid)
5272

53-
print(f" → Invalid frequency rows: {invalid_freq.count()}")
73+
print(f' → Invalid frequency rows: {invalid_freq.count()}')
5474
if invalid_freq.exists():
5575
print(' Sample (id, freq):')
56-
for row in invalid_freq[:10]:
57-
print(f" {row.id}{row.message_frequency}")
76+
for row in invalid_freq[:output_size]:
77+
print(f' {row.id}{row.message_frequency}')
5878
print(' ✔ OK' if not invalid_freq.exists() else ' ⚠ Needs cleanup')
5979

80+
# 3. Compare legacy frequency-based totals vs new subscription count
6081
if run_all or 'counts' in flags:
61-
# 3. Compare legacy frequency-based totals vs new subscription count
6282
print('\n3) Validating total count migrated...')
6383
valid_subscription_ids = NotificationSubscriptionLegacy.objects.filter(event_name__in=['global_reviews', 'global_file_updated', 'file_updated']).values_list('id', flat=True)
6484
with connection.cursor() as cursor:
@@ -72,26 +92,28 @@ def handle(self, *args, **options):
7292
legacy_total_expanded = none_count + digest_count + transactional_count
7393
new_total = NotificationSubscription.objects.count()
7494

75-
print(f" Legacy M2M total: {legacy_total_expanded}")
76-
print(f" New subscriptions: {new_total}")
95+
print(f' Legacy M2M total: {legacy_total_expanded}')
96+
print(f' New subscriptions: {new_total}')
7797

7898
if legacy_total_expanded == new_total:
7999
print(' ✔ Counts match')
80100
else:
81101
diff = new_total - legacy_total_expanded
82-
print(f" ⚠ Mismatch: difference = {diff} (possibly skipped or duplicates removed)")
102+
print(f' ⚠ Mismatch: difference = {diff} (possibly skipped, duplicates removed or newly created)')
103+
104+
print(' ⚠ Note: this is accurate only right after migration and before any new subscriptions are created.)')
83105

84106
if run_all or 'distribution' in flags:
85107
# 4. Distribution summary
86-
print('\n4) Subscription distribution breakdown (top 30):\n')
108+
print(f'\n4) Subscription distribution breakdown (top {output_size}):\n')
87109
dist = (
88110
NotificationSubscription.objects
89111
.values('notification_type_id', 'message_frequency')
90112
.annotate(total=Count('id'))
91-
.order_by('-total')[:30]
113+
.order_by('-total')[:output_size]
92114
)
93115
for row in dist:
94116
print(' ', row)
95117

96118
elapsed = time.time() - start
97-
print(f"\n================ Verification complete in {elapsed:.2f}s ================\n")
119+
print(f'\n================ Verification complete in {elapsed:.2f}s ================\n')

0 commit comments

Comments
 (0)