11import time
2+
23from django .core .management .base import BaseCommand
34from django .db .models import Count
45from django .db import connection
6+
57from osf .models import NotificationSubscription , NotificationSubscriptionLegacy
68
79
810class Command (BaseCommand ):
9- help = 'Verify notification migration integrity (duplicates, invalid frequencies, counts, distribution)'
10- '''
11+ """
1112 Usage example:
1213 python manage.py migrate_notifications_verification
13- python manage.py migrate_notifications_verification --duplicates --counts
14- '''
14+ python manage.py migrate_notifications_verification --duplicates --distribution
15+ python manage.py migrate_notifications_verification --duplicates --unique-digest --output-size=100
16+ """
17+
18+ help = 'Verify notification migration integrity (duplicates, invalid frequencies, counts and distribution)'
1519
1620 def add_arguments (self , parser ):
21+ parser .add_argument ('--all' , action = 'store_true' , default = False , help = 'Run all checks' )
1722 parser .add_argument ('--duplicates' , action = 'store_true' , help = 'Check for duplicate NotificationSubscription entries' )
1823 parser .add_argument ('--frequencies' , action = 'store_true' , help = 'Check message_frequency values for invalid ones' )
1924 parser .add_argument ('--counts' , action = 'store_true' , help = 'Compare legacy M2M total with migrated count' )
2025 parser .add_argument ('--distribution' , action = 'store_true' , help = 'Print breakdown summary' )
21- parser .add_argument ('--all' , action = 'store_true' , help = 'Run all checks' )
26+ parser .add_argument ('--unique-digest' , action = 'store_true' , default = False , help = 'Used along with --duplicates to include _is_digest field in unique_together' )
27+ parser .add_argument ('--output-size' , type = int , default = 10 , help = 'Used along with other options to set the number of found duplicates for output' )
2228
2329 def handle (self , * args , ** options ):
30+
2431 start = time .time ()
2532 flags = {k for k , v in options .items () if v and k in ['duplicates' , 'frequencies' , 'counts' , 'distribution' ]}
33+ run_all = options ['all' ]
34+ output_size = options ['output_size' ]
2635
27- run_all = options ['all' ] or not flags
2836 print ('\n ================ Notification Migration Verification ================\n ' )
2937
38+ if not run_all and not flags :
39+ print ('\n ⚠ No options selected, command will exit ... \n ' )
40+
41+ # 1. Detect duplicates
3042 if run_all or 'duplicates' in flags :
31- # 1. Detect duplicates
32- print ('1) Checking duplicate NotificationSubscription entries...' )
33- duplicates = (
34- NotificationSubscription .objects .values (
35- 'user_id' , 'content_type_id' , 'object_id' , 'notification_type_id'
43+ print (f'1) Checking duplicate NotificationSubscription entries (unique-digest:{ options ['unique_digest' ]} )...' )
44+ if options ['unique_digest' ]:
45+ duplicates = (
46+ NotificationSubscription .objects .values (
47+ 'user_id' , 'content_type_id' , 'object_id' , 'notification_type_id' , '_is_digest' ,
48+ )
49+ .annotate (count = Count ('id' ))
50+ .filter (count__gt = 1 )
3651 )
37- .annotate (count = Count ('id' ))
38- .filter (count__gt = 1 )
39- )
40- print (f" → Duplicates found: { duplicates .count ()} " )
52+ else :
53+ duplicates = (
54+ NotificationSubscription .objects .values (
55+ 'user_id' , 'content_type_id' , 'object_id' , 'notification_type_id' ,
56+ )
57+ .annotate (count = Count ('id' ))
58+ .filter (count__gt = 1 )
59+ )
60+ print (f' → Duplicates found: { duplicates .count ()} .' )
4161 if duplicates .exists ():
42- print (' Sample (up to 10 ):' )
43- for d in duplicates [: 10 ]:
62+ print (f ' Sample (up to { output_size } ):' )
63+ for d in duplicates . order_by ( '-count' )[: output_size ]:
4464 print (' ' , d )
4565 print (' ✔ OK' if not duplicates .exists () else ' ⚠ Needs review' )
4666
67+ # 2. Invalid frequencies
4768 if run_all or 'frequencies' in flags :
48- # 2. Invalid frequencies
4969 print ('\n 2) Checking invalid message_frequency values...' )
5070 valid = {'none' , 'daily' , 'instantly' }
5171 invalid_freq = NotificationSubscription .objects .exclude (message_frequency__in = valid )
5272
53- print (f" → Invalid frequency rows: { invalid_freq .count ()} " )
73+ print (f' → Invalid frequency rows: { invalid_freq .count ()} ' )
5474 if invalid_freq .exists ():
5575 print (' Sample (id, freq):' )
56- for row in invalid_freq [:10 ]:
57- print (f" { row .id } → { row .message_frequency } " )
76+ for row in invalid_freq [:output_size ]:
77+ print (f' { row .id } → { row .message_frequency } ' )
5878 print (' ✔ OK' if not invalid_freq .exists () else ' ⚠ Needs cleanup' )
5979
80+ # 3. Compare legacy frequency-based totals vs new subscription count
6081 if run_all or 'counts' in flags :
61- # 3. Compare legacy frequency-based totals vs new subscription count
6282 print ('\n 3) Validating total count migrated...' )
6383 valid_subscription_ids = NotificationSubscriptionLegacy .objects .filter (event_name__in = ['global_reviews' , 'global_file_updated' , 'file_updated' ]).values_list ('id' , flat = True )
6484 with connection .cursor () as cursor :
@@ -72,26 +92,28 @@ def handle(self, *args, **options):
7292 legacy_total_expanded = none_count + digest_count + transactional_count
7393 new_total = NotificationSubscription .objects .count ()
7494
75- print (f" Legacy M2M total: { legacy_total_expanded } " )
76- print (f" New subscriptions: { new_total } " )
95+ print (f' Legacy M2M total: { legacy_total_expanded } ' )
96+ print (f' New subscriptions: { new_total } ' )
7797
7898 if legacy_total_expanded == new_total :
7999 print (' ✔ Counts match' )
80100 else :
81101 diff = new_total - legacy_total_expanded
82- print (f" ⚠ Mismatch: difference = { diff } (possibly skipped or duplicates removed)" )
102+ print (f' ⚠ Mismatch: difference = { diff } (possibly skipped, duplicates removed or newly created)' )
103+
104+ print (' ⚠ Note: this is accurate only right after migration and before any new subscriptions are created.)' )
83105
84106 if run_all or 'distribution' in flags :
85107 # 4. Distribution summary
86- print ('\n 4) Subscription distribution breakdown (top 30 ):\n ' )
108+ print (f '\n 4) Subscription distribution breakdown (top { output_size } ):\n ' )
87109 dist = (
88110 NotificationSubscription .objects
89111 .values ('notification_type_id' , 'message_frequency' )
90112 .annotate (total = Count ('id' ))
91- .order_by ('-total' )[:30 ]
113+ .order_by ('-total' )[:output_size ]
92114 )
93115 for row in dist :
94116 print (' ' , row )
95117
96118 elapsed = time .time () - start
97- print (f" \n ================ Verification complete in { elapsed :.2f} s ================\n " )
119+ print (f' \n ================ Verification complete in { elapsed :.2f} s ================\n ' )
0 commit comments