11import time
2+
23from django .core .management .base import BaseCommand
34from django .db .models import Count
45from django .db import connection
6+
57from osf .models import NotificationSubscription , NotificationSubscriptionLegacy
68
79
810class Command (BaseCommand ):
9- help = 'Verify notification migration integrity (duplicates, invalid frequencies, counts, distribution)'
10- '''
11+ help = 'Verify notification migration integrity (duplicates, invalid frequencies, counts and distribution)'
12+ """
1113 Usage example:
1214 python manage.py migrate_notifications_verification
13- python manage.py migrate_notifications_verification --duplicates --counts
14- '''
15+ python manage.py migrate_notifications_verification --duplicates --distribution
16+ python manage.py migrate_notifications_verification --duplicates --unique-digest --output-size=100
17+ """
1518
1619 def add_arguments (self , parser ):
20+ parser .add_argument ('--all' , action = 'store_true' , default = False , help = 'Run all checks' )
1721 parser .add_argument ('--duplicates' , action = 'store_true' , help = 'Check for duplicate NotificationSubscription entries' )
1822 parser .add_argument ('--frequencies' , action = 'store_true' , help = 'Check message_frequency values for invalid ones' )
1923 parser .add_argument ('--counts' , action = 'store_true' , help = 'Compare legacy M2M total with migrated count' )
2024 parser .add_argument ('--distribution' , action = 'store_true' , help = 'Print breakdown summary' )
21- parser .add_argument ('--all' , action = 'store_true' , help = 'Run all checks' )
25+ parser .add_argument ('--unique-digest' , action = 'store_true' , default = False , help = 'Used along with --duplicates to include _is_digest field in unique_together' )
26+ parser .add_argument ('--output-size' , type = int , default = 10 , help = 'Used along with other options to set the number of found duplicates for output' )
2227
2328 def handle (self , * args , ** options ):
29+
2430 start = time .time ()
2531 flags = {k for k , v in options .items () if v and k in ['duplicates' , 'frequencies' , 'counts' , 'distribution' ]}
32+ run_all = options ['all' ]
33+ output_size = options ['output_size' ]
2634
27- run_all = options ['all' ] or not flags
2835 print ('\n ================ Notification Migration Verification ================\n ' )
2936
37+ if not run_all and not flags :
38+ print ('\n ⚠ No options selected, command will exit ... \n ' )
39+
40+ # 1. Detect duplicates
3041 if run_all or 'duplicates' in flags :
31- # 1. Detect duplicates
32- print ('1) Checking duplicate NotificationSubscription entries...' )
33- duplicates = (
34- NotificationSubscription .objects .values (
35- 'user_id' , 'content_type_id' , 'object_id' , 'notification_type_id'
42+ print (f"1) Checking duplicate NotificationSubscription entries (unique-digest:{ options ["unique_digest" ]} )..." )
43+ if options ['unique_digest' ]:
44+ duplicates = (
45+ NotificationSubscription .objects .values (
46+ 'user_id' , 'content_type_id' , 'object_id' , 'notification_type_id' , '_is_digest' ,
47+ )
48+ .annotate (count = Count ('id' ))
49+ .filter (count__gt = 1 )
3650 )
37- .annotate (count = Count ('id' ))
38- .filter (count__gt = 1 )
39- )
40- print (f" → Duplicates found: { duplicates .count ()} " )
51+ else :
52+ duplicates = (
53+ NotificationSubscription .objects .values (
54+ 'user_id' , 'content_type_id' , 'object_id' , 'notification_type_id' ,
55+ )
56+ .annotate (count = Count ('id' ))
57+ .filter (count__gt = 1 )
58+ )
59+ print (f" → Duplicates found: { duplicates .count ()} ." )
4160 if duplicates .exists ():
42- print (' Sample (up to 10):' )
43- for d in duplicates [: 10 ]:
61+ print (f" Sample (up to { output_size } ):" )
62+ for d in duplicates . order_by ( '-count' )[: output_size ]:
4463 print (' ' , d )
4564 print (' ✔ OK' if not duplicates .exists () else ' ⚠ Needs review' )
4665
66+ # 2. Invalid frequencies
4767 if run_all or 'frequencies' in flags :
48- # 2. Invalid frequencies
4968 print ('\n 2) Checking invalid message_frequency values...' )
5069 valid = {'none' , 'daily' , 'instantly' }
5170 invalid_freq = NotificationSubscription .objects .exclude (message_frequency__in = valid )
5271
5372 print (f" → Invalid frequency rows: { invalid_freq .count ()} " )
5473 if invalid_freq .exists ():
5574 print (' Sample (id, freq):' )
56- for row in invalid_freq [:10 ]:
75+ for row in invalid_freq [:output_size ]:
5776 print (f" { row .id } → { row .message_frequency } " )
5877 print (' ✔ OK' if not invalid_freq .exists () else ' ⚠ Needs cleanup' )
5978
79+ # 3. Compare legacy frequency-based totals vs new subscription count
6080 if run_all or 'counts' in flags :
61- # 3. Compare legacy frequency-based totals vs new subscription count
6281 print ('\n 3) Validating total count migrated...' )
6382 valid_subscription_ids = NotificationSubscriptionLegacy .objects .filter (event_name__in = ['global_reviews' , 'global_file_updated' , 'file_updated' ]).values_list ('id' , flat = True )
6483 with connection .cursor () as cursor :
@@ -79,16 +98,18 @@ def handle(self, *args, **options):
7998 print (' ✔ Counts match' )
8099 else :
81100 diff = new_total - legacy_total_expanded
82- print (f" ⚠ Mismatch: difference = { diff } (possibly skipped or duplicates removed)" )
101+ print (f" ⚠ Mismatch: difference = { diff } (possibly skipped, duplicates removed or newly created)" )
102+
103+ print (' ⚠ Note: this is accurate only right after migration and before any new subscriptions are created.)' )
83104
84105 if run_all or 'distribution' in flags :
85106 # 4. Distribution summary
86- print (' \n 4) Subscription distribution breakdown (top 30 ):\n ' )
107+ print (f" \n 4) Subscription distribution breakdown (top { output_size } ):\n " )
87108 dist = (
88109 NotificationSubscription .objects
89110 .values ('notification_type_id' , 'message_frequency' )
90111 .annotate (total = Count ('id' ))
91- .order_by ('-total' )[:30 ]
112+ .order_by ('-total' )[:output_size ]
92113 )
93114 for row in dist :
94115 print (' ' , row )
0 commit comments