11"""Command to dedupe accounts."""
22
3- from difflib import SequenceMatcher
43import logging
5- from typing import Any
64
7- from django .contrib .auth import get_user_model
85from django .core .management .base import BaseCommand
9- from django .db import transaction
10- from django .db .models import Count
116
7+ from marsha .account .utils .dedupe_accounts import dedupe_accounts
128
13- logger = logging .getLogger (__name__ )
14-
15-
16- # pylint: disable=too-many-locals
17- def dedupe_accounts (
18- options : dict [str , Any ],
19- ) -> tuple [list [Any ], dict [Any , Any ], dict [Any , Any ], list [Any ], list [Any ]]:
20- """Deduplicate accounts."""
21- # pylint: disable=invalid-name
22- User = get_user_model ()
23-
24- if options ["email" ]:
25- duplicates = [{"email" : options ["email" ]}]
26- else :
27- duplicates = (
28- User .objects .values ("email" )
29- .annotate (count = Count ("id" ))
30- .filter (count__gt = 1 )
31- .order_by ("email" )
32- )
33-
34- accounts_to_delete = []
35- duped_users = {}
36- organizations = {}
37- skipped_accounts = []
38- users_to_delete = []
39- for dup in duplicates :
40- email = dup ["email" ]
41- if not email :
42- continue
43-
44- logger .info ("Deduping %s" , email )
45-
46- users = list (User .objects .filter (email = email ).order_by ("date_joined" ))
47- original_user , * duplicate_users = users
48- original_social = original_user .social_auth .first ()
49-
50- for duplicate_user in duplicate_users :
51- new_social = duplicate_user .social_auth .first ()
52- if not new_social :
53- continue
54-
55- old_account_email = original_social .uid .split (":" )[1 ]
56- new_account_email = new_social .uid .split (":" )[1 ]
57-
58- old_organization_uid = original_social .uid .split (":" )[0 ]
59- new_organization_uid = new_social .uid .split (":" )[0 ]
60-
61- account_email_ratio = SequenceMatcher (
62- None , old_account_email , new_account_email
63- ).ratio ()
64- organization_ratio = SequenceMatcher (
65- None , old_organization_uid , new_organization_uid
66- ).ratio ()
679
68- if old_account_email != new_account_email :
69- skipped_accounts .append (
70- [
71- email ,
72- [
73- original_social .uid ,
74- new_social .uid ,
75- str (organization_ratio ),
76- str (account_email_ratio ),
77- ],
78- ]
79- )
80- continue
81-
82- if old_organization_uid not in organizations :
83- organizations [old_organization_uid ] = [new_organization_uid ]
84- else :
85- if new_organization_uid not in organizations [old_organization_uid ]:
86- organizations [old_organization_uid ].append (new_organization_uid )
87-
88- if original_user .email not in duped_users :
89- duped_users [original_user .email ] = [original_social .uid , new_social .uid ]
90- else :
91- duped_users [original_user .email ].append (new_social .uid )
92- users_to_delete .append (duplicate_user .email )
93- accounts_to_delete .append (original_social .uid )
94-
95- if not options ["dry_run" ]:
96- with transaction .atomic ():
97- original_user .social_auth .first ().delete ()
98- original_user .social_auth .set ([new_social ])
99- for playlist in duplicate_user .playlists .exclude (
100- id__in = original_user .playlists .values_list ("id" , flat = True )
101- ):
102- original_user .playlists .add (playlist )
103- duplicate_user .delete ()
104-
105- return (
106- accounts_to_delete ,
107- duped_users ,
108- organizations ,
109- skipped_accounts ,
110- users_to_delete ,
111- )
10+ logger = logging .getLogger (__name__ )
11211
11312
11413class Command (BaseCommand ):
@@ -118,52 +17,13 @@ class Command(BaseCommand):
11817
11918 def add_arguments (self , parser ):
12019 """Add arguments to the command."""
20+ parser .add_argument ("--email" , type = str , help = "Email to dedupe" )
12121 parser .add_argument ("--dry-run" , action = "store_true" )
122- parser .add_argument (
123- "--email" , type = str , help = "Email to dedupe (for testing purposes)"
124- )
12522
12623 def handle (self , * args , ** options ):
12724 """Handle command."""
128- if options ["dry_run" ]:
25+ dry_run = options ["dry_run" ]
26+ if dry_run :
12927 logger .info ("[DRY-RUN] No changes will be made." )
13028
131- (
132- accounts_to_delete ,
133- duped_users ,
134- organizations ,
135- skipped_accounts ,
136- users_to_delete ,
137- ) = dedupe_accounts (options )
138-
139- logger .info ("-" * 80 )
140- logger .info (
141- "Deduping complete. %d SSO accounts deleted, %d users deleted" ,
142- len (accounts_to_delete ),
143- len (users_to_delete ),
144- )
145- logger .info ("- " * 40 )
146-
147- logger .info ("%d accounts skipped:" , len (skipped_accounts ))
148- for email , accounts in skipped_accounts :
149- logger .info (" - %s | %s" , email , " | " .join (accounts ))
150- logger .info ("- " * 40 )
151-
152- logger .info ("%d organizations impacted:" , len (organizations ))
153- for org_id , new_orgs in organizations .items ():
154- logger .info (" - %s -> %s" , org_id , " -> " .join (new_orgs ))
155- logger .info ("- " * 40 )
156-
157- logger .info ("%d users impacted:" , len (duped_users ))
158- for email , accounts in duped_users .items ():
159- logger .info (" - %s -> %s" , email , " -> " .join (accounts ))
160- logger .info ("- " * 40 )
161-
162- logger .info ("Summary:" )
163- logger .info (" %d organizations impacted" , len (organizations ))
164- logger .info (" %d users processed" , len (duped_users ))
165- logger .info (" %d users deleted" , len (users_to_delete ))
166- logger .info (" %d SSO accounts deleted" , len (accounts_to_delete ))
167-
168- if options ["dry_run" ]:
169- logger .info ("[DRY-RUN] No changes made." )
29+ dedupe_accounts (options ["email" ], dry_run )
0 commit comments