Skip to content

Commit 6d3d8a3

Browse files
committed
✨(backend) add dedupe_accounts management command
When an organization changes its UID, and the user logs in, a new account is made. This command should delete duplicated accounts.
1 parent 6031fea commit 6d3d8a3

File tree

4 files changed

+437
-0
lines changed

4 files changed

+437
-0
lines changed

src/backend/marsha/account/management/__init__.py

Whitespace-only changes.

src/backend/marsha/account/management/commands/__init__.py

Whitespace-only changes.
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
"""Command to dedupe accounts."""
2+
3+
from difflib import SequenceMatcher
4+
import logging
5+
from typing import Any
6+
7+
from django.contrib.auth import get_user_model
8+
from django.core.management.base import BaseCommand
9+
from django.db import transaction
10+
from django.db.models import Count
11+
12+
13+
logger = logging.getLogger(__name__)
14+
15+
16+
# pylint: disable=too-many-locals
17+
def dedupe_accounts(
18+
options: dict[str, Any],
19+
) -> tuple[list[Any], dict[Any, Any], dict[Any, Any], list[Any], list[Any]]:
20+
"""Deduplicate accounts."""
21+
# pylint: disable=invalid-name
22+
User = get_user_model()
23+
24+
if options["email"]:
25+
duplicates = [{"email": options["email"]}]
26+
else:
27+
duplicates = (
28+
User.objects.values("email")
29+
.annotate(count=Count("id"))
30+
.filter(count__gt=1)
31+
.order_by("email")
32+
)
33+
34+
accounts_to_delete = []
35+
duped_users = {}
36+
organizations = {}
37+
skipped_accounts = []
38+
users_to_delete = []
39+
for dup in duplicates:
40+
email = dup["email"]
41+
if not email:
42+
continue
43+
44+
logger.info("Deduping %s", email)
45+
46+
users = list(User.objects.filter(email=email).order_by("date_joined"))
47+
original_user, *duplicate_users = users
48+
original_social = original_user.social_auth.first()
49+
50+
for duplicate_user in duplicate_users:
51+
new_social = duplicate_user.social_auth.first()
52+
if not new_social:
53+
continue
54+
55+
old_account_email = original_social.uid.split(":")[1]
56+
new_account_email = new_social.uid.split(":")[1]
57+
58+
old_organization_uid = original_social.uid.split(":")[0]
59+
new_organization_uid = new_social.uid.split(":")[0]
60+
61+
account_email_ratio = SequenceMatcher(
62+
None, old_account_email, new_account_email
63+
).ratio()
64+
organization_ratio = SequenceMatcher(
65+
None, old_organization_uid, new_organization_uid
66+
).ratio()
67+
68+
if old_account_email != new_account_email:
69+
skipped_accounts.append(
70+
[
71+
email,
72+
[
73+
original_social.uid,
74+
new_social.uid,
75+
str(organization_ratio),
76+
str(account_email_ratio),
77+
],
78+
]
79+
)
80+
continue
81+
82+
if old_organization_uid not in organizations:
83+
organizations[old_organization_uid] = [new_organization_uid]
84+
else:
85+
if new_organization_uid not in organizations[old_organization_uid]:
86+
organizations[old_organization_uid].append(new_organization_uid)
87+
88+
if original_user.email not in duped_users:
89+
duped_users[original_user.email] = [original_social.uid, new_social.uid]
90+
else:
91+
duped_users[original_user.email].append(new_social.uid)
92+
users_to_delete.append(duplicate_user.email)
93+
accounts_to_delete.append(original_social.uid)
94+
95+
if not options["dry_run"]:
96+
with transaction.atomic():
97+
original_user.social_auth.first().delete()
98+
original_user.social_auth.set([new_social])
99+
for playlist in duplicate_user.playlists.exclude(
100+
id__in=original_user.playlists.values_list("id", flat=True)
101+
):
102+
original_user.playlists.add(playlist)
103+
duplicate_user.delete()
104+
105+
return (
106+
accounts_to_delete,
107+
duped_users,
108+
organizations,
109+
skipped_accounts,
110+
users_to_delete,
111+
)
112+
113+
114+
class Command(BaseCommand):
115+
"""Command to dedupe accounts."""
116+
117+
help = "Merge duplicate SAML accounts created with the same email"
118+
119+
def add_arguments(self, parser):
120+
"""Add arguments to the command."""
121+
parser.add_argument("--dry-run", action="store_true")
122+
parser.add_argument(
123+
"--email", type=str, help="Email to dedupe (for testing purposes)"
124+
)
125+
126+
def handle(self, *args, **options):
127+
"""Handle command."""
128+
if options["dry_run"]:
129+
logger.info("[DRY-RUN] No changes will be made.")
130+
131+
(
132+
accounts_to_delete,
133+
duped_users,
134+
organizations,
135+
skipped_accounts,
136+
users_to_delete,
137+
) = dedupe_accounts(options)
138+
139+
logger.info("-" * 80)
140+
logger.info(
141+
"Deduping complete. %d SSO accounts deleted, %d users deleted",
142+
len(accounts_to_delete),
143+
len(users_to_delete),
144+
)
145+
logger.info("- " * 40)
146+
147+
logger.info("%d accounts skipped:", len(skipped_accounts))
148+
for email, accounts in skipped_accounts:
149+
logger.info(" - %s | %s", email, " | ".join(accounts))
150+
logger.info("- " * 40)
151+
152+
logger.info("%d organizations impacted:", len(organizations))
153+
for org_id, new_orgs in organizations.items():
154+
logger.info(" - %s -> %s", org_id, " -> ".join(new_orgs))
155+
logger.info("- " * 40)
156+
157+
logger.info("%d users impacted:", len(duped_users))
158+
for email, accounts in duped_users.items():
159+
logger.info(" - %s -> %s", email, " -> ".join(accounts))
160+
logger.info("- " * 40)
161+
162+
logger.info("Summary:")
163+
logger.info(" %d organizations impacted", len(organizations))
164+
logger.info(" %d users processed", len(duped_users))
165+
logger.info(" %d users deleted", len(users_to_delete))
166+
logger.info(" %d SSO accounts deleted", len(accounts_to_delete))
167+
168+
if options["dry_run"]:
169+
logger.info("[DRY-RUN] No changes made.")

0 commit comments

Comments
 (0)