Skip to content

Commit 0d5b238

Browse files
committed
🐛(db) fix users duplicate
Some OIDC identity providers provide a random value in the "sub" field instead of an identifying ID. It created duplicate users in the database. This migration fixes the issue by removing the duplicate users after having updated all the references to the old users.
1 parent 39d0211 commit 0d5b238

File tree

2 files changed

+132
-0
lines changed

2 files changed

+132
-0
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@ and this project adheres to
99

1010
## [Unreleased]
1111

12+
## Fixed
13+
14+
- 🐛(db) fix users duplicate #316
15+
1216

1317
## [1.5.0] - 2024-10-09
1418

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# Generated by Django 5.1.1 on 2024-10-10 11:45
2+
3+
from django.db import migrations
4+
5+
procedure = """
6+
DO $$
7+
DECLARE
8+
user_email TEXT;
9+
BEGIN
10+
-- Step 1: Create a temporary table (without the unique constraint)
11+
-- impress_document_access
12+
DROP TABLE IF EXISTS impress_document_access_tmp;
13+
CREATE TEMP TABLE impress_document_access_tmp AS
14+
SELECT * FROM impress_document_access;
15+
16+
-- impress_link_trace
17+
DROP TABLE IF EXISTS impress_link_trace_tmp;
18+
CREATE TEMP TABLE impress_link_trace_tmp AS
19+
SELECT * FROM impress_link_trace;
20+
21+
-- Step 2: Loop through each email that appears more than once
22+
FOR user_email IN
23+
SELECT email
24+
FROM impress_user
25+
GROUP BY email
26+
HAVING COUNT(email) > 1
27+
LOOP
28+
-- Step 3: Update user_id in the temporary table based on email
29+
-- For impress_document_access
30+
UPDATE impress_document_access_tmp
31+
SET user_id = (
32+
SELECT id
33+
FROM impress_user
34+
WHERE email = user_email
35+
LIMIT 1
36+
)
37+
WHERE user_id IN (
38+
SELECT id
39+
FROM impress_user
40+
WHERE email = user_email
41+
);
42+
43+
-- For impress_link_trace
44+
UPDATE impress_link_trace_tmp
45+
SET user_id = (
46+
SELECT id
47+
FROM impress_user
48+
WHERE email = user_email
49+
LIMIT 1
50+
)
51+
WHERE user_id IN (
52+
SELECT id
53+
FROM impress_user
54+
WHERE email = user_email
55+
);
56+
57+
-- update impress_invitation
58+
UPDATE impress_invitation
59+
SET issuer_id = (
60+
SELECT id
61+
FROM impress_user
62+
WHERE email = user_email
63+
LIMIT 1
64+
)
65+
WHERE issuer_id IN (
66+
SELECT id
67+
FROM impress_user
68+
WHERE email = user_email
69+
);
70+
71+
DELETE FROM impress_user
72+
WHERE id IN (
73+
SELECT id
74+
FROM impress_user
75+
WHERE email = user_email
76+
)
77+
AND id != (
78+
SELECT id
79+
FROM impress_user
80+
WHERE email = user_email
81+
LIMIT 1
82+
);
83+
84+
RAISE NOTICE 'Processed updates for email: %', user_email;
85+
END LOOP;
86+
87+
-- Step 4: Remove duplicate rows from the temporary table, keeping only one row per (document_id, user_id)
88+
-- For impress_document_access
89+
DELETE FROM impress_document_access_tmp a
90+
USING impress_document_access_tmp b
91+
WHERE a.ctid < b.ctid -- Keep one row
92+
AND a.document_id = b.document_id
93+
AND a.user_id = b.user_id;
94+
95+
-- Step 5: Replace the original table with the cleaned-up temporary table
96+
TRUNCATE TABLE impress_document_access;
97+
98+
-- Insert cleaned-up data back into the original table
99+
INSERT INTO impress_document_access
100+
SELECT * FROM impress_document_access_tmp;
101+
102+
-- For impress_link_trace
103+
DELETE FROM impress_link_trace_tmp a
104+
USING impress_link_trace_tmp b
105+
WHERE a.ctid < b.ctid -- Keep one row
106+
AND a.document_id = b.document_id
107+
AND a.user_id = b.user_id;
108+
109+
-- Step 5: Replace the original table with the cleaned-up temporary table
110+
TRUNCATE TABLE impress_link_trace;
111+
112+
-- Insert cleaned-up data back into the original table
113+
INSERT INTO impress_link_trace
114+
SELECT * FROM impress_link_trace_tmp;
115+
116+
RAISE NOTICE 'Update and deduplication process completed.';
117+
END $$;
118+
"""
119+
120+
class Migration(migrations.Migration):
121+
122+
dependencies = [
123+
('core', '0006_add_user_full_name_and_short_name'),
124+
]
125+
126+
operations = [
127+
migrations.RunSQL(procedure),
128+
]

0 commit comments

Comments
 (0)