Skip to content

Commit d7f3054

Browse files
committed
syn2mas: Migrate threepids to MAS (#3878)
* Add a table to hold unsupported threepids * Migrate threepids from Synapse to MAS
1 parent b88da68 commit d7f3054

9 files changed

+374
-12
lines changed
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
-- Copyright 2025 New Vector Ltd.
2+
--
3+
-- SPDX-License-Identifier: AGPL-3.0-only
4+
-- Please see LICENSE in the repository root for full details.
5+
6+
7+
8+
-- Tracks third-party ID associations that have been verified but are
9+
-- not currently supported by MAS.
10+
-- This is currently used when importing third-party IDs from Synapse,
11+
-- which historically could verify at least phone numbers.
12+
-- E-mail associations will not be stored in this table because those are natively
13+
-- supported by MAS; see the `user_emails` table.
14+
15+
CREATE TABLE user_unsupported_third_party_ids(
16+
-- The owner of the third-party ID assocation
17+
user_id UUID NOT NULL
18+
REFERENCES users(user_id) ON DELETE CASCADE,
19+
20+
-- What type of association is this?
21+
medium TEXT NOT NULL,
22+
23+
-- The address of the associated ID, e.g. a phone number or other identifier.
24+
address TEXT NOT NULL,
25+
26+
-- When the association was created
27+
created_at TIMESTAMP WITH TIME ZONE NOT NULL,
28+
29+
PRIMARY KEY (user_id, medium, address)
30+
);

crates/syn2mas/.sqlx/query-b11590549fdd4cdcd36c937a353b5b37ab50db3505712c35610b822cda322b5b.json

Lines changed: 17 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/syn2mas/.sqlx/query-dfbd462f7874d3dae551f2a0328a853a8a7efccdc20b968d99d8c18deda8dd00.json

Lines changed: 17 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/syn2mas/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,14 @@ rand.workspace = true
2727
uuid = "1.10.0"
2828
ulid = { workspace = true, features = ["uuid"] }
2929

30+
mas-config.workspace = true
31+
3032
[dev-dependencies]
3133
mas-storage-pg.workspace = true
3234

3335
anyhow.workspace = true
3436
insta.workspace = true
3537
serde.workspace = true
3638

37-
mas-config.workspace = true
38-
3939
[lints]
4040
workspace = true

crates/syn2mas/src/mas_writer/mod.rs

Lines changed: 175 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -207,14 +207,33 @@ pub struct MasNewUserPassword {
207207
pub created_at: DateTime<Utc>,
208208
}
209209

210+
pub struct MasNewEmailThreepid {
211+
pub user_email_id: Uuid,
212+
pub user_id: Uuid,
213+
pub email: String,
214+
pub created_at: DateTime<Utc>,
215+
}
216+
217+
pub struct MasNewUnsupportedThreepid {
218+
pub user_id: Uuid,
219+
pub medium: String,
220+
pub address: String,
221+
pub created_at: DateTime<Utc>,
222+
}
223+
210224
/// The 'version' of the password hashing scheme used for passwords when they are
211225
/// migrated from Synapse to MAS.
212226
/// This is version 1, as in the previous syn2mas script.
213227
// TODO hardcoding version to `1` may not be correct long-term?
214228
pub const MIGRATED_PASSWORD_VERSION: u16 = 1;
215229

216230
/// List of all MAS tables that are written to by syn2mas.
217-
pub const MAS_TABLES_AFFECTED_BY_MIGRATION: &[&str] = &["users", "user_passwords"];
231+
pub const MAS_TABLES_AFFECTED_BY_MIGRATION: &[&str] = &[
232+
"users",
233+
"user_passwords",
234+
"user_emails",
235+
"user_unsupported_third_party_ids",
236+
];
218237

219238
/// Detect whether a syn2mas migration has started on the given database.
220239
///
@@ -563,11 +582,11 @@ impl<'conn> MasWriter<'conn> {
563582
&mut self,
564583
passwords: Vec<MasNewUserPassword>,
565584
) -> Result<(), Error> {
566-
self.writer_pool.spawn_with_connection(move |conn| Box::pin(async move {
567-
if passwords.is_empty() {
568-
return Ok(());
569-
}
585+
if passwords.is_empty() {
586+
return Ok(());
587+
}
570588

589+
self.writer_pool.spawn_with_connection(move |conn| Box::pin(async move {
571590
let mut user_password_ids: Vec<Uuid> = Vec::with_capacity(passwords.len());
572591
let mut user_ids: Vec<Uuid> = Vec::with_capacity(passwords.len());
573592
let mut hashed_passwords: Vec<String> = Vec::with_capacity(passwords.len());
@@ -603,6 +622,100 @@ impl<'conn> MasWriter<'conn> {
603622
Ok(())
604623
})).await
605624
}
625+
626+
#[tracing::instrument(skip_all, level = Level::DEBUG)]
627+
pub async fn write_email_threepids(
628+
&mut self,
629+
threepids: Vec<MasNewEmailThreepid>,
630+
) -> Result<(), Error> {
631+
if threepids.is_empty() {
632+
return Ok(());
633+
}
634+
self.writer_pool.spawn_with_connection(move |conn| {
635+
Box::pin(async move {
636+
let mut user_email_ids: Vec<Uuid> = Vec::with_capacity(threepids.len());
637+
let mut user_ids: Vec<Uuid> = Vec::with_capacity(threepids.len());
638+
let mut emails: Vec<String> = Vec::with_capacity(threepids.len());
639+
let mut created_ats: Vec<DateTime<Utc>> = Vec::with_capacity(threepids.len());
640+
641+
for MasNewEmailThreepid {
642+
user_email_id,
643+
user_id,
644+
email,
645+
created_at,
646+
} in threepids
647+
{
648+
user_email_ids.push(user_email_id);
649+
user_ids.push(user_id);
650+
emails.push(email);
651+
created_ats.push(created_at);
652+
}
653+
654+
// `confirmed_at` is going to get removed in a future MAS release,
655+
// so just populate with `created_at`
656+
sqlx::query!(
657+
r#"
658+
INSERT INTO syn2mas__user_emails
659+
(user_email_id, user_id, email, created_at, confirmed_at)
660+
SELECT * FROM UNNEST($1::UUID[], $2::UUID[], $3::TEXT[], $4::TIMESTAMP WITH TIME ZONE[], $4::TIMESTAMP WITH TIME ZONE[])
661+
"#,
662+
&user_email_ids[..],
663+
&user_ids[..],
664+
&emails[..],
665+
&created_ats[..],
666+
).execute(&mut *conn).await.into_database("writing emails to MAS")?;
667+
668+
Ok(())
669+
})
670+
}).await
671+
}
672+
673+
#[tracing::instrument(skip_all, level = Level::DEBUG)]
674+
pub async fn write_unsupported_threepids(
675+
&mut self,
676+
threepids: Vec<MasNewUnsupportedThreepid>,
677+
) -> Result<(), Error> {
678+
if threepids.is_empty() {
679+
return Ok(());
680+
}
681+
self.writer_pool.spawn_with_connection(move |conn| {
682+
Box::pin(async move {
683+
let mut user_ids: Vec<Uuid> = Vec::with_capacity(threepids.len());
684+
let mut mediums: Vec<String> = Vec::with_capacity(threepids.len());
685+
let mut addresses: Vec<String> = Vec::with_capacity(threepids.len());
686+
let mut created_ats: Vec<DateTime<Utc>> = Vec::with_capacity(threepids.len());
687+
688+
for MasNewUnsupportedThreepid {
689+
user_id,
690+
medium,
691+
address,
692+
created_at,
693+
} in threepids
694+
{
695+
user_ids.push(user_id);
696+
mediums.push(medium);
697+
addresses.push(address);
698+
created_ats.push(created_at);
699+
}
700+
701+
// `confirmed_at` is going to get removed in a future MAS release,
702+
// so just populate with `created_at`
703+
sqlx::query!(
704+
r#"
705+
INSERT INTO syn2mas__user_unsupported_third_party_ids
706+
(user_id, medium, address, created_at)
707+
SELECT * FROM UNNEST($1::UUID[], $2::TEXT[], $3::TEXT[], $4::TIMESTAMP WITH TIME ZONE[])
708+
"#,
709+
&user_ids[..],
710+
&mediums[..],
711+
&addresses[..],
712+
&created_ats[..],
713+
).execute(&mut *conn).await.into_database("writing unsupported threepids to MAS")?;
714+
715+
Ok(())
716+
})
717+
}).await
718+
}
606719
}
607720

608721
// How many entries to buffer at once, before writing a batch of rows to the database.
@@ -670,6 +783,63 @@ impl<'writer, 'conn> MasUserWriteBuffer<'writer, 'conn> {
670783
}
671784
}
672785

786+
pub struct MasThreepidWriteBuffer<'writer, 'conn> {
787+
email: Vec<MasNewEmailThreepid>,
788+
unsupported: Vec<MasNewUnsupportedThreepid>,
789+
writer: &'writer mut MasWriter<'conn>,
790+
}
791+
792+
impl<'writer, 'conn> MasThreepidWriteBuffer<'writer, 'conn> {
793+
pub fn new(writer: &'writer mut MasWriter<'conn>) -> Self {
794+
MasThreepidWriteBuffer {
795+
email: Vec::with_capacity(WRITE_BUFFER_BATCH_SIZE),
796+
unsupported: Vec::with_capacity(WRITE_BUFFER_BATCH_SIZE),
797+
writer,
798+
}
799+
}
800+
801+
pub async fn finish(mut self) -> Result<(), Error> {
802+
self.flush_emails().await?;
803+
self.flush_unsupported().await?;
804+
Ok(())
805+
}
806+
807+
pub async fn flush_emails(&mut self) -> Result<(), Error> {
808+
self.writer
809+
.write_email_threepids(std::mem::take(&mut self.email))
810+
.await?;
811+
self.email.reserve_exact(WRITE_BUFFER_BATCH_SIZE);
812+
Ok(())
813+
}
814+
815+
pub async fn flush_unsupported(&mut self) -> Result<(), Error> {
816+
self.writer
817+
.write_unsupported_threepids(std::mem::take(&mut self.unsupported))
818+
.await?;
819+
self.unsupported.reserve_exact(WRITE_BUFFER_BATCH_SIZE);
820+
Ok(())
821+
}
822+
823+
pub async fn write_email(&mut self, user: MasNewEmailThreepid) -> Result<(), Error> {
824+
self.email.push(user);
825+
if self.email.len() >= WRITE_BUFFER_BATCH_SIZE {
826+
self.flush_emails().await?;
827+
}
828+
Ok(())
829+
}
830+
831+
pub async fn write_password(
832+
&mut self,
833+
unsupported: MasNewUnsupportedThreepid,
834+
) -> Result<(), Error> {
835+
self.unsupported.push(unsupported);
836+
if self.unsupported.len() >= WRITE_BUFFER_BATCH_SIZE {
837+
self.flush_unsupported().await?;
838+
}
839+
Ok(())
840+
}
841+
}
842+
673843
#[cfg(test)]
674844
mod test {
675845
use std::collections::{BTreeMap, BTreeSet};

crates/syn2mas/src/mas_writer/syn2mas_revert_temporary_tables.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,5 @@ DROP TABLE syn2mas_restore_indices;
1010

1111
ALTER TABLE syn2mas__users RENAME TO users;
1212
ALTER TABLE syn2mas__user_passwords RENAME TO user_passwords;
13+
ALTER TABLE syn2mas__user_emails RENAME TO user_emails;
14+
ALTER TABLE syn2mas__user_unsupported_third_party_ids RENAME TO user_unsupported_third_party_ids;

crates/syn2mas/src/mas_writer/syn2mas_temporary_tables.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,5 @@ CREATE TABLE syn2mas_restore_indices (
3939
-- Now we rename all tables that we touch during the migration.
4040
ALTER TABLE users RENAME TO syn2mas__users;
4141
ALTER TABLE user_passwords RENAME TO syn2mas__user_passwords;
42+
ALTER TABLE user_emails RENAME TO syn2mas__user_emails;
43+
ALTER TABLE user_unsupported_third_party_ids RENAME TO syn2mas__user_unsupported_third_party_ids;

0 commit comments

Comments
 (0)