Skip to content

Commit 64f3910

Browse files
reivilibresandhose
authored andcommitted
For performance, switch to a row count estimate for users and devices
1 parent 9c66326 commit 64f3910

File tree

2 files changed

+13
-8
lines changed

2 files changed

+13
-8
lines changed

crates/syn2mas/src/migration.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,10 @@ pub async fn migrate(
148148

149149
let state = MigrationState {
150150
server_name,
151-
users: HashMap::with_capacity(counts.users),
152-
devices_to_compat_sessions: HashMap::with_capacity(counts.devices),
151+
// We oversize the hashmaps, as the estimates are innaccurate, and we would like to avoid
152+
// reallocations.
153+
users: HashMap::with_capacity(counts.users * 9 / 8),
154+
devices_to_compat_sessions: HashMap::with_capacity(counts.devices * 9 / 8),
153155
provider_id_mapping,
154156
};
155157

crates/syn2mas/src/synapse_reader/mod.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -338,28 +338,31 @@ impl<'conn> SynapseReader<'conn> {
338338
///
339339
/// - An underlying database error
340340
pub async fn count_rows(&mut self) -> Result<SynapseRowCounts, Error> {
341+
// We don't get to filter out application service users by using this estimate,
342+
// which is a shame, but on a large database this is way faster.
343+
// On matrix.org, counting users and devices properly takes around 1m10s,
344+
// which is unnecessary extra downtime during the migration, just to
345+
// show a more accurate progress bar and size a hash map accurately.
341346
let users: usize = sqlx::query_scalar::<_, i64>(
342347
"
343-
SELECT COUNT(1) FROM users
344-
WHERE appservice_id IS NULL
348+
SELECT reltuples::bigint AS estimate FROM pg_class WHERE oid = 'users'::regclass;
345349
",
346350
)
347351
.fetch_one(&mut *self.txn)
348352
.await
349-
.into_database("counting Synapse users")?
353+
.into_database("estimating count of users")?
350354
.max(0)
351355
.try_into()
352356
.unwrap_or(usize::MAX);
353357

354358
let devices = sqlx::query_scalar::<_, i64>(
355359
"
356-
SELECT COUNT(1) FROM devices
357-
WHERE NOT hidden
360+
SELECT reltuples::bigint AS estimate FROM pg_class WHERE oid = 'devices'::regclass;
358361
",
359362
)
360363
.fetch_one(&mut *self.txn)
361364
.await
362-
.into_database("counting Synapse devices")?
365+
.into_database("estimating count of devices")?
363366
.max(0)
364367
.try_into()
365368
.unwrap_or(usize::MAX);

0 commit comments

Comments
 (0)