Skip to content

Commit 95c1761

Browse files
reivilibresandhose
authored andcommitted
For performance, switch to a row count estimate for users and devices
1 parent 9ca7288 commit 95c1761

File tree

2 files changed

+17
-12
lines changed

2 files changed

+17
-12
lines changed

crates/syn2mas/src/migration.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -167,10 +167,11 @@ pub async fn migrate(
167167
// `(MAS user_id, device_id)` mapped to `compat_session` ULID
168168
let mut devices_to_compat_sessions: HashMap<(Uuid, CompactString), Uuid> =
169169
HashMap::with_capacity(
170-
counts
171-
.devices
172-
.try_into()
173-
.expect("More than usize::MAX devices — unable to handle this many!"),
170+
usize::try_from(counts.devices)
171+
.expect("More than usize::MAX devices — unable to handle this many!")
172+
// Oversize the capacity, because the count is only an estimate and
173+
// we would like to avoid a reallocation
174+
* 9 / 8,
174175
);
175176

176177
span.pb_set_message("migrating access tokens");
@@ -258,8 +259,9 @@ async fn migrate_users(
258259
let mut user_buffer = MasWriteBuffer::new(mas, MasWriter::write_users);
259260
let mut password_buffer = MasWriteBuffer::new(mas, MasWriter::write_passwords);
260261
let mut users_stream = pin!(synapse.read_users());
261-
// TODO is 1:1 capacity enough for a hashmap?
262-
let mut user_localparts_to_uuid = HashMap::with_capacity(user_count_hint);
262+
// Oversize the capacity, because the count is only an estimate and
263+
// we would like to avoid a reallocation
264+
let mut user_localparts_to_uuid = HashMap::with_capacity(user_count_hint * 9 / 8);
263265
let mut synapse_admins = HashSet::new();
264266

265267
while let Some(user_res) = users_stream.next().await {

crates/syn2mas/src/synapse_reader/mod.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -338,25 +338,28 @@ impl<'conn> SynapseReader<'conn> {
338338
///
339339
/// - An underlying database error
340340
pub async fn count_rows(&mut self) -> Result<SynapseRowCounts, Error> {
341+
// We don't get to filter out application service users by using this estimate,
342+
// which is a shame, but on a large database this is way faster.
343+
// On matrix.org, counting users and devices properly takes around 1m10s,
344+
// which is unnecessary extra downtime during the migration, just to
345+
// show a more accurate progress bar and size a hash map accurately.
341346
let users: i64 = sqlx::query_scalar(
342347
"
343-
SELECT COUNT(1) FROM users
344-
WHERE appservice_id IS NULL
348+
SELECT reltuples::bigint AS estimate FROM pg_class WHERE oid = 'users'::regclass;
345349
",
346350
)
347351
.fetch_one(&mut *self.txn)
348352
.await
349-
.into_database("counting Synapse users")?;
353+
.into_database("estimating count of users")?;
350354

351355
let devices = sqlx::query_scalar(
352356
"
353-
SELECT COUNT(1) FROM devices
354-
WHERE NOT hidden
357+
SELECT reltuples::bigint AS estimate FROM pg_class WHERE oid = 'devices'::regclass;
355358
",
356359
)
357360
.fetch_one(&mut *self.txn)
358361
.await
359-
.into_database("counting Synapse devices")?;
362+
.into_database("estimating count of devices")?;
360363

361364
// For other rows, we don't particularly care about the number except for
362365
// progress bars, so retrieve a fast estimate from the statistics system

0 commit comments

Comments
 (0)