Skip to content

Commit d1a3ef9

Browse files
authored
syn2mas: better performance, output tweaks, tracing tweaks, access token fixes (#4175)
2 parents 7823f21 + dbb6753 commit d1a3ef9

File tree

10 files changed

+550
-328
lines changed

10 files changed

+550
-328
lines changed

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/cli/src/commands/syn2mas.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ enum Subcommand {
8080
const NUM_WRITER_CONNECTIONS: usize = 8;
8181

8282
impl Options {
83+
#[tracing::instrument("cli.syn2mas.run", skip_all)]
8384
#[allow(clippy::too_many_lines)]
8485
pub async fn run(self, figment: &Figment) -> anyhow::Result<ExitCode> {
8586
warn!(
@@ -173,14 +174,14 @@ impl Options {
173174

174175
// Display errors and warnings
175176
if !check_errors.is_empty() {
176-
eprintln!("===== Errors =====");
177+
eprintln!("\n\n===== Errors =====");
177178
eprintln!("These issues prevent migrating from Synapse to MAS right now:\n");
178179
for error in &check_errors {
179180
eprintln!("• {error}\n");
180181
}
181182
}
182183
if !check_warnings.is_empty() {
183-
eprintln!("===== Warnings =====");
184+
eprintln!("\n\n===== Warnings =====");
184185
eprintln!(
185186
"These potential issues should be considered before migrating from Synapse to MAS right now:\n"
186187
);
@@ -220,6 +221,7 @@ impl Options {
220221

221222
// TODO how should we handle warnings at this stage?
222223

224+
// TODO this dry-run flag should be set to false in real circumstances !!!
223225
let reader = SynapseReader::new(&mut syn_conn, true).await?;
224226
let mut writer_mas_connections = Vec::with_capacity(NUM_WRITER_CONNECTIONS);
225227
for _ in 0..NUM_WRITER_CONNECTIONS {
@@ -234,6 +236,7 @@ impl Options {
234236

235237
// TODO progress reporting
236238
let mas_matrix = MatrixConfig::extract(figment)?;
239+
eprintln!("\n\n");
237240
syn2mas::migrate(
238241
reader,
239242
writer,

crates/syn2mas/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,16 @@ serde.workspace = true
1818
thiserror.workspace = true
1919
thiserror-ext.workspace = true
2020
tokio.workspace = true
21+
tokio-util.workspace = true
2122
sqlx.workspace = true
2223
chrono.workspace = true
2324
compact_str.workspace = true
2425
tracing.workspace = true
2526
futures-util = "0.3.31"
27+
rustc-hash = "2.1.1"
2628

2729
rand.workspace = true
30+
rand_chacha = "0.3.1"
2831
uuid = "1.15.1"
2932
ulid = { workspace = true, features = ["uuid"] }
3033

crates/syn2mas/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ mod synapse_reader;
88

99
mod migration;
1010

11+
type RandomState = rustc_hash::FxBuildHasher;
12+
type HashMap<K, V> = rustc_hash::FxHashMap<K, V>;
13+
1114
pub use self::{
1215
mas_writer::{MasWriter, checks::mas_pre_migration_checks, locking::LockedMasDatabase},
1316
migration::migrate,

crates/syn2mas/src/mas_writer/checks.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
1111
use thiserror::Error;
1212
use thiserror_ext::ContextInto;
13+
use tracing::Instrument as _;
1314

1415
use super::{MAS_TABLES_AFFECTED_BY_MIGRATION, is_syn2mas_in_progress, locking::LockedMasDatabase};
1516

@@ -46,7 +47,7 @@ pub enum Error {
4647
/// - If any MAS tables involved in the migration are not empty.
4748
/// - If we can't check whether syn2mas is already in progress on this database
4849
/// or not.
49-
#[tracing::instrument(skip_all)]
50+
#[tracing::instrument(name = "syn2mas.mas_pre_migration_checks", skip_all)]
5051
pub async fn mas_pre_migration_checks(mas_connection: &mut LockedMasDatabase) -> Result<(), Error> {
5152
if is_syn2mas_in_progress(mas_connection.as_mut())
5253
.await
@@ -60,8 +61,11 @@ pub async fn mas_pre_migration_checks(mas_connection: &mut LockedMasDatabase) ->
6061
// empty database.
6162

6263
for &table in MAS_TABLES_AFFECTED_BY_MIGRATION {
63-
let row_present = sqlx::query(&format!("SELECT 1 AS dummy FROM {table} LIMIT 1"))
64+
let query = format!("SELECT 1 AS dummy FROM {table} LIMIT 1");
65+
let span = tracing::info_span!("db.query", db.query.text = query);
66+
let row_present = sqlx::query(&query)
6467
.fetch_optional(mas_connection.as_mut())
68+
.instrument(span)
6569
.await
6670
.into_maybe_not_mas(table)?
6771
.is_some();

crates/syn2mas/src/mas_writer/constraint_pausing.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
// SPDX-License-Identifier: AGPL-3.0-only
44
// Please see LICENSE in the repository root for full details.
55

6+
use std::time::Instant;
7+
68
use sqlx::PgConnection;
7-
use tracing::debug;
9+
use tracing::{debug, info};
810

911
use super::{Error, IntoDatabase};
1012

@@ -109,15 +111,20 @@ pub async fn drop_index(conn: &mut PgConnection, index: &IndexDescription) -> Re
109111
/// Restores (recreates) a constraint.
110112
///
111113
/// The constraint must not exist prior to this call.
114+
#[tracing::instrument(name = "syn2mas.restore_constraint", skip_all, fields(constraint.name = constraint.name))]
112115
pub async fn restore_constraint(
113116
conn: &mut PgConnection,
114117
constraint: &ConstraintDescription,
115118
) -> Result<(), Error> {
119+
let start = Instant::now();
120+
116121
let ConstraintDescription {
117122
name,
118123
table_name,
119124
definition,
120125
} = &constraint;
126+
info!("rebuilding constraint {name}");
127+
121128
sqlx::query(&format!(
122129
"ALTER TABLE {table_name} ADD CONSTRAINT {name} {definition};"
123130
))
@@ -127,13 +134,21 @@ pub async fn restore_constraint(
127134
format!("failed to recreate constraint {name} on {table_name} with {definition}")
128135
})?;
129136

137+
info!(
138+
"constraint {name} rebuilt in {:.1}s",
139+
Instant::now().duration_since(start).as_secs_f64()
140+
);
141+
130142
Ok(())
131143
}
132144

133145
/// Restores (recreates) a index.
134146
///
135147
/// The index must not exist prior to this call.
148+
#[tracing::instrument(name = "syn2mas.restore_index", skip_all, fields(index.name = index.name))]
136149
pub async fn restore_index(conn: &mut PgConnection, index: &IndexDescription) -> Result<(), Error> {
150+
let start = Instant::now();
151+
137152
let IndexDescription {
138153
name,
139154
table_name,
@@ -147,5 +162,10 @@ pub async fn restore_index(conn: &mut PgConnection, index: &IndexDescription) ->
147162
format!("failed to recreate index {name} on {table_name} with {definition}")
148163
})?;
149164

165+
info!(
166+
"index {name} rebuilt in {:.1}s",
167+
Instant::now().duration_since(start).as_secs_f64()
168+
);
169+
150170
Ok(())
151171
}

crates/syn2mas/src/mas_writer/mod.rs

Lines changed: 75 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,22 @@
77
//!
88
//! This module is responsible for writing new records to MAS' database.
99
10-
use std::{fmt::Display, net::IpAddr};
10+
use std::{
11+
fmt::Display,
12+
net::IpAddr,
13+
sync::{
14+
Arc,
15+
atomic::{AtomicU32, Ordering},
16+
},
17+
};
1118

1219
use chrono::{DateTime, Utc};
1320
use futures_util::{FutureExt, TryStreamExt, future::BoxFuture};
1421
use sqlx::{Executor, PgConnection, query, query_as};
1522
use thiserror::Error;
1623
use thiserror_ext::{Construct, ContextInto};
1724
use tokio::sync::mpsc::{self, Receiver, Sender};
18-
use tracing::{Level, error, info, warn};
25+
use tracing::{Instrument, Level, error, info, warn};
1926
use uuid::{NonNilUuid, Uuid};
2027

2128
use self::{
@@ -44,6 +51,9 @@ pub enum Error {
4451
#[error("inconsistent database: {0}")]
4552
Inconsistent(String),
4653

54+
#[error("bug in syn2mas: write buffers not finished")]
55+
WriteBuffersNotFinished,
56+
4757
#[error("{0}")]
4858
Multiple(MultipleErrors),
4959
}
@@ -109,18 +119,21 @@ impl WriterConnectionPool {
109119
match self.connection_rx.recv().await {
110120
Some(Ok(mut connection)) => {
111121
let connection_tx = self.connection_tx.clone();
112-
tokio::task::spawn(async move {
113-
let to_return = match task(&mut connection).await {
114-
Ok(()) => Ok(connection),
115-
Err(error) => {
116-
error!("error in writer: {error}");
117-
Err(error)
118-
}
119-
};
120-
// This should always succeed in sending unless we're already shutting
121-
// down for some other reason.
122-
let _: Result<_, _> = connection_tx.send(to_return).await;
123-
});
122+
tokio::task::spawn(
123+
async move {
124+
let to_return = match task(&mut connection).await {
125+
Ok(()) => Ok(connection),
126+
Err(error) => {
127+
error!("error in writer: {error}");
128+
Err(error)
129+
}
130+
};
131+
// This should always succeed in sending unless we're already shutting
132+
// down for some other reason.
133+
let _: Result<_, _> = connection_tx.send(to_return).await;
134+
}
135+
.instrument(tracing::debug_span!("spawn_with_connection")),
136+
);
124137

125138
Ok(())
126139
}
@@ -188,12 +201,52 @@ impl WriterConnectionPool {
188201
}
189202
}
190203

204+
/// Small utility to make sure `finish()` is called on all write buffers
205+
/// before committing to the database.
206+
#[derive(Default)]
207+
struct FinishChecker {
208+
counter: Arc<AtomicU32>,
209+
}
210+
211+
struct FinishCheckerHandle {
212+
counter: Arc<AtomicU32>,
213+
}
214+
215+
impl FinishChecker {
216+
/// Acquire a new handle, for a task that should declare when it has
217+
/// finished.
218+
pub fn handle(&self) -> FinishCheckerHandle {
219+
self.counter.fetch_add(1, Ordering::SeqCst);
220+
FinishCheckerHandle {
221+
counter: Arc::clone(&self.counter),
222+
}
223+
}
224+
225+
/// Check that all handles have been declared as finished.
226+
pub fn check_all_finished(self) -> Result<(), Error> {
227+
if self.counter.load(Ordering::SeqCst) == 0 {
228+
Ok(())
229+
} else {
230+
Err(Error::WriteBuffersNotFinished)
231+
}
232+
}
233+
}
234+
235+
impl FinishCheckerHandle {
236+
/// Declare that the task this handle represents has been finished.
237+
pub fn declare_finished(self) {
238+
self.counter.fetch_sub(1, Ordering::SeqCst);
239+
}
240+
}
241+
191242
pub struct MasWriter {
192243
conn: LockedMasDatabase,
193244
writer_pool: WriterConnectionPool,
194245

195246
indices_to_restore: Vec<IndexDescription>,
196247
constraints_to_restore: Vec<ConstraintDescription>,
248+
249+
write_buffer_finish_checker: FinishChecker,
197250
}
198251

199252
pub struct MasNewUser {
@@ -337,7 +390,7 @@ impl MasWriter {
337390
///
338391
/// - If the database connection experiences an error.
339392
#[allow(clippy::missing_panics_doc)] // not real
340-
#[tracing::instrument(skip_all)]
393+
#[tracing::instrument(name = "syn2mas.mas_writer.new", skip_all)]
341394
pub async fn new(
342395
mut conn: LockedMasDatabase,
343396
mut writer_connections: Vec<PgConnection>,
@@ -454,6 +507,7 @@ impl MasWriter {
454507
writer_pool: WriterConnectionPool::new(writer_connections),
455508
indices_to_restore,
456509
constraints_to_restore,
510+
write_buffer_finish_checker: FinishChecker::default(),
457511
})
458512
}
459513

@@ -521,6 +575,8 @@ impl MasWriter {
521575
/// - If the database connection experiences an error.
522576
#[tracing::instrument(skip_all)]
523577
pub async fn finish(mut self) -> Result<PgConnection, Error> {
578+
self.write_buffer_finish_checker.check_all_finished()?;
579+
524580
// Commit all writer transactions to the database.
525581
self.writer_pool
526582
.finish()
@@ -1041,28 +1097,24 @@ type WriteBufferFlusher<T> =
10411097

10421098
/// A buffer for writing rows to the MAS database.
10431099
/// Generic over the type of rows.
1044-
///
1045-
/// # Panics
1046-
///
1047-
/// Panics if dropped before `finish()` has been called.
10481100
pub struct MasWriteBuffer<T> {
10491101
rows: Vec<T>,
10501102
flusher: WriteBufferFlusher<T>,
1051-
finished: bool,
1103+
finish_checker_handle: FinishCheckerHandle,
10521104
}
10531105

10541106
impl<T> MasWriteBuffer<T> {
1055-
pub fn new(flusher: WriteBufferFlusher<T>) -> Self {
1107+
pub fn new(writer: &MasWriter, flusher: WriteBufferFlusher<T>) -> Self {
10561108
MasWriteBuffer {
10571109
rows: Vec::with_capacity(WRITE_BUFFER_BATCH_SIZE),
10581110
flusher,
1059-
finished: false,
1111+
finish_checker_handle: writer.write_buffer_finish_checker.handle(),
10601112
}
10611113
}
10621114

10631115
pub async fn finish(mut self, writer: &mut MasWriter) -> Result<(), Error> {
1064-
self.finished = true;
10651116
self.flush(writer).await?;
1117+
self.finish_checker_handle.declare_finished();
10661118
Ok(())
10671119
}
10681120

@@ -1085,12 +1137,6 @@ impl<T> MasWriteBuffer<T> {
10851137
}
10861138
}
10871139

1088-
impl<T> Drop for MasWriteBuffer<T> {
1089-
fn drop(&mut self) {
1090-
assert!(self.finished, "MasWriteBuffer dropped but not finished!");
1091-
}
1092-
}
1093-
10941140
#[cfg(test)]
10951141
mod test {
10961142
use std::collections::{BTreeMap, BTreeSet};

0 commit comments

Comments
 (0)