Skip to content

Commit b482ccd

Browse files
committed
feat(sqlite): make sqlite's implementation of load_all_chunks_metadata even faster
See the updated code comment.
1 parent 165ec9d commit b482ccd

File tree

1 file changed

+52
-23
lines changed

1 file changed

+52
-23
lines changed

crates/matrix-sdk-sqlite/src/event_cache_store.rs

Lines changed: 52 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
//! An SQLite-based backend for the [`EventCacheStore`].
1616
17-
use std::{fmt, iter::once, path::Path, sync::Arc};
17+
use std::{collections::HashMap, fmt, iter::once, path::Path, sync::Arc};
1818

1919
use async_trait::async_trait;
2020
use deadpool_sqlite::{Object as SqliteAsyncConn, Pool as SqlitePool, Runtime};
@@ -882,35 +882,53 @@ impl EventCacheStore for SqliteEventCacheStore {
882882
self.read()
883883
.await?
884884
.with_transaction(move |txn| -> Result<_> {
885-
// This query will visit all chunks of a linked chunk with ID
886-
// `hashed_linked_chunk_id`. For each chunk, it collects its ID
887-
// (`ChunkIdentifier`), previous chunk, next chunk, and number of events
888-
// (`num_events`). If it's a gap, `num_events` is equal to 0, otherwise it
889-
// counts the number of events in `event_chunks` where `event_chunks.chunk_id =
890-
// linked_chunks.id`.
885+
// We want to collect the metadata about each chunk (id, next, previous), and
886+
// for event chunks, the number of events in it. For gaps, the
887+
// number of events is 0, by convention.
891888
//
892-
// Why not using a `(LEFT) JOIN` + `COUNT`? Because for gaps, the entire
893-
// `event_chunks` will be traversed every time. It's extremely inefficient. To
894-
// speed that up, we could use an `INDEX` but it will consume more storage
895-
// space. This solution is nice trade-off and offers great performance.
889+
// We've tried different strategies over time:
890+
// - use a `LEFT JOIN` + `COUNT`, which was extremely inefficient because it
891+
// caused a full table traversal for each chunk, including for gaps which
892+
// don't have any events. This happened in
893+
// https://github.com/matrix-org/matrix-rust-sdk/pull/5225.
894+
// - use a `CASE` statement on the chunk's type: if it's an event chunk, run an
895+
// additional `SELECT` query. It was an immense improvement, but still caused
896+
// one select query per event chunk. This happened in
897+
// https://github.com/matrix-org/matrix-rust-sdk/pull/5411.
896898
//
897-
// Also, use `ORDER BY id` to get a deterministic ordering for testing purposes.
899+
// The current solution is to run two queries:
900+
// - one to get each chunk and its number of events, by doing a single `SELECT`
901+
// query over the `event_chunks` table, grouping by chunk ids. This gives us a
902+
// list of `(chunk_id, num_events)` pairs, which can be transformed into a
903+
// hashmap.
904+
// - one to get each chunk's metadata (id, previous, next, type) from the
905+
// database with a `SELECT`, and then use the hashmap to get the number of
906+
// events.
907+
//
908+
// This strategy minimizes the number of queries to the database, and keeps them
909+
// super simple, while doing a bit more processing here, which is much faster.
910+
911+
let num_events_by_chunk_ids = txn
912+
.prepare(
913+
r#"
914+
SELECT ec.chunk_id, COUNT(ec.event_id)
915+
FROM event_chunks as ec
916+
WHERE ec.linked_chunk_id = ?
917+
GROUP BY ec.chunk_id
918+
"#,
919+
)?
920+
.query_map((&hashed_linked_chunk_id,), |row| {
921+
Ok((row.get::<_, u64>(0)?, row.get::<_, usize>(1)?))
922+
})?
923+
.collect::<Result<HashMap<_, _>, _>>()?;
898924

899925
txn.prepare(
900926
r#"
901927
SELECT
902928
lc.id,
903929
lc.previous,
904930
lc.next,
905-
CASE lc.type
906-
WHEN 'E' THEN (
907-
SELECT COUNT(ec.event_id)
908-
FROM event_chunks as ec
909-
WHERE ec.chunk_id = lc.id
910-
)
911-
ELSE
912-
0
913-
END as num_events
931+
lc.type
914932
FROM linked_chunks as lc
915933
WHERE lc.linked_chunk_id = ?
916934
ORDER BY lc.id"#,
@@ -920,11 +938,22 @@ impl EventCacheStore for SqliteEventCacheStore {
920938
row.get::<_, u64>(0)?,
921939
row.get::<_, Option<u64>>(1)?,
922940
row.get::<_, Option<u64>>(2)?,
923-
row.get::<_, usize>(3)?,
941+
row.get::<_, String>(3)?,
924942
))
925943
})?
926944
.map(|data| -> Result<_> {
927-
let (id, previous, next, num_items) = data?;
945+
let (id, previous, next, chunk_type) = data?;
946+
947+
// Note: since a gap has 0 events, an alternative could be to *not* retrieve
948+
// the chunk type, and just let the hashmap lookup fail for gaps. However,
949+
// benchmarking shows that this is slightly slower than matching the chunk
950+
// type (around 1%, so in the realm of noise), so we keep the explicit
951+
// check instead.
952+
let num_items = if chunk_type == CHUNK_TYPE_GAP_TYPE_STRING {
953+
0
954+
} else {
955+
num_events_by_chunk_ids.get(&id).copied().unwrap_or(0)
956+
};
928957

929958
Ok(ChunkMetadata {
930959
identifier: ChunkIdentifier::new(id),

0 commit comments

Comments
 (0)