14
14
15
15
//! An SQLite-based backend for the [`EventCacheStore`].
16
16
17
- use std:: { fmt, iter:: once, path:: Path , sync:: Arc } ;
17
+ use std:: { collections :: HashMap , fmt, iter:: once, path:: Path , sync:: Arc } ;
18
18
19
19
use async_trait:: async_trait;
20
20
use deadpool_sqlite:: { Object as SqliteAsyncConn , Pool as SqlitePool , Runtime } ;
@@ -882,35 +882,53 @@ impl EventCacheStore for SqliteEventCacheStore {
882
882
self . read ( )
883
883
. await ?
884
884
. with_transaction ( move |txn| -> Result < _ > {
885
- // This query will visit all chunks of a linked chunk with ID
886
- // `hashed_linked_chunk_id`. For each chunk, it collects its ID
887
- // (`ChunkIdentifier`), previous chunk, next chunk, and number of events
888
- // (`num_events`). If it's a gap, `num_events` is equal to 0, otherwise it
889
- // counts the number of events in `event_chunks` where `event_chunks.chunk_id =
890
- // linked_chunks.id`.
885
+ // We want to collect the metadata about each chunk (id, next, previous), and
886
+ // for event chunks, the number of events in it. For gaps, the
887
+ // number of events is 0, by convention.
891
888
//
892
- // Why not using a `(LEFT) JOIN` + `COUNT`? Because for gaps, the entire
893
- // `event_chunks` will be traversed every time. It's extremely inefficient. To
894
- // speed that up, we could use an `INDEX` but it will consume more storage
895
- // space. This solution is nice trade-off and offers great performance.
889
+ // We've tried different strategies over time:
890
+ // - use a `LEFT JOIN` + `COUNT`, which was extremely inefficient because it
891
+ // caused a full table traversal for each chunk, including for gaps which
892
+ // don't have any events. This happened in
893
+ // https://github.com/matrix-org/matrix-rust-sdk/pull/5225.
894
+ // - use a `CASE` statement on the chunk's type: if it's an event chunk, run an
895
+ // additional `SELECT` query. It was an immense improvement, but still caused
896
+ // one select query per event chunk. This happened in
897
+ // https://github.com/matrix-org/matrix-rust-sdk/pull/5411.
896
898
//
897
- // Also, use `ORDER BY id` to get a deterministic ordering for testing purposes.
899
+ // The current solution is to run two queries:
900
+ // - one to get each chunk and its number of events, by doing a single `SELECT`
901
+ // query over the `event_chunks` table, grouping by chunk ids. This gives us a
902
+ // list of `(chunk_id, num_events)` pairs, which can be transformed into a
903
+ // hashmap.
904
+ // - one to get each chunk's metadata (id, previous, next, type) from the
905
+ // database with a `SELECT`, and then use the hashmap to get the number of
906
+ // events.
907
+ //
908
+ // This strategy minimizes the number of queries to the database, and keeps them
909
+ // super simple, while doing a bit more processing here, which is much faster.
910
+
911
+ let num_events_by_chunk_ids = txn
912
+ . prepare (
913
+ r#"
914
+ SELECT ec.chunk_id, COUNT(ec.event_id)
915
+ FROM event_chunks as ec
916
+ WHERE ec.linked_chunk_id = ?
917
+ GROUP BY ec.chunk_id
918
+ "# ,
919
+ ) ?
920
+ . query_map ( ( & hashed_linked_chunk_id, ) , |row| {
921
+ Ok ( ( row. get :: < _ , u64 > ( 0 ) ?, row. get :: < _ , usize > ( 1 ) ?) )
922
+ } ) ?
923
+ . collect :: < Result < HashMap < _ , _ > , _ > > ( ) ?;
898
924
899
925
txn. prepare (
900
926
r#"
901
927
SELECT
902
928
lc.id,
903
929
lc.previous,
904
930
lc.next,
905
- CASE lc.type
906
- WHEN 'E' THEN (
907
- SELECT COUNT(ec.event_id)
908
- FROM event_chunks as ec
909
- WHERE ec.chunk_id = lc.id
910
- )
911
- ELSE
912
- 0
913
- END as num_events
931
+ lc.type
914
932
FROM linked_chunks as lc
915
933
WHERE lc.linked_chunk_id = ?
916
934
ORDER BY lc.id"# ,
@@ -920,11 +938,22 @@ impl EventCacheStore for SqliteEventCacheStore {
920
938
row. get :: < _ , u64 > ( 0 ) ?,
921
939
row. get :: < _ , Option < u64 > > ( 1 ) ?,
922
940
row. get :: < _ , Option < u64 > > ( 2 ) ?,
923
- row. get :: < _ , usize > ( 3 ) ?,
941
+ row. get :: < _ , String > ( 3 ) ?,
924
942
) )
925
943
} ) ?
926
944
. map ( |data| -> Result < _ > {
927
- let ( id, previous, next, num_items) = data?;
945
+ let ( id, previous, next, chunk_type) = data?;
946
+
947
+ // Note: since a gap has 0 events, an alternative could be to *not* retrieve
948
+ // the chunk type, and just let the hashmap lookup fail for gaps. However,
949
+ // benchmarking shows that this is slightly slower than matching the chunk
950
+ // type (around 1%, so in the realm of noise), so we keep the explicit
951
+ // check instead.
952
+ let num_items = if chunk_type == CHUNK_TYPE_GAP_TYPE_STRING {
953
+ 0
954
+ } else {
955
+ num_events_by_chunk_ids. get ( & id) . copied ( ) . unwrap_or ( 0 )
956
+ } ;
928
957
929
958
Ok ( ChunkMetadata {
930
959
identifier : ChunkIdentifier :: new ( id) ,
0 commit comments