Skip to content

Commit d771525

Browse files
author
dragonfly1033
committed
feat(sdk): Add creation of indexes and indexing of messages.
1 parent 669ebf2 commit d771525

File tree

8 files changed

+271
-7
lines changed

8 files changed

+271
-7
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/matrix-sdk-search/src/index.rs

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ mod tests {
193193
use std::{collections::HashSet, error::Error};
194194

195195
use matrix_sdk_test::event_factory::EventFactory;
196-
use ruma::{event_id, owned_event_id, room_id, user_id};
196+
use ruma::{event_id, room_id, user_id};
197197

198198
use crate::index::RoomIndex;
199199

@@ -227,10 +227,14 @@ mod tests {
227227
let mut index =
228228
RoomIndex::new_in_ram(room_id).expect("failed to make index in ram: {index:?}");
229229

230+
let event_id_1 = event_id!("$event_id_1:localhost");
231+
let event_id_2 = event_id!("$event_id_2:localhost");
232+
let event_id_3 = event_id!("$event_id_3:localhost");
233+
230234
index.add_event(
231235
EventFactory::new()
232236
.text_msg("This is a sentence")
233-
.event_id(event_id!("$event_id_1:localhost"))
237+
.event_id(event_id_1)
234238
.room(room_id)
235239
.sender(user_id!("@user_id:localhost"))
236240
.into_any_message_like_event(),
@@ -239,7 +243,7 @@ mod tests {
239243
index.add_event(
240244
EventFactory::new()
241245
.text_msg("All new words")
242-
.event_id(event_id!("$event_id_2:localhost"))
246+
.event_id(event_id_2)
243247
.room(room_id)
244248
.sender(user_id!("@user_id:localhost"))
245249
.into_any_message_like_event(),
@@ -248,7 +252,7 @@ mod tests {
248252
index.add_event(
249253
EventFactory::new()
250254
.text_msg("A similar sentence")
251-
.event_id(event_id!("$event_id_3:localhost"))
255+
.event_id(event_id_3)
252256
.room(room_id)
253257
.sender(user_id!("@user_id:localhost"))
254258
.into_any_message_like_event(),
@@ -259,8 +263,7 @@ mod tests {
259263
let result = index.search("sentence", 10).expect("search failed with: {result:?}");
260264
let result: HashSet<_> = result.iter().collect();
261265

262-
let true_value =
263-
[owned_event_id!("$event_id_1:localhost"), owned_event_id!("$event_id_3:localhost")];
266+
let true_value = [event_id_1.to_owned(), event_id_3.to_owned()];
264267
let true_value: HashSet<_> = true_value.iter().collect();
265268

266269
assert_eq!(result, true_value, "search result not correct: {result:?}");

crates/matrix-sdk/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ docsrs = ["e2e-encryption", "sqlite", "indexeddb", "sso-login", "qrcode"]
6262
# Add support for inline media galleries via msgtypes
6363
unstable-msc4274 = ["ruma/unstable-msc4274", "matrix-sdk-base/unstable-msc4274"]
6464

65+
experimental-search = ["matrix-sdk-search"]
66+
6567
[dependencies]
6668
anyhow = { workspace = true, optional = true }
6769
anymap2 = "0.13.0"
@@ -92,6 +94,7 @@ matrix-sdk-ffi-macros = { workspace = true, optional = true }
9294
matrix-sdk-indexeddb = { workspace = true, optional = true }
9395
matrix-sdk-sqlite = { workspace = true, optional = true }
9496
matrix-sdk-test = { workspace = true, optional = true }
97+
matrix-sdk-search = { workspace = true, optional = true }
9598
mime.workspace = true
9699
mime2ext = "0.1.53"
97100
oauth2.workspace = true

crates/matrix-sdk/src/client/builder/mod.rs

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,24 @@
1515

1616
mod homeserver_config;
1717

18+
#[cfg(feature = "experimental-search")]
19+
use std::collections::HashMap;
1820
#[cfg(feature = "sqlite")]
1921
use std::path::Path;
22+
#[cfg(any(feature = "experimental-search", feature = "sqlite"))]
23+
use std::path::PathBuf;
2024
use std::{collections::BTreeSet, fmt, sync::Arc};
2125

2226
use homeserver_config::*;
2327
#[cfg(feature = "e2e-encryption")]
2428
use matrix_sdk_base::crypto::DecryptionSettings;
2529
use matrix_sdk_base::{store::StoreConfig, BaseClient, ThreadingSupport};
30+
#[cfg(feature = "experimental-search")]
31+
use matrix_sdk_search::index::RoomIndex;
2632
#[cfg(feature = "sqlite")]
2733
use matrix_sdk_sqlite::SqliteStoreConfig;
34+
#[cfg(feature = "experimental-search")]
35+
use ruma::OwnedRoomId;
2836
use ruma::{
2937
api::{error::FromHttpResponseError, MatrixVersion, SupportedVersions},
3038
OwnedServerName, ServerName,
@@ -114,6 +122,8 @@ pub struct ClientBuilder {
114122
enable_share_history_on_invite: bool,
115123
cross_process_store_locks_holder_name: String,
116124
threading_support: ThreadingSupport,
125+
#[cfg(feature = "experimental-search")]
126+
index_base_dir: IndexBaseDir,
117127
}
118128

119129
impl ClientBuilder {
@@ -145,6 +155,8 @@ impl ClientBuilder {
145155
cross_process_store_locks_holder_name:
146156
Self::DEFAULT_CROSS_PROCESS_STORE_LOCKS_HOLDER_NAME.to_owned(),
147157
threading_support: ThreadingSupport::Disabled,
158+
#[cfg(feature = "experimental-search")]
159+
index_base_dir: IndexBaseDir::Ram,
148160
}
149161
}
150162

@@ -489,6 +501,13 @@ impl ClientBuilder {
489501
self
490502
}
491503

504+
/// The base directory in which each room's index directory will be stored.
505+
#[cfg(feature = "experimental-search")]
506+
pub fn index_base_directory(mut self, path: IndexBaseDir) -> Self {
507+
self.index_base_dir = path;
508+
self
509+
}
510+
492511
/// Create a [`Client`] with the options set on this builder.
493512
///
494513
/// # Errors
@@ -590,6 +609,10 @@ impl ClientBuilder {
590609
let event_cache = OnceCell::new();
591610
let latest_events = OnceCell::new();
592611

612+
#[cfg(feature = "experimental-search")]
613+
let room_indexes: Arc<Mutex<HashMap<OwnedRoomId, RoomIndex>>> =
614+
Arc::new(Mutex::new(HashMap::new()));
615+
593616
let inner = ClientInner::new(
594617
auth_ctx,
595618
server,
@@ -607,6 +630,10 @@ impl ClientBuilder {
607630
#[cfg(feature = "e2e-encryption")]
608631
self.enable_share_history_on_invite,
609632
self.cross_process_store_locks_holder_name,
633+
#[cfg(feature = "experimental-search")]
634+
room_indexes,
635+
#[cfg(feature = "experimental-search")]
636+
self.index_base_dir,
610637
)
611638
.await;
612639

@@ -717,6 +744,14 @@ async fn build_indexeddb_store_config(
717744
panic!("the IndexedDB is only available on the 'wasm32' arch")
718745
}
719746

747+
#[cfg(feature = "experimental-search")]
748+
#[allow(dead_code)]
749+
#[derive(Clone, Debug)]
750+
pub enum IndexBaseDir {
751+
Directory(PathBuf),
752+
Ram,
753+
}
754+
720755
#[derive(Clone, Debug)]
721756
enum HttpConfig {
722757
#[cfg(not(target_family = "wasm"))]
@@ -755,7 +790,7 @@ enum BuilderStoreConfig {
755790
#[cfg(feature = "sqlite")]
756791
Sqlite {
757792
config: SqliteStoreConfig,
758-
cache_path: Option<std::path::PathBuf>,
793+
cache_path: Option<PathBuf>,
759794
},
760795
#[cfg(feature = "indexeddb")]
761796
IndexedDb {

crates/matrix-sdk/src/client/mod.rs

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
// See the License for the specific language governing permissions and
1515
// limitations under the License.
1616

17+
#[cfg(feature = "experimental-search")]
18+
use std::collections::HashMap;
1719
use std::{
1820
collections::{btree_map, BTreeMap, BTreeSet},
1921
fmt::{self, Debug},
@@ -38,6 +40,12 @@ use matrix_sdk_base::{
3840
StateStoreDataKey, StateStoreDataValue, SyncOutsideWasm, ThreadingSupport,
3941
};
4042
use matrix_sdk_common::ttl_cache::TtlCache;
43+
#[cfg(feature = "experimental-search")]
44+
use matrix_sdk_search::error::IndexError;
45+
#[cfg(feature = "experimental-search")]
46+
use matrix_sdk_search::index::RoomIndex;
47+
#[cfg(feature = "experimental-search")]
48+
use ruma::events::AnyMessageLikeEvent;
4149
#[cfg(feature = "e2e-encryption")]
4250
use ruma::events::{room::encryption::RoomEncryptionEventContent, InitialStateEvent};
4351
use ruma::{
@@ -72,11 +80,15 @@ use ruma::{
7280
RoomAliasId, RoomId, RoomOrAliasId, ServerName, UInt, UserId,
7381
};
7482
use serde::de::DeserializeOwned;
83+
#[cfg(feature = "experimental-search")]
84+
use tokio::sync::MutexGuard;
7585
use tokio::sync::{broadcast, Mutex, OnceCell, RwLock, RwLockReadGuard};
7686
use tracing::{debug, error, instrument, trace, warn, Instrument, Span};
7787
use url::Url;
7888

7989
use self::futures::SendRequest;
90+
#[cfg(feature = "experimental-search")]
91+
use crate::client::builder::IndexBaseDir;
8092
use crate::{
8193
authentication::{
8294
matrix::MatrixAuth, oauth::OAuth, AuthCtx, AuthData, ReloadSessionCallback,
@@ -350,6 +362,14 @@ pub(crate) struct ClientInner {
350362
///
351363
/// [`LatestEvent`]: crate::latest_event::LatestEvent
352364
latest_events: OnceCell<LatestEvents>,
365+
366+
/// HashMap that links each joined room to its RoomIndex
367+
#[cfg(feature = "experimental-search")]
368+
room_indexes: Arc<Mutex<HashMap<OwnedRoomId, RoomIndex>>>,
369+
370+
/// Base directory that stores the directories for each RoomIndex
371+
#[cfg(feature = "experimental-search")]
372+
index_base_dir: IndexBaseDir,
353373
}
354374

355375
impl ClientInner {
@@ -374,6 +394,10 @@ impl ClientInner {
374394
#[cfg(feature = "e2e-encryption")] encryption_settings: EncryptionSettings,
375395
#[cfg(feature = "e2e-encryption")] enable_share_history_on_invite: bool,
376396
cross_process_store_locks_holder_name: String,
397+
#[cfg(feature = "experimental-search")] room_indexes: Arc<
398+
Mutex<HashMap<OwnedRoomId, RoomIndex>>,
399+
>,
400+
#[cfg(feature = "experimental-search")] index_base_dir: IndexBaseDir,
377401
) -> Arc<Self> {
378402
let caches = ClientCaches {
379403
server_info: server_info.into(),
@@ -409,6 +433,10 @@ impl ClientInner {
409433
#[cfg(feature = "e2e-encryption")]
410434
enable_share_history_on_invite,
411435
server_max_upload_size: Mutex::new(OnceCell::new()),
436+
#[cfg(feature = "experimental-search")]
437+
room_indexes,
438+
#[cfg(feature = "experimental-search")]
439+
index_base_dir,
412440
};
413441

414442
#[allow(clippy::let_and_return)]
@@ -2713,6 +2741,10 @@ impl Client {
27132741
#[cfg(feature = "e2e-encryption")]
27142742
self.inner.enable_share_history_on_invite,
27152743
cross_process_store_locks_holder_name,
2744+
#[cfg(feature = "experimental-search")]
2745+
self.inner.room_indexes.clone(),
2746+
#[cfg(feature = "experimental-search")]
2747+
self.inner.index_base_dir.clone(),
27162748
)
27172749
.await,
27182750
};
@@ -2810,6 +2842,85 @@ impl Client {
28102842
&self.base_client().decryption_settings
28112843
}
28122844

2845+
/// Add [`AnyMessageLikeEvent`] to [`RoomIndex`] of given [`RoomId`]
2846+
#[cfg(feature = "experimental-search")]
2847+
pub async fn index_event(
2848+
&self,
2849+
event: AnyMessageLikeEvent,
2850+
room_id: &RoomId,
2851+
) -> Result<(), IndexError> {
2852+
let mut hash_map = self.inner.room_indexes.lock().await;
2853+
2854+
let result = if let Some(index) = hash_map.get_mut(room_id) {
2855+
index.add_event(event)
2856+
} else {
2857+
self.add_index_impl(room_id, &mut hash_map)?;
2858+
let index = hash_map.get_mut(room_id).expect("key just added");
2859+
index.add_event(event)
2860+
};
2861+
2862+
match result {
2863+
Ok(_) => {}
2864+
Err(IndexError::CannotIndexRedactedMessage)
2865+
| Err(IndexError::EmptyMessage)
2866+
| Err(IndexError::MessageTypeNotSupported) => {
2867+
debug!("failed to parse event for indexing: {result:?}")
2868+
}
2869+
Err(IndexError::TantivyError(err)) => {
2870+
error!("failed to add/commit event to index: {err:?}")
2871+
}
2872+
Err(_) => error!("unexpected error during indexing: {result:?}"),
2873+
};
2874+
Ok(())
2875+
}
2876+
2877+
/// Add [`RoomIndex`] for given [`RoomId`] to room_indexes
2878+
#[cfg(feature = "experimental-search")]
2879+
pub async fn add_index(&self, room_id: &RoomId) -> Result<(), IndexError> {
2880+
let mut hash_map = self.inner.room_indexes.lock().await;
2881+
self.add_index_impl(room_id, &mut hash_map)?;
2882+
Ok(())
2883+
}
2884+
2885+
#[cfg(feature = "experimental-search")]
2886+
fn add_index_impl(
2887+
&self,
2888+
room_id: &RoomId,
2889+
hash_map: &mut MutexGuard<'_, HashMap<OwnedRoomId, RoomIndex>>,
2890+
) -> Result<(), IndexError> {
2891+
if !hash_map.contains_key(room_id) {
2892+
let index = match &self.inner.index_base_dir {
2893+
IndexBaseDir::Directory(path) => RoomIndex::open_or_create(path, room_id)?,
2894+
IndexBaseDir::Ram => RoomIndex::new_in_ram(room_id)?,
2895+
};
2896+
hash_map.insert(room_id.to_owned(), index);
2897+
}
2898+
Ok(())
2899+
}
2900+
2901+
/// Search a [`Room`]'s index for the query and return at most
2902+
/// max_number_of_results results.
2903+
#[cfg(feature = "experimental-search")]
2904+
pub async fn search_index(
2905+
&self,
2906+
query: &str,
2907+
max_number_of_results: usize,
2908+
room_id: &RoomId,
2909+
) -> Option<Vec<OwnedEventId>> {
2910+
let hash_map = self.inner.room_indexes.lock().await;
2911+
if let Some(index) = hash_map.get(room_id) {
2912+
index
2913+
.search(query, max_number_of_results)
2914+
.inspect_err(|err| {
2915+
error!("error occured while searching index: {err:?}");
2916+
})
2917+
.ok()
2918+
} else {
2919+
warn!("Tried to search in a room with no index");
2920+
None
2921+
}
2922+
}
2923+
28132924
/// Whether the client is configured to take thread subscriptions (MSC4306
28142925
/// and MSC4308) into account.
28152926
///

crates/matrix-sdk/src/event_cache/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ use matrix_sdk_base::{
4545
timer,
4646
};
4747
use matrix_sdk_common::executor::{spawn, JoinHandle};
48+
#[cfg(feature = "experimental-search")]
49+
use matrix_sdk_search::error::IndexError;
4850
use room::RoomEventCacheState;
4951
use ruma::{events::AnySyncEphemeralRoomEvent, serde::Raw, OwnedEventId, OwnedRoomId, RoomId};
5052
use tokio::sync::{
@@ -116,6 +118,11 @@ pub enum EventCacheError {
116118
/// A string containing details about the error.
117119
details: String,
118120
},
121+
122+
/// An error occurred higher up in the SDK.
123+
#[cfg(feature = "experimental-search")]
124+
#[error(transparent)]
125+
IndexError(#[from] IndexError),
119126
}
120127

121128
/// A result using the [`EventCacheError`].

0 commit comments

Comments
 (0)