Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 0 additions & 13 deletions crates/matrix-sdk-search/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.

//! The event cache is an abstraction layer, sitting between the Rust SDK and a
//! final client, that acts as a global observer of all the rooms, gathering and
//! inferring some extra useful information about each room. In particular, this
//! doesn't require subscribing to a specific room to get access to this
//! information.
//!
//! It's intended to be fast, robust and easy to maintain, having learned from
//! previous endeavours at implementing middle to high level features elsewhere
//! in the SDK, notably in the UI's Timeline object.
//!
//! See the [github issue](https://github.com/matrix-org/matrix-rust-sdk/issues/3058) for more
//! details about the historical reasons that led us to start writing this.

use tantivy::{
directory::error::OpenDirectoryError as TantivyOpenDirectoryError,
query::QueryParserError as TantivyQueryParserError,
Expand Down
79 changes: 39 additions & 40 deletions crates/matrix-sdk-search/src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.

//! The event cache is an abstraction layer, sitting between the Rust SDK and a
//! final client, that acts as a global observer of all the rooms, gathering and
//! inferring some extra useful information about each room. In particular, this
//! doesn't require subscribing to a specific room to get access to this
//! information.
//!
//! It's intended to be fast, robust and easy to maintain, having learned from
//! previous endeavours at implementing middle to high level features elsewhere
//! in the SDK, notably in the UI's Timeline object.
//!
//! See the [github issue](https://github.com/matrix-org/matrix-rust-sdk/issues/3058) for more
//! details about the historical reasons that led us to start writing this.

use std::{fmt, fs, path::Path, sync::Arc};

use ruma::{OwnedEventId, OwnedRoomId, RoomId, events::AnyMessageLikeEvent};
use ruma::{OwnedEventId, OwnedRoomId, RoomId, events::AnySyncMessageLikeEvent};
use tantivy::{
Index, IndexReader, TantivyDocument,
collector::TopDocs,
Expand All @@ -44,6 +31,11 @@ use crate::{
writer::SearchIndexWriter,
};

/// A struct to represent the operations on a [`RoomIndex`]
pub(crate) enum RoomIndexOperation {
Add(TantivyDocument),
}

/// A struct that holds all data pertaining to a particular room's
/// message index.
pub struct RoomIndex {
Expand Down Expand Up @@ -91,9 +83,9 @@ impl RoomIndex {
RoomIndex::new_with(index, schema, room_id)
}

/// Create new [`RoomIndex`] which stores the index in RAM.
/// Create new [`RoomIndex`] which stores the index in memory.
/// Intended for testing.
pub fn new_in_ram(room_id: &RoomId) -> Result<RoomIndex, IndexError> {
pub fn new_in_memory(room_id: &RoomId) -> Result<RoomIndex, IndexError> {
let schema = RoomMessageSchema::new();
let index = Index::create_in_ram(schema.as_tantivy_schema());
RoomIndex::new_with(index, schema, room_id)
Expand Down Expand Up @@ -130,10 +122,14 @@ impl RoomIndex {
RoomIndex::new_with(index, schema, room_id)
}

/// Add [`AnyMessageLikeEvent`] to [`RoomIndex`]
pub fn add_event(&mut self, event: AnyMessageLikeEvent) -> Result<(), IndexError> {
let doc = self.schema.make_doc(event)?;
self.writer.add_document(doc)?; // TODO: This is blocking. Handle it.
/// Handle [`AnySyncMessageLikeEvent`]
///
/// This which will add/remove/edit an event in the index based on the
/// event type.
pub fn handle_event(&mut self, event: AnySyncMessageLikeEvent) -> Result<(), IndexError> {
match self.schema.handle_event(event)? {
RoomIndexOperation::Add(document) => self.writer.add_document(document)?,
};
Ok(())
}

Expand Down Expand Up @@ -193,74 +189,77 @@ mod tests {
use std::{collections::HashSet, error::Error};

use matrix_sdk_test::event_factory::EventFactory;
use ruma::{event_id, owned_event_id, room_id, user_id};
use ruma::{event_id, room_id, user_id};

use crate::index::RoomIndex;

#[test]
fn test_make_index_in_ram() {
fn test_make_index_in_memory() {
let room_id = room_id!("!room_id:localhost");
let index = RoomIndex::new_in_ram(room_id);
let index = RoomIndex::new_in_memory(room_id);

index.expect("failed to make index in ram: {index:?}");
}

#[test]
fn test_add_event() {
fn test_handle_event() {
let room_id = room_id!("!room_id:localhost");
let mut index =
RoomIndex::new_in_ram(room_id).expect("failed to make index in ram: {index:?}");
RoomIndex::new_in_memory(room_id).expect("failed to make index in ram: {index:?}");

let event = EventFactory::new()
.text_msg("event message")
.event_id(event_id!("$event_id:localhost"))
.room(room_id)
.sender(user_id!("@user_id:localhost"))
.into_any_message_like_event();
.into_any_sync_message_like_event();

index.add_event(event).expect("failed to add event: {res:?}");
index.handle_event(event).expect("failed to add event: {res:?}");
}

#[test]
fn test_search_populated_index() -> Result<(), Box<dyn Error>> {
let room_id = room_id!("!room_id:localhost");
let mut index =
RoomIndex::new_in_ram(room_id).expect("failed to make index in ram: {index:?}");
RoomIndex::new_in_memory(room_id).expect("failed to make index in ram: {index:?}");

let event_id_1 = event_id!("$event_id_1:localhost");
let event_id_2 = event_id!("$event_id_2:localhost");
let event_id_3 = event_id!("$event_id_3:localhost");

index.add_event(
index.handle_event(
EventFactory::new()
.text_msg("This is a sentence")
.event_id(event_id!("$event_id_1:localhost"))
.event_id(event_id_1)
.room(room_id)
.sender(user_id!("@user_id:localhost"))
.into_any_message_like_event(),
.into_any_sync_message_like_event(),
)?;

index.add_event(
index.handle_event(
EventFactory::new()
.text_msg("All new words")
.event_id(event_id!("$event_id_2:localhost"))
.event_id(event_id_2)
.room(room_id)
.sender(user_id!("@user_id:localhost"))
.into_any_message_like_event(),
.into_any_sync_message_like_event(),
)?;

index.add_event(
index.handle_event(
EventFactory::new()
.text_msg("A similar sentence")
.event_id(event_id!("$event_id_3:localhost"))
.event_id(event_id_3)
.room(room_id)
.sender(user_id!("@user_id:localhost"))
.into_any_message_like_event(),
.into_any_sync_message_like_event(),
)?;

index.commit_and_reload()?;

let result = index.search("sentence", 10).expect("search failed with: {result:?}");
let result: HashSet<_> = result.iter().collect();

let true_value =
[owned_event_id!("$event_id_1:localhost"), owned_event_id!("$event_id_3:localhost")];
let true_value = [event_id_1.to_owned(), event_id_3.to_owned()];
let true_value: HashSet<_> = true_value.iter().collect();

assert_eq!(result, true_value, "search result not correct: {result:?}");
Expand All @@ -272,7 +271,7 @@ mod tests {
fn test_search_empty_index() -> Result<(), Box<dyn Error>> {
let room_id = room_id!("!room_id:localhost");
let mut index =
RoomIndex::new_in_ram(room_id).expect("failed to make index in ram: {index:?}");
RoomIndex::new_in_memory(room_id).expect("failed to make index in ram: {index:?}");

index.commit_and_reload()?;

Expand Down
110 changes: 47 additions & 63 deletions crates/matrix-sdk-search/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,39 +12,29 @@
// See the License for the specific language governing permissions and
// limitations under the License.

//! The event cache is an abstraction layer, sitting between the Rust SDK and a
//! final client, that acts as a global observer of all the rooms, gathering and
//! inferring some extra useful information about each room. In particular, this
//! doesn't require subscribing to a specific room to get access to this
//! information.
//!
//! It's intended to be fast, robust and easy to maintain, having learned from
//! previous endeavours at implementing middle to high level features elsewhere
//! in the SDK, notably in the UI's Timeline object.
//!
//! See the [github issue](https://github.com/matrix-org/matrix-rust-sdk/issues/3058) for more
//! details about the historical reasons that led us to start writing this.

use ruma::{
MilliSecondsSinceUnixEpoch, OwnedEventId, OwnedUserId,
events::{
AnyMessageLikeEvent, MessageLikeEvent, MessageLikeEventContent, RedactContent,
RedactedMessageLikeEventContent, room::message::MessageType,
},
use ruma::events::{
AnySyncMessageLikeEvent, MessageLikeEventContent, RedactContent,
RedactedMessageLikeEventContent, SyncMessageLikeEvent, room::message::MessageType,
};
use tantivy::{
DateTime, TantivyDocument, doc,
schema::{DateOptions, DateTimePrecision, Field, INDEXED, STORED, STRING, Schema, TEXT},
};

use crate::error::{IndexError, IndexSchemaError};
use crate::{
error::{IndexError, IndexSchemaError},
index::RoomIndexOperation,
};

pub(crate) trait MatrixSearchIndexSchema {
fn new() -> Self;
fn default_search_fields(&self) -> Vec<Field>;
fn primary_key(&self) -> Field;
fn as_tantivy_schema(&self) -> Schema;
fn make_doc(&self, event: AnyMessageLikeEvent) -> Result<TantivyDocument, IndexError>;
fn handle_event(
&self,
event: AnySyncMessageLikeEvent,
) -> Result<RoomIndexOperation, IndexError>;
}

#[derive(Debug, Clone)]
Expand All @@ -58,48 +48,31 @@ pub(crate) struct RoomMessageSchema {
}

impl RoomMessageSchema {
fn parse_event<C: MessageLikeEventContent + RedactContent, F>(
/// Given an [`AnySyncMessageLikeEvent`] and a function to convert the
/// content into a String to be indexed, return a [`TantivyDocument`] to
/// index.
fn make_doc<C: MessageLikeEventContent + RedactContent, F>(
&self,
event: MessageLikeEvent<C>,
get_body: F,
) -> Result<(OwnedEventId, String, MilliSecondsSinceUnixEpoch, OwnedUserId), IndexError>
event: SyncMessageLikeEvent<C>,
get_body_from_content: F,
) -> Result<TantivyDocument, IndexError>
where
<C as RedactContent>::Redacted: RedactedMessageLikeEventContent,
F: FnOnce(&C) -> Result<String, IndexError>,
{
let unredacted = event.as_original().ok_or(IndexError::CannotIndexRedactedMessage)?;

let body = get_body(&unredacted.content)?;
let body = get_body_from_content(&unredacted.content)?;

Ok((
unredacted.event_id.clone(),
body,
unredacted.origin_server_ts,
unredacted.sender.clone(),
Ok(doc!(
self.event_id_field => unredacted.event_id.to_string(),
self.body_field => body,
self.date_field =>
DateTime::from_timestamp_millis(
unredacted.origin_server_ts.get().into()),
self.sender_field => unredacted.sender.to_string(),
))
}

fn parse_any_event(
&self,
event: AnyMessageLikeEvent,
) -> Result<(OwnedEventId, String, MilliSecondsSinceUnixEpoch, OwnedUserId), IndexError> {
match event {
// old m.room.message behaviour
AnyMessageLikeEvent::RoomMessage(event) => {
self.parse_event(event, |content| match &content.msgtype {
MessageType::Text(content) => Ok(content.body.clone()),
_ => Err(IndexError::MessageTypeNotSupported),
})
}

// new m.message behaviour
AnyMessageLikeEvent::Message(event) => self.parse_event(event, |content| {
content.text.find_plain().ok_or(IndexError::EmptyMessage).map(|v| v.to_owned())
}),

_ => Err(IndexError::MessageTypeNotSupported),
}
}
}

impl MatrixSearchIndexSchema for RoomMessageSchema {
Expand Down Expand Up @@ -140,17 +113,28 @@ impl MatrixSearchIndexSchema for RoomMessageSchema {
self.inner.clone()
}

fn make_doc(&self, event: AnyMessageLikeEvent) -> Result<TantivyDocument, IndexError> {
let (event_id, body, timestamp, sender) = self.parse_any_event(event)?;
fn handle_event(
&self,
event: AnySyncMessageLikeEvent,
) -> Result<RoomIndexOperation, IndexError> {
match event {
// m.room.message behaviour
AnySyncMessageLikeEvent::RoomMessage(event) => self
.make_doc(event, |content| match &content.msgtype {
MessageType::Text(content) => Ok(content.body.clone()),
_ => Err(IndexError::MessageTypeNotSupported),
})
.map(RoomIndexOperation::Add),

Ok(doc!(
self.event_id_field => event_id.to_string(),
self.body_field => body,
self.date_field =>
DateTime::from_timestamp_millis(
timestamp.get().into()),
self.sender_field => sender.to_string(),
))
// new MSC-1767 m.message behaviour
AnySyncMessageLikeEvent::Message(event) => self
.make_doc(event, |content| {
content.text.find_plain().ok_or(IndexError::EmptyMessage).map(|v| v.to_owned())
})
.map(RoomIndexOperation::Add),

_ => Err(IndexError::MessageTypeNotSupported),
}
}
}

Expand Down
13 changes: 0 additions & 13 deletions crates/matrix-sdk-search/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.

//! The event cache is an abstraction layer, sitting between the Rust SDK and a
//! final client, that acts as a global observer of all the rooms, gathering and
//! inferring some extra useful information about each room. In particular, this
//! doesn't require subscribing to a specific room to get access to this
//! information.
//!
//! It's intended to be fast, robust and easy to maintain, having learned from
//! previous endeavours at implementing middle to high level features elsewhere
//! in the SDK, notably in the UI's Timeline object.
//!
//! See the [github issue](https://github.com/matrix-org/matrix-rust-sdk/issues/3058) for more
//! details about the historical reasons that led us to start writing this.

use tantivy::{IndexWriter, TantivyDocument, TantivyError};

use crate::{OpStamp, error::IndexError};
Expand Down
3 changes: 3 additions & 0 deletions crates/matrix-sdk/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ docsrs = ["e2e-encryption", "sqlite", "indexeddb", "sso-login", "qrcode"]
# Add support for inline media galleries via msgtypes
unstable-msc4274 = ["ruma/unstable-msc4274", "matrix-sdk-base/unstable-msc4274"]

experimental-search = ["matrix-sdk-search"]

[dependencies]
anyhow = { workspace = true, optional = true }
anymap2 = "0.13.0"
Expand Down Expand Up @@ -90,6 +92,7 @@ matrix-sdk-base.workspace = true
matrix-sdk-common.workspace = true
matrix-sdk-ffi-macros = { workspace = true, optional = true }
matrix-sdk-indexeddb = { workspace = true, optional = true }
matrix-sdk-search = { workspace = true, optional = true }
matrix-sdk-sqlite = { workspace = true, optional = true }
matrix-sdk-test = { workspace = true, optional = true }
mime.workspace = true
Expand Down
Loading
Loading