Skip to content

Commit b9b8775

Browse files
Add plain-text handling for rich-text topics as per MSC3765 (#18195)
This implements matrix-org/matrix-spec-proposals#3765 which is already merged and, therefore, can use stable identifiers. For `/publicRooms` and `/hierarchy`, the topic is read from the eponymous field of the `current_state_events` table. Rather than introduce further columns in this table, I changed the insertion / update logic to write the plain-text topic from the rich topic into the existing field. This will not take effect for existing rooms unless their topic is changed. However, existing rooms shouldn't have rich topics to begin with. Similarly, for server-side search, I changed the insertion logic of the `event_search` table to prefer the value from the rich topic. Again, existing events shouldn't have rich topics and, therefore, don't need to be migrated in the table. Spec doc: https://spec.matrix.org/v1.15/client-server-api/#mroomtopic Part of supporting Matrix v1.15: https://spec.matrix.org/v1.15/client-server-api/#mroomtopic Signed-off-by: Johannes Marbach <[email protected]> Co-authored-by: Eric Eastwood <[email protected]>
1 parent e1b429d commit b9b8775

File tree

13 files changed

+356
-53
lines changed

13 files changed

+356
-53
lines changed

changelog.d/18195.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add plain-text handling for rich-text topics as per [MSC3765](https://github.com/matrix-org/matrix-spec-proposals/pull/3765).

synapse/api/constants.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,11 @@ class EventContentFields:
262262

263263
TOMBSTONE_SUCCESSOR_ROOM: Final = "replacement_room"
264264

265+
# Used in m.room.topic events.
266+
TOPIC: Final = "topic"
267+
M_TOPIC: Final = "m.topic"
268+
M_TEXT: Final = "m.text"
269+
265270

266271
class EventUnsignedContentFields:
267272
"""Fields found inside the 'unsigned' data on events"""
@@ -270,6 +275,13 @@ class EventUnsignedContentFields:
270275
MEMBERSHIP: Final = "membership"
271276

272277

278+
class MTextFields:
279+
"""Fields found inside m.text content blocks."""
280+
281+
BODY: Final = "body"
282+
MIMETYPE: Final = "mimetype"
283+
284+
273285
class RoomTypes:
274286
"""Understood values of the room_type field of m.room.create events."""
275287

synapse/config/workers.py

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,6 @@
2727
import attr
2828

2929
from synapse._pydantic_compat import (
30-
BaseModel,
31-
Extra,
3230
StrictBool,
3331
StrictInt,
3432
StrictStr,
@@ -47,6 +45,7 @@
4745
parse_listener_def,
4846
)
4947
from synapse.types import JsonDict
48+
from synapse.util.pydantic_models import ParseModel
5049

5150
_DEPRECATED_WORKER_DUTY_OPTION_USED = """
5251
The '%s' configuration option is deprecated and will be removed in a future
@@ -90,30 +89,7 @@ def _instance_to_list_converter(obj: Union[str, List[str]]) -> List[str]:
9089
return obj
9190

9291

93-
class ConfigModel(BaseModel):
94-
"""A custom version of Pydantic's BaseModel which
95-
96-
- ignores unknown fields and
97-
- does not allow fields to be overwritten after construction,
98-
99-
but otherwise uses Pydantic's default behaviour.
100-
101-
For now, ignore unknown fields. In the future, we could change this so that unknown
102-
config values cause a ValidationError, provided the error messages are meaningful to
103-
server operators.
104-
105-
Subclassing in this way is recommended by
106-
https://pydantic-docs.helpmanual.io/usage/model_config/#change-behaviour-globally
107-
"""
108-
109-
class Config:
110-
# By default, ignore fields that we don't recognise.
111-
extra = Extra.ignore
112-
# By default, don't allow fields to be reassigned after parsing.
113-
allow_mutation = False
114-
115-
116-
class InstanceTcpLocationConfig(ConfigModel):
92+
class InstanceTcpLocationConfig(ParseModel):
11793
"""The host and port to talk to an instance via HTTP replication."""
11894

11995
host: StrictStr
@@ -129,7 +105,7 @@ def netloc(self) -> str:
129105
return f"{self.host}:{self.port}"
130106

131107

132-
class InstanceUnixLocationConfig(ConfigModel):
108+
class InstanceUnixLocationConfig(ParseModel):
133109
"""The socket file to talk to an instance via HTTP replication."""
134110

135111
path: StrictStr

synapse/handlers/room.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
HistoryVisibility,
5252
JoinRules,
5353
Membership,
54+
MTextFields,
5455
RoomCreationPreset,
5556
RoomEncryptionAlgorithms,
5657
RoomTypes,
@@ -1303,7 +1304,13 @@ async def create_event(
13031304
topic = room_config["topic"]
13041305
topic_event, topic_context = await create_event(
13051306
EventTypes.Topic,
1306-
{"topic": topic},
1307+
{
1308+
EventContentFields.TOPIC: topic,
1309+
EventContentFields.M_TOPIC: {
1310+
# The mimetype property defaults to `text/plain` if omitted.
1311+
EventContentFields.M_TEXT: [{MTextFields.BODY: topic}]
1312+
},
1313+
},
13071314
True,
13081315
)
13091316
events_to_send.append((topic_event, topic_context))

synapse/handlers/stats.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from synapse.metrics.background_process_metrics import run_as_background_process
3737
from synapse.storage.databases.main.state_deltas import StateDelta
3838
from synapse.types import JsonDict
39+
from synapse.util.events import get_plain_text_topic_from_event_content
3940

4041
if TYPE_CHECKING:
4142
from synapse.server import HomeServer
@@ -299,7 +300,9 @@ async def _handle_deltas(
299300
elif delta.event_type == EventTypes.Name:
300301
room_state["name"] = event_content.get("name")
301302
elif delta.event_type == EventTypes.Topic:
302-
room_state["topic"] = event_content.get("topic")
303+
room_state["topic"] = get_plain_text_topic_from_event_content(
304+
event_content
305+
)
303306
elif delta.event_type == EventTypes.RoomAvatar:
304307
room_state["avatar"] = event_content.get("url")
305308
elif delta.event_type == EventTypes.CanonicalAlias:

synapse/storage/databases/main/events.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
from synapse.types.handlers import SLIDING_SYNC_DEFAULT_BUMP_EVENT_TYPES
7979
from synapse.types.state import StateFilter
8080
from synapse.util import json_encoder
81+
from synapse.util.events import get_plain_text_topic_from_event_content
8182
from synapse.util.iterutils import batch_iter, sorted_topologically
8283
from synapse.util.stringutils import non_null_str_or_none
8384

@@ -3102,7 +3103,10 @@ def _handle_redact_relations(
31023103
def _store_room_topic_txn(self, txn: LoggingTransaction, event: EventBase) -> None:
31033104
if isinstance(event.content.get("topic"), str):
31043105
self.store_event_search_txn(
3105-
txn, event, "content.topic", event.content["topic"]
3106+
txn,
3107+
event,
3108+
"content.topic",
3109+
get_plain_text_topic_from_event_content(event.content) or "",
31063110
)
31073111

31083112
def _store_room_name_txn(self, txn: LoggingTransaction, event: EventBase) -> None:

synapse/storage/databases/main/search.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
from synapse.storage.databases.main.events_worker import EventRedactBehaviour
5050
from synapse.storage.engines import PostgresEngine, Sqlite3Engine
5151
from synapse.types import JsonDict
52+
from synapse.util.events import get_plain_text_topic_from_event_content
5253

5354
if TYPE_CHECKING:
5455
from synapse.server import HomeServer
@@ -212,7 +213,9 @@ def reindex_search_txn(txn: LoggingTransaction) -> int:
212213
value = content["body"]
213214
elif etype == "m.room.topic":
214215
key = "content.topic"
215-
value = content["topic"]
216+
value = (
217+
get_plain_text_topic_from_event_content(content) or "",
218+
)
216219
elif etype == "m.room.name":
217220
key = "content.name"
218221
value = content["name"]

synapse/storage/databases/main/stats.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from synapse.storage.databases.main.state_deltas import StateDeltasStore
4949
from synapse.types import JsonDict
5050
from synapse.util.caches.descriptors import cached
51+
from synapse.util.events import get_plain_text_topic_from_event_content
5152

5253
if TYPE_CHECKING:
5354
from synapse.server import HomeServer
@@ -611,7 +612,9 @@ def _fetch_current_state_stats(
611612
elif event.type == EventTypes.Name:
612613
room_state["name"] = event.content.get("name")
613614
elif event.type == EventTypes.Topic:
614-
room_state["topic"] = event.content.get("topic")
615+
room_state["topic"] = get_plain_text_topic_from_event_content(
616+
event.content
617+
)
615618
elif event.type == EventTypes.RoomAvatar:
616619
room_state["avatar"] = event.content.get("url")
617620
elif event.type == EventTypes.CanonicalAlias:

synapse/types/rest/__init__.py

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,26 +18,8 @@
1818
# [This file includes modifications made by New Vector Limited]
1919
#
2020
#
21-
from synapse._pydantic_compat import BaseModel, Extra
21+
from synapse.util.pydantic_models import ParseModel
2222

2323

24-
class RequestBodyModel(BaseModel):
25-
"""A custom version of Pydantic's BaseModel which
26-
27-
- ignores unknown fields and
28-
- does not allow fields to be overwritten after construction,
29-
30-
but otherwise uses Pydantic's default behaviour.
31-
32-
Ignoring unknown fields is a useful default. It means that clients can provide
33-
unstable field not known to the server without the request being refused outright.
34-
35-
Subclassing in this way is recommended by
36-
https://pydantic-docs.helpmanual.io/usage/model_config/#change-behaviour-globally
37-
"""
38-
39-
class Config:
40-
# By default, ignore fields that we don't recognise.
41-
extra = Extra.ignore
42-
# By default, don't allow fields to be reassigned after parsing.
43-
allow_mutation = False
24+
class RequestBodyModel(ParseModel):
25+
pass

synapse/util/events.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,11 @@
1313
#
1414
#
1515

16+
from typing import Any, List, Optional
17+
18+
from synapse._pydantic_compat import Field, StrictStr, ValidationError, validator
19+
from synapse.types import JsonDict
20+
from synapse.util.pydantic_models import ParseModel
1621
from synapse.util.stringutils import random_string
1722

1823

@@ -27,3 +32,100 @@ def generate_fake_event_id() -> str:
2732
A string intended to look like an event ID, but with no actual meaning.
2833
"""
2934
return "$" + random_string(43)
35+
36+
37+
class MTextRepresentation(ParseModel):
38+
"""
39+
See `TextualRepresentation` in the Matrix specification.
40+
"""
41+
42+
body: StrictStr
43+
mimetype: Optional[StrictStr]
44+
45+
46+
class MTopic(ParseModel):
47+
"""
48+
`m.room.topic` -> `content` -> `m.topic`
49+
50+
Textual representation of the room topic in different mimetypes. Added in Matrix v1.15.
51+
52+
See `TopicContentBlock` in the Matrix specification.
53+
"""
54+
55+
m_text: Optional[List[MTextRepresentation]] = Field(alias="m.text")
56+
"""
57+
An ordered array of textual representations in different mimetypes.
58+
"""
59+
60+
# Because "Receivers SHOULD use the first representation in the array that they
61+
# understand.", we ignore invalid representations in the `m.text` field and use
62+
# what we can.
63+
@validator("m_text", pre=True)
64+
def ignore_invalid_representations(
65+
cls, m_text: Any
66+
) -> Optional[List[MTextRepresentation]]:
67+
if not isinstance(m_text, list):
68+
raise ValueError("m.text must be a list")
69+
representations = []
70+
for element in m_text:
71+
try:
72+
representations.append(MTextRepresentation.parse_obj(element))
73+
except ValidationError:
74+
continue
75+
return representations
76+
77+
78+
class TopicContent(ParseModel):
79+
"""
80+
Represents the `content` field of an `m.room.topic` event
81+
"""
82+
83+
topic: StrictStr
84+
"""
85+
The topic in plain text.
86+
"""
87+
88+
m_topic: Optional[MTopic] = Field(alias="m.topic")
89+
"""
90+
Textual representation of the room topic in different mimetypes.
91+
"""
92+
93+
# We ignore invalid `m.topic` fields as we can always fall back to the plain-text
94+
# `topic` field.
95+
@validator("m_topic", pre=True)
96+
def ignore_invalid_m_topic(cls, m_topic: Any) -> Optional[MTopic]:
97+
try:
98+
return MTopic.parse_obj(m_topic)
99+
except ValidationError:
100+
return None
101+
102+
103+
def get_plain_text_topic_from_event_content(content: JsonDict) -> Optional[str]:
104+
"""
105+
Given the `content` of an `m.room.topic` event, returns the plain-text topic
106+
representation. Prefers pulling plain-text from the newer `m.topic` field if
107+
available with a fallback to `topic`.
108+
109+
Args:
110+
content: The `content` field of an `m.room.topic` event.
111+
112+
Returns:
113+
A string representing the plain text topic.
114+
"""
115+
116+
try:
117+
topic_content = TopicContent.parse_obj(content)
118+
except ValidationError:
119+
return None
120+
121+
# Find the first `text/plain` topic ("Receivers SHOULD use the first
122+
# representationin the array that they understand.")
123+
if topic_content.m_topic and topic_content.m_topic.m_text:
124+
for representation in topic_content.m_topic.m_text:
125+
# The mimetype property defaults to `text/plain` if omitted.
126+
if not representation.mimetype or representation.mimetype == "text/plain":
127+
return representation.body
128+
129+
# Fallback to the plain-old `topic` field if there isn't any `text/plain` topic
130+
# representation available.
131+
return topic_content.topic

0 commit comments

Comments
 (0)