Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 6cba6a5

Browse files
committed
Merge branch 'master' into develop
2 parents 7469824 + 09d89dd commit 6cba6a5

File tree

5 files changed

+84
-25
lines changed

5 files changed

+84
-25
lines changed

CHANGES.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,24 @@
1+
Synapse 1.61.1 (2022-06-28)
2+
===========================
3+
4+
This patch release fixes a security issue regarding URL previews, affecting all prior versions of Synapse. Server administrators are encouraged to update Synapse as soon as possible. We are not aware of these vulnerabilities being exploited in the wild.
5+
6+
Server administrators who are unable to update Synapse may use the workarounds described in the linked GitHub Security Advisory below.
7+
8+
## Security advisory
9+
10+
The following issue is fixed in 1.61.1.
11+
12+
* [GHSA-22p3-qrh9-cx32](https://github.com/matrix-org/synapse/security/advisories/GHSA-22p3-qrh9-cx32) / [CVE-2022-31052](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-31052)
13+
14+
Synapse instances with the [`url_preview_enabled`](https://matrix-org.github.io/synapse/v1.61/usage/configuration/config_documentation.html#media-store) homeserver config option set to `true` are affected. URL previews of some web pages can lead to unbounded recursion, causing the request to either fail, or in some cases crash the running Synapse process.
15+
16+
Requesting URL previews requires authentication. Nevertheless, it is possible to exploit this maliciously, either by malicious users on the homeserver, or by remote users sending URLs that a local user's client may automatically request a URL preview for.
17+
18+
Homeservers with the `url_preview_enabled` configuration option set to `false` (the default) are unaffected. Instances with the `enable_media_repo` configuration option set to `false` are also unaffected, as this also disables URL preview functionality.
19+
20+
Fixed by [fa1308061802ac7b7d20e954ba7372c5ac292333](https://github.com/matrix-org/synapse/commit/fa1308061802ac7b7d20e954ba7372c5ac292333).
21+
122
Synapse 1.61.0 (2022-06-14)
223
===========================
324

debian/changelog

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
matrix-synapse-py3 (1.61.1) stable; urgency=medium
2+
3+
* New Synapse release 1.61.1.
4+
5+
-- Synapse Packaging team <[email protected]> Tue, 28 Jun 2022 14:33:46 +0100
6+
17
matrix-synapse-py3 (1.61.0) stable; urgency=medium
28

39
* New Synapse release 1.61.0.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ skip_gitignore = true
5454

5555
[tool.poetry]
5656
name = "matrix-synapse"
57-
version = "1.61.0"
57+
version = "1.61.1"
5858
description = "Homeserver for the Matrix decentralised comms protocol"
5959
authors = ["Matrix.org Team and Contributors <[email protected]>"]
6060
license = "Apache-2.0"

synapse/rest/media/v1/preview_html.py

Lines changed: 39 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
import codecs
15-
import itertools
1615
import logging
1716
import re
1817
from typing import (
@@ -21,7 +20,7 @@
2120
Dict,
2221
Generator,
2322
Iterable,
24-
Optional,
23+
List, Optional,
2524
Set,
2625
Union,
2726
)
@@ -354,7 +353,7 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]:
354353

355354
from lxml import etree
356355

357-
TAGS_TO_REMOVE = (
356+
TAGS_TO_REMOVE = {
358357
"header",
359358
"nav",
360359
"aside",
@@ -369,31 +368,42 @@ def parse_html_description(tree: "etree.Element") -> Optional[str]:
369368
"img",
370369
"picture",
371370
etree.Comment,
372-
)
371+
}
373372

374373
# Split all the text nodes into paragraphs (by splitting on new
375374
# lines)
376375
text_nodes = (
377376
re.sub(r"\s+", "\n", el).strip()
378-
for el in _iterate_over_text(tree.find("body"), *TAGS_TO_REMOVE)
377+
for el in _iterate_over_text(tree.find("body"), TAGS_TO_REMOVE)
379378
)
380379
return summarize_paragraphs(text_nodes)
381380

382381

383382
def _iterate_over_text(
384-
tree: "etree.Element", *tags_to_ignore: Union[str, "etree.Comment"]
383+
tree: Optional["etree.Element"],
384+
tags_to_ignore: Set[Union[str, "etree.Comment"]],
385+
stack_limit: int = 1024,
385386
) -> Generator[str, None, None]:
386387
"""Iterate over the tree returning text nodes in a depth first fashion,
387388
skipping text nodes inside certain tags.
389+
390+
Args:
391+
tree: The parent element to iterate. Can be None if there isn't one.
392+
tags_to_ignore: Set of tags to ignore
393+
stack_limit: Maximum stack size limit for depth-first traversal.
394+
Nodes will be dropped if this limit is hit, which may truncate the
395+
textual result.
396+
Intended to limit the maximum working memory when generating a preview.
388397
"""
389-
# This is basically a stack that we extend using itertools.chain.
390-
# This will either consist of an element to iterate over *or* a string
398+
399+
if tree is None:
400+
return
401+
402+
# This is a stack whose items are elements to iterate over *or* strings
391403
# to be returned.
392-
elements = iter([tree])
393-
while True:
394-
el = next(elements, None)
395-
if el is None:
396-
return
404+
elements: List[Union[str, "etree.Element"]] = [tree]
405+
while elements:
406+
el = elements.pop()
397407

398408
if isinstance(el, str):
399409
yield el
@@ -407,17 +417,22 @@ def _iterate_over_text(
407417
if el.text:
408418
yield el.text
409419

410-
# We add to the stack all the elements children, interspersed with
411-
# each child's tail text (if it exists). The tail text of a node
412-
# is text that comes *after* the node, so we always include it even
413-
# if we ignore the child node.
414-
elements = itertools.chain(
415-
itertools.chain.from_iterable( # Basically a flatmap
416-
[child, child.tail] if child.tail else [child]
417-
for child in el.iterchildren()
418-
),
419-
elements,
420-
)
420+
# We add to the stack all the element's children, interspersed with
421+
# each child's tail text (if it exists).
422+
#
423+
# We iterate in reverse order so that earlier pieces of text appear
424+
# closer to the top of the stack.
425+
for child in el.iterchildren(reversed=True):
426+
if len(elements) > stack_limit:
427+
# We've hit our limit for working memory
428+
break
429+
430+
if child.tail:
431+
# The tail text of a node is text that comes *after* the node,
432+
# so we always include it even if we ignore the child node.
433+
elements.append(child.tail)
434+
435+
elements.append(child)
421436

422437

423438
def summarize_paragraphs(

tests/rest/media/v1/test_html_preview.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,23 @@ def test_twitter_tag(self) -> None:
411411
},
412412
)
413413

414+
def test_nested_nodes(self) -> None:
415+
"""A body with some nested nodes. Tests that we iterate over children
416+
in the right order (and don't reverse the order of the text)."""
417+
html = b"""
418+
<a href="somewhere">Welcome <b>the bold <u>and underlined text <svg>
419+
with a cheeky SVG</svg></u> and <strong>some</strong> tail text</b></a>
420+
"""
421+
tree = decode_body(html, "http://example.com/test.html")
422+
og = parse_html_to_open_graph(tree)
423+
self.assertEqual(
424+
og,
425+
{
426+
"og:title": None,
427+
"og:description": "Welcome\n\nthe bold\n\nand underlined text\n\nand\n\nsome\n\ntail text",
428+
},
429+
)
430+
414431

415432
class MediaEncodingTestCase(unittest.TestCase):
416433
def test_meta_charset(self) -> None:

0 commit comments

Comments
 (0)