diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index fc544fcfde2..0b18ce04ca2 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -444,7 +444,7 @@ jobs: - run: | sudo apt-get -qq update sudo apt-get -qq install build-essential libffi-dev python3-dev \ - libxml2-dev libxslt-dev xmlsec1 zlib1g-dev libjpeg-dev libwebp-dev + libxslt-dev xmlsec1 zlib1g-dev libjpeg-dev libwebp-dev - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: @@ -496,7 +496,7 @@ jobs: steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 # Install libs necessary for PyPy to build binary wheels for dependencies - - run: sudo apt-get -qq install xmlsec1 libxml2-dev libxslt-dev + - run: sudo apt-get -qq install xmlsec1 libxslt-dev - uses: matrix-org/setup-python-poetry@5bbf6603c5c930615ec8a29f1b5d7d258d905aa4 # v2.0.0 with: python-version: ${{ matrix.python-version }} diff --git a/changelog.d/19301.misc b/changelog.d/19301.misc new file mode 100644 index 00000000000..a8e802bdf82 --- /dev/null +++ b/changelog.d/19301.misc @@ -0,0 +1 @@ +Switch to `beautifulsoup4` from `lxml` for URL previews. Contributed by @clokep. \ No newline at end of file diff --git a/docs/setup/installation.md b/docs/setup/installation.md index a48662362af..660c3c7a0fc 100644 --- a/docs/setup/installation.md +++ b/docs/setup/installation.md @@ -307,7 +307,7 @@ Installing prerequisites on CentOS or Fedora Linux: ```sh sudo dnf install libtiff-devel libjpeg-devel libzip-devel freetype-devel \ - libwebp-devel libxml2-devel libxslt-devel libpq-devel \ + libwebp-devel libxslt-devel libpq-devel \ python3-virtualenv libffi-devel openssl-devel python3-devel sudo dnf group install "Development Tools" ``` @@ -633,10 +633,6 @@ This is critical from a security perspective to stop arbitrary Matrix users spidering 'internal' URLs on your network. At the very least we recommend that your loopback and RFC1918 IP addresses are blacklisted. -This also requires the optional `lxml` python dependency to be installed. This -in turn requires the `libxml2` library to be available - on Debian/Ubuntu this -means `apt-get install libxml2-dev`, or equivalent for your OS. - ### Backups Don't forget to take [backups](../usage/administration/backups.md) of your new server! diff --git a/flake.nix b/flake.nix index 4ff6518aed7..47a86102b99 100644 --- a/flake.nix +++ b/flake.nix @@ -100,7 +100,6 @@ libjpeg libpqxx libwebp - libxml2 libxslt sqlite diff --git a/poetry.lock b/poetry.lock index 2d75fb50c85..bfe2dd77c55 100644 --- a/poetry.lock +++ b/poetry.lock @@ -26,15 +26,15 @@ files = [ [[package]] name = "authlib" -version = "1.6.6" +version = "1.6.5" description = "The ultimate Python library in building OAuth and OpenID Connect servers and clients." optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"all\" or extra == \"jwt\" or extra == \"oidc\"" +markers = "extra == \"oidc\" or extra == \"jwt\" or extra == \"all\"" files = [ - {file = "authlib-1.6.6-py2.py3-none-any.whl", hash = "sha256:7d9e9bc535c13974313a87f53e8430eb6ea3d1cf6ae4f6efcd793f2e949143fd"}, - {file = "authlib-1.6.6.tar.gz", hash = "sha256:45770e8e056d0f283451d9996fbb59b70d45722b45d854d58f32878d0a40c38e"}, + {file = "authlib-1.6.5-py2.py3-none-any.whl", hash = "sha256:3e0e0507807f842b02175507bdee8957a1d5707fd4afb17c32fb43fee90b6e3a"}, + {file = "authlib-1.6.5.tar.gz", hash = "sha256:6aaf9c79b7cc96c900f0b284061691c5d4e61221640a948fe690b556a6d6d10b"}, ] [package.dependencies] @@ -132,6 +132,30 @@ files = [ tests = ["pytest (>=3.2.1,!=3.3.0)"] typecheck = ["mypy"] +[[package]] +name = "beautifulsoup4" +version = "4.14.3" +description = "Screen-scraping library" +optional = true +python-versions = ">=3.7.0" +groups = ["main"] +markers = "extra == \"url-preview\" or extra == \"all\"" +files = [ + {file = "beautifulsoup4-4.14.3-py3-none-any.whl", hash = "sha256:0918bfe44902e6ad8d57732ba310582e98da931428d231a5ecb9e7c703a735bb"}, + {file = "beautifulsoup4-4.14.3.tar.gz", hash = "sha256:6292b1c5186d356bba669ef9f7f051757099565ad9ada5dd630bd9de5fa7fb86"}, +] + +[package.dependencies] +soupsieve = ">=1.6.1" +typing-extensions = ">=4.0.0" + +[package.extras] +cchardet = ["cchardet"] +chardet = ["chardet"] +charset-normalizer = ["charset-normalizer"] +html5lib = ["html5lib"] +lxml = ["lxml"] + [[package]] name = "bleach" version = "6.3.0" @@ -481,7 +505,7 @@ description = "XML bomb protection for Python stdlib modules" optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" groups = ["main"] -markers = "extra == \"all\" or extra == \"saml2\"" +markers = "extra == \"saml2\" or extra == \"all\"" files = [ {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"}, {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, @@ -506,7 +530,7 @@ description = "XPath 1.0/2.0/3.0/3.1 parsers and selectors for ElementTree and l optional = true python-versions = ">=3.7" groups = ["main"] -markers = "extra == \"all\" or extra == \"saml2\"" +markers = "extra == \"saml2\" or extra == \"all\"" files = [ {file = "elementpath-4.1.5-py3-none-any.whl", hash = "sha256:2ac1a2fb31eb22bbbf817f8cf6752f844513216263f0e3892c8e79782fe4bb55"}, {file = "elementpath-4.1.5.tar.gz", hash = "sha256:c2d6dc524b29ef751ecfc416b0627668119d8812441c555d7471da41d4bacb8d"}, @@ -556,7 +580,7 @@ description = "Python wrapper for hiredis" optional = true python-versions = ">=3.8" groups = ["main"] -markers = "extra == \"all\" or extra == \"redis\"" +markers = "extra == \"redis\" or extra == \"all\"" files = [ {file = "hiredis-3.3.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:9937d9b69321b393fbace69f55423480f098120bc55a3316e1ca3508c4dbbd6f"}, {file = "hiredis-3.3.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:50351b77f89ba6a22aff430b993653847f36b71d444509036baa0f2d79d1ebf4"}, @@ -879,7 +903,7 @@ description = "Jaeger Python OpenTracing Tracer implementation" optional = true python-versions = ">=3.7" groups = ["main"] -markers = "extra == \"all\" or extra == \"opentracing\"" +markers = "extra == \"opentracing\" or extra == \"all\"" files = [ {file = "jaeger-client-4.8.0.tar.gz", hash = "sha256:3157836edab8e2c209bd2d6ae61113db36f7ee399e66b1dcbb715d87ab49bfe0"}, ] @@ -1017,7 +1041,7 @@ description = "A strictly RFC 4510 conforming LDAP V3 pure Python client library optional = true python-versions = "*" groups = ["main"] -markers = "extra == \"all\" or extra == \"matrix-synapse-ldap3\"" +markers = "extra == \"matrix-synapse-ldap3\" or extra == \"all\"" files = [ {file = "ldap3-2.9.1-py2.py3-none-any.whl", hash = "sha256:5869596fc4948797020d3f03b7939da938778a0f9e2009f7a072ccf92b8e8d70"}, {file = "ldap3-2.9.1.tar.gz", hash = "sha256:f3e7fc4718e3f09dda568b57100095e0ce58633bcabbed8667ce3f8fbaa4229f"}, @@ -1112,178 +1136,6 @@ files = [ {file = "librt-0.6.3.tar.gz", hash = "sha256:c724a884e642aa2bbad52bb0203ea40406ad742368a5f90da1b220e970384aae"}, ] -[[package]] -name = "lxml" -version = "6.0.2" -description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." -optional = true -python-versions = ">=3.8" -groups = ["main"] -markers = "extra == \"all\" or extra == \"url-preview\"" -files = [ - {file = "lxml-6.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e77dd455b9a16bbd2a5036a63ddbd479c19572af81b624e79ef422f929eef388"}, - {file = "lxml-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d444858b9f07cefff6455b983aea9a67f7462ba1f6cbe4a21e8bf6791bf2153"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f952dacaa552f3bb8834908dddd500ba7d508e6ea6eb8c52eb2d28f48ca06a31"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:71695772df6acea9f3c0e59e44ba8ac50c4f125217e84aab21074a1a55e7e5c9"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:17f68764f35fd78d7c4cc4ef209a184c38b65440378013d24b8aecd327c3e0c8"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:058027e261afed589eddcfe530fcc6f3402d7fd7e89bfd0532df82ebc1563dba"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8ffaeec5dfea5881d4c9d8913a32d10cfe3923495386106e4a24d45300ef79c"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:f2e3b1a6bb38de0bc713edd4d612969dd250ca8b724be8d460001a387507021c"}, - {file = "lxml-6.0.2-cp310-cp310-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d6690ec5ec1cce0385cb20896b16be35247ac8c2046e493d03232f1c2414d321"}, - {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f2a50c3c1d11cad0ebebbac357a97b26aa79d2bcaf46f256551152aa85d3a4d1"}, - {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:3efe1b21c7801ffa29a1112fab3b0f643628c30472d507f39544fd48e9549e34"}, - {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:59c45e125140b2c4b33920d21d83681940ca29f0b83f8629ea1a2196dc8cfe6a"}, - {file = "lxml-6.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:452b899faa64f1805943ec1c0c9ebeaece01a1af83e130b69cdefeda180bb42c"}, - {file = "lxml-6.0.2-cp310-cp310-win32.whl", hash = "sha256:1e786a464c191ca43b133906c6903a7e4d56bef376b75d97ccbb8ec5cf1f0a4b"}, - {file = "lxml-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:dacf3c64ef3f7440e3167aa4b49aa9e0fb99e0aa4f9ff03795640bf94531bcb0"}, - {file = "lxml-6.0.2-cp310-cp310-win_arm64.whl", hash = "sha256:45f93e6f75123f88d7f0cfd90f2d05f441b808562bf0bc01070a00f53f5028b5"}, - {file = "lxml-6.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:13e35cbc684aadf05d8711a5d1b5857c92e5e580efa9a0d2be197199c8def607"}, - {file = "lxml-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b1675e096e17c6fe9c0e8c81434f5736c0739ff9ac6123c87c2d452f48fc938"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8ac6e5811ae2870953390452e3476694196f98d447573234592d30488147404d"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5aa0fc67ae19d7a64c3fe725dc9a1bb11f80e01f78289d05c6f62545affec438"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:de496365750cc472b4e7902a485d3f152ecf57bd3ba03ddd5578ed8ceb4c5964"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:200069a593c5e40b8f6fc0d84d86d970ba43138c3e68619ffa234bc9bb806a4d"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7d2de809c2ee3b888b59f995625385f74629707c9355e0ff856445cdcae682b7"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:b2c3da8d93cf5db60e8858c17684c47d01fee6405e554fb55018dd85fc23b178"}, - {file = "lxml-6.0.2-cp311-cp311-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:442de7530296ef5e188373a1ea5789a46ce90c4847e597856570439621d9c553"}, - {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2593c77efde7bfea7f6389f1ab249b15ed4aa5bc5cb5131faa3b843c429fbedb"}, - {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:3e3cb08855967a20f553ff32d147e14329b3ae70ced6edc2f282b94afbc74b2a"}, - {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:2ed6c667fcbb8c19c6791bbf40b7268ef8ddf5a96940ba9404b9f9a304832f6c"}, - {file = "lxml-6.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b8f18914faec94132e5b91e69d76a5c1d7b0c73e2489ea8929c4aaa10b76bbf7"}, - {file = "lxml-6.0.2-cp311-cp311-win32.whl", hash = "sha256:6605c604e6daa9e0d7f0a2137bdc47a2e93b59c60a65466353e37f8272f47c46"}, - {file = "lxml-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e5867f2651016a3afd8dd2c8238baa66f1e2802f44bc17e236f547ace6647078"}, - {file = "lxml-6.0.2-cp311-cp311-win_arm64.whl", hash = "sha256:4197fb2534ee05fd3e7afaab5d8bfd6c2e186f65ea7f9cd6a82809c887bd1285"}, - {file = "lxml-6.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a59f5448ba2ceccd06995c95ea59a7674a10de0810f2ce90c9006f3cbc044456"}, - {file = "lxml-6.0.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e8113639f3296706fbac34a30813929e29247718e88173ad849f57ca59754924"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a8bef9b9825fa8bc816a6e641bb67219489229ebc648be422af695f6e7a4fa7f"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:65ea18d710fd14e0186c2f973dc60bb52039a275f82d3c44a0e42b43440ea534"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c371aa98126a0d4c739ca93ceffa0fd7a5d732e3ac66a46e74339acd4d334564"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:700efd30c0fa1a3581d80a748157397559396090a51d306ea59a70020223d16f"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c33e66d44fe60e72397b487ee92e01da0d09ba2d66df8eae42d77b6d06e5eba0"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:90a345bbeaf9d0587a3aaffb7006aa39ccb6ff0e96a57286c0cb2fd1520ea192"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:064fdadaf7a21af3ed1dcaa106b854077fbeada827c18f72aec9346847cd65d0"}, - {file = "lxml-6.0.2-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:fbc74f42c3525ac4ffa4b89cbdd00057b6196bcefe8bce794abd42d33a018092"}, - {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6ddff43f702905a4e32bc24f3f2e2edfe0f8fde3277d481bffb709a4cced7a1f"}, - {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6da5185951d72e6f5352166e3da7b0dc27aa70bd1090b0eb3f7f7212b53f1bb8"}, - {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:57a86e1ebb4020a38d295c04fc79603c7899e0df71588043eb218722dabc087f"}, - {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:2047d8234fe735ab77802ce5f2297e410ff40f5238aec569ad7c8e163d7b19a6"}, - {file = "lxml-6.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6f91fd2b2ea15a6800c8e24418c0775a1694eefc011392da73bc6cef2623b322"}, - {file = "lxml-6.0.2-cp312-cp312-win32.whl", hash = "sha256:3ae2ce7d6fedfb3414a2b6c5e20b249c4c607f72cb8d2bb7cc9c6ec7c6f4e849"}, - {file = "lxml-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:72c87e5ee4e58a8354fb9c7c84cbf95a1c8236c127a5d1b7683f04bed8361e1f"}, - {file = "lxml-6.0.2-cp312-cp312-win_arm64.whl", hash = "sha256:61cb10eeb95570153e0c0e554f58df92ecf5109f75eacad4a95baa709e26c3d6"}, - {file = "lxml-6.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9b33d21594afab46f37ae58dfadd06636f154923c4e8a4d754b0127554eb2e77"}, - {file = "lxml-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6c8963287d7a4c5c9a432ff487c52e9c5618667179c18a204bdedb27310f022f"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1941354d92699fb5ffe6ed7b32f9649e43c2feb4b97205f75866f7d21aa91452"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bb2f6ca0ae2d983ded09357b84af659c954722bbf04dea98030064996d156048"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb2a12d704f180a902d7fa778c6d71f36ceb7b0d317f34cdc76a5d05aa1dd1df"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:6ec0e3f745021bfed19c456647f0298d60a24c9ff86d9d051f52b509663feeb1"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:846ae9a12d54e368933b9759052d6206a9e8b250291109c48e350c1f1f49d916"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ef9266d2aa545d7374938fb5c484531ef5a2ec7f2d573e62f8ce722c735685fd"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:4077b7c79f31755df33b795dc12119cb557a0106bfdab0d2c2d97bd3cf3dffa6"}, - {file = "lxml-6.0.2-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:a7c5d5e5f1081955358533be077166ee97ed2571d6a66bdba6ec2f609a715d1a"}, - {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8f8d0cbd0674ee89863a523e6994ac25fd5be9c8486acfc3e5ccea679bad2679"}, - {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:2cbcbf6d6e924c28f04a43f3b6f6e272312a090f269eff68a2982e13e5d57659"}, - {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dfb874cfa53340009af6bdd7e54ebc0d21012a60a4e65d927c2e477112e63484"}, - {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:fb8dae0b6b8b7f9e96c26fdd8121522ce5de9bb5538010870bd538683d30e9a2"}, - {file = "lxml-6.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:358d9adae670b63e95bc59747c72f4dc97c9ec58881d4627fe0120da0f90d314"}, - {file = "lxml-6.0.2-cp313-cp313-win32.whl", hash = "sha256:e8cd2415f372e7e5a789d743d133ae474290a90b9023197fd78f32e2dc6873e2"}, - {file = "lxml-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:b30d46379644fbfc3ab81f8f82ae4de55179414651f110a1514f0b1f8f6cb2d7"}, - {file = "lxml-6.0.2-cp313-cp313-win_arm64.whl", hash = "sha256:13dcecc9946dca97b11b7c40d29fba63b55ab4170d3c0cf8c0c164343b9bfdcf"}, - {file = "lxml-6.0.2-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:b0c732aa23de8f8aec23f4b580d1e52905ef468afb4abeafd3fec77042abb6fe"}, - {file = "lxml-6.0.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:4468e3b83e10e0317a89a33d28f7aeba1caa4d1a6fd457d115dd4ffe90c5931d"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:abd44571493973bad4598a3be7e1d807ed45aa2adaf7ab92ab7c62609569b17d"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:370cd78d5855cfbffd57c422851f7d3864e6ae72d0da615fca4dad8c45d375a5"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:901e3b4219fa04ef766885fb40fa516a71662a4c61b80c94d25336b4934b71c0"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:a4bf42d2e4cf52c28cc1812d62426b9503cdb0c87a6de81442626aa7d69707ba"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2c7fdaa4d7c3d886a42534adec7cfac73860b89b4e5298752f60aa5984641a0"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:98a5e1660dc7de2200b00d53fa00bcd3c35a3608c305d45a7bbcaf29fa16e83d"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:dc051506c30b609238d79eda75ee9cab3e520570ec8219844a72a46020901e37"}, - {file = "lxml-6.0.2-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:8799481bbdd212470d17513a54d568f44416db01250f49449647b5ab5b5dccb9"}, - {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9261bb77c2dab42f3ecd9103951aeca2c40277701eb7e912c545c1b16e0e4917"}, - {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:65ac4a01aba353cfa6d5725b95d7aed6356ddc0a3cd734de00124d285b04b64f"}, - {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:b22a07cbb82fea98f8a2fd814f3d1811ff9ed76d0fc6abc84eb21527596e7cc8"}, - {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:d759cdd7f3e055d6bc8d9bec3ad905227b2e4c785dc16c372eb5b5e83123f48a"}, - {file = "lxml-6.0.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:945da35a48d193d27c188037a05fec5492937f66fb1958c24fc761fb9d40d43c"}, - {file = "lxml-6.0.2-cp314-cp314-win32.whl", hash = "sha256:be3aaa60da67e6153eb15715cc2e19091af5dc75faef8b8a585aea372507384b"}, - {file = "lxml-6.0.2-cp314-cp314-win_amd64.whl", hash = "sha256:fa25afbadead523f7001caf0c2382afd272c315a033a7b06336da2637d92d6ed"}, - {file = "lxml-6.0.2-cp314-cp314-win_arm64.whl", hash = "sha256:063eccf89df5b24e361b123e257e437f9e9878f425ee9aae3144c77faf6da6d8"}, - {file = "lxml-6.0.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:6162a86d86893d63084faaf4ff937b3daea233e3682fb4474db07395794fa80d"}, - {file = "lxml-6.0.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:414aaa94e974e23a3e92e7ca5b97d10c0cf37b6481f50911032c69eeb3991bba"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48461bd21625458dd01e14e2c38dd0aea69addc3c4f960c30d9f59d7f93be601"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:25fcc59afc57d527cfc78a58f40ab4c9b8fd096a9a3f964d2781ffb6eb33f4ed"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5179c60288204e6ddde3f774a93350177e08876eaf3ab78aa3a3649d43eb7d37"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:967aab75434de148ec80597b75062d8123cadf2943fb4281f385141e18b21338"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d100fcc8930d697c6561156c6810ab4a508fb264c8b6779e6e61e2ed5e7558f9"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ca59e7e13e5981175b8b3e4ab84d7da57993eeff53c07764dcebda0d0e64ecd"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:957448ac63a42e2e49531b9d6c0fa449a1970dbc32467aaad46f11545be9af1d"}, - {file = "lxml-6.0.2-cp314-cp314t-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b7fc49c37f1786284b12af63152fe1d0990722497e2d5817acfe7a877522f9a9"}, - {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e19e0643cc936a22e837f79d01a550678da8377d7d801a14487c10c34ee49c7e"}, - {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:1db01e5cf14345628e0cbe71067204db658e2fb8e51e7f33631f5f4735fefd8d"}, - {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:875c6b5ab39ad5291588aed6925fac99d0097af0dd62f33c7b43736043d4a2ec"}, - {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:cdcbed9ad19da81c480dfd6dd161886db6096083c9938ead313d94b30aadf272"}, - {file = "lxml-6.0.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:80dadc234ebc532e09be1975ff538d154a7fa61ea5031c03d25178855544728f"}, - {file = "lxml-6.0.2-cp314-cp314t-win32.whl", hash = "sha256:da08e7bb297b04e893d91087df19638dc7a6bb858a954b0cc2b9f5053c922312"}, - {file = "lxml-6.0.2-cp314-cp314t-win_amd64.whl", hash = "sha256:252a22982dca42f6155125ac76d3432e548a7625d56f5a273ee78a5057216eca"}, - {file = "lxml-6.0.2-cp314-cp314t-win_arm64.whl", hash = "sha256:bb4c1847b303835d89d785a18801a883436cdfd5dc3d62947f9c49e24f0f5a2c"}, - {file = "lxml-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a656ca105115f6b766bba324f23a67914d9c728dafec57638e2b92a9dcd76c62"}, - {file = "lxml-6.0.2-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c54d83a2188a10ebdba573f16bd97135d06c9ef60c3dc495315c7a28c80a263f"}, - {file = "lxml-6.0.2-cp38-cp38-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:1ea99340b3c729beea786f78c38f60f4795622f36e305d9c9be402201efdc3b7"}, - {file = "lxml-6.0.2-cp38-cp38-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:af85529ae8d2a453feee4c780d9406a5e3b17cee0dd75c18bd31adcd584debc3"}, - {file = "lxml-6.0.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:fe659f6b5d10fb5a17f00a50eb903eb277a71ee35df4615db573c069bcf967ac"}, - {file = "lxml-6.0.2-cp38-cp38-win32.whl", hash = "sha256:5921d924aa5468c939d95c9814fa9f9b5935a6ff4e679e26aaf2951f74043512"}, - {file = "lxml-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:0aa7070978f893954008ab73bb9e3c24a7c56c054e00566a21b553dc18105fca"}, - {file = "lxml-6.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2c8458c2cdd29589a8367c09c8f030f1d202be673f0ca224ec18590b3b9fb694"}, - {file = "lxml-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3fee0851639d06276e6b387f1c190eb9d7f06f7f53514e966b26bae46481ec90"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b2142a376b40b6736dfc214fd2902409e9e3857eff554fed2d3c60f097e62a62"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6b5b39cc7e2998f968f05309e666103b53e2edd01df8dc51b90d734c0825444"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4aec24d6b72ee457ec665344a29acb2d35937d5192faebe429ea02633151aad"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_i686.manylinux_2_28_i686.whl", hash = "sha256:b42f4d86b451c2f9d06ffb4f8bbc776e04df3ba070b9fe2657804b1b40277c48"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6cdaefac66e8b8f30e37a9b4768a391e1f8a16a7526d5bc77a7928408ef68e93"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux_2_31_armv7l.whl", hash = "sha256:b738f7e648735714bbb82bdfd030203360cfeab7f6e8a34772b3c8c8b820568c"}, - {file = "lxml-6.0.2-cp39-cp39-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:daf42de090d59db025af61ce6bdb2521f0f102ea0e6ea310f13c17610a97da4c"}, - {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:66328dabea70b5ba7e53d94aa774b733cf66686535f3bc9250a7aab53a91caaf"}, - {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:e237b807d68a61fc3b1e845407e27e5eb8ef69bc93fe8505337c1acb4ee300b6"}, - {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_riscv64.whl", hash = "sha256:ac02dc29fd397608f8eb15ac1610ae2f2f0154b03f631e6d724d9e2ad4ee2c84"}, - {file = "lxml-6.0.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:817ef43a0c0b4a77bd166dc9a09a555394105ff3374777ad41f453526e37f9cb"}, - {file = "lxml-6.0.2-cp39-cp39-win32.whl", hash = "sha256:bc532422ff26b304cfb62b328826bd995c96154ffd2bac4544f37dbb95ecaa8f"}, - {file = "lxml-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:995e783eb0374c120f528f807443ad5a83a656a8624c467ea73781fc5f8a8304"}, - {file = "lxml-6.0.2-cp39-cp39-win_arm64.whl", hash = "sha256:08b9d5e803c2e4725ae9e8559ee880e5328ed61aa0935244e0515d7d9dbec0aa"}, - {file = "lxml-6.0.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:e748d4cf8fef2526bb2a589a417eba0c8674e29ffcb570ce2ceca44f1e567bf6"}, - {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4ddb1049fa0579d0cbd00503ad8c58b9ab34d1254c77bc6a5576d96ec7853dba"}, - {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cb233f9c95f83707dae461b12b720c1af9c28c2d19208e1be03387222151daf5"}, - {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc456d04db0515ce3320d714a1eac7a97774ff0849e7718b492d957da4631dd4"}, - {file = "lxml-6.0.2-pp310-pypy310_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2613e67de13d619fd283d58bda40bff0ee07739f624ffee8b13b631abf33083d"}, - {file = "lxml-6.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:24a8e756c982c001ca8d59e87c80c4d9dcd4d9b44a4cbeb8d9be4482c514d41d"}, - {file = "lxml-6.0.2-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1c06035eafa8404b5cf475bb37a9f6088b0aca288d4ccc9d69389750d5543700"}, - {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c7d13103045de1bdd6fe5d61802565f1a3537d70cd3abf596aa0af62761921ee"}, - {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0a3c150a95fbe5ac91de323aa756219ef9cf7fde5a3f00e2281e30f33fa5fa4f"}, - {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:60fa43be34f78bebb27812ed90f1925ec99560b0fa1decdb7d12b84d857d31e9"}, - {file = "lxml-6.0.2-pp311-pypy311_pp73-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:21c73b476d3cfe836be731225ec3421fa2f048d84f6df6a8e70433dff1376d5a"}, - {file = "lxml-6.0.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:27220da5be049e936c3aca06f174e8827ca6445a4353a1995584311487fc4e3e"}, - {file = "lxml-6.0.2.tar.gz", hash = "sha256:cd79f3367bd74b317dda655dc8fcfa304d9eb6e4fb06b7168c5cf27f96e0cd62"}, -] - -[package.extras] -cssselect = ["cssselect (>=0.7)"] -html-clean = ["lxml_html_clean"] -html5 = ["html5lib"] -htmlsoup = ["BeautifulSoup4"] - -[[package]] -name = "lxml-stubs" -version = "0.5.1" -description = "Type annotations for the lxml package" -optional = false -python-versions = "*" -groups = ["dev"] -files = [ - {file = "lxml-stubs-0.5.1.tar.gz", hash = "sha256:e0ec2aa1ce92d91278b719091ce4515c12adc1d564359dfaf81efa7d4feab79d"}, - {file = "lxml_stubs-0.5.1-py3-none-any.whl", hash = "sha256:1f689e5dbc4b9247cb09ae820c7d34daeb1fdbd1db06123814b856dae7787272"}, -] - -[package.extras] -test = ["coverage[toml] (>=7.2.5)", "mypy (>=1.2.0)", "pytest (>=7.3.0)", "pytest-mypy-plugins (>=1.10.1)"] - [[package]] name = "markdown-it-py" version = "4.0.0" @@ -1405,7 +1257,7 @@ description = "An LDAP3 auth provider for Synapse" optional = true python-versions = ">=3.7" groups = ["main"] -markers = "extra == \"all\" or extra == \"matrix-synapse-ldap3\"" +markers = "extra == \"matrix-synapse-ldap3\" or extra == \"all\"" files = [ {file = "matrix-synapse-ldap3-0.3.0.tar.gz", hash = "sha256:8bb6517173164d4b9cc44f49de411d8cebdb2e705d5dd1ea1f38733c4a009e1d"}, {file = "matrix_synapse_ldap3-0.3.0-py3-none-any.whl", hash = "sha256:8b4d701f8702551e98cc1d8c20dbed532de5613584c08d0df22de376ba99159d"}, @@ -1648,7 +1500,7 @@ description = "OpenTracing API for Python. See documentation at http://opentraci optional = true python-versions = "*" groups = ["main"] -markers = "extra == \"all\" or extra == \"opentracing\"" +markers = "extra == \"opentracing\" or extra == \"all\"" files = [ {file = "opentracing-2.4.0.tar.gz", hash = "sha256:a173117e6ef580d55874734d1fa7ecb6f3655160b8b8974a2a1e98e5ec9c840d"}, ] @@ -1838,7 +1690,7 @@ description = "psycopg2 - Python-PostgreSQL Database Adapter" optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"all\" or extra == \"postgres\"" +markers = "extra == \"postgres\" or extra == \"all\"" files = [ {file = "psycopg2-2.9.11-cp310-cp310-win_amd64.whl", hash = "sha256:103e857f46bb76908768ead4e2d0ba1d1a130e7b8ed77d3ae91e8b33481813e8"}, {file = "psycopg2-2.9.11-cp311-cp311-win_amd64.whl", hash = "sha256:210daed32e18f35e3140a1ebe059ac29209dd96468f2f7559aa59f75ee82a5cb"}, @@ -1856,7 +1708,7 @@ description = ".. image:: https://travis-ci.org/chtd/psycopg2cffi.svg?branch=mas optional = true python-versions = "*" groups = ["main"] -markers = "platform_python_implementation == \"PyPy\" and (extra == \"all\" or extra == \"postgres\")" +markers = "platform_python_implementation == \"PyPy\" and (extra == \"postgres\" or extra == \"all\")" files = [ {file = "psycopg2cffi-2.9.0.tar.gz", hash = "sha256:7e272edcd837de3a1d12b62185eb85c45a19feda9e62fa1b120c54f9e8d35c52"}, ] @@ -1872,7 +1724,7 @@ description = "A Simple library to enable psycopg2 compatability" optional = true python-versions = "*" groups = ["main"] -markers = "platform_python_implementation == \"PyPy\" and (extra == \"all\" or extra == \"postgres\")" +markers = "platform_python_implementation == \"PyPy\" and (extra == \"postgres\" or extra == \"all\")" files = [ {file = "psycopg2cffi-compat-1.1.tar.gz", hash = "sha256:d25e921748475522b33d13420aad5c2831c743227dc1f1f2585e0fdb5c914e05"}, ] @@ -2154,7 +2006,7 @@ description = "A development tool to measure, monitor and analyze the memory beh optional = true python-versions = ">=3.6" groups = ["main"] -markers = "extra == \"all\" or extra == \"cache-memory\"" +markers = "extra == \"cache-memory\" or extra == \"all\"" files = [ {file = "Pympler-1.0.1-py3-none-any.whl", hash = "sha256:d260dda9ae781e1eab6ea15bacb84015849833ba5555f141d2d9b7b7473b307d"}, {file = "Pympler-1.0.1.tar.gz", hash = "sha256:993f1a3599ca3f4fcd7160c7545ad06310c9e12f70174ae7ae8d4e25f6c5d3fa"}, @@ -2162,45 +2014,30 @@ files = [ [[package]] name = "pynacl" -version = "1.6.2" +version = "1.5.0" description = "Python binding to the Networking and Cryptography (NaCl) library" optional = false -python-versions = ">=3.8" +python-versions = ">=3.6" groups = ["main", "dev"] files = [ - {file = "pynacl-1.6.2-cp314-cp314t-macosx_10_10_universal2.whl", hash = "sha256:622d7b07cc5c02c666795792931b50c91f3ce3c2649762efb1ef0d5684c81594"}, - {file = "pynacl-1.6.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d071c6a9a4c94d79eb665db4ce5cedc537faf74f2355e4d502591d850d3913c0"}, - {file = "pynacl-1.6.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fe9847ca47d287af41e82be1dd5e23023d3c31a951da134121ab02e42ac218c9"}, - {file = "pynacl-1.6.2-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:04316d1fc625d860b6c162fff704eb8426b1a8bcd3abacea11142cbd99a6b574"}, - {file = "pynacl-1.6.2-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44081faff368d6c5553ccf55322ef2819abb40e25afaec7e740f159f74813634"}, - {file = "pynacl-1.6.2-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:a9f9932d8d2811ce1a8ffa79dcbdf3970e7355b5c8eb0c1a881a57e7f7d96e88"}, - {file = "pynacl-1.6.2-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:bc4a36b28dd72fb4845e5d8f9760610588a96d5a51f01d84d8c6ff9849968c14"}, - {file = "pynacl-1.6.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3bffb6d0f6becacb6526f8f42adfb5efb26337056ee0831fb9a7044d1a964444"}, - {file = "pynacl-1.6.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:2fef529ef3ee487ad8113d287a593fa26f48ee3620d92ecc6f1d09ea38e0709b"}, - {file = "pynacl-1.6.2-cp314-cp314t-win32.whl", hash = "sha256:a84bf1c20339d06dc0c85d9aea9637a24f718f375d861b2668b2f9f96fa51145"}, - {file = "pynacl-1.6.2-cp314-cp314t-win_amd64.whl", hash = "sha256:320ef68a41c87547c91a8b58903c9caa641ab01e8512ce291085b5fe2fcb7590"}, - {file = "pynacl-1.6.2-cp314-cp314t-win_arm64.whl", hash = "sha256:d29bfe37e20e015a7d8b23cfc8bd6aa7909c92a1b8f41ee416bbb3e79ef182b2"}, - {file = "pynacl-1.6.2-cp38-abi3-macosx_10_10_universal2.whl", hash = "sha256:c949ea47e4206af7c8f604b8278093b674f7c79ed0d4719cc836902bf4517465"}, - {file = "pynacl-1.6.2-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8845c0631c0be43abdd865511c41eab235e0be69c81dc66a50911594198679b0"}, - {file = "pynacl-1.6.2-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:22de65bb9010a725b0dac248f353bb072969c94fa8d6b1f34b87d7953cf7bbe4"}, - {file = "pynacl-1.6.2-cp38-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:46065496ab748469cdd999246d17e301b2c24ae2fdf739132e580a0e94c94a87"}, - {file = "pynacl-1.6.2-cp38-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8a66d6fb6ae7661c58995f9c6435bda2b1e68b54b598a6a10247bfcdadac996c"}, - {file = "pynacl-1.6.2-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:26bfcd00dcf2cf160f122186af731ae30ab120c18e8375684ec2670dccd28130"}, - {file = "pynacl-1.6.2-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:c8a231e36ec2cab018c4ad4358c386e36eede0319a0c41fed24f840b1dac59f6"}, - {file = "pynacl-1.6.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:68be3a09455743ff9505491220b64440ced8973fe930f270c8e07ccfa25b1f9e"}, - {file = "pynacl-1.6.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8b097553b380236d51ed11356c953bf8ce36a29a3e596e934ecabe76c985a577"}, - {file = "pynacl-1.6.2-cp38-abi3-win32.whl", hash = "sha256:5811c72b473b2f38f7e2a3dc4f8642e3a3e9b5e7317266e4ced1fba85cae41aa"}, - {file = "pynacl-1.6.2-cp38-abi3-win_amd64.whl", hash = "sha256:62985f233210dee6548c223301b6c25440852e13d59a8b81490203c3227c5ba0"}, - {file = "pynacl-1.6.2-cp38-abi3-win_arm64.whl", hash = "sha256:834a43af110f743a754448463e8fd61259cd4ab5bbedcf70f9dabad1d28a394c"}, - {file = "pynacl-1.6.2.tar.gz", hash = "sha256:018494d6d696ae03c7e656e5e74cdfd8ea1326962cc401bcf018f1ed8436811c"}, + {file = "PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d"}, + {file = "PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858"}, + {file = "PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b"}, + {file = "PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff"}, + {file = "PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543"}, + {file = "PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93"}, + {file = "PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba"}, ] [package.dependencies] -cffi = {version = ">=2.0.0", markers = "platform_python_implementation != \"PyPy\" and python_version >= \"3.9\""} +cffi = ">=1.4.1" [package.extras] -docs = ["sphinx (<7)", "sphinx_rtd_theme"] -tests = ["hypothesis (>=3.27.0)", "pytest (>=7.4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"] +docs = ["sphinx (>=1.6.5)", "sphinx-rtd-theme"] +tests = ["hypothesis (>=3.27.0)", "pytest (>=3.2.1,!=3.3.0)"] [[package]] name = "pyopenssl" @@ -2286,7 +2123,7 @@ description = "Python implementation of SAML Version 2 Standard" optional = true python-versions = ">=3.9,<4.0" groups = ["main"] -markers = "extra == \"all\" or extra == \"saml2\"" +markers = "extra == \"saml2\" or extra == \"all\"" files = [ {file = "pysaml2-7.5.0-py3-none-any.whl", hash = "sha256:bc6627cc344476a83c757f440a73fda1369f13b6fda1b4e16bca63ffbabb5318"}, {file = "pysaml2-7.5.0.tar.gz", hash = "sha256:f36871d4e5ee857c6b85532e942550d2cf90ea4ee943d75eb681044bbc4f54f7"}, @@ -2311,7 +2148,7 @@ description = "Extensions to the standard Python datetime module" optional = true python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" groups = ["main"] -markers = "extra == \"all\" or extra == \"saml2\"" +markers = "extra == \"saml2\" or extra == \"all\"" files = [ {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, @@ -2339,7 +2176,7 @@ description = "World timezone definitions, modern and historical" optional = true python-versions = "*" groups = ["main"] -markers = "extra == \"all\" or extra == \"saml2\"" +markers = "extra == \"saml2\" or extra == \"all\"" files = [ {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, @@ -2743,7 +2580,7 @@ description = "Python client for Sentry (https://sentry.io)" optional = true python-versions = ">=3.6" groups = ["main"] -markers = "extra == \"all\" or extra == \"sentry\"" +markers = "extra == \"sentry\" or extra == \"all\"" files = [ {file = "sentry_sdk-2.46.0-py2.py3-none-any.whl", hash = "sha256:4eeeb60198074dff8d066ea153fa6f241fef1668c10900ea53a4200abc8da9b1"}, {file = "sentry_sdk-2.46.0.tar.gz", hash = "sha256:91821a23460725734b7741523021601593f35731808afc0bb2ba46c27b8acd91"}, @@ -2918,6 +2755,19 @@ files = [ {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"}, ] +[[package]] +name = "soupsieve" +version = "2.8" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = true +python-versions = ">=3.9" +groups = ["main"] +markers = "extra == \"url-preview\" or extra == \"all\"" +files = [ + {file = "soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c"}, + {file = "soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f"}, +] + [[package]] name = "sqlglot" version = "28.0.0" @@ -2953,7 +2803,7 @@ description = "Tornado IOLoop Backed Concurrent Futures" optional = true python-versions = "*" groups = ["main"] -markers = "extra == \"all\" or extra == \"opentracing\"" +markers = "extra == \"opentracing\" or extra == \"all\"" files = [ {file = "threadloop-1.0.2-py2-none-any.whl", hash = "sha256:5c90dbefab6ffbdba26afb4829d2a9df8275d13ac7dc58dccb0e279992679599"}, {file = "threadloop-1.0.2.tar.gz", hash = "sha256:8b180aac31013de13c2ad5c834819771992d350267bddb854613ae77ef571944"}, @@ -2969,7 +2819,7 @@ description = "Python bindings for the Apache Thrift RPC system" optional = true python-versions = "*" groups = ["main"] -markers = "extra == \"all\" or extra == \"opentracing\"" +markers = "extra == \"opentracing\" or extra == \"all\"" files = [ {file = "thrift-0.16.0.tar.gz", hash = "sha256:2b5b6488fcded21f9d312aa23c9ff6a0195d0f6ae26ddbd5ad9e3e25dfc14408"}, ] @@ -3042,7 +2892,7 @@ description = "Tornado is a Python web framework and asynchronous networking lib optional = true python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"all\" or extra == \"opentracing\"" +markers = "extra == \"opentracing\" or extra == \"all\"" files = [ {file = "tornado-6.5-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:f81067dad2e4443b015368b24e802d0083fecada4f0a4572fdb72fc06e54a9a6"}, {file = "tornado-6.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:9ac1cbe1db860b3cbb251e795c701c41d343f06a96049d6274e7c77559117e41"}, @@ -3176,7 +3026,7 @@ description = "non-blocking redis client for python" optional = true python-versions = "*" groups = ["main"] -markers = "extra == \"all\" or extra == \"redis\"" +markers = "extra == \"redis\" or extra == \"all\"" files = [ {file = "txredisapi-1.4.11-py3-none-any.whl", hash = "sha256:ac64d7a9342b58edca13ef267d4fa7637c1aa63f8595e066801c1e8b56b22d0b"}, {file = "txredisapi-1.4.11.tar.gz", hash = "sha256:3eb1af99aefdefb59eb877b1dd08861efad60915e30ad5bf3d5bf6c5cedcdbc6"}, @@ -3387,14 +3237,14 @@ files = [ [[package]] name = "urllib3" -version = "2.6.3" +version = "2.6.0" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.9" groups = ["main", "dev"] files = [ - {file = "urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4"}, - {file = "urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed"}, + {file = "urllib3-2.6.0-py3-none-any.whl", hash = "sha256:c90f7a39f716c572c4e3e58509581ebd83f9b59cced005b7db7ad2d22b0db99f"}, + {file = "urllib3-2.6.0.tar.gz", hash = "sha256:cb9bcef5a4b345d5da5d145dc3e30834f58e8018828cbc724d30b4cb7d4d49f1"}, ] [package.extras] @@ -3422,7 +3272,7 @@ description = "An XML Schema validator and decoder" optional = true python-versions = ">=3.7" groups = ["main"] -markers = "extra == \"all\" or extra == \"saml2\"" +markers = "extra == \"saml2\" or extra == \"all\"" files = [ {file = "xmlschema-2.4.0-py3-none-any.whl", hash = "sha256:dc87be0caaa61f42649899189aab2fd8e0d567f2cf548433ba7b79278d231a4a"}, {file = "xmlschema-2.4.0.tar.gz", hash = "sha256:d74cd0c10866ac609e1ef94a5a69b018ad16e39077bc6393408b40c6babee793"}, @@ -3540,7 +3390,7 @@ docs = ["Sphinx", "repoze.sphinx.autointerface"] test = ["zope.i18nmessageid", "zope.testing", "zope.testrunner"] [extras] -all = ["authlib", "defusedxml", "hiredis", "jaeger-client", "lxml", "matrix-synapse-ldap3", "opentracing", "psycopg2", "psycopg2cffi", "psycopg2cffi-compat", "pympler", "pysaml2", "pytz", "sentry-sdk", "thrift", "tornado", "txredisapi"] +all = ["authlib", "beautifulsoup4", "defusedxml", "hiredis", "jaeger-client", "matrix-synapse-ldap3", "opentracing", "psycopg2", "psycopg2cffi", "psycopg2cffi-compat", "pympler", "pysaml2", "pytz", "sentry-sdk", "thrift", "tornado", "txredisapi"] cache-memory = ["pympler"] jwt = ["authlib"] matrix-synapse-ldap3 = ["matrix-synapse-ldap3"] @@ -3552,9 +3402,9 @@ saml2 = ["defusedxml", "pysaml2", "pytz"] sentry = ["sentry-sdk"] systemd = ["systemd-python"] test = ["idna", "parameterized"] -url-preview = ["lxml"] +url-preview = ["beautifulsoup4"] [metadata] lock-version = "2.1" python-versions = ">=3.10.0,<4.0.0" -content-hash = "1caa5072f6304122c89377420f993a54f54587f3618ccc8094ec31642264592c" +content-hash = "6061a6b34498a223ce8fe8d889a9ac8fd79fb714ce5e64c690042c2a55a7fc0e" diff --git a/pyproject.toml b/pyproject.toml index d61f7177bdc..f8491ec02f9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -139,7 +139,7 @@ oidc = ["authlib>=0.15.1"] # `systemd.journal.JournalHandler`, as is documented in # `contrib/systemd/log_config.yaml`. systemd = ["systemd-python>=231"] -url-preview = ["lxml>=4.6.3"] +url-preview = ["beautifulsoup4>=4.13.0"] sentry = ["sentry-sdk>=0.7.2"] opentracing = [ "jaeger-client>=4.2.0", @@ -182,7 +182,7 @@ all = [ # oidc and jwt "authlib>=0.15.1", # url-preview - "lxml>=4.6.3", + "beautifulsoup4>=4.13.0", # sentry "sentry-sdk>=0.7.2", # opentracing @@ -266,7 +266,6 @@ generate-setup-file = true ruff = "0.14.6" # Typechecking -lxml-stubs = ">=0.4.0" mypy = "*" mypy-zope = "*" types-bleach = ">=4.1.0" diff --git a/synapse/media/oembed.py b/synapse/media/oembed.py index 7e440721302..03e215530c8 100644 --- a/synapse/media/oembed.py +++ b/synapse/media/oembed.py @@ -21,16 +21,21 @@ import html import logging import urllib.parse -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING import attr -from synapse.media.preview_html import parse_html_description +from synapse.media.preview_html import ( + NON_BLANK, + decode_body, + get_attribute, + parse_html_description, +) from synapse.types import JsonDict from synapse.util.json import json_decoder if TYPE_CHECKING: - from lxml import etree + from bs4 import BeautifulSoup from synapse.server import HomeServer @@ -105,35 +110,25 @@ def get_oembed_url(self, url: str) -> str | None: # No match. return None - def autodiscover_from_html(self, tree: "etree._Element") -> str | None: + def autodiscover_from_html(self, soup: "BeautifulSoup") -> str | None: """ Search an HTML document for oEmbed autodiscovery information. Args: - tree: The parsed HTML body. + soup: The parsed HTML body. Returns: The URL to use for oEmbed information, or None if no URL was found. """ # Search for link elements with the proper rel and type attributes. - # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. - for tag in cast( - list["etree._Element"], - tree.xpath("//link[@rel='alternate'][@type='application/json+oembed']"), - ): - if "href" in tag.attrib: - return cast(str, tag.attrib["href"]) - - # Some providers (e.g. Flickr) use alternative instead of alternate. - # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. - for tag in cast( - list["etree._Element"], - tree.xpath("//link[@rel='alternative'][@type='application/json+oembed']"), - ): - if "href" in tag.attrib: - return cast(str, tag.attrib["href"]) - - return None + # Some providers (e.g. Flickr) use `alternative` instead of `alternate`. + tag = soup.find( + "link", + rel=("alternate", "alternative"), + type="application/json+oembed", + href=NON_BLANK, + ) + return get_attribute(tag, "href") if tag else None def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult: """ @@ -196,7 +191,7 @@ def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult: if oembed_type == "rich": html_str = oembed.get("html") if isinstance(html_str, str): - calc_description_and_urls(open_graph_response, html_str) + calc_description_and_urls(open_graph_response, html_str, url) elif oembed_type == "photo": # If this is a photo, use the full image, not the thumbnail. @@ -208,7 +203,7 @@ def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult: open_graph_response["og:type"] = "video.other" html_str = oembed.get("html") if html_str and isinstance(html_str, str): - calc_description_and_urls(open_graph_response, oembed["html"]) + calc_description_and_urls(open_graph_response, oembed["html"], url) for size in ("width", "height"): val = oembed.get(size) if type(val) is int: # noqa: E721 @@ -223,55 +218,45 @@ def parse_oembed_response(self, url: str, raw_body: bytes) -> OEmbedResult: return OEmbedResult(open_graph_response, author_name, cache_age) -def _fetch_urls(tree: "etree._Element", tag_name: str) -> list[str]: - results = [] - # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. - for tag in cast(list["etree._Element"], tree.xpath("//*/" + tag_name)): - if "src" in tag.attrib: - results.append(cast(str, tag.attrib["src"])) - return results +def _fetch_url(soup: "BeautifulSoup", tag_name: str) -> str | None: + tag = soup.find(tag_name, src=NON_BLANK) + return get_attribute(tag, "src") if tag else None -def calc_description_and_urls(open_graph_response: JsonDict, html_body: str) -> None: +def calc_description_and_urls( + open_graph_response: JsonDict, html_body: str, url: str +) -> None: """ Calculate description for an HTML document. - This uses lxml to convert the HTML document into plaintext. If errors + This uses BeautifulSoup to convert the HTML document into plaintext. If errors occur during processing of the document, an empty response is returned. Args: open_graph_response: The current Open Graph summary. This is updated with additional fields. html_body: The HTML document, as bytes. - - Returns: - The summary + url: The URL which is being previewed (not the one which was requested). """ - # If there's no body, nothing useful is going to be found. - if not html_body: - return - - from lxml import etree + soup = decode_body(html_body, url) - # Create an HTML parser. If this fails, log and return no metadata. - parser = etree.HTMLParser(recover=True, encoding="utf-8") - - # Attempt to parse the body. If this fails, log and return no metadata. - tree = etree.fromstring(html_body, parser) - - # The data was successfully parsed, but no tree was found. - if tree is None: + # If there's no body, nothing useful is going to be found. + if not soup: return # Attempt to find interesting URLs (images, videos, embeds). if "og:image" not in open_graph_response: - image_urls = _fetch_urls(tree, "img") - if image_urls: - open_graph_response["og:image"] = image_urls[0] - - video_urls = _fetch_urls(tree, "video") + _fetch_urls(tree, "embed") - if video_urls: - open_graph_response["og:video"] = video_urls[0] - - description = parse_html_description(tree) + image_url = _fetch_url(soup, "img") + if image_url: + open_graph_response["og:image"] = image_url + + video_url = _fetch_url(soup, "video") + if video_url: + open_graph_response["og:video"] = video_url + else: + embed_url = _fetch_url(soup, "embed") + if embed_url: + open_graph_response["og:video"] = embed_url + + description = parse_html_description(soup) if description: open_graph_response["og:description"] = description diff --git a/synapse/media/preview_html.py b/synapse/media/preview_html.py index 22ad581f829..d92f9a1a62c 100644 --- a/synapse/media/preview_html.py +++ b/synapse/media/preview_html.py @@ -18,108 +18,27 @@ # [This file includes modifications made by New Vector Limited] # # -import codecs import logging import re -from typing import ( - TYPE_CHECKING, - Callable, - Generator, - Iterable, - Optional, - cast, -) +from typing import TYPE_CHECKING, Callable, Generator, Iterable, Optional if TYPE_CHECKING: - from lxml import etree + from bs4 import BeautifulSoup + from bs4.element import PageElement, Tag logger = logging.getLogger(__name__) -_charset_match = re.compile( - rb'<\s*meta[^>]*charset\s*=\s*"?([a-z0-9_-]+)"?', flags=re.I -) -_xml_encoding_match = re.compile( - rb'\s*<\s*\?\s*xml[^>]*encoding="([a-z0-9_-]+)"', flags=re.I -) _content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I) # Certain elements aren't meant for display. ARIA_ROLES_TO_IGNORE = {"directory", "menu", "menubar", "toolbar"} - -def _normalise_encoding(encoding: str) -> str | None: - """Use the Python codec's name as the normalised entry.""" - try: - return codecs.lookup(encoding).name - except LookupError: - return None +NON_BLANK = re.compile(".+") -def _get_html_media_encodings(body: bytes, content_type: str | None) -> Iterable[str]: +def decode_body(body: bytes | str, uri: str) -> Optional["BeautifulSoup"]: """ - Get potential encoding of the body based on the (presumably) HTML body or the content-type header. - - The precedence used for finding a character encoding is: - - 1. tag with a charset declared. - 2. The XML document's character encoding attribute. - 3. The Content-Type header. - 4. Fallback to utf-8. - 5. Fallback to windows-1252. - - This roughly follows the algorithm used by BeautifulSoup's bs4.dammit.EncodingDetector. - - Args: - body: The HTML document, as bytes. - content_type: The Content-Type header. - - Returns: - The character encoding of the body, as a string. - """ - # There's no point in returning an encoding more than once. - attempted_encodings: set[str] = set() - - # Limit searches to the first 1kb, since it ought to be at the top. - body_start = body[:1024] - - # Check if it has an encoding set in a meta tag. - match = _charset_match.search(body_start) - if match: - encoding = _normalise_encoding(match.group(1).decode("ascii")) - if encoding: - attempted_encodings.add(encoding) - yield encoding - - # TODO Support - - # Check if it has an XML document with an encoding. - match = _xml_encoding_match.match(body_start) - if match: - encoding = _normalise_encoding(match.group(1).decode("ascii")) - if encoding and encoding not in attempted_encodings: - attempted_encodings.add(encoding) - yield encoding - - # Check the HTTP Content-Type header for a character set. - if content_type: - content_match = _content_type_match.match(content_type) - if content_match: - encoding = _normalise_encoding(content_match.group(1)) - if encoding and encoding not in attempted_encodings: - attempted_encodings.add(encoding) - yield encoding - - # Finally, fallback to UTF-8, then windows-1252. - for fallback in ("utf-8", "cp1252"): - if fallback not in attempted_encodings: - yield fallback - - -def decode_body( - body: bytes, uri: str, content_type: str | None = None -) -> Optional["etree._Element"]: - """ - This uses lxml to parse the HTML document. + This uses BeautifulSoup to parse the HTML document. Args: body: The HTML document, as bytes. @@ -133,54 +52,46 @@ def decode_body( if not body: return None - # The idea here is that multiple encodings are tried until one works. - # Unfortunately the result is never used and then LXML will decode the string - # again with the found encoding. - for encoding in _get_html_media_encodings(body, content_type): - try: - body.decode(encoding) - except Exception: - pass - else: - break - else: + from bs4 import BeautifulSoup + from bs4.builder import ParserRejectedMarkup + + try: + soup = BeautifulSoup(body, "html.parser") + # If an empty document is returned, convert to None. + if not len(soup): + return None + return soup + except ParserRejectedMarkup: logger.warning("Unable to decode HTML body for %s", uri) return None - from lxml import etree - - # Create an HTML parser. - parser = etree.HTMLParser(recover=True, encoding=encoding) - # Attempt to parse the body. With `lxml` 6.0.0+, this will be an empty HTML - # tree if the body was successfully parsed, but no tree was found. In - # previous `lxml` versions, `etree.fromstring` would return `None` in that - # case. - html_tree = etree.fromstring(body, parser) +def get_attribute(tag: "Tag", attribute_name: str) -> str: + """ + Get an attribute from a beautifulsoup tag. - # Account for the above referenced case where `html_tree` is an HTML tree - # with an empty body. If so, return None. - if html_tree is not None and html_tree.tag == "html": - # If the tree has only a single element and it's empty, then - # return None. - body_el = html_tree.find("body") - if body_el is not None and len(html_tree) == 1: - # Extract the content of the body tag as text. - body_text = "".join(cast(Iterable[str], body_el.itertext())) + Fetching an attribute may return either a string or list of strings depending + on if the attribute is a "multi-valued" attribute. - # Strip any undecodable Unicode characters and whitespace. - body_text = body_text.strip("\ufffd").strip() + The multi-valued attributes are never used in the HTML preview code, but this + function helps enforce type safety without casts. - # If there's no text left, and there were no child tags, - # then we consider the tag empty. - if not body_text and len(body_el) == 0: - return None + Args: + tag: The Tag object to get the attribute from. + attribute_name: The name of the attribute to get. - return html_tree + Returns: + The attribute value as a string. + """ + attribute = tag[attribute_name] + assert isinstance(attribute, str), ( + f"Expected attribute {attribute_name} to have a string value" + ) + return attribute def _get_meta_tags( - tree: "etree._Element", + soup: "BeautifulSoup", property: str, prefix: str, property_mapper: Callable[[str], str | None] | None = None, @@ -189,7 +100,7 @@ def _get_meta_tags( Search for meta tags prefixed with a particular string. Args: - tree: The parsed HTML document. + soup: The parsed HTML document. property: The name of the property which contains the tag name, e.g. "property" for Open Graph. prefix: The prefix on the property to search for, e.g. "og" for Open Graph. @@ -199,15 +110,10 @@ def _get_meta_tags( Returns: A map of tag name to value. """ - # This actually returns dict[str, str], but the caller sets this as a variable - # which is dict[str, str | None]. results: dict[str, str | None] = {} # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. - for tag in cast( - list["etree._Element"], - tree.xpath( - f"//*/meta[starts-with(@{property}, '{prefix}:')][@content][not(@content='')]" - ), + for tag in soup.find_all( + "meta", attrs={property: re.compile(rf"^{prefix}:")}, content=NON_BLANK ): # if we've got more than 50 tags, someone is taking the piss if len(results) >= 50: @@ -217,7 +123,7 @@ def _get_meta_tags( ) return {} - key = cast(str, tag.attrib[property]) + key = get_attribute(tag, property) if property_mapper: new_key = property_mapper(key) # None is a special value used to ignore a value. @@ -225,7 +131,7 @@ def _get_meta_tags( continue key = new_key - results[key] = cast(str, tag.attrib["content"]) + results[key] = get_attribute(tag, "content") return results @@ -250,15 +156,14 @@ def _map_twitter_to_open_graph(key: str) -> str | None: return "og" + key[7:] -def parse_html_to_open_graph(tree: "etree._Element") -> dict[str, str | None]: +def parse_html_to_open_graph(soup: "BeautifulSoup") -> dict[str, str | None]: """ - Parse the HTML document into an Open Graph response. + Calculate metadata for an HTML document. - This uses lxml to search the HTML document for Open Graph data (or - synthesizes it from the document). + This uses BeautifulSoup to search the HTML document for Open Graph data. Args: - tree: The parsed HTML document. + soup: The parsed HTML document. Returns: The Open Graph response as a dictionary. @@ -278,7 +183,8 @@ def parse_html_to_open_graph(tree: "etree._Element") -> dict[str, str | None]: # "og:video:height" : "720", # "og:video:secure_url": "https://www.youtube.com/v/LXDBoHyjmtw?version=3", - og = _get_meta_tags(tree, "property", "og") + # TODO: grab article: meta tags too, e.g.: + og = _get_meta_tags(soup, "property", "og") # TODO: Search for properties specific to the different Open Graph types, # such as article: meta tags, e.g.: @@ -298,7 +204,7 @@ def parse_html_to_open_graph(tree: "etree._Element") -> dict[str, str | None]: # Twitter cards tags also duplicate Open Graph tags. # # See https://developer.twitter.com/en/docs/twitter-for-websites/cards/guides/getting-started - twitter = _get_meta_tags(tree, "name", "twitter", _map_twitter_to_open_graph) + twitter = _get_meta_tags(soup, "name", "twitter", _map_twitter_to_open_graph) # Merge the Twitter values with the Open Graph values, but do not overwrite # information from Open Graph tags. for key, value in twitter.items(): @@ -307,73 +213,67 @@ def parse_html_to_open_graph(tree: "etree._Element") -> dict[str, str | None]: if "og:title" not in og: # Attempt to find a title from the title tag, or the biggest header on the page. - # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. - title = cast( - list["etree._ElementUnicodeResult"], - tree.xpath("((//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1])/text()"), - ) - if title: - og["og:title"] = title[0].strip() + # + # mypy doesn't like passing both name and string, but it is used to ignore + # empty elements. + title = soup.find(("title", "h1", "h2", "h3"), string=True) # type: ignore[call-overload] + if title and title.string: + og["og:title"] = title.string.strip() else: og["og:title"] = None if "og:image" not in og: - # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. - meta_image = cast( - list["etree._ElementUnicodeResult"], - tree.xpath( - "//*/meta[translate(@itemprop, 'IMAGE', 'image')='image'][not(@content='')]/@content[1]" - ), + # Check microdata for an image. + meta_image = soup.find( + "meta", itemprop=re.compile("image", re.I), content=NON_BLANK ) # If a meta image is found, use it. if meta_image: - og["og:image"] = meta_image[0] + og["og:image"] = get_attribute(meta_image, "content") else: # Try to find images which are larger than 10px by 10px. - # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. # # TODO: consider inlined CSS styles as well as width & height attribs - images = cast( - list["etree._Element"], - tree.xpath("//img[@src][number(@width)>10][number(@height)>10]"), + raw_images = soup.find_all( + "img", src=NON_BLANK, width=NON_BLANK, height=NON_BLANK ) images = sorted( - images, + filter( + lambda tag: int(get_attribute(tag, "width")) > 10 + and int(get_attribute(tag, "height")) > 10, + raw_images, + ), key=lambda i: ( - -1 * float(i.attrib["width"]) * float(i.attrib["height"]) + -1 + * float(get_attribute(i, "width")) + * float(get_attribute(i, "height")) ), ) # If no images were found, try to find *any* images. if not images: - # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. - images = cast(list["etree._Element"], tree.xpath("//img[@src][1]")) + images = soup.find_all("img", src=NON_BLANK, limit=1) if images: - og["og:image"] = cast(str, images[0].attrib["src"]) + og["og:image"] = get_attribute(images[0], "src") # Finally, fallback to the favicon if nothing else. else: - # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. - favicons = cast( - list["etree._ElementUnicodeResult"], - tree.xpath("//link[@href][contains(@rel, 'icon')]/@href[1]"), - ) - if favicons: - og["og:image"] = favicons[0] + favicon = soup.find("link", href=NON_BLANK, rel="icon") + if favicon: + og["og:image"] = get_attribute(favicon, "href") if "og:description" not in og: # Check the first meta description tag for content. - # Cast: the type returned by xpath depends on the xpath expression: mypy can't deduce this. - meta_description = cast( - list["etree._ElementUnicodeResult"], - tree.xpath( - "//*/meta[translate(@name, 'DESCRIPTION', 'description')='description'][not(@content='')]/@content[1]" - ), + meta_description = soup.find( + "meta", + attrs={"name": re.compile("description", re.I)}, + content=NON_BLANK, ) + # If a meta description is found with content, use it. if meta_description: - og["og:description"] = meta_description[0] + og["og:description"] = get_attribute(meta_description, "content") else: - og["og:description"] = parse_html_description(tree) + og["og:description"] = parse_html_description(soup) elif og["og:description"]: # This must be a non-empty string at this point. assert isinstance(og["og:description"], str) @@ -384,7 +284,7 @@ def parse_html_to_open_graph(tree: "etree._Element") -> dict[str, str | None]: return og -def parse_html_description(tree: "etree._Element") -> str | None: +def parse_html_description(soup: "BeautifulSoup") -> str | None: """ Calculate a text description based on an HTML document. @@ -397,16 +297,14 @@ def parse_html_description(tree: "etree._Element") -> str | None: This is a very very very coarse approximation to a plain text render of the page. Args: - tree: The parsed HTML document. + soup: The parsed HTML document. Returns: The plain text description, or None if one cannot be generated. """ - # We don't just use XPATH here as that is slow on some machines. - - from lxml import etree TAGS_TO_REMOVE = { + "head", "header", "nav", "aside", @@ -420,74 +318,56 @@ def parse_html_description(tree: "etree._Element") -> str | None: "canvas", "img", "picture", - # etree.Comment is a function which creates an etree._Comment element. - # The "tag" attribute of an etree._Comment instance is confusingly the - # etree.Comment function instead of a string. - etree.Comment, } # Split all the text nodes into paragraphs (by splitting on new # lines) text_nodes = ( re.sub(r"\s+", "\n", el).strip() - for el in _iterate_over_text(tree.find("body"), TAGS_TO_REMOVE) + for el in _iterate_over_text(soup, TAGS_TO_REMOVE) ) return summarize_paragraphs(text_nodes) def _iterate_over_text( - tree: Optional["etree._Element"], - tags_to_ignore: set[object], + soup: "Tag", + tags_to_ignore: Iterable[str], stack_limit: int = 1024, ) -> Generator[str, None, None]: - """Iterate over the tree returning text nodes in a depth first fashion, + """Iterate over the document returning text nodes in a depth first fashion, skipping text nodes inside certain tags. Args: - tree: The parent element to iterate. Can be None if there isn't one. + soup: The parent element to iterate. tags_to_ignore: Set of tags to ignore stack_limit: Maximum stack size limit for depth-first traversal. Nodes will be dropped if this limit is hit, which may truncate the textual result. Intended to limit the maximum working memory when generating a preview. """ + from bs4.element import NavigableString, Tag - if tree is None: - return - - # This is a stack whose items are elements to iterate over *or* strings + # This is basically a stack that we extend using itertools.chain. + # This will either consist of an element to iterate over *or* a string # to be returned. - elements: list[str | "etree._Element"] = [tree] + elements: list["PageElement"] = [soup] while elements: el = elements.pop() - if isinstance(el, str): - yield el - elif el.tag not in tags_to_ignore: - # If the element isn't meant for display, ignore it. - if el.get("role") in ARIA_ROLES_TO_IGNORE: - continue - - # el.text is the text before the first child, so we can immediately - # return it if the text exists. - if el.text: - yield el.text - - # We add to the stack all the element's children, interspersed with - # each child's tail text (if it exists). + # Do not consider sub-classes of NavigableString since those represent + # stylesheets, etc. + if type(el) == NavigableString: # noqa: E721 + yield str(el) + elif isinstance(el, Tag) and el.name not in tags_to_ignore: + # We add to the stack all the element's children. # # We iterate in reverse order so that earlier pieces of text appear # closer to the top of the stack. - for child in el.iterchildren(reversed=True): + for child in reversed(el.contents): if len(elements) > stack_limit: # We've hit our limit for working memory break - if child.tail: - # The tail text of a node is text that comes *after* the node, - # so we always include it even if we ignore the child node. - elements.append(child.tail) - elements.append(child) diff --git a/synapse/media/url_previewer.py b/synapse/media/url_previewer.py index 7782905a7ab..ddcbfefd0a8 100644 --- a/synapse/media/url_previewer.py +++ b/synapse/media/url_previewer.py @@ -294,16 +294,16 @@ async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes: # define our OG response for this media elif _is_html(media_info.media_type): - # TODO: somehow stop a big HTML tree from exploding synapse's RAM + # TODO: somehow stop a big HTML document from exploding synapse's RAM with open(media_info.filename, "rb") as file: body = file.read() - tree = decode_body(body, media_info.uri, media_info.media_type) - if tree is not None: + soup = decode_body(body, media_info.uri) + if soup is not None: # Check if this HTML document points to oEmbed information and # defer to that. - oembed_url = self._oembed.autodiscover_from_html(tree) + oembed_url = self._oembed.autodiscover_from_html(soup) og_from_oembed: JsonDict = {} # Only download to the oEmbed URL if it is allowed. if oembed_url: @@ -329,7 +329,7 @@ async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes: # Parse Open Graph information from the HTML in case the oEmbed # response failed or is incomplete. - og_from_html = parse_html_to_open_graph(tree) + og_from_html = parse_html_to_open_graph(soup) # Compile an Open Graph response by combining the oEmbed response # and the information from the HTML, with information in the HTML diff --git a/tests/media/test_html_preview.py b/tests/media/test_html_preview.py index d3f1e8833a7..fb5938dc4f3 100644 --- a/tests/media/test_html_preview.py +++ b/tests/media/test_html_preview.py @@ -20,7 +20,6 @@ # from synapse.media.preview_html import ( - _get_html_media_encodings, decode_body, parse_html_to_open_graph, summarize_paragraphs, @@ -29,14 +28,14 @@ from tests import unittest try: - import lxml + import bs4 except ImportError: - lxml = None # type: ignore[assignment] + bs4 = None # type: ignore[assignment] class SummarizeTestCase(unittest.TestCase): - if not lxml: - skip = "url preview feature requires lxml" + if not bs4: + skip = "url preview feature requires beautifulsoup4" def test_long_summarize(self) -> None: example_paras = [ @@ -153,8 +152,8 @@ def test_small_then_large_summarize(self) -> None: class OpenGraphFromHtmlTestCase(unittest.TestCase): - if not lxml: - skip = "url preview feature requires lxml" + if not bs4: + skip = "url preview feature requires beautifulsoup4" def test_simple(self) -> None: html = b""" @@ -166,9 +165,9 @@ def test_simple(self) -> None: """ - tree = decode_body(html, "http://example.com/test.html") - assert tree is not None - og = parse_html_to_open_graph(tree) + soup = decode_body(html, "http://example.com/test.html") + assert soup is not None + og = parse_html_to_open_graph(soup) self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."}) @@ -183,9 +182,9 @@ def test_comment(self) -> None: """ - tree = decode_body(html, "http://example.com/test.html") - assert tree is not None - og = parse_html_to_open_graph(tree) + soup = decode_body(html, "http://example.com/test.html") + assert soup is not None + og = parse_html_to_open_graph(soup) self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."}) @@ -203,9 +202,9 @@ def test_comment2(self) -> None: """ - tree = decode_body(html, "http://example.com/test.html") - assert tree is not None - og = parse_html_to_open_graph(tree) + soup = decode_body(html, "http://example.com/test.html") + assert soup is not None + og = parse_html_to_open_graph(soup) self.assertEqual( og, @@ -226,9 +225,9 @@ def test_script(self) -> None: """ - tree = decode_body(html, "http://example.com/test.html") - assert tree is not None - og = parse_html_to_open_graph(tree) + soup = decode_body(html, "http://example.com/test.html") + assert soup is not None + og = parse_html_to_open_graph(soup) self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."}) @@ -241,9 +240,9 @@ def test_missing_title(self) -> None: """ - tree = decode_body(html, "http://example.com/test.html") - assert tree is not None - og = parse_html_to_open_graph(tree) + soup = decode_body(html, "http://example.com/test.html") + assert soup is not None + og = parse_html_to_open_graph(soup) self.assertEqual(og, {"og:title": None, "og:description": "Some text."}) @@ -273,9 +272,9 @@ def test_h1_as_title(self) -> None: """ - tree = decode_body(html, "http://example.com/test.html") - assert tree is not None - og = parse_html_to_open_graph(tree) + soup = decode_body(html, "http://example.com/test.html") + assert soup is not None + og = parse_html_to_open_graph(soup) self.assertEqual(og, {"og:title": "Title", "og:description": "Some text."}) @@ -310,23 +309,17 @@ def test_missing_title_and_broken_h1(self) -> None: """ - tree = decode_body(html, "http://example.com/test.html") - assert tree is not None - og = parse_html_to_open_graph(tree) + soup = decode_body(html, "http://example.com/test.html") + assert soup is not None + og = parse_html_to_open_graph(soup) self.assertEqual(og, {"og:title": None, "og:description": "Some text."}) def test_empty(self) -> None: """Test a body with no data in it.""" html = b"" - tree = decode_body(html, "http://example.com/test.html") - self.assertIsNone(tree) - - def test_no_tree(self) -> None: - """A valid body with no tree in it.""" - html = b"\x00" - tree = decode_body(html, "http://example.com/test.html") - self.assertIsNone(tree) + soup = decode_body(html, "http://example.com/test.html") + self.assertIsNone(soup) def test_xml(self) -> None: """Test decoding XML and ensure it works properly.""" @@ -339,24 +332,9 @@ def test_xml(self) -> None: FooSome text. """.strip() - tree = decode_body(html, "http://example.com/test.html") - assert tree is not None - og = parse_html_to_open_graph(tree) - self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."}) - - def test_invalid_encoding(self) -> None: - """An invalid character encoding should be ignored and treated as UTF-8, if possible.""" - html = b""" - - Foo - - Some text. - - - """ - tree = decode_body(html, "http://example.com/test.html", "invalid-encoding") - assert tree is not None - og = parse_html_to_open_graph(tree) + soup = decode_body(html, "http://example.com/test.html") + assert soup is not None + og = parse_html_to_open_graph(soup) self.assertEqual(og, {"og:title": "Foo", "og:description": "Some text."}) def test_invalid_encoding2(self) -> None: @@ -370,10 +348,10 @@ def test_invalid_encoding2(self) -> None: """ - tree = decode_body(html, "http://example.com/test.html") - assert tree is not None - og = parse_html_to_open_graph(tree) - self.assertEqual(og, {"og:title": "ÿÿ Foo", "og:description": "Some text."}) + soup = decode_body(html, "http://example.com/test.html") + assert soup is not None + og = parse_html_to_open_graph(soup) + self.assertEqual(og, {"og:title": "˙˙ Foo", "og:description": "Some text."}) def test_windows_1252(self) -> None: """A body which uses cp1252, but doesn't declare that.""" @@ -385,10 +363,73 @@ def test_windows_1252(self) -> None: """ - tree = decode_body(html, "http://example.com/test.html") - assert tree is not None - og = parse_html_to_open_graph(tree) - self.assertEqual(og, {"og:title": "ó", "og:description": "Some text."}) + soup = decode_body(html, "http://example.com/test.html") + assert soup is not None + og = parse_html_to_open_graph(soup) + self.assertIn("og:title", og) + og.pop("og:title") + self.assertEqual(og, {"og:description": "Some text."}) + + def test_image(self) -> None: + """Test the spots an image can be pulled from .""" + # Ordered listed of tags, we'll pop off the top and keep testing. + tags = [ + ( + b"""""", + "meta-prop", + ), + ( + b"""""", + "meta-IMAGE", + ), + ( + b"""""", + "meta-image", + ), + (b"""""", "img"), + ( + b"""""", + "img", + ), + ( + b"""""", + "img", + ), + ( + b"""""", + "img", + ), + ( + b"""""", + "img", + ), + # Put this image again since if it is the *only* image it will be used. + ( + b"""""", + "img-no-width-no-height", + ), + ( + b"""""", + "favicon", + ), + ] + + while tags: + html = b"" + b"".join(t[0] for t in tags) + b"" + tree = decode_body(html, "http://example.com/test.html") + assert tree is not None + og = parse_html_to_open_graph(tree) + self.assertEqual( + og, + { + "og:title": None, + "og:description": None, + "og:image": f"https://example.com/{tags[0][1]}.png", + }, + ) + + # Remove the highest remaining priority item. + tags.pop(0) def test_twitter_tag(self) -> None: """Twitter card tags should be used if nothing else is available.""" @@ -397,6 +438,7 @@ def test_twitter_tag(self) -> None: + """ tree = decode_body(html, "http://example.com/test.html") @@ -408,6 +450,7 @@ def test_twitter_tag(self) -> None: "og:title": None, "og:description": "Description", "og:site_name": "@matrixdotorg", + "og:image": "https://example.com/test.png", }, ) @@ -419,6 +462,8 @@ def test_twitter_tag(self) -> None: + + """ tree = decode_body(html, "http://example.com/test.html") @@ -430,6 +475,7 @@ def test_twitter_tag(self) -> None: "og:title": None, "og:description": "Real Description", "og:site_name": "matrix.org", + "og:image": "https://example.com/good.png", }, ) @@ -450,116 +496,3 @@ def test_nested_nodes(self) -> None: "og:description": "Welcome\n\nthe bold\n\nand underlined text\n\nand\n\nsome\n\ntail text", }, ) - - -class MediaEncodingTestCase(unittest.TestCase): - def test_meta_charset(self) -> None: - """A character encoding is found via the meta tag.""" - encodings = _get_html_media_encodings( - b""" - - - - - """, - "text/html", - ) - self.assertEqual(list(encodings), ["ascii", "utf-8", "cp1252"]) - - # A less well-formed version. - encodings = _get_html_media_encodings( - b""" - - < meta charset = ascii> - - - """, - "text/html", - ) - self.assertEqual(list(encodings), ["ascii", "utf-8", "cp1252"]) - - def test_meta_charset_underscores(self) -> None: - """A character encoding contains underscore.""" - encodings = _get_html_media_encodings( - b""" - - - - - """, - "text/html", - ) - self.assertEqual(list(encodings), ["shift_jis", "utf-8", "cp1252"]) - - def test_xml_encoding(self) -> None: - """A character encoding is found via the meta tag.""" - encodings = _get_html_media_encodings( - b""" - - - - """, - "text/html", - ) - self.assertEqual(list(encodings), ["ascii", "utf-8", "cp1252"]) - - def test_meta_xml_encoding(self) -> None: - """Meta tags take precedence over XML encoding.""" - encodings = _get_html_media_encodings( - b""" - - - - - - """, - "text/html", - ) - self.assertEqual(list(encodings), ["utf-16", "ascii", "utf-8", "cp1252"]) - - def test_content_type(self) -> None: - """A character encoding is found via the Content-Type header.""" - # Test a few variations of the header. - headers = ( - 'text/html; charset="ascii";', - "text/html;charset=ascii;", - 'text/html; charset="ascii"', - "text/html; charset=ascii", - 'text/html; charset="ascii;', - 'text/html; charset=ascii";', - ) - for header in headers: - encodings = _get_html_media_encodings(b"", header) - self.assertEqual(list(encodings), ["ascii", "utf-8", "cp1252"]) - - def test_fallback(self) -> None: - """A character encoding cannot be found in the body or header.""" - encodings = _get_html_media_encodings(b"", "text/html") - self.assertEqual(list(encodings), ["utf-8", "cp1252"]) - - def test_duplicates(self) -> None: - """Ensure each encoding is only attempted once.""" - encodings = _get_html_media_encodings( - b""" - - - - - - """, - 'text/html; charset="UTF_8"', - ) - self.assertEqual(list(encodings), ["utf-8", "cp1252"]) - - def test_unknown_invalid(self) -> None: - """A character encoding should be ignored if it is unknown or invalid.""" - encodings = _get_html_media_encodings( - b""" - - - - - """, - 'text/html; charset="invalid"', - ) - self.assertEqual(list(encodings), ["utf-8", "cp1252"]) diff --git a/tests/media/test_oembed.py b/tests/media/test_oembed.py index dc13c03df33..74fd77f330b 100644 --- a/tests/media/test_oembed.py +++ b/tests/media/test_oembed.py @@ -34,14 +34,14 @@ from tests.unittest import HomeserverTestCase try: - import lxml + import bs4 except ImportError: - lxml = None # type: ignore[assignment] + bs4 = None # type: ignore[assignment] class OEmbedTests(HomeserverTestCase): - if not lxml: - skip = "url preview feature requires lxml" + if not bs4: + skip = "url preview feature requires beautifulsoup4" def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.oembed = OEmbedProvider(hs) diff --git a/tests/media/test_url_previewer.py b/tests/media/test_url_previewer.py index 3d706c7e90d..da6ea60ab13 100644 --- a/tests/media/test_url_previewer.py +++ b/tests/media/test_url_previewer.py @@ -29,14 +29,14 @@ from tests.unittest import override_config try: - import lxml + import bs4 except ImportError: - lxml = None # type: ignore[assignment] + bs4 = None # type: ignore[assignment] class URLPreviewTests(unittest.HomeserverTestCase): - if not lxml: - skip = "url preview feature requires lxml" + if not bs4: + skip = "url preview feature requires beautifulsoup4" def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: config = self.default_config() diff --git a/tests/rest/client/test_media.py b/tests/rest/client/test_media.py index ec81b1413c2..71755488e67 100644 --- a/tests/rest/client/test_media.py +++ b/tests/rest/client/test_media.py @@ -77,9 +77,9 @@ from tests.unittest import override_config try: - import lxml + import bs4 except ImportError: - lxml = None # type: ignore[assignment] + bs4 = None # type: ignore[assignment] class MediaDomainBlockingTests(unittest.HomeserverTestCase): @@ -188,8 +188,8 @@ def test_remote_media_thumbnail_normally_unblocked(self) -> None: class URLPreviewTests(unittest.HomeserverTestCase): - if not lxml: - skip = "url preview feature requires lxml" + if not bs4: + skip = "url preview feature requires beauitfulsoup4" servlets = [media.register_servlets] hijack_auth = True @@ -495,7 +495,7 @@ def test_non_ascii_preview_content_type(self) -> None: self.pump() self.assertEqual(channel.code, 200) - self.assertEqual(channel.json_body["og:title"], "\u0434\u043a\u0430") + self.assertIn("og:title", channel.json_body) def test_overlong_title(self) -> None: self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")] diff --git a/tests/rest/media/test_url_preview.py b/tests/rest/media/test_url_preview.py index 32e78fc12a6..ae42091e0b1 100644 --- a/tests/rest/media/test_url_preview.py +++ b/tests/rest/media/test_url_preview.py @@ -45,14 +45,14 @@ from tests.unittest import override_config try: - import lxml + import bs4 except ImportError: - lxml = None # type: ignore[assignment] + bs4 = None # type: ignore[assignment] class URLPreviewTests(unittest.HomeserverTestCase): - if not lxml: - skip = "url preview feature requires lxml" + if not bs4: + skip = "url preview feature requires beautifulsoup4" hijack_auth = True user_id = "@test:user" @@ -367,7 +367,7 @@ def test_non_ascii_preview_content_type(self) -> None: self.pump() self.assertEqual(channel.code, 200) - self.assertEqual(channel.json_body["og:title"], "\u0434\u043a\u0430") + self.assertIn("og:title", channel.json_body) def test_overlong_title(self) -> None: self.lookups["matrix.org"] = [(IPv4Address, "10.1.2.3")]