Skip to content

Commit 040c2e5

Browse files
committed
fix(machinery): limit allowed URLs
Allow only public URLs by default to reduce risk of SSRF.
1 parent a7fd394 commit 040c2e5

File tree

20 files changed

+1054
-79
lines changed

20 files changed

+1054
-79
lines changed

docs/admin/config.rst

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,26 @@ This is currently used in the following places:
8181

8282
* :setting:`ALLOWED_ASSET_SIZE`
8383

84+
.. setting:: ALLOWED_MACHINERY_DOMAINS
85+
86+
ALLOWED_MACHINERY_DOMAINS
87+
-------------------------
88+
89+
Configures which custom machinery domains are explicitly allowed in project-level
90+
machine translation configuration.
91+
92+
This setting applies only to machinery services and does not affect
93+
:setting:`ALLOWED_ASSET_DOMAINS`.
94+
95+
It expects a list of host/domain names. You can use fully qualified names or
96+
prepend with a period as a wildcard to match all subdomains.
97+
98+
Defaults to ``[]``.
99+
100+
The allowlist only affects configuration-time validation for project-managed
101+
machinery endpoints. Runtime checks still reject destinations that resolve to
102+
private or otherwise non-public addresses.
103+
84104
.. setting:: ALLOWED_ASSET_SIZE
85105

86106
ALLOWED_ASSET_SIZE

docs/changes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ Weblate 5.17
2020

2121
.. rubric:: Bug fixes
2222

23+
* Hardened project-level machine translation against SSRF by blocking private-network targets for untrusted endpoints and hiding untrusted remote error details.
2324
* Prevented removing the last team from a project token.
2425
* Batch automatic translation now uses project-level machinery configuration instead of only site-wide settings.
2526
* Fixed sorting by the **Unreviewed** column in listings.

weblate/api/tests.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3467,6 +3467,24 @@ def test_install_machinery(self) -> None:
34673467

34683468
self.assertEqual(new_config, response.data)
34693469

3470+
def test_install_machinery_blocks_private_project_target(self) -> None:
3471+
self.component.project.add_user(self.user, "Administration")
3472+
3473+
response = self.do_request(
3474+
"api:project-machinery-settings",
3475+
self.project_kwargs,
3476+
method="post",
3477+
code=400,
3478+
superuser=False,
3479+
request={
3480+
"service": "deepl",
3481+
"configuration": {"key": "x", "url": "http://127.0.0.1:11434/"},
3482+
},
3483+
format="json",
3484+
)
3485+
3486+
self.assertIn("URL domain is not allowed.", str(response.data))
3487+
34703488

34713489
class ComponentAPITest(APIBaseTest):
34723490
def setUp(self) -> None:

weblate/api/views.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1515,7 +1515,9 @@ def machinery_settings(self, request: Request, **kwargs):
15151515
raise ValidationError({"service": "Missing service name"}) from error
15161516

15171517
service, configuration, errors = validate_service_configuration(
1518-
service_name, request.data.get("configuration", "{}")
1518+
service_name,
1519+
request.data.get("configuration", "{}"),
1520+
allow_private_targets=False,
15191521
)
15201522

15211523
if service is None or errors:
@@ -1554,7 +1556,9 @@ def machinery_settings(self, request: Request, **kwargs):
15541556
valid_configurations: dict[str, dict] = {}
15551557
for service_name, configuration in request.data.items():
15561558
service, configuration, errors = validate_service_configuration(
1557-
service_name, configuration
1559+
service_name,
1560+
configuration,
1561+
allow_private_targets=False,
15581562
)
15591563

15601564
if service is None or errors:

weblate/machinery/anthropic.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from __future__ import annotations
66

7+
from typing import ClassVar
78
from urllib.parse import urljoin
89

910
from .base import MachineryRateLimitError
@@ -20,6 +21,7 @@ class AnthropicTranslation(BaseLLMTranslation):
2021
"""
2122

2223
name = "Anthropic"
24+
trusted_error_hosts: ClassVar[set[str]] = {"api.anthropic.com"}
2325
end_point = "/v1/messages"
2426
settings_form = AnthropicMachineryForm
2527
version_added = "5.16"

weblate/machinery/base.py

Lines changed: 108 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@
1515
from html import escape, unescape
1616
from itertools import chain
1717
from typing import TYPE_CHECKING, ClassVar
18-
from urllib.parse import quote
18+
from urllib.parse import quote, urlparse
1919

20+
from django.conf import settings
2021
from django.core.cache import cache
2122
from django.core.exceptions import ValidationError
2223
from django.utils.functional import cached_property
@@ -28,8 +29,10 @@
2829
from weblate.machinery.forms import BaseMachineryForm
2930
from weblate.utils.docs import DocVersionsMixin
3031
from weblate.utils.errors import report_error
32+
from weblate.utils.forms import WeblateServiceURLField
3133
from weblate.utils.hash import calculate_dict_hash, calculate_hash, hash_to_checksum
32-
from weblate.utils.requests import http_request
34+
from weblate.utils.outbound import is_allowlisted_hostname
35+
from weblate.utils.requests import http_request, validate_request_url
3336
from weblate.utils.similarity import Comparer
3437
from weblate.utils.site import get_site_url
3538

@@ -108,20 +111,21 @@ class BatchMachineTranslation(DocVersionsMixin):
108111

109112
validate_source_language = "en"
110113
validate_target_language = "de"
114+
trusted_error_hosts: ClassVar[set[str]] = set()
111115

112116
@classmethod
113117
def get_rank(cls):
114118
return cls.max_score + cls.rank_boost
115119

116-
def __init__(self, settings: SettingsDict) -> None:
120+
def __init__(self, configuration: SettingsDict) -> None:
117121
"""Create new machine translation object."""
118122
self.mtid = self.get_identifier()
119123
self.rate_limit_cache = f"{self.mtid}-rate-limit"
120124
self.languages_cache = f"{self.mtid}-languages"
121125
self.comparer = Comparer()
122126
self.supported_languages_error: Exception | None = None
123127
self.supported_languages_error_age: float = 0
124-
self.settings = settings
128+
self.settings = configuration
125129

126130
def delete_cache(self) -> None:
127131
cache.delete_many([self.rate_limit_cache, self.languages_cache])
@@ -187,29 +191,108 @@ def check_failure(self, response: Response) -> None:
187191
try:
188192
response.raise_for_status()
189193
except HTTPError as error:
190-
detail = response.text
191-
try:
192-
payload = response.json()
193-
except JSONDecodeError:
194-
pass
195-
else:
196-
if isinstance(payload, dict) and payload:
197-
if detail_error := payload.get("error"):
198-
if isinstance(detail_error, str):
199-
detail = detail_error
200-
elif isinstance(detail_error, dict):
201-
if "message" in detail_error:
202-
detail = detail_error["message"]
203-
else:
204-
detail = str(detail_error)
205-
else:
206-
detail = str(payload)
207-
208-
if detail:
194+
if detail := self.get_error_detail(response):
209195
message = f"{error.args[0]}: {detail[:200]}"
210196
raise HTTPError(message, response=response) from error
211197
raise
212198

199+
@property
200+
def allow_private_targets(self) -> bool:
201+
return "_project" not in self.settings
202+
203+
def validate_runtime_url(self, url: str) -> None:
204+
validate_request_url(url, allow_private_targets=self.allow_private_targets)
205+
206+
@staticmethod
207+
def get_host_from_setting(value: object) -> str | None:
208+
if not isinstance(value, str):
209+
return None
210+
if "://" in value:
211+
return urlparse(value).hostname
212+
return value or None
213+
214+
def get_trusted_error_hosts(self) -> set[str]:
215+
hosts = set(settings.ALLOWED_MACHINERY_DOMAINS)
216+
hosts.update(self.trusted_error_hosts)
217+
if self.allow_private_targets or self.settings_form is None:
218+
return hosts
219+
220+
form = self.settings_form(self.__class__)
221+
for field_name, field in form.fields.items():
222+
values: set[str] = set()
223+
if initial := getattr(field, "initial", None):
224+
values.add(initial)
225+
values.update(value for value, _label in getattr(field, "choices", ()))
226+
227+
current_value = self.settings.get(field_name)
228+
if current_value in values and (
229+
host := self.get_host_from_setting(current_value)
230+
):
231+
hosts.add(host)
232+
233+
if isinstance(field, WeblateServiceURLField):
234+
for value in values:
235+
if host := self.get_host_from_setting(value):
236+
hosts.add(host)
237+
return hosts
238+
239+
@classmethod
240+
def has_configurable_outbound_target(cls) -> bool:
241+
if cls.settings_form is None:
242+
return False
243+
244+
form = cls.settings_form(cls)
245+
for field_name, field in form.fields.items():
246+
if isinstance(field, WeblateServiceURLField):
247+
return True
248+
if field_name in form.network_host_fields:
249+
return True
250+
return False
251+
252+
def can_display_error_detail(self, response: Response) -> bool:
253+
if self.allow_private_targets:
254+
return True
255+
return self.is_trusted_error_host(response)
256+
257+
def is_trusted_error_host(self, response: Response) -> bool:
258+
if (
259+
self.settings_form is not None
260+
and not self.has_configurable_outbound_target()
261+
):
262+
return True
263+
hostname = urlparse(response.url).hostname or ""
264+
return is_allowlisted_hostname(hostname, list(self.get_trusted_error_hosts()))
265+
266+
def get_error_detail(self, response: Response) -> str | None:
267+
if not self.can_display_error_detail(response):
268+
return None
269+
trusted_host = self.is_trusted_error_host(response)
270+
271+
try:
272+
payload = response.json()
273+
except JSONDecodeError:
274+
if trusted_host:
275+
return response.text or None
276+
return None
277+
278+
if isinstance(payload, dict):
279+
if (message := payload.get("message")) and isinstance(message, str):
280+
return message
281+
if (detail := payload.get("detail")) and isinstance(detail, str):
282+
return detail
283+
if error := payload.get("error"):
284+
if isinstance(error, str):
285+
return error
286+
if isinstance(error, dict):
287+
detail_message = error.get("message")
288+
if isinstance(detail_message, str):
289+
return detail_message
290+
elif isinstance(payload, str) and trusted_host:
291+
return payload
292+
if trusted_host:
293+
return response.text or None
294+
return None
295+
213296
def request(self, method, url, skip_auth=False, **kwargs):
214297
"""Perform JSON request."""
215298
# Create custom headers
@@ -231,6 +314,8 @@ def request(self, method, url, skip_auth=False, **kwargs):
231314
timeout=self.request_timeout,
232315
auth=self.get_auth(),
233316
raise_for_status=False,
317+
validate_url=True,
318+
allow_private_targets=self.allow_private_targets,
234319
**kwargs,
235320
)
236321

weblate/machinery/deepl.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ class DeepLTranslation(
4646
target_language_map: ClassVar[dict[str, str]] = {
4747
"PT": "PT-PT",
4848
}
49+
trusted_error_hosts: ClassVar[set[str]] = {
50+
"api.deepl.com",
51+
"api-free.deepl.com",
52+
}
4953
highlight_syntax = True
5054
settings_form = DeepLMachineryForm
5155
glossary_count_limit = 1000

weblate/machinery/forms.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,14 @@
1212
from django.utils.translation import gettext, gettext_lazy, pgettext_lazy
1313

1414
from weblate.utils.forms import WeblateServiceURLField
15+
from weblate.utils.validators import validate_machinery_hostname, validate_machinery_url
1516

1617
from .types import SourceLanguageChoices
1718

1819

1920
class BaseMachineryForm(forms.Form):
21+
network_host_fields = frozenset({"base_url", "endpoint_url"})
22+
2023
source_language = forms.ChoiceField(
2124
label=pgettext_lazy(
2225
"Automatic suggestion service configuration", "Source language selection"
@@ -26,8 +29,11 @@ class BaseMachineryForm(forms.Form):
2629
required=False,
2730
)
2831

29-
def __init__(self, machinery, *args, **kwargs) -> None:
32+
def __init__(
33+
self, machinery, *args, allow_private_targets: bool = True, **kwargs
34+
) -> None:
3035
self.machinery = machinery
36+
self.allow_private_targets = allow_private_targets
3137
super().__init__(*args, **kwargs)
3238

3339
def serialize_form(self):
@@ -40,9 +46,25 @@ def clean(self) -> None:
4046
continue
4147
if field not in settings:
4248
return
49+
self.validate_endpoint_fields(settings)
50+
if not self.allow_private_targets:
51+
settings = {**settings, "_project": object()}
4352
machinery = self.machinery(settings)
4453
machinery.validate_settings()
4554

55+
def validate_endpoint_fields(self, settings) -> None:
56+
for field_name, field in self.fields.items():
57+
if (value := settings.get(field_name)) in {"", None}:
58+
continue
59+
if isinstance(field, WeblateServiceURLField):
60+
validate_machinery_url(
61+
value, allow_private_targets=self.allow_private_targets
62+
)
63+
elif field_name in self.network_host_fields and isinstance(value, str):
64+
validate_machinery_hostname(
65+
value, allow_private_targets=self.allow_private_targets
66+
)
67+
4668

4769
class KeyMachineryForm(BaseMachineryForm):
4870
key = forms.CharField(

weblate/machinery/libretranslate.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from __future__ import annotations
88

9-
from typing import TYPE_CHECKING
9+
from typing import TYPE_CHECKING, ClassVar
1010

1111
from .base import BatchMachineTranslation
1212
from .forms import LibreTranslateMachineryForm
@@ -22,6 +22,7 @@ class LibreTranslateTranslation(BatchMachineTranslation):
2222

2323
name = "LibreTranslate"
2424
max_score = 89
25+
trusted_error_hosts: ClassVar[set[str]] = {"libretranslate.com"}
2526
version_added = "4.7.1"
2627
settings_form = LibreTranslateMachineryForm
2728
request_timeout = 20

weblate/machinery/microsoft.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from datetime import timedelta
88
from typing import TYPE_CHECKING, ClassVar
9+
from urllib.parse import urlparse
910

1011
from django.utils import timezone
1112

@@ -57,6 +58,12 @@ class MicrosoftCognitiveTranslation(XMLMachineTranslationMixin, MachineTranslati
5758
def get_identifier(cls) -> str:
5859
return "microsoft-translator"
5960

61+
@classmethod
62+
def get_application_hosts(cls) -> set[str]:
63+
return {
64+
value for value, _label in cls.settings_form.base_fields["base_url"].choices
65+
}
66+
6067
def __init__(self, settings: SettingsDict) -> None:
6168
"""Check configuration."""
6269
super().__init__(settings)
@@ -108,10 +115,8 @@ def check_failure(self, response) -> None:
108115
# Microsoft tends to use utf-8-sig instead of plain utf-8
109116
response.encoding = response.apparent_encoding
110117
super().check_failure(response)
111-
if (
112-
response.url.startswith("https://api.cognitive.microsofttranslator.com/")
113-
and response.status_code == 200
114-
):
118+
hostname = urlparse(response.url).hostname
119+
if response.status_code == 200 and hostname in self.get_application_hosts():
115120
payload = response.json()
116121

117122
# We should get an object, string usually means an error

0 commit comments

Comments
 (0)