Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ Weblate 5.17
* Improved API access control for pending tasks.
* Faster category and project removals.
* Project backup restore no longer trusts repository-local VCS configuration and hooks from the uploaded archive.
* :ref:`check-punctuation-spacing` check no longer triggers false positives for placeholders.
* :doc:`/admin/machine` now falls back to the default API URL when base URL is empty.
* :ref:`mt-deepl` maps plain Portuguese to European Portuguese.
* Push branches are no longer updated with upstream-only commits in multi-branch workflows.
Expand Down
32 changes: 26 additions & 6 deletions weblate/checks/chars.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from weblate.checks.base import CountingCheck, TargetCheck, TargetCheckParametrized
from weblate.checks.markup import strip_entities
from weblate.checks.parser import single_value_flag
from weblate.checks.same import strip_format
from weblate.checks.utils import highlight_string

if TYPE_CHECKING:
from collections.abc import Iterable
Expand Down Expand Up @@ -533,15 +533,31 @@
return super().should_skip(unit)

def check_single(self, source: str, target: str, unit: Unit) -> bool:
# Remove possible markup
target = strip_format(target, unit.all_flags)
# Remove XML/HTML entities to simplify parsing
# Remove XML/HTML entities first (indices must match the string we iterate over)
target = strip_entities(target)

# Skip punctuation inside placeables (e.g XLIFF equiv-text, RST)
highlighted_ranges = [
(start, end) for start, end, _ in highlight_string(target, unit)
]
highlighted_ranges.sort()
whitespace = {" ", "\u00a0", "\u202f", "\u2009"}

total = len(target)
range_index = 0
current_start, current_end = (
highlighted_ranges[0] if highlighted_ranges else (None, None)
)
for i, char in enumerate(target):
# Advance to the next highlighted range if we've passed the current one.
while current_start is not None and i >= current_end:

Check failure on line 551 in weblate/checks/chars.py

View workflow job for this annotation

GitHub Actions / mypy

Unsupported operand types for >= ("int" and "None")
range_index += 1
if range_index < len(highlighted_ranges):
current_start, current_end = highlighted_ranges[range_index]
else:
current_start = current_end = None
break
# Skip characters that fall inside a highlighted range.
if current_start is not None and current_start <= i < current_end:

Check failure on line 559 in weblate/checks/chars.py

View workflow job for this annotation

GitHub Actions / mypy

Unsupported operand types for < ("int" and "None")
continue
if char in FRENCH_PUNCTUATION:
if i == 0:
# Trigger if punctionation at beginning of the string
Expand All @@ -559,6 +575,10 @@
return False

def get_fixup(self, unit: Unit) -> Iterable[FixupType] | None:
# If there are placeables in target, skip Fix button and rely on save-time
# autofix which has position-aware checks.
if highlight_string(unit.target, unit):
return None
return [
# First fix possibly wrong whitespace
(
Expand Down
10 changes: 3 additions & 7 deletions weblate/checks/duplicate.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from django.utils.translation import gettext_lazy, ngettext

from weblate.checks.base import TargetCheck
from weblate.checks.same import replace_format_placeholder, strip_format
from weblate.checks.utils import placeholder_replacement, replace_highlighted
from weblate.utils.html import format_html_join_comma
from weblate.utils.unicodechars import NON_WORD_CHARS

Expand Down Expand Up @@ -74,15 +74,11 @@ def check_single(self, source: str, target: str, unit: Unit):
lang_code = unit.translation.language.base_code

source_groups, source_words = self.extract_groups(
strip_format(
source, unit.all_flags, replacement=replace_format_placeholder
),
replace_highlighted(source, unit, placeholder_replacement),
source_code,
)
target_groups, target_words = self.extract_groups(
strip_format(
target, unit.all_flags, replacement=replace_format_placeholder
),
replace_highlighted(target, unit, placeholder_replacement),
lang_code,
)

Expand Down
63 changes: 3 additions & 60 deletions weblate/checks/same.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,9 @@

from weblate.checks.base import TargetCheck
from weblate.checks.data import IGNORE_WORDS
from weblate.checks.format import FLAG_RULES, PERCENT_MATCH
from weblate.checks.markup import BBCODE_MATCH
from weblate.checks.qt import QT_FORMAT_MATCH, QT_PLURAL_MATCH
from weblate.checks.ruby import RUBY_FORMAT_MATCH
from weblate.checks.utils import replace_highlighted

if TYPE_CHECKING:
from collections.abc import Callable

from weblate.checks.flags import Flags
from weblate.trans.models import Unit

# Email address to ignore
Expand Down Expand Up @@ -49,8 +43,6 @@

TEMPLATE_RE = re.compile(r"{[a-z_-]+}|@[A-Z_]@", re.IGNORECASE)

RST_MATCH = re.compile(r"(:[a-z:]+:`[^`]+`|``[^`]+``)")

SPLIT_RE = re.compile(
r"(?:\&(?:nbsp|rsaquo|lt|gt|amp|ldquo|rdquo|times|quot);|"
r'[() ,.^`"\'\\/_<>!?;:|{}*@%#&~=+\r\n✓—‑…\[\]0-9-])+',
Expand All @@ -63,39 +55,6 @@
DB_TAGS = ("screen", "indexterm", "programlisting")


def replace_format_placeholder(match: re.Match) -> str:
return f"x-weblate-{match.start(0)}"


def strip_format(
msg: str, flags: Flags, replacement: str | Callable[[re.Match], str] = ""
) -> str:
"""
Remove format strings from the strings.

These are quite often not changed by translators.
"""
for format_flag, (regex, _is_position_based, _extract_string) in FLAG_RULES.items():
if format_flag in flags:
return regex.sub("", msg)

if "qt-format" in flags:
regex = QT_FORMAT_MATCH
elif "qt-plural-format" in flags:
regex = QT_PLURAL_MATCH
elif "ruby-format" in flags:
regex = RUBY_FORMAT_MATCH
elif "rst-text" in flags:
regex = RST_MATCH
elif "percent-placeholders" in flags:
regex = PERCENT_MATCH
elif "bbcode-text" in flags:
regex = BBCODE_MATCH
else:
return msg
return regex.sub(replacement, msg)


def strip_string(msg: str) -> str:
"""Strip (usually) untranslated parts from the string."""
# Strip HTML markup
Expand Down Expand Up @@ -133,17 +92,6 @@ def test_word(word, extra_ignore):
)


def strip_placeholders(msg: str, unit: Unit) -> str:
return re.sub(
"|".join(
re.escape(param) if isinstance(param, str) else param.pattern
for param in unit.all_flags.get_value("placeholders")
),
"",
msg,
)


class SameCheck(TargetCheck):
"""Check for untranslated entries."""

Expand All @@ -153,7 +101,6 @@ class SameCheck(TargetCheck):

def should_ignore(self, source: str, unit: Unit) -> bool:
"""Check whether given unit should be ignored."""
from weblate.checks.flags import TYPED_FLAGS
from weblate.glossary.models import get_glossary_terms

# Ignore some strings based on notes (typically from gettext PO file)
Expand All @@ -167,12 +114,8 @@ def should_ignore(self, source: str, unit: Unit) -> bool:
stripped = source
flags = unit.all_flags

# Strip format strings
stripped = strip_format(stripped, flags)

# Strip placeholder strings
if "placeholders" in TYPED_FLAGS and "placeholders" in flags:
stripped = strip_placeholders(stripped, unit)
# Strip all highlighted placeables and format spans.
stripped = replace_highlighted(stripped, unit)

if "strict-same" in flags:
return not stripped
Expand Down
24 changes: 24 additions & 0 deletions weblate/checks/tests/test_chars_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@

def setUp(self) -> None:
super().setUp()
self.test_single_good_matching = ("string\n\nstring", "string\n\nstring", "")

Check failure on line 307 in weblate/checks/tests/test_chars_checks.py

View workflow job for this annotation

GitHub Actions / mypy

Incompatible types in assignment (expression has type "tuple[str, str, str]", variable has type "Callable[[], None]")

Check failure on line 307 in weblate/checks/tests/test_chars_checks.py

View workflow job for this annotation

GitHub Actions / mypy

Cannot assign to a method
self.test_failure_1 = ("string\nstring", "string\n\n\nstring", "")
self.test_failure_2 = ("string\nstring\n\nstring", "string\nstring\nstring", "")

Expand Down Expand Up @@ -521,6 +521,30 @@
"fr",
)

def test_angular_fr_placeholders(self) -> None:
# XLIFF placeholder regex so highlight_string skips equiv-text content
xliff_placeholder = r'placeholders:r"<x\s[^>]*/>"'
# Check should not fire when punctuation is inside placeholder equiv-text
self.do_test(
False,
(
'Orangutan has <x id="INTERPOLATION" equiv-text="{{ count | other: 0 }}"/> banana.\n',
'Orangutan a <x id="INTERPOLATION" equiv-text="{{ count | other: 0 }}"/> banane.\n',
xliff_placeholder,
),
"fr",
)
# Check should fire when punctuation is outside placeholder
self.do_test(
True,
(
'Orangutan has: <x id="INTERPOLATION" equiv-text="{{ count }}"/> banana.\n',
'Orangutan a: <x id="INTERPOLATION" equiv-text="{{ count }}"/> banane.\n',
xliff_placeholder,
),
"fr",
)

def test_cdata(self) -> None:
self.do_test(
False,
Expand Down
20 changes: 20 additions & 0 deletions weblate/checks/tests/test_duplicate_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,23 @@ def test_rst_markup(self) -> None:
),
{},
)

def test_xliff_placeholders(self) -> None:
xliff_flag = r'placeholders:r"<x\s[^>]*/>"'
unit = MockUnit(code="fr", flags=xliff_flag)
# no warning triggered if duplicated words are inside placeholders
self.assertFalse(
self.check.check_single(
"",
'Limite <x id="INTERPOLATION" equiv-text="{{ quota quota }}"/> par jour', # codespell:ignore
unit,
)
)
# warning triggered if duplicated words are outside placeholders
self.assertTrue(
self.check.check_single(
"",
'limite limite <x id="INTERPOLATION" equiv-text="{{ quota }}"/> par jour', # codespell:ignore
unit,
)
)
8 changes: 8 additions & 0 deletions weblate/checks/tests/test_same_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
unit.is_source = True
self.assertTrue(self.check.should_skip(unit))
# Is same as source
unit.translation.template = False

Check failure on line 31 in weblate/checks/tests/test_same_checks.py

View workflow job for this annotation

GitHub Actions / mypy

"MockTranslation" has no attribute "template"; maybe "is_template"?
self.assertTrue(self.check.should_skip(unit))
# Interlingua special case
unit.translation.language.code = "ia"
Expand Down Expand Up @@ -244,6 +244,14 @@
r'placeholders:r"%\w+%",strict-same',
),
)
self.do_test(
False,
(
'<x id="INTERPOLATION" equiv-text="{{ count }}"/>',
'<x id="INTERPOLATION" equiv-text="{{ count }}"/>',
r'placeholders:r"<x\s[^>]*/>"',
),
)

def test_same_project(self) -> None:
self.do_test(False, ("MockProject", "MockProject", ""))
Expand Down
20 changes: 19 additions & 1 deletion weblate/checks/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from django.test import SimpleTestCase

from weblate.checks.tests.test_checks import MockUnit
from weblate.checks.utils import highlight_string
from weblate.checks.utils import highlight_string, replace_highlighted


class HighlightTestCase(SimpleTestCase):
Expand Down Expand Up @@ -75,3 +75,21 @@ def test_escaped_markup(self) -> None:
(44, 59, "&lt;/strong&gt;"),
],
)

def test_replace_highlighted(self) -> None:
unit = MockUnit(
source="simple {format} %d string",
flags="python-brace-format, python-format",
)
self.assertEqual(
replace_highlighted(unit.source, unit),
"simple string",
)
self.assertEqual(
replace_highlighted(
unit.source,
unit,
lambda start: f"x-weblate-{start}",
),
"simple x-weblate-7 x-weblate-16 string",
)
31 changes: 30 additions & 1 deletion weblate/checks/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from weblate.checks.models import CHECKS

if TYPE_CHECKING:
from collections.abc import Generator
from collections.abc import Callable, Generator

from weblate.trans.models import Unit

Expand Down Expand Up @@ -88,3 +88,32 @@ def highlight_string(
hl_idx_next += 1

return highlights


def replace_highlighted(
source: str,
unit: Unit,
replacement: str | Callable[[int], str] = "",
) -> str:
"""Replace highlighted ranges in source string."""
highlights = highlight_string(source, unit)
if not highlights:
return source

result = []
last_end = 0
for start, end, _text in highlights:
if start < last_end:
continue
result.append(source[last_end:start])
if callable(replacement):
result.append(replacement(start))
else:
result.append(replacement)
last_end = end
Comment on lines +105 to +113
Copy link

Copilot AI Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

replace_highlighted() can fail to replace the full intended range when highlights contains partially-overlapping spans (e.g., (0, 5) and (3, 8)). In that case the second span is skipped (start < last_end) and last_end is not extended, leaving a tail of the highlighted content unmodified. Update the overlap handling so that when start < last_end, the function still advances last_end = max(last_end, end) (without appending a second replacement), ensuring the union of overlaps is removed/replaced.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Contributor Author

@gersona gersona Mar 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In practice, is it possible that some multiple placeholders overlap without being nested ? This seems unlikely to me. And none of the tests in weblate/checks/tests/test_utils.py go in that direction

result.append(source[last_end:])
return "".join(result)


def placeholder_replacement(start_index: int) -> str:
return f"x-weblate-{start_index}"
Loading
Loading