Skip to content

Commit 8b86172

Browse files
committed
feat(unit): apply delta of stats upon edit
This avoids possibly expensive full stats update on large components and only applies delta that can be inferred from the current edit. The scope of delta handling is intentionally narrow to handle only the simple cases and avoid code complexity. If delta approach cannot be used, it falls back to standard stats calculation.
1 parent 1ba072a commit 8b86172

File tree

5 files changed

+461
-47
lines changed

5 files changed

+461
-47
lines changed

weblate/trans/models/translation.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import codecs
88
import os
99
import tempfile
10-
from contextlib import suppress
10+
from contextlib import contextmanager, suppress
1111
from datetime import UTC
1212
from itertools import chain
1313
from pathlib import Path
@@ -234,6 +234,8 @@ def __init__(self, *args, **kwargs) -> None:
234234
self.addon_commit_files: list[str] = []
235235
self.reason = ""
236236
self._invalidate_scheduled = False
237+
self._suppress_cache_invalidation_depth = 0
238+
self._stats_delta_requires_full_rebuild = False
237239
self.update_changes: list[Change] = []
238240
self.pending_unit_changes: list[PendingUnitChange] = []
239241
# Project backup integration
@@ -1674,12 +1676,31 @@ def _invalidate_trigger(self) -> None:
16741676

16751677
def invalidate_cache(self) -> None:
16761678
"""Invalidate any cached stats."""
1679+
if self._suppress_cache_invalidation_depth:
1680+
return
16771681
# Invalidate summary stats
16781682
if self._invalidate_scheduled:
16791683
return
16801684
self._invalidate_scheduled = True
16811685
transaction.on_commit(self._invalidate_trigger)
16821686

1687+
@contextmanager
1688+
def suppress_cache_invalidation(self):
1689+
self._suppress_cache_invalidation_depth += 1
1690+
try:
1691+
yield
1692+
finally:
1693+
self._suppress_cache_invalidation_depth -= 1
1694+
1695+
def require_full_stats_rebuild(self) -> None:
1696+
self._stats_delta_requires_full_rebuild = True
1697+
1698+
def consume_full_stats_rebuild_requirement(self) -> bool:
1699+
if not self._stats_delta_requires_full_rebuild:
1700+
return False
1701+
self._stats_delta_requires_full_rebuild = False
1702+
return True
1703+
16831704
def detect_completed_translation(self, change: Change, old_translated: int) -> None:
16841705
translated = self.stats.translated
16851706
if old_translated < translated and translated == self.stats.all:

weblate/trans/models/unit.py

Lines changed: 113 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ def prefetch_full(self):
159159
.prefetch_source()
160160
.prefetch_related(
161161
"labels",
162+
"source_unit__labels",
162163
models.Prefetch(
163164
"suggestion_set",
164165
queryset=Suggestion.objects.order(),
@@ -1532,7 +1533,10 @@ def save_backend(
15321533

15331534
# Generate change and process it
15341535
change = self.post_save(
1535-
user or author, author, change_action, save=not self.is_batch_update
1536+
user or author,
1537+
author,
1538+
change_action,
1539+
save=not self.is_batch_update,
15361540
)
15371541
if self.is_batch_update:
15381542
self.translation.update_changes.append(change)
@@ -1563,7 +1567,6 @@ def post_save(
15631567
ActionEvents.BULK_EDIT,
15641568
}:
15651569
old_translated = self.translation.stats.translated
1566-
15671570
# Update translation stats
15681571
self.translation.invalidate_cache()
15691572

@@ -1592,46 +1595,19 @@ def update_source_units(
15921595
"""
15931596
with sentry_sdk.start_span(op="unit.update_source_units", name=f"{self.pk}"):
15941597
changes = []
1598+
translation_parent_stats = {}
1599+
delta_failed = False
1600+
translation_delta_data = {}
15951601

15961602
# Find relevant units
15971603
for unit in self.unit_set.exclude(id=self.id).prefetch().prefetch_bulk():
1598-
# Update source and number of words
1599-
unit.source = self.target
1600-
unit.num_words = self.num_words
1601-
# Find reverted units
1602-
if (
1603-
unit.state in FUZZY_STATES
1604-
and unit.previous_source == self.target
1605-
and unit.target
1606-
):
1607-
# Unset fuzzy on reverted
1608-
unit.original_state = unit.state = STATE_TRANSLATED
1609-
PendingUnitChange.store_unit_change(
1610-
unit=unit,
1611-
author=author,
1612-
)
1613-
unit.previous_source = ""
1614-
elif (
1615-
unit.original_state in FUZZY_STATES
1616-
and unit.previous_source == self.target
1617-
and unit.target
1604+
if not self.update_unit_from_source_change(
1605+
unit,
1606+
previous_source,
1607+
author,
1608+
translation_delta_data,
16181609
):
1619-
# Unset fuzzy on reverted
1620-
unit.original_state = STATE_TRANSLATED
1621-
unit.previous_source = ""
1622-
elif unit.state >= STATE_TRANSLATED and unit.target:
1623-
# Set fuzzy on changed
1624-
unit.original_state = STATE_NEEDS_REWRITING
1625-
if unit.state < STATE_READONLY:
1626-
unit.state = STATE_NEEDS_REWRITING
1627-
PendingUnitChange.store_unit_change(
1628-
unit=unit,
1629-
author=author,
1630-
)
1631-
unit.previous_source = previous_source
1632-
1633-
# Save unit
1634-
unit.save()
1610+
delta_failed = True
16351611
# Generate change
16361612
changes.append(
16371613
unit.generate_change(
@@ -1644,11 +1620,100 @@ def update_source_units(
16441620
save=False,
16451621
)
16461622
)
1623+
for stat in unit.translation.stats.get_update_objects(full=False):
1624+
translation_parent_stats[stat.cache_key] = stat
16471625
if changes:
16481626
# Bulk create changes
16491627
Change.objects.bulk_create(changes)
1650-
# Invalidate stats
1651-
self.translation.component.invalidate_cache()
1628+
if delta_failed:
1629+
self.translation.component.invalidate_cache()
1630+
return
1631+
1632+
def update_source_stats_on_commit() -> None:
1633+
for data in translation_delta_data.values():
1634+
stats = data["stats"]
1635+
if not stats.apply_source_delta(
1636+
data["base_stats_timestamp"], data["delta"]
1637+
):
1638+
stats.update_stats(update_parents=False)
1639+
for stat in translation_parent_stats.values():
1640+
stat.update_stats()
1641+
self.translation.component.stats.update_stats()
1642+
self.translation.component.stats.update_parents()
1643+
1644+
transaction.on_commit(update_source_stats_on_commit)
1645+
1646+
def update_source_unit_state(
1647+
self, unit, previous_source: str, author: User | None
1648+
) -> None:
1649+
# Update source and number of words
1650+
unit.source = self.target
1651+
unit.num_words = self.num_words
1652+
# Find reverted units
1653+
if (
1654+
unit.state in FUZZY_STATES
1655+
and unit.previous_source == self.target
1656+
and unit.target
1657+
):
1658+
# Unset fuzzy on reverted
1659+
unit.original_state = unit.state = STATE_TRANSLATED
1660+
PendingUnitChange.store_unit_change(unit=unit, author=author)
1661+
unit.previous_source = ""
1662+
return
1663+
if (
1664+
unit.original_state in FUZZY_STATES
1665+
and unit.previous_source == self.target
1666+
and unit.target
1667+
):
1668+
# Unset fuzzy on reverted
1669+
unit.original_state = STATE_TRANSLATED
1670+
unit.previous_source = ""
1671+
return
1672+
if unit.state >= STATE_TRANSLATED and unit.target:
1673+
# Set fuzzy on changed
1674+
unit.original_state = STATE_NEEDS_REWRITING
1675+
if unit.state < STATE_READONLY:
1676+
unit.state = STATE_NEEDS_REWRITING
1677+
PendingUnitChange.store_unit_change(unit=unit, author=author)
1678+
unit.previous_source = previous_source
1679+
1680+
def update_unit_from_source_change(
1681+
self,
1682+
unit,
1683+
previous_source: str,
1684+
author: User | None,
1685+
translation_delta_data: dict,
1686+
) -> bool:
1687+
stats = unit.translation.stats
1688+
old_stats_snapshot = (
1689+
stats.capture_unit_snapshot(unit) if stats.can_apply_delta() else None
1690+
)
1691+
1692+
self.update_source_unit_state(unit, previous_source, author)
1693+
with unit.translation.suppress_cache_invalidation():
1694+
unit.save()
1695+
1696+
if unit.translation.consume_full_stats_rebuild_requirement():
1697+
return False
1698+
if old_stats_snapshot is None:
1699+
return False
1700+
1701+
new_stats_snapshot = stats.capture_unit_snapshot(unit)
1702+
entry = translation_delta_data.setdefault(
1703+
unit.translation_id,
1704+
{
1705+
"stats": stats,
1706+
"base_stats_timestamp": stats.stats_timestamp,
1707+
"delta": {},
1708+
},
1709+
)
1710+
old_bucket = stats.snapshot_to_bucket(old_stats_snapshot)
1711+
new_bucket = stats.snapshot_to_bucket(new_stats_snapshot)
1712+
for key in stats.UNIT_DELTA_KEYS:
1713+
delta = new_bucket.get(key, 0) - old_bucket.get(key, 0)
1714+
if delta:
1715+
entry["delta"][key] = entry["delta"].get(key, 0) + delta
1716+
return True
16521717

16531718
def generate_change(
16541719
self,
@@ -1759,6 +1824,11 @@ def unresolved_comments(self) -> list[Comment]:
17591824
if not comment.resolved and comment.unit_id == self.id
17601825
]
17611826

1827+
def get_label_count(self) -> int:
1828+
if "labels" in self._prefetched_objects_cache:
1829+
return len(self._prefetched_objects_cache["labels"])
1830+
return self.labels.count()
1831+
17621832
def run_checks( # noqa: C901
17631833
self, *, force_propagate: bool = False, skip_propagate: bool = False
17641834
) -> None:
@@ -1816,6 +1886,7 @@ def run_checks( # noqa: C901
18161886
# Skip disabled/removed checks
18171887
continue
18181888
if check_obj.propagates:
1889+
self.translation.require_full_stats_rebuild()
18191890
if check_obj.propagates == "source":
18201891
propagated_units = self.propagated_units
18211892
values = set(
@@ -1844,6 +1915,7 @@ def run_checks( # noqa: C901
18441915

18451916
# Propagate checks which need it (for example consistency)
18461917
if propagation:
1918+
self.translation.require_full_stats_rebuild()
18471919
querymap: dict[Literal["source", "target"], UnitQuerySet] = {
18481920
"source": self.propagated_units,
18491921
"target": Unit.objects.same_target(self),
@@ -2001,10 +2073,7 @@ def translate(
20012073
if new_state != STATE_READONLY:
20022074
self.original_state = self.state
20032075

2004-
if change_action == ActionEvents.AUTO:
2005-
self.automatically_translated = True
2006-
else:
2007-
self.automatically_translated = False
2076+
self.automatically_translated = change_action == ActionEvents.AUTO
20082077

20092078
# Save to the database
20102079
saved = self.save_backend(

weblate/trans/tests/test_edit.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import time
1010
from typing import TYPE_CHECKING, cast
11+
from unittest.mock import patch
1112

1213
from django.urls import reverse
1314

@@ -524,6 +525,90 @@ def test_edit(self) -> None:
524525
self.assertEqual(unit.state, STATE_TRANSLATED)
525526
self.assert_backend(4, "en")
526527

528+
def test_edit_does_not_rebuild_component_language_stats(self) -> None:
529+
self.assertGreater(self.get_translation().stats.all, 0)
530+
with patch(
531+
"weblate.trans.models.component.Component.invalidate_cache",
532+
autospec=True,
533+
) as invalidate_cache:
534+
self.edit_unit("Hello, world!\n", "Nazdar svete!\n", "en")
535+
invalidate_cache.assert_not_called()
536+
537+
def test_suppress_cache_invalidation_is_reentrant(self) -> None:
538+
translation = self.get_translation()
539+
with patch(
540+
"weblate.trans.models.translation.transaction.on_commit"
541+
) as on_commit:
542+
with translation.suppress_cache_invalidation():
543+
translation.invalidate_cache()
544+
with translation.suppress_cache_invalidation():
545+
translation.invalidate_cache()
546+
translation.invalidate_cache()
547+
on_commit.assert_not_called()
548+
549+
translation.invalidate_cache()
550+
translation.invalidate_cache()
551+
552+
on_commit.assert_called_once()
553+
554+
def test_source_edit_updates_translation_and_component_stats(self) -> None:
555+
translation = self.get_translation()
556+
self.edit_unit("Hello, world!\n", "Nazdar svete!\n", "cs")
557+
translation = Translation.objects.get(pk=translation.pk)
558+
component = Component.objects.get(pk=self.component.pk)
559+
unit_before = translation.unit_set.get(context="hello")
560+
all_chars_before = translation.stats.all_chars
561+
all_words_before = translation.stats.all_words
562+
translated_before = translation.stats.translated
563+
component_all_chars_before = component.stats.all_chars
564+
565+
self.edit_unit("Hello, world!\n", "Hello, universe!\n", "en")
566+
567+
translation = Translation.objects.get(pk=translation.pk)
568+
component = Component.objects.get(pk=self.component.pk)
569+
unit_after = translation.unit_set.get(context="hello")
570+
all_chars_delta = len(unit_after.source) - len(unit_before.source)
571+
all_words_delta = unit_after.num_words - unit_before.num_words
572+
573+
self.assertEqual(
574+
translation.stats.all_chars,
575+
all_chars_before + all_chars_delta,
576+
)
577+
self.assertEqual(
578+
translation.stats.all_words,
579+
all_words_before + all_words_delta,
580+
)
581+
self.assertEqual(translation.stats.translated, translated_before - 1)
582+
self.assertNotEqual(component.stats.all_chars, component_all_chars_before)
583+
self.assertEqual(
584+
component.stats.all_chars,
585+
sum(
586+
child.stats.all_chars
587+
for child in Component.objects.get(
588+
pk=self.component.pk
589+
).translation_set.all()
590+
),
591+
)
592+
593+
def test_source_edit_falls_back_to_full_recompute_on_nonlocal_checks(self) -> None:
594+
def fake_run_checks(unit, *args, **kwargs) -> None:
595+
unit.translation.require_full_stats_rebuild()
596+
597+
with (
598+
patch(
599+
"weblate.trans.models.unit.Unit.run_checks",
600+
autospec=True,
601+
side_effect=fake_run_checks,
602+
),
603+
patch(
604+
"weblate.trans.models.component.Component.invalidate_cache",
605+
autospec=True,
606+
) as invalidate_cache,
607+
):
608+
self.edit_unit("Hello, world!\n", "Nazdar svete!\n", "en")
609+
610+
invalidate_cache.assert_called()
611+
527612
def test_edit_revert(self) -> None:
528613
translation = self.get_translation()
529614
# Edit translation

0 commit comments

Comments
 (0)