Skip to content

Commit 2bfdeae

Browse files
committed
feat(unit): apply delta of stats upon edit
This avoids possibly expensive full stats update on large components and only applies delta that can be inferred from the current edit. The scope of delta handling is intentionally narrow to handle only the simple cases and avoid code complexity. If delta approach cannot be used, it falls back to standard stats calculation.
1 parent 3bf186f commit 2bfdeae

File tree

4 files changed

+339
-46
lines changed

4 files changed

+339
-46
lines changed

weblate/trans/models/translation.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import codecs
88
import os
99
import tempfile
10-
from contextlib import suppress
10+
from contextlib import contextmanager, suppress
1111
from datetime import UTC
1212
from itertools import chain
1313
from pathlib import Path
@@ -230,6 +230,8 @@ def __init__(self, *args, **kwargs) -> None:
230230
self.addon_commit_files: list[str] = []
231231
self.reason = ""
232232
self._invalidate_scheduled = False
233+
self._suppress_cache_invalidation = False
234+
self._stats_delta_requires_full_rebuild = False
233235
self.update_changes: list[Change] = []
234236
self.pending_unit_changes: list[PendingUnitChange] = []
235237
# Project backup integration
@@ -1671,12 +1673,31 @@ def _invalidate_trigger(self) -> None:
16711673

16721674
def invalidate_cache(self) -> None:
16731675
"""Invalidate any cached stats."""
1676+
if self._suppress_cache_invalidation:
1677+
return
16741678
# Invalidate summary stats
16751679
if self._invalidate_scheduled:
16761680
return
16771681
self._invalidate_scheduled = True
16781682
transaction.on_commit(self._invalidate_trigger)
16791683

1684+
@contextmanager
1685+
def suppress_cache_invalidation(self):
1686+
self._suppress_cache_invalidation = True
1687+
try:
1688+
yield
1689+
finally:
1690+
self._suppress_cache_invalidation = False
1691+
1692+
def require_full_stats_rebuild(self) -> None:
1693+
self._stats_delta_requires_full_rebuild = True
1694+
1695+
def consume_full_stats_rebuild_requirement(self) -> bool:
1696+
if not self._stats_delta_requires_full_rebuild:
1697+
return False
1698+
self._stats_delta_requires_full_rebuild = False
1699+
return True
1700+
16801701
def detect_completed_translation(self, change: Change, old_translated: int) -> None:
16811702
translated = self.stats.translated
16821703
if old_translated < translated and translated == self.stats.all:

weblate/trans/models/unit.py

Lines changed: 115 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import operator
88
import re
9+
import time
910
from functools import partial, reduce
1011
from typing import TYPE_CHECKING, Any, Literal, TypedDict
1112

@@ -158,6 +159,7 @@ def prefetch_full(self):
158159
.prefetch_source()
159160
.prefetch_related(
160161
"labels",
162+
"source_unit__labels",
161163
models.Prefetch(
162164
"suggestion_set",
163165
queryset=Suggestion.objects.order(),
@@ -1501,7 +1503,10 @@ def save_backend(
15011503

15021504
# Generate change and process it
15031505
change = self.post_save(
1504-
user or author, author, change_action, save=not self.is_batch_update
1506+
user or author,
1507+
author,
1508+
change_action,
1509+
save=not self.is_batch_update,
15051510
)
15061511
if self.is_batch_update:
15071512
self.translation.update_changes.append(change)
@@ -1532,7 +1537,6 @@ def post_save(
15321537
ActionEvents.BULK_EDIT,
15331538
}:
15341539
old_translated = self.translation.stats.translated
1535-
15361540
# Update translation stats
15371541
self.translation.invalidate_cache()
15381542

@@ -1561,46 +1565,19 @@ def update_source_units(
15611565
"""
15621566
with sentry_sdk.start_span(op="unit.update_source_units", name=f"{self.pk}"):
15631567
changes = []
1568+
translation_parent_stats = {}
1569+
delta_failed = False
1570+
translation_delta_data = {}
15641571

15651572
# Find relevant units
15661573
for unit in self.unit_set.exclude(id=self.id).prefetch().prefetch_bulk():
1567-
# Update source and number of words
1568-
unit.source = self.target
1569-
unit.num_words = self.num_words
1570-
# Find reverted units
1571-
if (
1572-
unit.state in FUZZY_STATES
1573-
and unit.previous_source == self.target
1574-
and unit.target
1575-
):
1576-
# Unset fuzzy on reverted
1577-
unit.original_state = unit.state = STATE_TRANSLATED
1578-
PendingUnitChange.store_unit_change(
1579-
unit=unit,
1580-
author=author,
1581-
)
1582-
unit.previous_source = ""
1583-
elif (
1584-
unit.original_state in FUZZY_STATES
1585-
and unit.previous_source == self.target
1586-
and unit.target
1574+
if not self.update_source_unit_stats(
1575+
unit,
1576+
previous_source,
1577+
author,
1578+
translation_delta_data,
15871579
):
1588-
# Unset fuzzy on reverted
1589-
unit.original_state = STATE_TRANSLATED
1590-
unit.previous_source = ""
1591-
elif unit.state >= STATE_TRANSLATED and unit.target:
1592-
# Set fuzzy on changed
1593-
unit.original_state = STATE_NEEDS_REWRITING
1594-
if unit.state < STATE_READONLY:
1595-
unit.state = STATE_NEEDS_REWRITING
1596-
PendingUnitChange.store_unit_change(
1597-
unit=unit,
1598-
author=author,
1599-
)
1600-
unit.previous_source = previous_source
1601-
1602-
# Save unit
1603-
unit.save()
1580+
delta_failed = True
16041581
# Generate change
16051582
changes.append(
16061583
unit.generate_change(
@@ -1613,11 +1590,101 @@ def update_source_units(
16131590
save=False,
16141591
)
16151592
)
1593+
for stat in unit.translation.stats.get_update_objects(full=False):
1594+
translation_parent_stats[stat.cache_key] = stat
16161595
if changes:
16171596
# Bulk create changes
16181597
Change.objects.bulk_create(changes)
1619-
# Invalidate stats
1620-
self.translation.component.invalidate_cache()
1598+
if delta_failed:
1599+
self.translation.component.invalidate_cache()
1600+
return
1601+
1602+
def update_source_stats_on_commit() -> None:
1603+
for data in translation_delta_data.values():
1604+
stats = data["stats"]
1605+
stats.apply_source_delta(data["base_data"], data["delta"])
1606+
stats.fetch_last_change()
1607+
stats.count_changes()
1608+
stats.store("stats_timestamp", time.time())
1609+
stats.save(update_parents=False)
1610+
for stat in translation_parent_stats.values():
1611+
stat.update_stats()
1612+
self.translation.component.stats.update_stats()
1613+
self.translation.component.stats.update_parents()
1614+
1615+
transaction.on_commit(update_source_stats_on_commit)
1616+
1617+
def update_source_unit_state(
1618+
self, unit, previous_source: str, author: User | None
1619+
) -> None:
1620+
# Update source and number of words
1621+
unit.source = self.target
1622+
unit.num_words = self.num_words
1623+
# Find reverted units
1624+
if (
1625+
unit.state in FUZZY_STATES
1626+
and unit.previous_source == self.target
1627+
and unit.target
1628+
):
1629+
# Unset fuzzy on reverted
1630+
unit.original_state = unit.state = STATE_TRANSLATED
1631+
PendingUnitChange.store_unit_change(unit=unit, author=author)
1632+
unit.previous_source = ""
1633+
return
1634+
if (
1635+
unit.original_state in FUZZY_STATES
1636+
and unit.previous_source == self.target
1637+
and unit.target
1638+
):
1639+
# Unset fuzzy on reverted
1640+
unit.original_state = STATE_TRANSLATED
1641+
unit.previous_source = ""
1642+
return
1643+
if unit.state >= STATE_TRANSLATED and unit.target:
1644+
# Set fuzzy on changed
1645+
unit.original_state = STATE_NEEDS_REWRITING
1646+
if unit.state < STATE_READONLY:
1647+
unit.state = STATE_NEEDS_REWRITING
1648+
PendingUnitChange.store_unit_change(unit=unit, author=author)
1649+
unit.previous_source = previous_source
1650+
1651+
def update_source_unit_stats(
1652+
self,
1653+
unit,
1654+
previous_source: str,
1655+
author: User | None,
1656+
translation_delta_data: dict,
1657+
) -> bool:
1658+
stats = unit.translation.stats
1659+
old_stats_snapshot = (
1660+
stats.capture_unit_snapshot(unit) if stats.can_apply_delta() else None
1661+
)
1662+
1663+
self.update_source_unit_state(unit, previous_source, author)
1664+
with unit.translation.suppress_cache_invalidation():
1665+
unit.save()
1666+
1667+
if unit.translation.consume_full_stats_rebuild_requirement():
1668+
return False
1669+
if old_stats_snapshot is None:
1670+
return False
1671+
1672+
new_stats_snapshot = stats.capture_unit_snapshot(unit)
1673+
entry = translation_delta_data.setdefault(
1674+
unit.translation_id,
1675+
{
1676+
"stats": stats,
1677+
"base_data": stats.get_data_copy(),
1678+
"delta": {},
1679+
},
1680+
)
1681+
old_bucket = stats.snapshot_to_bucket(old_stats_snapshot)
1682+
new_bucket = stats.snapshot_to_bucket(new_stats_snapshot)
1683+
for key in stats.UNIT_DELTA_KEYS:
1684+
delta = new_bucket.get(key, 0) - old_bucket.get(key, 0)
1685+
if delta:
1686+
entry["delta"][key] = entry["delta"].get(key, 0) + delta
1687+
return True
16211688

16221689
def generate_change(
16231690
self,
@@ -1728,6 +1795,11 @@ def unresolved_comments(self) -> list[Comment]:
17281795
if not comment.resolved and comment.unit_id == self.id
17291796
]
17301797

1798+
def get_label_count(self) -> int:
1799+
if "labels" in self._prefetched_objects_cache:
1800+
return len(self._prefetched_objects_cache["labels"])
1801+
return self.labels.count()
1802+
17311803
def run_checks( # noqa: C901
17321804
self, *, force_propagate: bool = False, skip_propagate: bool = False
17331805
) -> None:
@@ -1785,6 +1857,7 @@ def run_checks( # noqa: C901
17851857
# Skip disabled/removed checks
17861858
continue
17871859
if check_obj.propagates:
1860+
self.translation.require_full_stats_rebuild()
17881861
if check_obj.propagates == "source":
17891862
propagated_units = self.propagated_units
17901863
values = set(
@@ -1813,6 +1886,7 @@ def run_checks( # noqa: C901
18131886

18141887
# Propagate checks which need it (for example consistency)
18151888
if propagation:
1889+
self.translation.require_full_stats_rebuild()
18161890
querymap: dict[Literal["source", "target"], UnitQuerySet] = {
18171891
"source": self.propagated_units,
18181892
"target": Unit.objects.same_target(self),
@@ -1970,10 +2044,7 @@ def translate(
19702044
if new_state != STATE_READONLY:
19712045
self.original_state = self.state
19722046

1973-
if change_action == ActionEvents.AUTO:
1974-
self.automatically_translated = True
1975-
else:
1976-
self.automatically_translated = False
2047+
self.automatically_translated = change_action == ActionEvents.AUTO
19772048

19782049
# Save to the database
19792050
saved = self.save_backend(

weblate/trans/tests/test_edit.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import time
1010
from typing import TYPE_CHECKING, cast
11+
from unittest.mock import patch
1112

1213
from django.urls import reverse
1314

@@ -523,6 +524,73 @@ def test_edit(self) -> None:
523524
self.assertEqual(unit.state, STATE_TRANSLATED)
524525
self.assert_backend(4, "en")
525526

527+
def test_edit_does_not_rebuild_component_language_stats(self) -> None:
528+
self.assertGreater(self.get_translation().stats.all, 0)
529+
with patch(
530+
"weblate.trans.models.component.Component.invalidate_cache",
531+
autospec=True,
532+
) as invalidate_cache:
533+
self.edit_unit("Hello, world!\n", "Nazdar svete!\n", "en")
534+
invalidate_cache.assert_not_called()
535+
536+
def test_source_edit_updates_translation_and_component_stats(self) -> None:
537+
translation = self.get_translation()
538+
self.edit_unit("Hello, world!\n", "Nazdar svete!\n", "cs")
539+
translation = Translation.objects.get(pk=translation.pk)
540+
component = Component.objects.get(pk=self.component.pk)
541+
unit_before = translation.unit_set.get(context="hello")
542+
all_chars_before = translation.stats.all_chars
543+
all_words_before = translation.stats.all_words
544+
translated_before = translation.stats.translated
545+
component_all_chars_before = component.stats.all_chars
546+
547+
self.edit_unit("Hello, world!\n", "Hello, universe!\n", "en")
548+
549+
translation = Translation.objects.get(pk=translation.pk)
550+
component = Component.objects.get(pk=self.component.pk)
551+
unit_after = translation.unit_set.get(context="hello")
552+
all_chars_delta = len(unit_after.source) - len(unit_before.source)
553+
all_words_delta = unit_after.num_words - unit_before.num_words
554+
555+
self.assertEqual(
556+
translation.stats.all_chars,
557+
all_chars_before + all_chars_delta,
558+
)
559+
self.assertEqual(
560+
translation.stats.all_words,
561+
all_words_before + all_words_delta,
562+
)
563+
self.assertEqual(translation.stats.translated, translated_before - 1)
564+
self.assertNotEqual(component.stats.all_chars, component_all_chars_before)
565+
self.assertEqual(
566+
component.stats.all_chars,
567+
sum(
568+
child.stats.all_chars
569+
for child in Component.objects.get(
570+
pk=self.component.pk
571+
).translation_set.all()
572+
),
573+
)
574+
575+
def test_source_edit_falls_back_to_full_recompute_on_nonlocal_checks(self) -> None:
576+
def fake_run_checks(unit, *args, **kwargs) -> None:
577+
unit.translation.require_full_stats_rebuild()
578+
579+
with (
580+
patch(
581+
"weblate.trans.models.unit.Unit.run_checks",
582+
autospec=True,
583+
side_effect=fake_run_checks,
584+
),
585+
patch(
586+
"weblate.trans.models.component.Component.invalidate_cache",
587+
autospec=True,
588+
) as invalidate_cache,
589+
):
590+
self.edit_unit("Hello, world!\n", "Nazdar svete!\n", "en")
591+
592+
invalidate_cache.assert_called()
593+
526594
def test_edit_revert(self) -> None:
527595
translation = self.get_translation()
528596
# Edit translation

0 commit comments

Comments
 (0)