Skip to content

Commit 9570843

Browse files
Merge pull request #2527 from IFRCGo/feature/translation-field-stats
Add table_field to translation cache for stats
2 parents 18d1144 + 17c67d8 commit 9570843

File tree

5 files changed

+123
-11
lines changed

5 files changed

+123
-11
lines changed
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Generated by Django 4.2.19 on 2025-08-05 12:57
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("lang", "0007_translationcache"),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name="translationcache",
15+
name="table_field",
16+
field=models.CharField(blank=True, default="", max_length=128),
17+
),
18+
migrations.AddField(
19+
model_name="translationcache",
20+
name="other_fields",
21+
field=models.BooleanField(default=False),
22+
),
23+
migrations.AddField(
24+
model_name="translationcache",
25+
name="num_calls",
26+
field=models.IntegerField(default=0),
27+
),
28+
]
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Generated by Django 4.2.19 on 2025-08-11 08:18
2+
3+
import hashlib
4+
5+
from django.db import migrations, models
6+
7+
8+
def fill_text_hash(apps, schema_editor):
9+
TranslationCache = apps.get_model("lang", "TranslationCache")
10+
for obj in TranslationCache.objects.all():
11+
if not obj.text_hash:
12+
obj.text_hash = hashlib.sha256(obj.text.encode("utf-8")).hexdigest()
13+
obj.save(update_fields=["text_hash"])
14+
print(" Hash field is populated in lang_translationcache.")
15+
16+
17+
def fill_last_used(apps, schema_editor):
18+
TranslationCache = apps.get_model("lang", "TranslationCache")
19+
TranslationCache.objects.update(last_used=models.F("created_at"))
20+
21+
22+
class Migration(migrations.Migration):
23+
24+
dependencies = [
25+
("lang", "0008_translationcache_table_field"),
26+
]
27+
28+
operations = [
29+
migrations.RemoveIndex(
30+
model_name="translationcache",
31+
name="lang_transl_text_4a497b_idx",
32+
),
33+
migrations.AddField(
34+
model_name="translationcache",
35+
name="text_hash",
36+
field=models.CharField(default="", max_length=64),
37+
preserve_default=False,
38+
),
39+
migrations.AddField(
40+
model_name="translationcache",
41+
name="last_used",
42+
field=models.DateTimeField(blank=True, null=True),
43+
),
44+
migrations.AddIndex(
45+
model_name="translationcache",
46+
index=models.Index(fields=["text_hash", "source_language", "dest_language"], name="lang_transl_text_ha_7b6786_idx"),
47+
),
48+
migrations.RunPython(fill_text_hash, reverse_code=migrations.RunPython.noop),
49+
migrations.RunPython(fill_last_used, reverse_code=migrations.RunPython.noop),
50+
]

lang/models.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,21 @@ def get_user_permissions_per_language(cls, user):
3737

3838
class TranslationCache(models.Model):
3939
text = models.TextField()
40+
text_hash = models.CharField(max_length=64)
4041
source_language = models.CharField(max_length=16)
4142
dest_language = models.CharField(max_length=16)
4243
translated_text = models.TextField()
44+
table_field = models.CharField(max_length=128, blank=True, default="") # for stats only
45+
other_fields = models.BooleanField(default=False) # for stats only
46+
num_calls = models.IntegerField(default=0) # for stats only
4347
created_at = models.DateTimeField(auto_now_add=True)
48+
last_used = models.DateTimeField(null=True, blank=True)
4449

4550
class Meta:
4651
unique_together = ("text", "source_language", "dest_language")
4752
indexes = [
48-
models.Index(fields=["text", "source_language", "dest_language"]),
53+
models.Index(fields=["text_hash", "source_language", "dest_language"]),
4954
]
5055

5156
def __str__(self):
52-
return f"{self.source_language}>{self.dest_language}: {self.text[:30]}..."
57+
return f"{self.source_language}>{self.dest_language}{self.table_field}: {self.text[:30]}..."

lang/tasks.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,19 @@ def translate_fields_object(self, obj, field):
4444
if value:
4545
continue
4646

47+
model = type(obj)
48+
app_label = model._meta.app_label
49+
model_name = model._meta.model_name
50+
table_field = f"{app_label}:{model_name}:{field}"
51+
4752
new_value = self.translator.translate_text(
4853
initial_value,
4954
lang,
5055
source_language=initial_lang,
56+
table_field=table_field,
5157
)
5258

53-
field_max_length = type(obj)._meta.get_field(field).max_length
59+
field_max_length = model._meta.get_field(field).max_length
5460
if field_max_length and len(new_value) > field_max_length:
5561
logger.warning(f"Greater then max_length found for Model ({type(obj)}<{lang_field}>) pk: ({obj.pk})")
5662
new_value = new_value[:field_max_length]

lang/translation.py

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1+
import hashlib
12
import logging
23
import threading
34

45
import boto3
56
import requests
67
from bs4 import BeautifulSoup
78
from django.conf import settings
9+
from django.db.models import BooleanField, Case, F, Value, When
10+
from django.utils import timezone
811
from django.utils.module_loading import import_string
912

1013
from .models import TranslationCache
@@ -19,16 +22,20 @@
1922
IFRC_TRANSLATION_CALL_LOCK = threading.Lock()
2023

2124

25+
def sha256_hash(text):
26+
return hashlib.sha256(text.encode("utf-8")).hexdigest()
27+
28+
2229
class BaseTranslator:
23-
def _fake_translation(self, text, dest_language, source_language):
30+
def _fake_translation(self, text, dest_language, source_language, table_field=""):
2431
"""
2532
This is only used for test
2633
"""
2734
return text + f' translated to "{dest_language}" using source language "{source_language}"'
2835

2936

3037
class DummyTranslator(BaseTranslator):
31-
def translate_text(self, text, dest_language, source_language="auto"):
38+
def translate_text(self, text, dest_language, source_language="auto", table_field=""):
3239
return self._fake_translation(text, dest_language, source_language)
3340

3441

@@ -100,14 +107,14 @@ def find_last_slashp(cls, text, limit):
100107
truncate_here += len(tag)
101108
return truncate_here
102109

103-
def translate_text(self, text, dest_language, source_language=None):
110+
def translate_text(self, text, dest_language, source_language=None, table_field=""):
104111
if settings.TESTING:
105112
# NOTE: Mocking for test purpose
106113
return self._fake_translation(text, dest_language, source_language)
107114

108115
global IFRC_TRANSLATION_CALL_COUNT
109116

110-
# A dirty workaround to handle oversized HTML+CSS texts, usually tables:
117+
# A workaround to handle oversized HTML+CSS texts, usually tables:
111118
textTail = ""
112119
if len(text) > settings.AZURE_TRANSL_LIMIT:
113120
truncate_here = self.find_last_slashtable(text, settings.AZURE_TRANSL_LIMIT)
@@ -134,22 +141,35 @@ def translate_text(self, text, dest_language, source_language=None):
134141
payload["textType"] = "html"
135142

136143
# Try cache at first (for shorter texts)
137-
use_cache = len(text) < 200
144+
use_cache = len(text) < 300
138145

139146
if use_cache:
147+
text_hash = sha256_hash(text)
140148
cache = TranslationCache.objects.filter(
141-
text=text,
149+
text_hash=text_hash,
142150
source_language=source_language or "", # source_language can be "detected"
143151
dest_language=dest_language,
144152
).first()
145153
if cache:
146-
logger.info(f"IFRC translation cache hit: {text[:30]}... {source_language}>{dest_language}")
154+
cache_other_fields = cache.table_field != table_field
155+
TranslationCache.objects.filter(id=cache.pk).update(
156+
last_used=timezone.now(),
157+
num_calls=F("num_calls") + 1,
158+
other_fields=Case(
159+
When(other_fields=True, then=Value(True)),
160+
default=Value(cache_other_fields),
161+
output_field=BooleanField(),
162+
),
163+
)
164+
logger.info(
165+
f"Translation cache hit, {source_language}>{dest_language} {table_field}{cache.num_calls}: {text[:30]}... "
166+
)
147167
return cache.translated_text
148168

149169
with IFRC_TRANSLATION_CALL_LOCK:
150170
IFRC_TRANSLATION_CALL_COUNT += 1
151171
logger.info(f"IFRC translation API call count: {IFRC_TRANSLATION_CALL_COUNT}")
152-
logger.info(f"IFRC translation API call: {text[:30]}... {source_language}>{dest_language}")
172+
logger.info(f"IFRC translation API call{source_language}>{dest_language}{table_field}: {text[:30]}... ")
153173
response = requests.post(
154174
self.url,
155175
headers=self.headers,
@@ -164,9 +184,12 @@ def translate_text(self, text, dest_language, source_language=None):
164184
if use_cache:
165185
TranslationCache.objects.create(
166186
text=text,
187+
text_hash=text_hash,
167188
source_language=source_language or "", # source_language can be "detected"
168189
dest_language=dest_language,
169190
translated_text=translated,
191+
table_field=table_field or "",
192+
last_used=timezone.now(),
170193
)
171194
return translated + textTail
172195

0 commit comments

Comments
 (0)