1+ import hashlib
12import logging
23import threading
34
45import boto3
56import requests
67from bs4 import BeautifulSoup
78from django .conf import settings
9+ from django .db .models import BooleanField , Case , F , Value , When
10+ from django .utils import timezone
811from django .utils .module_loading import import_string
912
1013from .models import TranslationCache
1922IFRC_TRANSLATION_CALL_LOCK = threading .Lock ()
2023
2124
25+ def sha256_hash (text ):
26+ return hashlib .sha256 (text .encode ("utf-8" )).hexdigest ()
27+
28+
2229class BaseTranslator :
23- def _fake_translation (self , text , dest_language , source_language ):
30+ def _fake_translation (self , text , dest_language , source_language , table_field = "" ):
2431 """
2532 This is only used for test
2633 """
2734 return text + f' translated to "{ dest_language } " using source language "{ source_language } "'
2835
2936
3037class DummyTranslator (BaseTranslator ):
31- def translate_text (self , text , dest_language , source_language = "auto" ):
38+ def translate_text (self , text , dest_language , source_language = "auto" , table_field = "" ):
3239 return self ._fake_translation (text , dest_language , source_language )
3340
3441
@@ -100,14 +107,14 @@ def find_last_slashp(cls, text, limit):
100107 truncate_here += len (tag )
101108 return truncate_here
102109
103- def translate_text (self , text , dest_language , source_language = None ):
110+ def translate_text (self , text , dest_language , source_language = None , table_field = "" ):
104111 if settings .TESTING :
105112 # NOTE: Mocking for test purpose
106113 return self ._fake_translation (text , dest_language , source_language )
107114
108115 global IFRC_TRANSLATION_CALL_COUNT
109116
110- # A dirty workaround to handle oversized HTML+CSS texts, usually tables:
117+ # A workaround to handle oversized HTML+CSS texts, usually tables:
111118 textTail = ""
112119 if len (text ) > settings .AZURE_TRANSL_LIMIT :
113120 truncate_here = self .find_last_slashtable (text , settings .AZURE_TRANSL_LIMIT )
@@ -134,22 +141,35 @@ def translate_text(self, text, dest_language, source_language=None):
134141 payload ["textType" ] = "html"
135142
136143 # Try cache at first (for shorter texts)
137- use_cache = len (text ) < 200
144+ use_cache = len (text ) < 300
138145
139146 if use_cache :
147+ text_hash = sha256_hash (text )
140148 cache = TranslationCache .objects .filter (
141- text = text ,
149+ text_hash = text_hash ,
142150 source_language = source_language or "" , # source_language can be "detected"
143151 dest_language = dest_language ,
144152 ).first ()
145153 if cache :
146- logger .info (f"IFRC translation cache hit: { text [:30 ]} ... { source_language } >{ dest_language } " )
154+ cache_other_fields = cache .table_field != table_field
155+ TranslationCache .objects .filter (id = cache .pk ).update (
156+ last_used = timezone .now (),
157+ num_calls = F ("num_calls" ) + 1 ,
158+ other_fields = Case (
159+ When (other_fields = True , then = Value (True )),
160+ default = Value (cache_other_fields ),
161+ output_field = BooleanField (),
162+ ),
163+ )
164+ logger .info (
165+ f"Translation cache hit, { source_language } >{ dest_language } { table_field } – { cache .num_calls } : { text [:30 ]} ... "
166+ )
147167 return cache .translated_text
148168
149169 with IFRC_TRANSLATION_CALL_LOCK :
150170 IFRC_TRANSLATION_CALL_COUNT += 1
151171 logger .info (f"IFRC translation API call count: { IFRC_TRANSLATION_CALL_COUNT } " )
152- logger .info (f"IFRC translation API call: { text [:30 ]} ... { source_language } > { dest_language } " )
172+ logger .info (f"IFRC translation API call – { source_language } > { dest_language } – { table_field } : { text [:30 ]} ... " )
153173 response = requests .post (
154174 self .url ,
155175 headers = self .headers ,
@@ -164,9 +184,12 @@ def translate_text(self, text, dest_language, source_language=None):
164184 if use_cache :
165185 TranslationCache .objects .create (
166186 text = text ,
187+ text_hash = text_hash ,
167188 source_language = source_language or "" , # source_language can be "detected"
168189 dest_language = dest_language ,
169190 translated_text = translated ,
191+ table_field = table_field or "" ,
192+ last_used = timezone .now (),
170193 )
171194 return translated + textTail
172195
0 commit comments