Skip to content

Commit d8ffd55

Browse files
committed
chg [pdf] improve translation layout
1 parent a611d6f commit d8ffd55

File tree

4 files changed

+60
-30
lines changed

4 files changed

+60
-30
lines changed

bin/lib/Language.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,9 @@ def get_obj_translated(obj_gid, language_name=False):
587587
def exists_object_translation_language(obj_gid, target):
588588
return r_lang.hexists(f'tr:{obj_gid}:', target)
589589

590+
def get_object_translation_language(obj_gid, target):
591+
return r_lang.hget(f'tr:{obj_gid}:', target)
592+
590593
def set_obj_translation(obj_global_id, language, translation, field=''):
591594
r_cache.delete(f'translation:{language}:{obj_global_id}:')
592595
return r_lang.hset(f'tr:{obj_global_id}:{field}', language, translation)

bin/lib/objects/PDFs.py

Lines changed: 55 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -165,32 +165,46 @@ def translate(self, task, source, target): # TODO harmonize
165165
# p = 0
166166
for page in doc:
167167
# p += 1
168-
# if p != 3:
168+
# if p != 31:
169169
# continue
170170
tabs = page.find_tables() # detect the tables
171171
tabs_extracted = {}
172172
html_box_tables = []
173173
for tab in tabs:
174174
# print(tab)
175175
rows_text = tab.extract()
176-
tabs_extracted[str(tab.bbox)] = rows_text
177-
current_row = 0
178-
# table coord -> tab.bbox
179-
for row in tab.rows:
180-
i = 0
181-
for cell in row.cells:
182-
if cell:
183-
original = rows_text[current_row][i]
184-
if original:
185-
_, translated = Language.translate(original.strip(), source=source, target=target, filter_same_content=False)
186-
if translated:
187-
translated = translated.strip()
188-
if translated:
189-
translated = translated.replace('\n', '\\n')
190-
translated = h.handle(translated.strip()).replace('\\n', '<br>').replace('\n', ' ').replace('\\.', '.')
191-
html_box_tables.append((cell, translated))
192-
i += 1
193-
current_row += 1
176+
# check if is a real table
177+
nb_cell = 0
178+
none_column = False
179+
for column in rows_text:
180+
nb_column = 0
181+
for v in column:
182+
if v:
183+
nb_column += 1
184+
if nb_column < 1:
185+
none_column = True
186+
break
187+
else:
188+
nb_cell += nb_column
189+
if nb_cell > 1 and not none_column:
190+
tabs_extracted[str(tab.bbox)] = rows_text
191+
current_row = 0
192+
# table coord -> tab.bbox
193+
for row in tab.rows:
194+
i = 0
195+
for cell in row.cells:
196+
if cell:
197+
original = rows_text[current_row][i]
198+
if original:
199+
_, translated = Language.translate(original.strip(), source=source, target=target, filter_same_content=False)
200+
if translated:
201+
translated = translated.strip()
202+
if translated:
203+
translated = translated.replace('\n', '\\n')
204+
translated = h.handle(translated.strip()).replace('\\n', '<br>').replace('\n', ' ').replace('\\.', '.')
205+
html_box_tables.append((cell, translated))
206+
i += 1
207+
current_row += 1
194208
# TODO TAB HEADERS
195209
# print(tab.header.external)
196210
# if tab.header.external:
@@ -212,17 +226,18 @@ def translate(self, task, source, target): # TODO harmonize
212226
if tabs and original:
213227
l_overlapp = []
214228
for tab in tabs:
215-
# tab y <=
216-
# text in table
217-
if tab.bbox[1] <= bbox[1] and bbox[3] <= tab.bbox[3] + 2:
218-
is_overlapping = True
219-
break
220-
if is_bboxs_overlapping(tab.bbox, bbox):
221-
l_overlapp.append(tab)
229+
if str(tab.bbox) in tabs_extracted:
230+
# tab y <=
231+
# text in table
232+
if tab.bbox[1] <= bbox[1] and bbox[3] <= tab.bbox[3] + 2:
233+
is_overlapping = True
234+
break
235+
if is_bboxs_overlapping(tab.bbox, bbox):
236+
l_overlapp.append(tab)
222237
if len(l_overlapp) == 1:
223238
tab = l_overlapp[0]
224239
# filter start + end
225-
if tab.bbox[1] > bbox[1] and tab.bbox[3] < bbox[3] - 2:
240+
if tab.bbox[1] > bbox[1] + 2 and tab.bbox[3] < bbox[3] - 2:
226241
pass
227242

228243
# Text start
@@ -295,6 +310,8 @@ def translate(self, task, source, target): # TODO harmonize
295310
print(done)
296311
task.update_progress(done, total)
297312

313+
print(task)
314+
298315
# Save translated PDF
299316
# translated = doc.tobytes(garbage=0, deflate=True)
300317
filename = f'{target}_{int(time.time())}_{self.id}.pdf'
@@ -305,6 +322,13 @@ def translate(self, task, source, target): # TODO harmonize
305322
task.complete(filename)
306323
return filename
307324

325+
def delete_translated(self, target):
326+
obj_gid = self.get_global_id()
327+
filename = Language.get_object_translation_language(obj_gid, target)
328+
if filename:
329+
Language.delete_obj_translation(obj_gid, target)
330+
os.remove(os.path.join(PDF_TRANSLATED_DIR, filename))
331+
308332
def create(self, content):
309333
filepath = self.get_filepath()
310334
dirname = os.path.dirname(filepath)
@@ -380,7 +404,10 @@ def api_create_translation_task(obj_id, source, target, force=False):
380404
return {'error': 'Invalid Language code'}, 400
381405
obj_gid = obj.get_global_id()
382406
if Language.exists_object_translation_language(obj_gid, target):
383-
return {'error': 'Already Translated'}, 400
407+
if force:
408+
obj.delete_translated(target)
409+
else:
410+
return {'error': 'Already Translated'}, 400
384411
task_uuid = Language.create_translation_task(obj_gid, source, target, force=force)
385412
return task_uuid, 200
386413

bin/modules/Translation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def run(self):
6666
# Wait before next process
6767
time.sleep(self.pending_seconds)
6868
else:
69-
time.sleep(30)
69+
time.sleep(10)
7070

7171

7272
if __name__ == '__main__':

var/www/blueprints/objects_pdf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def pdf_translate():
5757
obj_id = request.form.get('id')
5858
source = request.form.get('source')
5959
target = request.form.get('target')
60-
r = PDFs.api_create_translation_task(obj_id, source, target)
60+
r = PDFs.api_create_translation_task(obj_id, source, target, force=True)
6161
if r[1] != 200:
6262
return create_json_response(r[0], r[1])
6363
else:

0 commit comments

Comments
 (0)