@@ -165,32 +165,46 @@ def translate(self, task, source, target): # TODO harmonize
165165 # p = 0
166166 for page in doc :
167167 # p += 1
168- # if p != 3 :
168+ # if p != 31 :
169169 # continue
170170 tabs = page .find_tables () # detect the tables
171171 tabs_extracted = {}
172172 html_box_tables = []
173173 for tab in tabs :
174174 # print(tab)
175175 rows_text = tab .extract ()
176- tabs_extracted [str (tab .bbox )] = rows_text
177- current_row = 0
178- # table coord -> tab.bbox
179- for row in tab .rows :
180- i = 0
181- for cell in row .cells :
182- if cell :
183- original = rows_text [current_row ][i ]
184- if original :
185- _ , translated = Language .translate (original .strip (), source = source , target = target , filter_same_content = False )
186- if translated :
187- translated = translated .strip ()
188- if translated :
189- translated = translated .replace ('\n ' , '\\ n' )
190- translated = h .handle (translated .strip ()).replace ('\\ n' , '<br>' ).replace ('\n ' , ' ' ).replace ('\\ .' , '.' )
191- html_box_tables .append ((cell , translated ))
192- i += 1
193- current_row += 1
176+ # check if is a real table
177+ nb_cell = 0
178+ none_column = False
179+ for column in rows_text :
180+ nb_column = 0
181+ for v in column :
182+ if v :
183+ nb_column += 1
184+ if nb_column < 1 :
185+ none_column = True
186+ break
187+ else :
188+ nb_cell += nb_column
189+ if nb_cell > 1 and not none_column :
190+ tabs_extracted [str (tab .bbox )] = rows_text
191+ current_row = 0
192+ # table coord -> tab.bbox
193+ for row in tab .rows :
194+ i = 0
195+ for cell in row .cells :
196+ if cell :
197+ original = rows_text [current_row ][i ]
198+ if original :
199+ _ , translated = Language .translate (original .strip (), source = source , target = target , filter_same_content = False )
200+ if translated :
201+ translated = translated .strip ()
202+ if translated :
203+ translated = translated .replace ('\n ' , '\\ n' )
204+ translated = h .handle (translated .strip ()).replace ('\\ n' , '<br>' ).replace ('\n ' , ' ' ).replace ('\\ .' , '.' )
205+ html_box_tables .append ((cell , translated ))
206+ i += 1
207+ current_row += 1
194208 # TODO TAB HEADERS
195209 # print(tab.header.external)
196210 # if tab.header.external:
@@ -212,17 +226,18 @@ def translate(self, task, source, target): # TODO harmonize
212226 if tabs and original :
213227 l_overlapp = []
214228 for tab in tabs :
215- # tab y <=
216- # text in table
217- if tab .bbox [1 ] <= bbox [1 ] and bbox [3 ] <= tab .bbox [3 ] + 2 :
218- is_overlapping = True
219- break
220- if is_bboxs_overlapping (tab .bbox , bbox ):
221- l_overlapp .append (tab )
229+ if str (tab .bbox ) in tabs_extracted :
230+ # tab y <=
231+ # text in table
232+ if tab .bbox [1 ] <= bbox [1 ] and bbox [3 ] <= tab .bbox [3 ] + 2 :
233+ is_overlapping = True
234+ break
235+ if is_bboxs_overlapping (tab .bbox , bbox ):
236+ l_overlapp .append (tab )
222237 if len (l_overlapp ) == 1 :
223238 tab = l_overlapp [0 ]
224239 # filter start + end
225- if tab .bbox [1 ] > bbox [1 ] and tab .bbox [3 ] < bbox [3 ] - 2 :
240+ if tab .bbox [1 ] > bbox [1 ] + 2 and tab .bbox [3 ] < bbox [3 ] - 2 :
226241 pass
227242
228243 # Text start
@@ -295,6 +310,8 @@ def translate(self, task, source, target): # TODO harmonize
295310 print (done )
296311 task .update_progress (done , total )
297312
313+ print (task )
314+
298315 # Save translated PDF
299316 # translated = doc.tobytes(garbage=0, deflate=True)
300317 filename = f'{ target } _{ int (time .time ())} _{ self .id } .pdf'
@@ -305,6 +322,13 @@ def translate(self, task, source, target): # TODO harmonize
305322 task .complete (filename )
306323 return filename
307324
325+ def delete_translated (self , target ):
326+ obj_gid = self .get_global_id ()
327+ filename = Language .get_object_translation_language (obj_gid , target )
328+ if filename :
329+ Language .delete_obj_translation (obj_gid , target )
330+ os .remove (os .path .join (PDF_TRANSLATED_DIR , filename ))
331+
308332 def create (self , content ):
309333 filepath = self .get_filepath ()
310334 dirname = os .path .dirname (filepath )
@@ -380,7 +404,10 @@ def api_create_translation_task(obj_id, source, target, force=False):
380404 return {'error' : 'Invalid Language code' }, 400
381405 obj_gid = obj .get_global_id ()
382406 if Language .exists_object_translation_language (obj_gid , target ):
383- return {'error' : 'Already Translated' }, 400
407+ if force :
408+ obj .delete_translated (target )
409+ else :
410+ return {'error' : 'Already Translated' }, 400
384411 task_uuid = Language .create_translation_task (obj_gid , source , target , force = force )
385412 return task_uuid , 200
386413
0 commit comments