@@ -198,23 +198,49 @@ def list_downloads(lid):
198198
199199
200200class TailSync :
201- """Match sync 5 records tails and add new records."""
201+ """Match sync record rows and add new records or delete old ."""
202202
203203 def __init__ (self ):
204204 """Set default member variables."""
205205 self .local = []
206206 self .trle = []
207207
208- self .local_offset = 0
209- self .trle_offset = 0
208+ self .local_page_offset = 0
209+ self .trle_page_offset = 0
210210
211211 def match_record (self , a , b ):
212212 """trle.net record data matching."""
213- keys = ['trle_id' , 'author' , 'title' , 'difficulty' , 'duration' , 'class' , 'type' , 'release' ]
213+ keys = [
214+ 'trle_id' ,
215+ # 'author',
216+ 'title' ,
217+ 'difficulty' ,
218+ 'duration' ,
219+ 'class' ,
220+ 'type' ,
221+ 'release'
222+ ]
214223 return all (a .get (k ) == b .get (k ) for k in keys )
215224
216- def match_tails (self , match_size = 5 , max_pages = 100 ):
217- """Find a tail match between local and trle databases based ids."""
225+ def match_tails (self , match_size = 4 , max_pages = 20 ):
226+ """
227+ Find a tail match between local and trle databases based ids.
228+
229+ This checks every level id in orderd steps aginst a batch of local ids.
230+ When it finds ONE of at least match_size*5 known local ids then it
231+ jumps forward that number of ids, but now there is a problem, trle.net
232+ can have deleted previous known level ids. So we have to go back
233+ a number of steps to match the tails. This is true becouse trle will
234+ not add records in between old records, only remove or update.
235+ The end of the tail is the point where some records needs to be chacked
236+ again, updated, deleted or added to the local database. We match aginst
237+ a batch of known ids not in a specific sequence but based on a jump,
238+ not order.
239+
240+ match_size is the tail=n*5 and overshot=n*2+1
241+ tail is the number of most recent ids from the local databases
242+ overshot is the steps the alogorithm look backwards after the jump
243+ """
218244
219245 def get_local_page_and_extend (offset ):
220246 con = database_make_connection ()
@@ -228,66 +254,67 @@ def get_trle_page_and_extend(offset):
228254 self .trle .extend (page )
229255 return len (page )
230256
231- def ensure_data (index_local , index_trle ):
232- """Ensure enough items are available from each database."""
233- # Load local pages if needed
234- while index_local + match_size > len (self .local ):
235- self .local_offset += 1
236- print (f"Loading local page at offset { self .local_offset } " )
237- if self .local_offset >= max_pages or \
238- get_local_page_and_extend (self .local_offset ) == 0 :
239- return False
240-
241- # Load trle pages if needed
242- while index_trle + match_size > len (self .trle ):
243- self .trle_offset += 1
244- print (f"Loading trle page at offset { self .trle_offset } " )
245- if self .trle_offset >= max_pages or get_trle_page_and_extend (self .trle_offset ) == 0 :
246- return False
247-
248- return True
249-
250- def ids_match (offset_local , offset_trle ):
251- """Compare sequences of trle_ids from both lists."""
252- if not ensure_data (offset_local , offset_trle ):
253- return False
254-
255- for i in range (match_size ):
257+ def batch_ids_match (match_size , offset_trle ):
258+ """Compare a batch of local trle ids to one online trle id."""
259+ for i in range (match_size * 5 ):
256260 try :
257- local_id = int (self .local [offset_local + i ]['trle_id' ])
258- trle_id = int (self .trle [offset_trle + i ]['trle_id' ])
259- if local_id ! = trle_id :
260- return False
261+ local_id = int (self .local [i ]['trle_id' ])
262+ trle_id = int (self .trle [offset_trle ]['trle_id' ])
263+ if local_id = = trle_id :
264+ return True
261265 except IndexError :
266+ print (f"IndexError at local[{ i } ] and trle[{ offset_trle } ]" )
262267 return False
263- return True
264-
268+ return False
269+
270+ def overshot_ids_match (match_size , offset_trle ):
271+ """Compare one to one backwards from local trle ids to one online trle id."""
272+ for i in range (match_size * 2 + 1 ):
273+ for j in range (match_size * 5 ):
274+ try :
275+ overshot_index = offset_trle + match_size * 5 - i - 1
276+ trle_id = int (self .trle [overshot_index ]['trle_id' ])
277+ local_id = int (self .local [match_size * 5 - j - 1 ]['trle_id' ])
278+ if local_id == trle_id :
279+ print (f"Match found at local[{ match_size * 5 - j - 1 } ] and trle[{ overshot_index } ]" )
280+ return (match_size * 5 - j - 1 , overshot_index )
281+ except IndexError :
282+ print (f"IndexError at local[{ match_size * 5 - j } ]"
283+ " and trle[{offset_trle + match_size*5 - i}]" )
284+ return None
285+ return None
265286 # Load initial data
266- get_local_page_and_extend (self .local_offset )
267- get_trle_page_and_extend (self .trle_offset )
287+ get_local_page_and_extend (self .local_page_offset * 20 )
288+
289+ # Load local pages if needed
290+ while match_size * 5 > len (self .local ):
291+ self .local_page_offset += 1
292+ print (f"Loading local page at offset { self .local_page_offset } " )
293+ if self .local_page_offset >= max_pages or \
294+ get_local_page_and_extend (self .local_page_offset * 20 ) == 0 :
295+ break
296+
297+ get_trle_page_and_extend (self .trle_page_offset * 20 )
268298
269299 i = 0
270300 while True :
271- if i + match_size > len (self .local ):
272- # Try to load more local data
273- if not ensure_data (i , 0 ):
301+ # If we run out of trle data, load more
302+ if match_size * 5 + i > len (self .trle ):
303+ self .trle_page_offset += 1
304+ print (f"Loading trle page at offset { self .trle_page_offset } " )
305+ if self .trle_page_offset >= max_pages or \
306+ get_trle_page_and_extend (self .trle_page_offset * 20 ) == 0 :
274307 break
275308
276- for j in range (len (self .trle ) - match_size + 1 ):
277- if ids_match (i , j ):
278- print (f"Match found at local[{ i } ] and trle[{ j } ]" )
279- return (i , j )
280- i += 1
309+ if batch_ids_match (match_size , i ):
310+ id_match = overshot_ids_match (match_size , i )
311+ if id_match is not None :
312+ return id_match
281313
282- # If we run out of trle data, load more
283- if len (self .trle ) - match_size < 1 :
284- added = get_trle_page_and_extend (self .trle_offset + 1 )
285- self .trle_offset += 1
286- if added == 0 or self .trle_offset >= max_pages :
287- break
314+ i += 1
288315
289316 print ("❌ No match found after paging." )
290- return None
317+ sys . exit ( 1 )
291318
292319 def run (self ):
293320 """
@@ -297,35 +324,74 @@ def run(self):
297324 2. Add new records from remote page after the match.
298325 3. Update or delete unmatched local records after the match.
299326 """
327+ con = database_make_connection ()
328+
329+ def check_local_trle_id (trle_id ):
330+ result = tombll_read .database_level_id (trle_id , con )
331+ if isinstance (result , int ):
332+ return True
333+ return False
334+
335+ def has_multivalued_field (trle_id ):
336+ result = tombll_read .has_multivalued_field (trle_id , con )
337+ if isinstance (result , int ):
338+ return result
339+ return 0
340+
341+ def go_to_next_id (index , array ):
342+ current_id = array [index ]['trle_id' ]
343+ i = index
344+
345+ while i < len (array ) and array [i ]['trle_id' ] == current_id :
346+ i -= 1
347+
348+ return i
349+
350+ # get a tail of local records matching a point on trle records
351+ # from old known records to new, when local_start == 0 we have
352+ # only new records left.
300353 local_start , remote_start = self .match_tails ()
301- tail_count = 5
302354
303- # Stage 1: Check and update mismatched records
304- for i in range (tail_count ):
355+ # update mismatched and remove missing records from trle to local
356+ i = local_start
357+ j = remote_start
358+ while 0 < i :
305359 try :
306- local = self .local [local_start + i ]
307- remote = self .trle [remote_start + i ]
360+ local = self .local [i ]
361+ remote = self .trle [j ]
308362 except IndexError :
309- print (f"IndexError self.local[{ local_start + i } ] self.trle[{ remote_start + i } ]" )
363+ print (f"IndexError self.local[{ i } ] self.trle[{ j } ]" )
310364 sys .exit (1 )
311365
312366 if not self .match_record (local , remote ):
313- update_level (local ['trle_id' ])
314-
315- # Stage 2: Add new remote records
316- existing_ids = {ids ['trle_id' ] for ids in self .local }
317- for remote in self .trle [:remote_start ]:
318- if remote ['trle_id' ] not in existing_ids :
319- add_level_card (remote ['trle_id' ])
367+ if local ['trle_id' ] == remote ['trle_id' ]:
368+ # some attributes was chagned we update
369+ # this usally happens when a level is new
370+ update_level (local ['trle_id' ])
371+ else :
372+ # we asume the level was deleted but it might
373+ # apear as a new level later (moved record)
374+ remove_level (local ['trle_id' ])
375+
376+ # we can hit a multivalued row usually its just different authors
377+ # but could be class also, but its rare. or we go just one record step
378+ i = go_to_next_id (i , self .local )
379+ j = go_to_next_id (j , self .trle )
380+
381+ while 0 < j :
382+ j -= 1
383+ try :
384+ remote = self .trle [j ]
385+ except IndexError :
386+ print (f"IndexError self.trle[{ j } ]" )
387+ sys .exit (1 )
320388
321- # Stage 3: Check unmatched local records
322- for local in self .local [:local_start ]:
323- exists = any (r ['trle_id' ] == local ['trle_id' ] for r in self .trle )
324- if exists :
325- update_level (local ['trle_id' ])
389+ if check_local_trle_id (remote ['trle_id' ]):
390+ update_level (remote ['trle_id' ])
326391 else :
327- remove_level ( local ['trle_id' ])
392+ add_level_card ( remote ['trle_id' ])
328393
394+ con .close ()
329395
330396
331397def sync_cards ():
0 commit comments