Skip to content

Commit 88008a2

Browse files
committed
update sync cards script
1 parent f7f47a1 commit 88008a2

File tree

2 files changed

+177
-89
lines changed

2 files changed

+177
-89
lines changed

database/tombll_manage_data.py

Lines changed: 139 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -198,23 +198,49 @@ def list_downloads(lid):
198198

199199

200200
class TailSync:
201-
"""Match sync 5 records tails and add new records."""
201+
"""Match sync record rows and add new records or delete old."""
202202

203203
def __init__(self):
204204
"""Set default member variables."""
205205
self.local = []
206206
self.trle = []
207207

208-
self.local_offset = 0
209-
self.trle_offset = 0
208+
self.local_page_offset = 0
209+
self.trle_page_offset = 0
210210

211211
def match_record(self, a, b):
212212
"""trle.net record data matching."""
213-
keys = ['trle_id', 'author', 'title', 'difficulty', 'duration', 'class', 'type', 'release']
213+
keys = [
214+
'trle_id',
215+
# 'author',
216+
'title',
217+
'difficulty',
218+
'duration',
219+
'class',
220+
'type',
221+
'release'
222+
]
214223
return all(a.get(k) == b.get(k) for k in keys)
215224

216-
def match_tails(self, match_size=5, max_pages=100):
217-
"""Find a tail match between local and trle databases based ids."""
225+
def match_tails(self, match_size=4, max_pages=20):
226+
"""
227+
Find a tail match between local and trle databases based ids.
228+
229+
This checks every level id in orderd steps aginst a batch of local ids.
230+
When it finds ONE of at least match_size*5 known local ids then it
231+
jumps forward that number of ids, but now there is a problem, trle.net
232+
can have deleted previous known level ids. So we have to go back
233+
a number of steps to match the tails. This is true becouse trle will
234+
not add records in between old records, only remove or update.
235+
The end of the tail is the point where some records needs to be chacked
236+
again, updated, deleted or added to the local database. We match aginst
237+
a batch of known ids not in a specific sequence but based on a jump,
238+
not order.
239+
240+
match_size is the tail=n*5 and overshot=n*2+1
241+
tail is the number of most recent ids from the local databases
242+
overshot is the steps the alogorithm look backwards after the jump
243+
"""
218244

219245
def get_local_page_and_extend(offset):
220246
con = database_make_connection()
@@ -228,66 +254,67 @@ def get_trle_page_and_extend(offset):
228254
self.trle.extend(page)
229255
return len(page)
230256

231-
def ensure_data(index_local, index_trle):
232-
"""Ensure enough items are available from each database."""
233-
# Load local pages if needed
234-
while index_local + match_size > len(self.local):
235-
self.local_offset += 1
236-
print(f"Loading local page at offset {self.local_offset}")
237-
if self.local_offset >= max_pages or \
238-
get_local_page_and_extend(self.local_offset) == 0:
239-
return False
240-
241-
# Load trle pages if needed
242-
while index_trle + match_size > len(self.trle):
243-
self.trle_offset += 1
244-
print(f"Loading trle page at offset {self.trle_offset}")
245-
if self.trle_offset >= max_pages or get_trle_page_and_extend(self.trle_offset) == 0:
246-
return False
247-
248-
return True
249-
250-
def ids_match(offset_local, offset_trle):
251-
"""Compare sequences of trle_ids from both lists."""
252-
if not ensure_data(offset_local, offset_trle):
253-
return False
254-
255-
for i in range(match_size):
257+
def batch_ids_match(match_size, offset_trle):
258+
"""Compare a batch of local trle ids to one online trle id."""
259+
for i in range(match_size*5):
256260
try:
257-
local_id = int(self.local[offset_local + i]['trle_id'])
258-
trle_id = int(self.trle[offset_trle + i]['trle_id'])
259-
if local_id != trle_id:
260-
return False
261+
local_id = int(self.local[i]['trle_id'])
262+
trle_id = int(self.trle[offset_trle]['trle_id'])
263+
if local_id == trle_id:
264+
return True
261265
except IndexError:
266+
print(f"IndexError at local[{i}] and trle[{offset_trle}]")
262267
return False
263-
return True
264-
268+
return False
269+
270+
def overshot_ids_match(match_size, offset_trle):
271+
"""Compare one to one backwards from local trle ids to one online trle id."""
272+
for i in range(match_size*2+1):
273+
for j in range(match_size*5):
274+
try:
275+
overshot_index = offset_trle + match_size*5 - i - 1
276+
trle_id = int(self.trle[overshot_index]['trle_id'])
277+
local_id = int(self.local[match_size*5 - j - 1]['trle_id'])
278+
if local_id == trle_id:
279+
print(f"Match found at local[{match_size*5 - j - 1}] and trle[{overshot_index}]")
280+
return (match_size*5 - j - 1, overshot_index)
281+
except IndexError:
282+
print(f"IndexError at local[{match_size*5 - j}]"
283+
" and trle[{offset_trle + match_size*5 - i}]")
284+
return None
285+
return None
265286
# Load initial data
266-
get_local_page_and_extend(self.local_offset)
267-
get_trle_page_and_extend(self.trle_offset)
287+
get_local_page_and_extend(self.local_page_offset*20)
288+
289+
# Load local pages if needed
290+
while match_size*5 > len(self.local):
291+
self.local_page_offset += 1
292+
print(f"Loading local page at offset {self.local_page_offset}")
293+
if self.local_page_offset >= max_pages or \
294+
get_local_page_and_extend(self.local_page_offset*20) == 0:
295+
break
296+
297+
get_trle_page_and_extend(self.trle_page_offset*20)
268298

269299
i = 0
270300
while True:
271-
if i + match_size > len(self.local):
272-
# Try to load more local data
273-
if not ensure_data(i, 0):
301+
# If we run out of trle data, load more
302+
if match_size*5 + i > len(self.trle):
303+
self.trle_page_offset += 1
304+
print(f"Loading trle page at offset {self.trle_page_offset}")
305+
if self.trle_page_offset >= max_pages or \
306+
get_trle_page_and_extend(self.trle_page_offset*20) == 0:
274307
break
275308

276-
for j in range(len(self.trle) - match_size + 1):
277-
if ids_match(i, j):
278-
print(f"Match found at local[{i}] and trle[{j}]")
279-
return (i, j)
280-
i += 1
309+
if batch_ids_match(match_size, i):
310+
id_match = overshot_ids_match(match_size, i)
311+
if id_match is not None:
312+
return id_match
281313

282-
# If we run out of trle data, load more
283-
if len(self.trle) - match_size < 1:
284-
added = get_trle_page_and_extend(self.trle_offset + 1)
285-
self.trle_offset += 1
286-
if added == 0 or self.trle_offset >= max_pages:
287-
break
314+
i += 1
288315

289316
print("❌ No match found after paging.")
290-
return None
317+
sys.exit(1)
291318

292319
def run(self):
293320
"""
@@ -297,35 +324,74 @@ def run(self):
297324
2. Add new records from remote page after the match.
298325
3. Update or delete unmatched local records after the match.
299326
"""
327+
con = database_make_connection()
328+
329+
def check_local_trle_id(trle_id):
330+
result = tombll_read.database_level_id(trle_id, con)
331+
if isinstance(result, int):
332+
return True
333+
return False
334+
335+
def has_multivalued_field(trle_id):
336+
result = tombll_read.has_multivalued_field(trle_id, con)
337+
if isinstance(result, int):
338+
return result
339+
return 0
340+
341+
def go_to_next_id(index, array):
342+
current_id = array[index]['trle_id']
343+
i = index
344+
345+
while i < len(array) and array[i]['trle_id'] == current_id:
346+
i -= 1
347+
348+
return i
349+
350+
# get a tail of local records matching a point on trle records
351+
# from old known records to new, when local_start == 0 we have
352+
# only new records left.
300353
local_start, remote_start = self.match_tails()
301-
tail_count = 5
302354

303-
# Stage 1: Check and update mismatched records
304-
for i in range(tail_count):
355+
# update mismatched and remove missing records from trle to local
356+
i = local_start
357+
j = remote_start
358+
while 0 < i:
305359
try:
306-
local = self.local[local_start + i]
307-
remote = self.trle[remote_start + i]
360+
local = self.local[i]
361+
remote = self.trle[j]
308362
except IndexError:
309-
print(f"IndexError self.local[{local_start + i}] self.trle[{remote_start + i}]")
363+
print(f"IndexError self.local[{i}] self.trle[{j}]")
310364
sys.exit(1)
311365

312366
if not self.match_record(local, remote):
313-
update_level(local['trle_id'])
314-
315-
# Stage 2: Add new remote records
316-
existing_ids = {ids['trle_id'] for ids in self.local}
317-
for remote in self.trle[:remote_start]:
318-
if remote['trle_id'] not in existing_ids:
319-
add_level_card(remote['trle_id'])
367+
if local['trle_id'] == remote['trle_id']:
368+
# some attributes was chagned we update
369+
# this usally happens when a level is new
370+
update_level(local['trle_id'])
371+
else:
372+
# we asume the level was deleted but it might
373+
# apear as a new level later (moved record)
374+
remove_level(local['trle_id'])
375+
376+
# we can hit a multivalued row usually its just different authors
377+
# but could be class also, but its rare. or we go just one record step
378+
i = go_to_next_id(i, self.local)
379+
j = go_to_next_id(j, self.trle)
380+
381+
while 0 < j:
382+
j -= 1
383+
try:
384+
remote = self.trle[j]
385+
except IndexError:
386+
print(f"IndexError self.trle[{j}]")
387+
sys.exit(1)
320388

321-
# Stage 3: Check unmatched local records
322-
for local in self.local[:local_start]:
323-
exists = any(r['trle_id'] == local['trle_id'] for r in self.trle)
324-
if exists:
325-
update_level(local['trle_id'])
389+
if check_local_trle_id(remote['trle_id']):
390+
update_level(remote['trle_id'])
326391
else:
327-
remove_level(local['trle_id'])
392+
add_level_card(remote['trle_id'])
328393

394+
con.close()
329395

330396

331397
def sync_cards():

database/tombll_read.py

Lines changed: 38 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ def database_author_list(level_id, con):
2929
JOIN Author ON AuthorList.authorID = Author.AuthorID
3030
WHERE AuthorList.levelID = ?
3131
'''
32-
33-
# Fetch all rows from the executed query
3432
return tombll_common.query_return_everything(query, (level_id, ), con)
3533

3634

@@ -56,8 +54,6 @@ def database_level_list(con):
5654
JOIN Author ON AuthorList.authorID = Author.AuthorID
5755
GROUP BY Level.LevelID
5856
'''
59-
60-
# Fetch all rows from the executed query
6157
return tombll_common.query_return_everything(query, None, con)
6258

6359

@@ -84,8 +80,6 @@ def database_zip_list(level_id, con):
8480
WHERE Level.LevelID = ?
8581
GROUP BY Zip.ZipID
8682
'''
87-
88-
# Fetch all rows from the executed query
8983
return tombll_common.query_return_everything(query, (level_id, ), con)
9084

9185

@@ -111,6 +105,35 @@ def database_level_id(trle_id, con):
111105
return tombll_common.query_return_id(query, (trle_id, ), con)
112106

113107

108+
def has_multivalued_field(trle_id, con):
109+
"""
110+
Get level number of records form TRLE lid that would show on a page.
111+
112+
This function queries the 'Info' table using the given `trle_id` to determine
113+
the number of rows.
114+
115+
Args:
116+
trle_id (int or str): The TRLE level ID to target.
117+
con (sqlite3.Connection): An active SQLite database connection.
118+
119+
Returns:
120+
int: rows for a level ID if it exists in the database, None otherwise.
121+
"""
122+
query = '''
123+
SELECT COUNT(*)
124+
FROM Info
125+
INNER JOIN Level ON (Info.InfoID = Level.infoID)
126+
INNER JOIN AuthorList ON (Level.LevelID = AuthorList.levelID)
127+
INNER JOIN Author ON (Author.AuthorID = AuthorList.authorID)
128+
LEFT JOIN InfoDifficulty ON (InfoDifficulty.InfoDifficultyID = Info.difficulty)
129+
LEFT JOIN InfoDuration ON (InfoDuration.InfoDurationID = Info.duration)
130+
INNER JOIN InfoType ON (InfoType.InfoTypeID = Info.type)
131+
LEFT JOIN InfoClass ON (InfoClass.InfoClassID = Info.class)
132+
WHERE Info.trleID = ?
133+
'''
134+
return tombll_common.query_return_everything(query, (trle_id, ), con)
135+
136+
114137
def database_author_ids(level_id, con):
115138
"""
116139
Get author ID form level ID.
@@ -178,8 +201,6 @@ def database_picture_ids(level_id, con):
178201
JOIN Screens ON Level.LevelID = Screens.levelID
179202
WHERE Level.LevelID = ?
180203
'''
181-
182-
# Fetch all rows from the executed query
183204
return tombll_common.query_return_everything(query, (level_id, ), con)
184205

185206

@@ -212,16 +233,16 @@ def trle_page(offset, con, limit=20, sort_latest_first=False):
212233
page['offset'] = offset
213234
result = []
214235

215-
page['records_total'] = tombll_common.query_return_everything("""
236+
page['records_total'] = tombll_common.query_return_everything('''
216237
SELECT COUNT(*)
217238
FROM Info
218239
INNER JOIN Level ON Info.InfoID = Level.infoID
219240
INNER JOIN AuthorList ON Level.LevelID = AuthorList.levelID
220241
INNER JOIN Author ON Author.AuthorID = AuthorList.authorID
221-
""", None, con)[0][0]
242+
''', None, con)[0][0]
222243

223244
if sort_latest_first:
224-
result = tombll_common.query_return_everything("""
245+
result = tombll_common.query_return_everything('''
225246
SELECT
226247
Info.trleID,
227248
Author.value,
@@ -241,9 +262,9 @@ def trle_page(offset, con, limit=20, sort_latest_first=False):
241262
LEFT JOIN InfoClass ON (InfoClass.InfoClassID = Info.class)
242263
ORDER BY Info.release DESC, Info.trleID DESC
243264
LIMIT ? OFFSET ?
244-
""", (limit, offset), con)
265+
''', (limit, offset), con)
245266
else:
246-
result = tombll_common.query_return_everything("""
267+
result = tombll_common.query_return_everything('''
247268
SELECT
248269
Info.trleID,
249270
Author.value,
@@ -263,7 +284,7 @@ def trle_page(offset, con, limit=20, sort_latest_first=False):
263284
LEFT JOIN InfoClass ON (InfoClass.InfoClassID = Info.class)
264285
ORDER BY Info.release ASC, Info.trleID ASC
265286
LIMIT ? OFFSET ?
266-
""", (limit, offset), con)
287+
''', (limit, offset), con)
267288

268289
for row in result:
269290
level = data_factory.make_trle_level_data()
@@ -282,12 +303,13 @@ def trle_page(offset, con, limit=20, sort_latest_first=False):
282303

283304
def trle_cover_picture(trle_id, con):
284305
"""Get TRLE cover picture."""
285-
return tombll_common.query_return_everything("""
306+
query = '''
286307
SELECT
287308
Picture.data
288309
FROM Info
289310
INNER JOIN Level ON (Info.InfoID = Level.infoID)
290311
INNER JOIN Screens ON (Level.LevelID = Screens.levelID)
291312
INNER JOIN Picture ON (Screens.pictureID = Picture.PictureID)
292313
WHERE Info.trleID = ? AND Screens.position = 0
293-
""", (trle_id, ), con)[0][0]
314+
'''
315+
return tombll_common.query_return_everything(query, (trle_id, ), con)[0][0]

0 commit comments

Comments
 (0)