@@ -279,65 +279,25 @@ def find_by_substring(
279279
280280 @classmethod
281281 def get_random_images (cls , conn : Connection , size : int ) -> List ["Image" ]:
282- from scripts .iib .logger import logger
283- logger .info (f"Starting to get random images, requested size: { size } " )
282+ if size <= 0 :
283+ return []
284+
284285 images = []
285- max_cyc = 10
286- curr_cyc = 0
287- with closing (conn .cursor ()) as cur :
288- while len (images ) < size and curr_cyc < max_cyc :
289- curr_cyc += 1
290- logger .info (f"Starting attempt { curr_cyc } to get random images" )
291- cur .execute ("SELECT COUNT(*) FROM image" )
292- total_count = cur .fetchone ()[0 ]
293- logger .info (f"Total images in database: { total_count } " )
294-
295- if total_count == 0 or size <= 0 :
296- logger .warning (f"Cannot get random images: total_count={ total_count } , requested_size={ size } " )
297- return []
298-
299- step = max (1 , total_count // size )
300- logger .info (f"Calculated step size: { step } " )
301-
302- start_indices = []
303- for i in range (size ):
304- min_val = i * step
305- max_val = min ((i + 1 ) * step - 1 , total_count - 1 )
306- # Ensure max_val is not less than min_val
307- if max_val < min_val :
308- max_val = min_val
309- # Ensure indices are within valid range (1 to total_count)
310- min_val = max (1 , min (min_val , total_count ))
311- max_val = max (1 , min (max_val , total_count ))
312- if min_val <= max_val :
313- idx = random .randint (min_val , max_val )
314- start_indices .append (idx )
315- logger .debug (f"Generated random index [{ i } ]: range { min_val } -{ max_val } , selected { idx } " )
316-
317- logger .info (f"Generated random index list: { start_indices } " )
318-
319- if start_indices :
320- placeholders = "," .join ("?" * len (start_indices ))
321- query = f"SELECT * FROM image WHERE id IN ({ placeholders } )"
322- logger .debug (f"Executing SQL query: { query } , parameters: { start_indices } " )
323- cur .execute (query , start_indices )
324- rows = cur .fetchall ()
325- logger .info (f"Query returned { len (rows )} records" )
326-
327- curr_images = []
328- for row in rows :
329- path = row [1 ]
330- if os .path .exists (path ):
331- curr_images .append (cls .from_row (row ))
332- else :
333- logger .warning (f"Image file does not exist: { path } " )
334-
335- logger .info (f"Valid images found in this cycle: { len (curr_images )} " )
336- images .extend (curr_images )
337- images = unique_by (images , lambda x : x .path )
338- logger .info (f"Total unique images after deduplication: { len (images )} " )
286+ deleted_ids = []
287+ with closing (conn .cursor ()) as cur :
288+ cur .execute ("SELECT * FROM image ORDER BY RANDOM() LIMIT ?" , (size ,))
289+ rows = cur .fetchall ()
290+
291+ for row in rows :
292+ img = cls .from_row (row )
293+ if os .path .exists (img .path ):
294+ images .append (img )
295+ else :
296+ deleted_ids .append (img .id )
297+
298+ if deleted_ids :
299+ cls .safe_batch_remove (conn , deleted_ids )
339300
340- logger .info (f"Random image retrieval completed, final image count: { len (images )} " )
341301 return images
342302
343303
0 commit comments