77import itertools
88import stashapi .log as log
99from stashapi .stashapp import StashInterface
10- from typing import List
10+ from typing import List , Optional , Tuple
1111
1212MD5_RE = re .compile (r"^[a-f0-9]{32}$" )
1313
@@ -94,7 +94,11 @@ def stream_scenes(
9494
9595def process_e621_post_for_item (
9696 stash : StashInterface , item_type : str , item_id : str , item_md5 : str
97- ) -> None :
97+ ) -> bool :
98+ """
99+ CHANGED: return boolean indicating whether the item was updated/marked (True) or left untouched (False).
100+ This lets the caller (main loop) increment progress only when an item actually changed state.
101+ """
98102 # Fetch latest object to check tags
99103 if item_type == "image" :
100104 obj = stash .find_image (item_id )
@@ -110,7 +114,7 @@ def process_e621_post_for_item(
110114 )
111115
112116 if already_tagged or already_failed :
113- return
117+ return False # nothing to do
114118
115119 try :
116120 time .sleep (0.5 )
@@ -125,14 +129,19 @@ def process_e621_post_for_item(
125129 log .error (f"Marking as failed. e621 API error: { str (e )} " )
126130 e621_tag_failed = get_or_create_tag (stash , "e621_tag_failed" )
127131 fail_ids = [e621_tag_failed ["id" ]] + [t ["id" ] for t in obj .get ("tags" , [])]
128- if item_type == "image" :
129- stash .update_image ({"id" : item_id , "tag_ids" : list (set (fail_ids ))})
130- else :
131- stash .update_scene ({"id" : item_id , "tag_ids" : list (set (fail_ids ))})
132- return
132+ try :
133+ if item_type == "image" :
134+ stash .update_image ({"id" : item_id , "tag_ids" : list (set (fail_ids ))})
135+ else :
136+ stash .update_scene ({"id" : item_id , "tag_ids" : list (set (fail_ids ))})
137+ return True
138+ except Exception as e2 :
139+ log .error (f"Failed to mark as failed: { str (e2 )} " )
140+ return False
133141
134142 if not post_data :
135- return
143+ # not found on e621: leave untouched so it can be retried later (or user may decide to mark failed)
144+ return False
136145
137146 e621_tag = get_or_create_tag (stash , "e621_tagged" )
138147 post_url = f"https://e621.net/posts/{ post_data ['id' ]} "
@@ -173,8 +182,10 @@ def process_e621_post_for_item(
173182 else :
174183 stash .update_scene (update_payload )
175184 log .info (f"Scene updated: { item_id } " )
185+ return True
176186 except Exception as e :
177187 log .error (f"Update failed: { str (e )} " )
188+ return False
178189
179190
180191def get_or_create_tag (stash : StashInterface , tag_name : str ) -> dict :
@@ -227,10 +238,13 @@ def get_or_create_performer(stash: StashInterface, name: str) -> dict:
227238 return performers [0 ] if performers else stash .create_performer ({"name" : name })
228239
229240
230- def scrape_image (client : StashInterface , image_id : str ) -> None :
241+ def scrape_image (client : StashInterface , image_id : str ) -> bool :
242+ """
243+ PAGINATION: return True if item was updated/marked (so main loop can count progress).
244+ """
231245 image = client .find_image (image_id )
232246 if not image or not image .get ("visual_files" ):
233- return
247+ return False
234248
235249 file_data = image ["visual_files" ][0 ]
236250 filename = file_data .get ("basename" , "" )
@@ -256,15 +270,18 @@ def scrape_image(client: StashInterface, image_id: str) -> None:
256270 log .info (f"Generated content MD5 for image: { final_md5 } " )
257271 except Exception as e :
258272 log .error (f"Failed to generate MD5 for image: { str (e )} " )
259- return
273+ return False
260274
261- process_e621_post_for_item (client , "image" , image_id , final_md5 )
275+ return process_e621_post_for_item (client , "image" , image_id , final_md5 )
262276
263277
264- def scrape_scene (client : StashInterface , scene_id : str ) -> None :
278+ def scrape_scene (client : StashInterface , scene_id : str ) -> bool :
279+ """
280+ PAGINATION: return True if item was updated/marked (so main loop can count progress).
281+ """
265282 scene = client .find_scene (scene_id )
266283 if not scene :
267- return
284+ return False
268285
269286 final_md5 = None
270287
@@ -297,17 +314,16 @@ def scrape_scene(client: StashInterface, scene_id: str) -> None:
297314 log .info (f"Generated content MD5 for scene: { final_md5 } " )
298315 except Exception as e :
299316 log .error (f"Failed to generate MD5 for scene: { str (e )} " )
300- return
317+ return False
301318 else :
302319 log .error (f"No files found for scene { scene_id } ; cannot compute md5" )
303- return
320+ return False
304321
305- if final_md5 :
306- process_e621_post_for_item (client , "scene" , scene_id , final_md5 )
322+ return process_e621_post_for_item (client , "scene" , scene_id , final_md5 )
307323
308324
309325if __name__ == "__main__" :
310- log .info ("Starting tagger with stable pagination snapshot (streamed) ..." )
326+ log .info ("Starting tagger with scanning passes until no work left ..." )
311327 json_input = json .loads (sys .stdin .read ())
312328 stash = StashInterface (json_input ["server_connection" ])
313329
@@ -337,30 +353,117 @@ def scrape_scene(client: StashInterface, scene_id: str) -> None:
337353
338354 log .info (f"Total items (images + scenes): { total } " )
339355
340- stream = itertools .chain (
341- stream_images (
342- stash , skip_tag_ids , settings ["ExcludeOrganized" ], per_page = per_page
343- ),
344- stream_scenes (
345- stash , skip_tag_ids , settings ["ExcludeOrganized" ], per_page = per_page
346- ),
347- )
348-
349- for idx , (item_type , item ) in enumerate (stream , start = 1 ):
350- log .progress (float (idx - 1 ) / float (total ))
351-
352- item_id = item ["id" ]
353- current_tag_ids = [t ["id" ] for t in item .get ("tags" , [])]
354- if any (tid in current_tag_ids for tid in skip_tag_ids ):
355- log .info (f"Skipping { item_type } { item_id } - contains skip tag" )
356- log .progress (float (idx ) / float (total ))
357- continue
358-
359- if item_type == "image" :
360- scrape_image (stash , item_id )
361- else :
362- scrape_scene (stash , item_id )
356+ processed_count = 0
357+ pass_num = 0
358+ # Loop passes until a full pass processes zero items.
359+ while True :
360+ pass_num += 1
361+ log .info (f"Starting scanning pass #{ pass_num } " )
362+ pass_processed = 0
363+
364+ # Scan images by pages
365+ page = 1
366+ while True :
367+ pagination = {
368+ "page" : page ,
369+ "per_page" : per_page ,
370+ "sort" : "created_at" ,
371+ "direction" : "ASC" ,
372+ }
373+ images = stash .find_images (f = _build_filter (skip_tag_ids , settings ["ExcludeOrganized" ]), filter = pagination )
374+ log .info (f"[pass { pass_num } ] fetched image page { page } , count={ len (images )} " )
375+ if not images :
376+ break
377+ for img in images :
378+ item_id = img .get ("id" )
379+ if not item_id :
380+ log .error (f"[pass { pass_num } ] image without id on page { page } " )
381+ continue
382+
383+ # Defensive fetch of current tags to avoid race conditions
384+ current = stash .find_image (item_id )
385+ current_tag_ids = [t ["id" ] for t in current .get ("tags" , [])]
386+ if any (tid in current_tag_ids for tid in skip_tag_ids ):
387+ # Shouldn't usually happen because filter excluded them, but handle gracefully.
388+ log .info (f"[pass { pass_num } ] skipping image { item_id } - now has skip tag" )
389+ processed_count += 1
390+ pass_processed += 1
391+ log .progress (float (processed_count ) / float (total ))
392+ continue
393+
394+ # Attempt to process; scrape_image now returns True if it updated/marked the item.
395+ try :
396+ updated = scrape_image (stash , item_id )
397+ except Exception as e :
398+ log .error (f"[pass { pass_num } ] scrape_image exception for { item_id } : { str (e )} " )
399+ updated = False
400+
401+ if updated :
402+ processed_count += 1
403+ pass_processed += 1
404+ log .info (f"[pass { pass_num } ] processed image { item_id } (processed_count={ processed_count } )" )
405+ log .progress (float (processed_count ) / float (total ))
406+ # If not updated, it will remain in future passes. Continue scanning.
407+
408+ # If fewer than per_page results, we're at the end of current snapshot
409+ if len (images ) < per_page :
410+ break
411+ page += 1
412+
413+ # Scan scenes by pages
414+ page = 1
415+ while True :
416+ pagination = {
417+ "page" : page ,
418+ "per_page" : per_page ,
419+ "sort" : "created_at" ,
420+ "direction" : "ASC" ,
421+ }
422+ scenes = stash .find_scenes (f = _build_filter (skip_tag_ids , settings ["ExcludeOrganized" ]), filter = pagination )
423+ log .info (f"[pass { pass_num } ] fetched scene page { page } , count={ len (scenes )} " )
424+ if not scenes :
425+ break
426+ for sc in scenes :
427+ item_id = sc .get ("id" )
428+ if not item_id :
429+ log .error (f"[pass { pass_num } ] scene without id on page { page } " )
430+ continue
431+
432+ # Defensive fetch
433+ current = stash .find_scene (item_id )
434+ current_tag_ids = [t ["id" ] for t in current .get ("tags" , [])]
435+ if any (tid in current_tag_ids for tid in skip_tag_ids ):
436+ log .info (f"[pass { pass_num } ] skipping scene { item_id } - now has skip tag" )
437+ processed_count += 1
438+ pass_processed += 1
439+ log .progress (float (processed_count ) / float (total ))
440+ continue
441+
442+ try :
443+ updated = scrape_scene (stash , item_id )
444+ except Exception as e :
445+ log .error (f"[pass { pass_num } ] scrape_scene exception for { item_id } : { str (e )} " )
446+ updated = False
447+
448+ if updated :
449+ processed_count += 1
450+ pass_processed += 1
451+ log .info (f"[pass { pass_num } ] processed scene { item_id } (processed_count={ processed_count } )" )
452+ log .progress (float (processed_count ) / float (total ))
453+
454+ if len (scenes ) < per_page :
455+ break
456+ page += 1
457+
458+ log .info (f"Pass #{ pass_num } finished. items processed this pass: { pass_processed } " )
459+
460+ # If no items processed in a full pass, we're done
461+ if pass_processed == 0 :
462+ log .info ("No items processed in last pass; finishing scan." )
463+ break
363464
364- log .progress (float (idx ) / float (total ))
465+ # Small sleep to avoid hammering API and to let the DB settle between passes
466+ time .sleep (0.2 )
365467
468+ # ensure progress finished
366469 log .progress (1.0 )
0 commit comments