Skip to content

Commit 0b61ddd

Browse files
authored
e621_tagger - fix pagination (#616)
1 parent 233d90c commit 0b61ddd

File tree

2 files changed

+148
-45
lines changed

2 files changed

+148
-45
lines changed

plugins/e621_tagger/e621_tagger.py

Lines changed: 147 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import itertools
88
import stashapi.log as log
99
from stashapi.stashapp import StashInterface
10-
from typing import List
10+
from typing import List, Optional, Tuple
1111

1212
MD5_RE = re.compile(r"^[a-f0-9]{32}$")
1313

@@ -94,7 +94,11 @@ def stream_scenes(
9494

9595
def process_e621_post_for_item(
9696
stash: StashInterface, item_type: str, item_id: str, item_md5: str
97-
) -> None:
97+
) -> bool:
98+
"""
99+
CHANGED: return boolean indicating whether the item was updated/marked (True) or left untouched (False).
100+
This lets the caller (main loop) increment progress only when an item actually changed state.
101+
"""
98102
# Fetch latest object to check tags
99103
if item_type == "image":
100104
obj = stash.find_image(item_id)
@@ -110,7 +114,7 @@ def process_e621_post_for_item(
110114
)
111115

112116
if already_tagged or already_failed:
113-
return
117+
return False # nothing to do
114118

115119
try:
116120
time.sleep(0.5)
@@ -125,14 +129,19 @@ def process_e621_post_for_item(
125129
log.error(f"Marking as failed. e621 API error: {str(e)}")
126130
e621_tag_failed = get_or_create_tag(stash, "e621_tag_failed")
127131
fail_ids = [e621_tag_failed["id"]] + [t["id"] for t in obj.get("tags", [])]
128-
if item_type == "image":
129-
stash.update_image({"id": item_id, "tag_ids": list(set(fail_ids))})
130-
else:
131-
stash.update_scene({"id": item_id, "tag_ids": list(set(fail_ids))})
132-
return
132+
try:
133+
if item_type == "image":
134+
stash.update_image({"id": item_id, "tag_ids": list(set(fail_ids))})
135+
else:
136+
stash.update_scene({"id": item_id, "tag_ids": list(set(fail_ids))})
137+
return True
138+
except Exception as e2:
139+
log.error(f"Failed to mark as failed: {str(e2)}")
140+
return False
133141

134142
if not post_data:
135-
return
143+
# not found on e621: leave untouched so it can be retried later (or user may decide to mark failed)
144+
return False
136145

137146
e621_tag = get_or_create_tag(stash, "e621_tagged")
138147
post_url = f"https://e621.net/posts/{post_data['id']}"
@@ -173,8 +182,10 @@ def process_e621_post_for_item(
173182
else:
174183
stash.update_scene(update_payload)
175184
log.info(f"Scene updated: {item_id}")
185+
return True
176186
except Exception as e:
177187
log.error(f"Update failed: {str(e)}")
188+
return False
178189

179190

180191
def get_or_create_tag(stash: StashInterface, tag_name: str) -> dict:
@@ -227,10 +238,13 @@ def get_or_create_performer(stash: StashInterface, name: str) -> dict:
227238
return performers[0] if performers else stash.create_performer({"name": name})
228239

229240

230-
def scrape_image(client: StashInterface, image_id: str) -> None:
241+
def scrape_image(client: StashInterface, image_id: str) -> bool:
242+
"""
243+
PAGINATION: return True if item was updated/marked (so main loop can count progress).
244+
"""
231245
image = client.find_image(image_id)
232246
if not image or not image.get("visual_files"):
233-
return
247+
return False
234248

235249
file_data = image["visual_files"][0]
236250
filename = file_data.get("basename", "")
@@ -256,15 +270,18 @@ def scrape_image(client: StashInterface, image_id: str) -> None:
256270
log.info(f"Generated content MD5 for image: {final_md5}")
257271
except Exception as e:
258272
log.error(f"Failed to generate MD5 for image: {str(e)}")
259-
return
273+
return False
260274

261-
process_e621_post_for_item(client, "image", image_id, final_md5)
275+
return process_e621_post_for_item(client, "image", image_id, final_md5)
262276

263277

264-
def scrape_scene(client: StashInterface, scene_id: str) -> None:
278+
def scrape_scene(client: StashInterface, scene_id: str) -> bool:
279+
"""
280+
PAGINATION: return True if item was updated/marked (so main loop can count progress).
281+
"""
265282
scene = client.find_scene(scene_id)
266283
if not scene:
267-
return
284+
return False
268285

269286
final_md5 = None
270287

@@ -297,17 +314,16 @@ def scrape_scene(client: StashInterface, scene_id: str) -> None:
297314
log.info(f"Generated content MD5 for scene: {final_md5}")
298315
except Exception as e:
299316
log.error(f"Failed to generate MD5 for scene: {str(e)}")
300-
return
317+
return False
301318
else:
302319
log.error(f"No files found for scene {scene_id}; cannot compute md5")
303-
return
320+
return False
304321

305-
if final_md5:
306-
process_e621_post_for_item(client, "scene", scene_id, final_md5)
322+
return process_e621_post_for_item(client, "scene", scene_id, final_md5)
307323

308324

309325
if __name__ == "__main__":
310-
log.info("Starting tagger with stable pagination snapshot (streamed)...")
326+
log.info("Starting tagger with scanning passes until no work left...")
311327
json_input = json.loads(sys.stdin.read())
312328
stash = StashInterface(json_input["server_connection"])
313329

@@ -337,30 +353,117 @@ def scrape_scene(client: StashInterface, scene_id: str) -> None:
337353

338354
log.info(f"Total items (images + scenes): {total}")
339355

340-
stream = itertools.chain(
341-
stream_images(
342-
stash, skip_tag_ids, settings["ExcludeOrganized"], per_page=per_page
343-
),
344-
stream_scenes(
345-
stash, skip_tag_ids, settings["ExcludeOrganized"], per_page=per_page
346-
),
347-
)
348-
349-
for idx, (item_type, item) in enumerate(stream, start=1):
350-
log.progress(float(idx - 1) / float(total))
351-
352-
item_id = item["id"]
353-
current_tag_ids = [t["id"] for t in item.get("tags", [])]
354-
if any(tid in current_tag_ids for tid in skip_tag_ids):
355-
log.info(f"Skipping {item_type} {item_id} - contains skip tag")
356-
log.progress(float(idx) / float(total))
357-
continue
358-
359-
if item_type == "image":
360-
scrape_image(stash, item_id)
361-
else:
362-
scrape_scene(stash, item_id)
356+
processed_count = 0
357+
pass_num = 0
358+
# Loop passes until a full pass processes zero items.
359+
while True:
360+
pass_num += 1
361+
log.info(f"Starting scanning pass #{pass_num}")
362+
pass_processed = 0
363+
364+
# Scan images by pages
365+
page = 1
366+
while True:
367+
pagination = {
368+
"page": page,
369+
"per_page": per_page,
370+
"sort": "created_at",
371+
"direction": "ASC",
372+
}
373+
images = stash.find_images(f=_build_filter(skip_tag_ids, settings["ExcludeOrganized"]), filter=pagination)
374+
log.info(f"[pass {pass_num}] fetched image page {page}, count={len(images)}")
375+
if not images:
376+
break
377+
for img in images:
378+
item_id = img.get("id")
379+
if not item_id:
380+
log.error(f"[pass {pass_num}] image without id on page {page}")
381+
continue
382+
383+
# Defensive fetch of current tags to avoid race conditions
384+
current = stash.find_image(item_id)
385+
current_tag_ids = [t["id"] for t in current.get("tags", [])]
386+
if any(tid in current_tag_ids for tid in skip_tag_ids):
387+
# Shouldn't usually happen because filter excluded them, but handle gracefully.
388+
log.info(f"[pass {pass_num}] skipping image {item_id} - now has skip tag")
389+
processed_count += 1
390+
pass_processed += 1
391+
log.progress(float(processed_count) / float(total))
392+
continue
393+
394+
# Attempt to process; scrape_image now returns True if it updated/marked the item.
395+
try:
396+
updated = scrape_image(stash, item_id)
397+
except Exception as e:
398+
log.error(f"[pass {pass_num}] scrape_image exception for {item_id}: {str(e)}")
399+
updated = False
400+
401+
if updated:
402+
processed_count += 1
403+
pass_processed += 1
404+
log.info(f"[pass {pass_num}] processed image {item_id} (processed_count={processed_count})")
405+
log.progress(float(processed_count) / float(total))
406+
# If not updated, it will remain in future passes. Continue scanning.
407+
408+
# If fewer than per_page results, we're at the end of current snapshot
409+
if len(images) < per_page:
410+
break
411+
page += 1
412+
413+
# Scan scenes by pages
414+
page = 1
415+
while True:
416+
pagination = {
417+
"page": page,
418+
"per_page": per_page,
419+
"sort": "created_at",
420+
"direction": "ASC",
421+
}
422+
scenes = stash.find_scenes(f=_build_filter(skip_tag_ids, settings["ExcludeOrganized"]), filter=pagination)
423+
log.info(f"[pass {pass_num}] fetched scene page {page}, count={len(scenes)}")
424+
if not scenes:
425+
break
426+
for sc in scenes:
427+
item_id = sc.get("id")
428+
if not item_id:
429+
log.error(f"[pass {pass_num}] scene without id on page {page}")
430+
continue
431+
432+
# Defensive fetch
433+
current = stash.find_scene(item_id)
434+
current_tag_ids = [t["id"] for t in current.get("tags", [])]
435+
if any(tid in current_tag_ids for tid in skip_tag_ids):
436+
log.info(f"[pass {pass_num}] skipping scene {item_id} - now has skip tag")
437+
processed_count += 1
438+
pass_processed += 1
439+
log.progress(float(processed_count) / float(total))
440+
continue
441+
442+
try:
443+
updated = scrape_scene(stash, item_id)
444+
except Exception as e:
445+
log.error(f"[pass {pass_num}] scrape_scene exception for {item_id}: {str(e)}")
446+
updated = False
447+
448+
if updated:
449+
processed_count += 1
450+
pass_processed += 1
451+
log.info(f"[pass {pass_num}] processed scene {item_id} (processed_count={processed_count})")
452+
log.progress(float(processed_count) / float(total))
453+
454+
if len(scenes) < per_page:
455+
break
456+
page += 1
457+
458+
log.info(f"Pass #{pass_num} finished. items processed this pass: {pass_processed}")
459+
460+
# If no items processed in a full pass, we're done
461+
if pass_processed == 0:
462+
log.info("No items processed in last pass; finishing scan.")
463+
break
363464

364-
log.progress(float(idx) / float(total))
465+
# Small sleep to avoid hammering API and to let the DB settle between passes
466+
time.sleep(0.2)
365467

468+
# ensure progress finished
366469
log.progress(1.0)

plugins/e621_tagger/e621_tagger.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name: e621_tagger
22
description: Finding images and videos on e621 and tagging them.
3-
version: 0.3
3+
version: 0.4
44
url: https://github.com/stashapp/CommunityScripts/
55
exec:
66
- python

0 commit comments

Comments
 (0)