Skip to content

Commit 498ae9c

Browse files
spaceyuckanonymous
andauthored
bulkimageScrape plugin now uses generator and paging to iterate images (#562)
Co-authored-by: anonymous <[email protected]>
1 parent b3586eb commit 498ae9c

File tree

2 files changed

+65
-17
lines changed

2 files changed

+65
-17
lines changed

plugins/bulkImageScrape/BulkImageScrape.yml

100644100755
Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name: Bulk Image Scrape
22
description: Apply an image scraper to all images
3-
version: 0.3
3+
version: 0.3.1
44
url: https://github.com/stashapp/CommunityScripts/
55
exec:
66
- python
@@ -29,6 +29,9 @@ settings:
2929
ExcludeOrganized:
3030
displayName: Exclude images that are set as organized (default is to include)
3131
type: BOOLEAN
32+
SkipEntriesNum:
33+
displayName: number of entries to skip over (mostly for rerunning after an error on large collections)
34+
type: NUMBER
3235

3336
tasks:
3437
- name: "Bulk Image Scrape"

plugins/bulkImageScrape/bulkImageScrape.py

100644100755
Lines changed: 61 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1+
from collections.abc import Generator
12
import sys
23
import json
4+
import math
35
import time
46
import stashapi.log as log
57
from stashapi.stash_types import StashItem
@@ -109,19 +111,8 @@ def parse_skip_tags(client: StashInterface, skip_tags: str) -> list[str]:
109111
return tag_ids
110112

111113

112-
def get_all_images(
113-
client: StashInterface, skip_tags: list[str], exclude_organized: bool
114-
) -> list[dict]:
115-
"""
116-
Get all images from the stash
117-
"""
114+
def build_image_filter(skip_tags: list[str], exclude_organized: bool) -> dict:
118115
image_filter: dict = {}
119-
all_results: dict = {
120-
"page": 1,
121-
"per_page": -1,
122-
"sort": "created_at",
123-
"direction": "ASC",
124-
}
125116

126117
if exclude_organized:
127118
image_filter["organized"] = False
@@ -135,7 +126,58 @@ def get_all_images(
135126
"depth": -1,
136127
}
137128

138-
return client.find_images(f=image_filter, filter=all_results)
129+
return image_filter
130+
131+
132+
def count_all_images(
133+
client: StashInterface, skip_tags: list[str], exclude_organized: bool
134+
) -> int:
135+
"""
136+
count all images from the stash
137+
"""
138+
image_filter: dict = build_image_filter(skip_tags=skip_tags, exclude_organized=exclude_organized)
139+
140+
all_results: dict = {
141+
"page": 1,
142+
"per_page": 0,
143+
"sort": "created_at",
144+
"direction": "ASC",
145+
}
146+
147+
total_images, images = client.find_images(f=image_filter, filter=all_results, get_count=True)
148+
149+
return total_images
150+
151+
152+
def get_all_images(
153+
client: StashInterface, skip_tags: list[str], exclude_organized: bool, skip_entries: int = 0
154+
) -> Generator[dict, None, None]:
155+
"""
156+
Get all images from the stash
157+
"""
158+
image_filter: dict = build_image_filter(skip_tags=skip_tags, exclude_organized=exclude_organized)
159+
160+
page_size = 100
161+
page = 1
162+
if skip_entries > 0:
163+
page += math.floor(skip_entries / page_size)
164+
log.info(f"skipping to result page {page} with {page_size} entries each to skip around {skip_entries}")
165+
166+
images = None
167+
while images is None or len(images) > 0:
168+
all_results: dict = {
169+
"page": page,
170+
"per_page": page_size,
171+
"sort": "created_at",
172+
"direction": "ASC",
173+
}
174+
175+
images = client.find_images(f=image_filter, filter=all_results)
176+
177+
for image in images:
178+
yield image
179+
180+
page += 1
139181

140182

141183
def scrape_image(
@@ -240,6 +282,7 @@ def update_image(client: StashInterface, update: dict) -> dict | None:
240282
"CreateMissingTags": False,
241283
"MergeExistingTags": False,
242284
"ExcludeOrganized": False,
285+
"SkipEntriesNum": 0,
243286
}
244287

245288
if "BulkImageScrape" in config:
@@ -267,17 +310,19 @@ def update_image(client: StashInterface, update: dict) -> dict | None:
267310
#
268311

269312
log.info("Querying images from stash")
270-
images: list[dict] = get_all_images(
313+
total_images: int = count_all_images(
271314
stash, parsed_skip_tags, settings["ExcludeOrganized"]
272-
)
315+
) - settings["SkipEntriesNum"]
273316

274-
total_images: int = len(images)
275317
if total_images == 0:
276318
log.info("No images found with the given filters")
277319
exit(0)
278320
else:
279321
log.info(f"Found {total_images} images")
280322

323+
images: Generator[dict, None, None] = get_all_images(
324+
stash, parsed_skip_tags, settings["ExcludeOrganized"], settings["SkipEntriesNum"]
325+
)
281326

282327
for i, image in enumerate(images, start=1):
283328
time.sleep(0.5)

0 commit comments

Comments
 (0)