Skip to content

Commit 0e35831

Browse files
add bulkImageScraper Plugin (#547)
1 parent eeb2d3f commit 0e35831

File tree

3 files changed

+312
-0
lines changed

3 files changed

+312
-0
lines changed
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
name: Bulk Image Scrape
2+
description: Apply an image scraper to all images
3+
version: 0.1
4+
url: https://github.com/stashapp/CommunityScripts/
5+
exec:
6+
- python
7+
- "{pluginDir}/bulkImageScrape.py"
8+
interface: raw
9+
10+
settings:
11+
ScraperID:
12+
displayName: The Scraper ID of the image scraper to use (for example SHALookup)
13+
type: STRING
14+
SkipTags:
15+
displayName: List of tags to skip (comma separated)
16+
type: STRING
17+
CreateMissingPerformers:
18+
displayName: Create missing performers from scrape result
19+
type: BOOLEAN
20+
CreateMissingStudios:
21+
displayName: Create missing studios from scrape result
22+
type: BOOLEAN
23+
CreateMissingMovies:
24+
displayName: Create missing movies/groups from scrape result
25+
type: BOOLEAN
26+
MergeExistingTags:
27+
displayName: Merge existing tags with the scraped tags (default is to overwrite)
28+
type: BOOLEAN
29+
ExcludeOrganized:
30+
displayName: Exclude images that are set as organized (default is to include)
31+
type: BOOLEAN
32+
33+
tasks:
34+
- name: "Bulk Image Scrape"
35+
description: Apply an image scraper to all images
Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
import sys
2+
import json
3+
import time
4+
import stashapi.log as log
5+
from stashapi.stash_types import StashItem
6+
from stashapi.stashapp import StashInterface
7+
from stashapi.scrape_parser import ScrapeParser
8+
9+
# Quick check to make sure we have the correct version of stashapi
10+
if StashItem.IMAGE is None or not hasattr(StashInterface, "scrape_image"):
11+
log.error(
12+
"It seems you are using an older version of stashapi\n"
13+
"without support for image scraping.\n"
14+
"Please use the requirements.txt file to install the most recent version"
15+
)
16+
exit(1)
17+
18+
19+
#
20+
# Helper functions
21+
#
22+
23+
24+
def get_tag_id(client: StashInterface, tag_name: str) -> str | None:
25+
"""
26+
Get the id of a tag by name or return None if the tag is not found
27+
"""
28+
if tag_name == "" or tag_name is None:
29+
raise ValueError("Invalid tag name provided - skipping ...")
30+
31+
tags: list[dict] = client.find_tags(
32+
f={"name": {"value": tag_name, "modifier": "EQUALS"}}
33+
)
34+
if len(tags) == 0:
35+
log.error(f"No id found for tag {tag_name} - skipping ...")
36+
return None
37+
return tags[0]["id"]
38+
39+
40+
def get_scraper_id(client: StashInterface, scraper_name: str) -> str | None:
41+
"""
42+
Get the id of a scraper by name or return None if the scraper is not found
43+
"""
44+
scrapers: list[dict] = client.list_scrapers([StashItem.IMAGE])
45+
for scraper in scrapers:
46+
if scraper["name"] == scraper_name:
47+
return scraper["id"]
48+
return None
49+
50+
51+
def parse_skip_tags(client: StashInterface, skip_tags: str) -> list[str]:
52+
"""
53+
Parse the skip tags to a list of tag ids
54+
"""
55+
skip_tags = skip_tags.split(",")
56+
tag_ids: list[str] = []
57+
for tag in skip_tags:
58+
tag_id: None | str = get_tag_id(client, tag.strip())
59+
if tag_id is not None:
60+
tag_ids.append(tag_id)
61+
return tag_ids
62+
63+
64+
def get_all_images(
65+
client: StashInterface, skip_tags: list[str], exclude_organized: bool
66+
) -> list[dict]:
67+
"""
68+
Get all images from the stash
69+
"""
70+
image_filter: dict = {}
71+
all_results: dict = {
72+
"page": 1,
73+
"per_page": -1,
74+
"sort": "created_at",
75+
"direction": "ASC",
76+
}
77+
78+
if exclude_organized:
79+
image_filter["organized"] = False
80+
81+
if skip_tags is not None and len(skip_tags) > 0:
82+
tags: list[str] = skip_tags
83+
image_filter["tags"] = {
84+
"value": [],
85+
"excludes": tags,
86+
"modifier": "INCLUDES_ALL",
87+
"depth": -1,
88+
}
89+
90+
result: list[dict] = client.find_images(f=image_filter, filter=all_results)
91+
92+
return result
93+
94+
95+
def scrape_image(
96+
client: StashInterface, image_id: str, scraperid: str
97+
) -> dict | list[dict] | None:
98+
"""
99+
Scrape an image with the given scraper
100+
"""
101+
try:
102+
scrape_result: dict = client.scrape_image(scraperid, image_id)
103+
except Exception as e:
104+
# Catch any exceptions not under our control
105+
# so we can continue with the next image in case of errors
106+
log.error(f"Error scraping image {image_id} with scraper {scraperid}: {e}")
107+
return None
108+
109+
return scrape_result
110+
111+
112+
def scrape_is_valid(scrape_input: dict | list[dict] | None) -> bool:
113+
"""
114+
Check if the scrape is formatted as expected and has any data we can process
115+
"""
116+
if scrape_input is None:
117+
# most likely the scraper failed or had an exception we catched and returned None
118+
return False
119+
elif isinstance(scrape_input, list):
120+
# single image scrape results are lists with a single dict inside
121+
return len(scrape_input) == 1 and scrape_is_valid(scrape_input[0])
122+
elif isinstance(scrape_input, dict):
123+
# check if there are any values that are
124+
# not None, empty lists, empty dicts, or empty strings
125+
return any(
126+
value
127+
for value in scrape_input.values()
128+
if value is not None and value != [] and value != {} and value != ""
129+
)
130+
else:
131+
# something went wrong strangely wrong?
132+
return False
133+
134+
135+
def process_image_scrape(
136+
parser: ScrapeParser,
137+
image_input: dict,
138+
scrape_input: dict | list[dict],
139+
merge_existing_tags: bool,
140+
) -> dict | None:
141+
"""
142+
Process the scrape input and return an update dictionary
143+
"""
144+
if isinstance(scrape_input, list) and len(scrape_input) == 1:
145+
# single image scrape results are lists with a single dict inside
146+
scrape_input = scrape_input[0]
147+
elif isinstance(scrape_input, dict):
148+
# do nothing if its already a dict
149+
pass
150+
else:
151+
log.error(f"Unknown scrape input type for image {image_input['id']}")
152+
return None
153+
154+
update_dict: dict = parser.image_from_scrape(scrape_input)
155+
update_dict["id"] = image_input["id"]
156+
if merge_existing_tags:
157+
existing_tags: list = [tag["id"] for tag in image_input["tags"]]
158+
merged_tags: list = list(set(existing_tags + update_dict["tag_ids"]))
159+
update_dict["tag_ids"] = merged_tags
160+
161+
return update_dict
162+
163+
164+
def update_image(client: StashInterface, update: dict) -> dict | None:
165+
"""
166+
Update the image with the given update
167+
"""
168+
169+
try:
170+
return client.update_image(update)
171+
except Exception as e:
172+
# Catch any exceptions not under our control
173+
# so we can continue with the next image in case of errors
174+
log.error(f"Error updating image {update['id']}: {e}")
175+
return None
176+
177+
178+
#
179+
# SETUP
180+
#
181+
182+
183+
json_input: dict = json.loads(sys.stdin.read())
184+
FRAGMENT_SERVER: dict = json_input["server_connection"]
185+
stash: StashInterface = StashInterface(FRAGMENT_SERVER)
186+
187+
config: dict = stash.get_configuration()["plugins"]
188+
settings: dict[str, any] = {
189+
"ScraperID": "",
190+
"SkipTags": "",
191+
"CreateMissingPerformers": False,
192+
"CreateMissingStudios": False,
193+
"CreateMissingTags": False,
194+
"MergeExistingTags": False,
195+
"ExcludeOrganized": False,
196+
}
197+
198+
if "BulkImageScrape" in config:
199+
settings.update(config["BulkImageScrape"])
200+
201+
scrape_parser = ScrapeParser(
202+
stash,
203+
log,
204+
settings["CreateMissingTags"],
205+
settings["CreateMissingStudios"],
206+
settings["CreateMissingPerformers"],
207+
)
208+
209+
#
210+
# Validate input settings
211+
#
212+
213+
214+
# Exit if no ScraperID is set or we cannot resolve it
215+
if settings["ScraperID"] == "":
216+
log.error("No ScraperID set")
217+
exit(1)
218+
219+
scraper_id: None | str = get_scraper_id(stash, settings["ScraperID"])
220+
if scraper_id is None:
221+
log.error(f"ScraperID {settings['ScraperID']} not found - cannot continue")
222+
log.error("Please check the ScraperID is correct and try again")
223+
exit(1)
224+
225+
# parse the skip tags to a list of tag ids if we have any
226+
parsed_skip_tags: list[str] = []
227+
if settings["SkipTags"] != "":
228+
parsed_skip_tags = parse_skip_tags(stash, settings["SkipTags"])
229+
if len(parsed_skip_tags) == 0:
230+
parsed_skip_tags = []
231+
232+
#
233+
# MAIN
234+
#
235+
236+
log.info("Starting Bulk Image Scrape Plugin")
237+
log.info(f"settings: {settings=}")
238+
log.info("Querying images from stash")
239+
240+
images: list[dict] = get_all_images(
241+
stash, parsed_skip_tags, settings["ExcludeOrganized"]
242+
)
243+
244+
total_images: int = len(images)
245+
if total_images == 0:
246+
log.info("No images found with the given filters")
247+
exit(0)
248+
else:
249+
log.info(f"Found {len(images)} images")
250+
251+
252+
for i, image in enumerate(images, start=1):
253+
time.sleep(0.5)
254+
log.progress((i / total_images))
255+
log.debug(f"Scraping image {image['id']} with scraper {scraper_id}")
256+
257+
scrape: dict | list[dict] | None = scrape_image(stash, image["id"], scraper_id)
258+
valid: bool = scrape_is_valid(scrape)
259+
if not valid:
260+
log.error(
261+
f"Scraper returned invalid result for image {image['id']} with scraper {scraper_id}"
262+
)
263+
continue
264+
265+
update_input: dict | None = process_image_scrape(
266+
scrape_parser, image, scrape, settings["MergeExistingTags"]
267+
)
268+
if update_input is not None:
269+
update_image(stash, update_input)
270+
log.info(f"Updated image {image['id']} with scraper {scraper_id}")
271+
else:
272+
log.error(
273+
f"Failed to update image {image['id']} with result from scraper {scraper_id}"
274+
)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
requests
2+
# stashapi has to be installed from source until stashapp-tools is updated to include the latest version
3+
stashapi @ git+https://github.com/stg-annon/stashapi.git

0 commit comments

Comments
 (0)