|
| 1 | +import sys |
| 2 | +import json |
| 3 | +import time |
| 4 | +import stashapi.log as log |
| 5 | +from stashapi.stash_types import StashItem |
| 6 | +from stashapi.stashapp import StashInterface |
| 7 | +from stashapi.scrape_parser import ScrapeParser |
| 8 | + |
| 9 | +# Quick check to make sure we have the correct version of stashapi |
| 10 | +if StashItem.IMAGE is None or not hasattr(StashInterface, "scrape_image"): |
| 11 | + log.error( |
| 12 | + "It seems you are using an older version of stashapi\n" |
| 13 | + "without support for image scraping.\n" |
| 14 | + "Please use the requirements.txt file to install the most recent version" |
| 15 | + ) |
| 16 | + exit(1) |
| 17 | + |
| 18 | + |
| 19 | +# |
| 20 | +# Helper functions |
| 21 | +# |
| 22 | + |
| 23 | + |
| 24 | +def get_tag_id(client: StashInterface, tag_name: str) -> str | None: |
| 25 | + """ |
| 26 | + Get the id of a tag by name or return None if the tag is not found |
| 27 | + """ |
| 28 | + if tag_name == "" or tag_name is None: |
| 29 | + raise ValueError("Invalid tag name provided - skipping ...") |
| 30 | + |
| 31 | + tags: list[dict] = client.find_tags( |
| 32 | + f={"name": {"value": tag_name, "modifier": "EQUALS"}} |
| 33 | + ) |
| 34 | + if len(tags) == 0: |
| 35 | + log.error(f"No id found for tag {tag_name} - skipping ...") |
| 36 | + return None |
| 37 | + return tags[0]["id"] |
| 38 | + |
| 39 | + |
| 40 | +def get_scraper_id(client: StashInterface, scraper_name: str) -> str | None: |
| 41 | + """ |
| 42 | + Get the id of a scraper by name or return None if the scraper is not found |
| 43 | + """ |
| 44 | + scrapers: list[dict] = client.list_scrapers([StashItem.IMAGE]) |
| 45 | + for scraper in scrapers: |
| 46 | + if scraper["name"] == scraper_name: |
| 47 | + return scraper["id"] |
| 48 | + return None |
| 49 | + |
| 50 | + |
| 51 | +def parse_skip_tags(client: StashInterface, skip_tags: str) -> list[str]: |
| 52 | + """ |
| 53 | + Parse the skip tags to a list of tag ids |
| 54 | + """ |
| 55 | + skip_tags = skip_tags.split(",") |
| 56 | + tag_ids: list[str] = [] |
| 57 | + for tag in skip_tags: |
| 58 | + tag_id: None | str = get_tag_id(client, tag.strip()) |
| 59 | + if tag_id is not None: |
| 60 | + tag_ids.append(tag_id) |
| 61 | + return tag_ids |
| 62 | + |
| 63 | + |
| 64 | +def get_all_images( |
| 65 | + client: StashInterface, skip_tags: list[str], exclude_organized: bool |
| 66 | +) -> list[dict]: |
| 67 | + """ |
| 68 | + Get all images from the stash |
| 69 | + """ |
| 70 | + image_filter: dict = {} |
| 71 | + all_results: dict = { |
| 72 | + "page": 1, |
| 73 | + "per_page": -1, |
| 74 | + "sort": "created_at", |
| 75 | + "direction": "ASC", |
| 76 | + } |
| 77 | + |
| 78 | + if exclude_organized: |
| 79 | + image_filter["organized"] = False |
| 80 | + |
| 81 | + if skip_tags is not None and len(skip_tags) > 0: |
| 82 | + tags: list[str] = skip_tags |
| 83 | + image_filter["tags"] = { |
| 84 | + "value": [], |
| 85 | + "excludes": tags, |
| 86 | + "modifier": "INCLUDES_ALL", |
| 87 | + "depth": -1, |
| 88 | + } |
| 89 | + |
| 90 | + result: list[dict] = client.find_images(f=image_filter, filter=all_results) |
| 91 | + |
| 92 | + return result |
| 93 | + |
| 94 | + |
| 95 | +def scrape_image( |
| 96 | + client: StashInterface, image_id: str, scraperid: str |
| 97 | +) -> dict | list[dict] | None: |
| 98 | + """ |
| 99 | + Scrape an image with the given scraper |
| 100 | + """ |
| 101 | + try: |
| 102 | + scrape_result: dict = client.scrape_image(scraperid, image_id) |
| 103 | + except Exception as e: |
| 104 | + # Catch any exceptions not under our control |
| 105 | + # so we can continue with the next image in case of errors |
| 106 | + log.error(f"Error scraping image {image_id} with scraper {scraperid}: {e}") |
| 107 | + return None |
| 108 | + |
| 109 | + return scrape_result |
| 110 | + |
| 111 | + |
| 112 | +def scrape_is_valid(scrape_input: dict | list[dict] | None) -> bool: |
| 113 | + """ |
| 114 | + Check if the scrape is formatted as expected and has any data we can process |
| 115 | + """ |
| 116 | + if scrape_input is None: |
| 117 | + # most likely the scraper failed or had an exception we catched and returned None |
| 118 | + return False |
| 119 | + elif isinstance(scrape_input, list): |
| 120 | + # single image scrape results are lists with a single dict inside |
| 121 | + return len(scrape_input) == 1 and scrape_is_valid(scrape_input[0]) |
| 122 | + elif isinstance(scrape_input, dict): |
| 123 | + # check if there are any values that are |
| 124 | + # not None, empty lists, empty dicts, or empty strings |
| 125 | + return any( |
| 126 | + value |
| 127 | + for value in scrape_input.values() |
| 128 | + if value is not None and value != [] and value != {} and value != "" |
| 129 | + ) |
| 130 | + else: |
| 131 | + # something went wrong strangely wrong? |
| 132 | + return False |
| 133 | + |
| 134 | + |
| 135 | +def process_image_scrape( |
| 136 | + parser: ScrapeParser, |
| 137 | + image_input: dict, |
| 138 | + scrape_input: dict | list[dict], |
| 139 | + merge_existing_tags: bool, |
| 140 | +) -> dict | None: |
| 141 | + """ |
| 142 | + Process the scrape input and return an update dictionary |
| 143 | + """ |
| 144 | + if isinstance(scrape_input, list) and len(scrape_input) == 1: |
| 145 | + # single image scrape results are lists with a single dict inside |
| 146 | + scrape_input = scrape_input[0] |
| 147 | + elif isinstance(scrape_input, dict): |
| 148 | + # do nothing if its already a dict |
| 149 | + pass |
| 150 | + else: |
| 151 | + log.error(f"Unknown scrape input type for image {image_input['id']}") |
| 152 | + return None |
| 153 | + |
| 154 | + update_dict: dict = parser.image_from_scrape(scrape_input) |
| 155 | + update_dict["id"] = image_input["id"] |
| 156 | + if merge_existing_tags: |
| 157 | + existing_tags: list = [tag["id"] for tag in image_input["tags"]] |
| 158 | + merged_tags: list = list(set(existing_tags + update_dict["tag_ids"])) |
| 159 | + update_dict["tag_ids"] = merged_tags |
| 160 | + |
| 161 | + return update_dict |
| 162 | + |
| 163 | + |
| 164 | +def update_image(client: StashInterface, update: dict) -> dict | None: |
| 165 | + """ |
| 166 | + Update the image with the given update |
| 167 | + """ |
| 168 | + |
| 169 | + try: |
| 170 | + return client.update_image(update) |
| 171 | + except Exception as e: |
| 172 | + # Catch any exceptions not under our control |
| 173 | + # so we can continue with the next image in case of errors |
| 174 | + log.error(f"Error updating image {update['id']}: {e}") |
| 175 | + return None |
| 176 | + |
| 177 | + |
| 178 | +# |
| 179 | +# SETUP |
| 180 | +# |
| 181 | + |
| 182 | + |
| 183 | +json_input: dict = json.loads(sys.stdin.read()) |
| 184 | +FRAGMENT_SERVER: dict = json_input["server_connection"] |
| 185 | +stash: StashInterface = StashInterface(FRAGMENT_SERVER) |
| 186 | + |
| 187 | +config: dict = stash.get_configuration()["plugins"] |
| 188 | +settings: dict[str, any] = { |
| 189 | + "ScraperID": "", |
| 190 | + "SkipTags": "", |
| 191 | + "CreateMissingPerformers": False, |
| 192 | + "CreateMissingStudios": False, |
| 193 | + "CreateMissingTags": False, |
| 194 | + "MergeExistingTags": False, |
| 195 | + "ExcludeOrganized": False, |
| 196 | +} |
| 197 | + |
| 198 | +if "BulkImageScrape" in config: |
| 199 | + settings.update(config["BulkImageScrape"]) |
| 200 | + |
| 201 | +scrape_parser = ScrapeParser( |
| 202 | + stash, |
| 203 | + log, |
| 204 | + settings["CreateMissingTags"], |
| 205 | + settings["CreateMissingStudios"], |
| 206 | + settings["CreateMissingPerformers"], |
| 207 | +) |
| 208 | + |
| 209 | +# |
| 210 | +# Validate input settings |
| 211 | +# |
| 212 | + |
| 213 | + |
| 214 | +# Exit if no ScraperID is set or we cannot resolve it |
| 215 | +if settings["ScraperID"] == "": |
| 216 | + log.error("No ScraperID set") |
| 217 | + exit(1) |
| 218 | + |
| 219 | +scraper_id: None | str = get_scraper_id(stash, settings["ScraperID"]) |
| 220 | +if scraper_id is None: |
| 221 | + log.error(f"ScraperID {settings['ScraperID']} not found - cannot continue") |
| 222 | + log.error("Please check the ScraperID is correct and try again") |
| 223 | + exit(1) |
| 224 | + |
| 225 | +# parse the skip tags to a list of tag ids if we have any |
| 226 | +parsed_skip_tags: list[str] = [] |
| 227 | +if settings["SkipTags"] != "": |
| 228 | + parsed_skip_tags = parse_skip_tags(stash, settings["SkipTags"]) |
| 229 | + if len(parsed_skip_tags) == 0: |
| 230 | + parsed_skip_tags = [] |
| 231 | + |
| 232 | +# |
| 233 | +# MAIN |
| 234 | +# |
| 235 | + |
| 236 | +log.info("Starting Bulk Image Scrape Plugin") |
| 237 | +log.info(f"settings: {settings=}") |
| 238 | +log.info("Querying images from stash") |
| 239 | + |
| 240 | +images: list[dict] = get_all_images( |
| 241 | + stash, parsed_skip_tags, settings["ExcludeOrganized"] |
| 242 | +) |
| 243 | + |
| 244 | +total_images: int = len(images) |
| 245 | +if total_images == 0: |
| 246 | + log.info("No images found with the given filters") |
| 247 | + exit(0) |
| 248 | +else: |
| 249 | + log.info(f"Found {len(images)} images") |
| 250 | + |
| 251 | + |
| 252 | +for i, image in enumerate(images, start=1): |
| 253 | + time.sleep(0.5) |
| 254 | + log.progress((i / total_images)) |
| 255 | + log.debug(f"Scraping image {image['id']} with scraper {scraper_id}") |
| 256 | + |
| 257 | + scrape: dict | list[dict] | None = scrape_image(stash, image["id"], scraper_id) |
| 258 | + valid: bool = scrape_is_valid(scrape) |
| 259 | + if not valid: |
| 260 | + log.error( |
| 261 | + f"Scraper returned invalid result for image {image['id']} with scraper {scraper_id}" |
| 262 | + ) |
| 263 | + continue |
| 264 | + |
| 265 | + update_input: dict | None = process_image_scrape( |
| 266 | + scrape_parser, image, scrape, settings["MergeExistingTags"] |
| 267 | + ) |
| 268 | + if update_input is not None: |
| 269 | + update_image(stash, update_input) |
| 270 | + log.info(f"Updated image {image['id']} with scraper {scraper_id}") |
| 271 | + else: |
| 272 | + log.error( |
| 273 | + f"Failed to update image {image['id']} with result from scraper {scraper_id}" |
| 274 | + ) |
0 commit comments