E621 tagger (#555)

yokarion · web-flow · commit 0e7673abe461 · 2025-05-12T15:38:54.000+03:00
diff --git a/plugins/e621_tagger/README.md b/plugins/e621_tagger/README.md
@@ -0,0 +1,19 @@
+# e621 tagger
+
+Just a quick script to tag your uploadings
+
+Took some code from bulkImageScrape as example, because I'm not a python dev
+
+https://github.com/stashapp/CommunityScripts/blob/main/plugins/bulkImageScrape/bulkImageScrape.py
+
+## How to use
+
+Go to Tasks -> e621_tagger -> Press Tag Everything
+
+## Configuration
+
+You can configure which tags it will skip. By default, it will skip `e621_tagged` tag
+
+## Rate limit
+
+Be aware, that e621 has rate limit. In script it's hardcoded 2 seconds on wait time
diff --git a/plugins/e621_tagger/e621_tagger.py b/plugins/e621_tagger/e621_tagger.py
@@ -0,0 +1,237 @@
+import hashlib
+import re
+import sys
+import json
+import time
+import requests
+import stashapi.log as log
+from stashapi.stashapp import StashInterface
+
+
+
+def get_all_images(
+    client: StashInterface, 
+    skip_tags: list[str], 
+    exclude_organized: bool
+) -> list[dict]:
+    """
+    Get all images with proper tag exclusion and organization filter
+    """
+    image_filter = {}
+    pagination = {
+        "page": 1,
+        "per_page": -1,  # -1 gets all results at once
+        "sort": "created_at",
+        "direction": "ASC",
+    }
+
+    # Convert tag names to IDs
+    tag_ids = []
+    for tag_name in skip_tags:
+        tag = get_or_create_tag(client, tag_name)
+        if tag:
+            tag_ids.append(tag["id"])
+
+    if tag_ids:
+        image_filter["tags"] = {
+            "value": [],
+            "excludes": tag_ids,
+            "modifier": "INCLUDES_ALL",
+            "depth": -1,
+        }
+
+    if exclude_organized:
+        image_filter["organized"] = False  # Correct field name
+
+    # Maintain original parameter structure
+    return client.find_images(f=image_filter, filter=pagination)
+
+
+def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> None:
+    """Process e621 metadata and update Stash records"""
+    # Skip already processed images
+    image = stash.find_image(image_id)
+    if any(tag["name"] == "e621_tagged" for tag in image.get("tags", [])):
+        return
+
+    try:
+        time.sleep(2)  # Rate limiting
+        response = requests.get(
+            f"https://e621.net/posts.json?md5={image_md5}",
+            headers={"User-Agent": "Stash-e621-Tagger/1.0"},
+            timeout=10
+        )
+        response.raise_for_status()
+        post_data = response.json().get("post", {})
+    except Exception as e:
+        log.error(f"e621 API error: {str(e)}")
+        return
+
+    if not post_data:
+        return
+
+    # Create essential entities
+    e621_tag = get_or_create_tag(stash, "e621_tagged")
+    post_url = f"https://e621.net/posts/{post_data['id']}"
+
+    # Process tags
+    tag_ids = [e621_tag["id"]]
+    for category in ["general", "species", "character", "artist", "copyright"]:
+        for tag in post_data.get("tags", {}).get(category, []):
+            # Clean and validate tag
+            clean_tag = tag.strip()
+            if not clean_tag:
+                continue
+            
+            stash_tag = get_or_create_tag(stash, clean_tag)
+            if stash_tag:
+                tag_ids.append(stash_tag["id"])
+
+    # Process studio
+    studio_id = None
+    if artists := post_data.get("tags", {}).get("artist"):
+        studio = get_or_create_studio(stash, artists[0])
+        studio_id = studio["id"]
+
+    # Process performers
+    performer_ids = []
+    for char_tag in post_data.get("tags", {}).get("character", []):
+        performer_name = char_tag.split('_(')[0]
+        performer = get_or_create_performer(stash, performer_name)
+        performer_ids.append(performer["id"])
+
+    # Update image
+    try:
+        stash.update_image({
+            "id": image_id,
+            "urls": [post_url],
+            "tag_ids": list(set(tag_ids)),
+            "studio_id": studio_id,
+            "performer_ids": performer_ids
+        })
+
+        log.info("Image updated: ${image_id}")
+    except Exception as e:
+        log.error(f"Update failed: {str(e)}")
+
+
+def get_or_create_tag(stash: StashInterface, tag_name: str) -> dict:
+    """Find or create tag with hierarchy handling"""
+    # Validate tag name
+    tag_name = tag_name.strip()
+    if not tag_name:
+        log.error("Attempted to create tag with empty name")
+        return None
+
+    existing = stash.find_tags(f={"name": {"value": tag_name, "modifier": "EQUALS"}})
+    if existing:
+        return existing[0]
+    
+    parts = tag_name.split(":")
+    parent_id = None
+    for i in range(len(parts)):
+        current_name = ":".join(parts[:i+1]).strip()
+        if not current_name:
+            continue
+            
+        existing = stash.find_tags(f={"name": {"value": current_name, "modifier": "EQUALS"}})
+        if not existing:
+            create_data = {"name": current_name}
+            if parent_id:
+                create_data["parent_ids"] = [parent_id]
+            try:
+                new_tag = stash.create_tag(create_data)
+                if not new_tag:
+                    log.error(f"Failed to create tag: {current_name}")
+                    return None
+                parent_id = new_tag["id"]
+            except Exception as e:
+                log.error(f"Error creating tag {current_name}: {str(e)}")
+                return None
+        else:
+            parent_id = existing[0]["id"]
+    return {"id": parent_id}
+
+def get_or_create_studio(stash: StashInterface, name: str) -> dict:
+    """Find or create studio"""
+    studios = stash.find_studios(f={"name": {"value": name, "modifier": "EQUALS"}})
+    return studios[0] if studios else stash.create_studio({"name": name})
+
+
+def get_or_create_performer(stash: StashInterface, name: str) -> dict:
+    """Find or create performer"""
+    performers = stash.find_performers(f={"name": {"value": name, "modifier": "EQUALS"}})
+    return performers[0] if performers else stash.create_performer({"name": name})
+
+
+def scrape_image(client: StashInterface, image_id: str) -> None:
+    """Main scraping handler"""
+    image = client.find_image(image_id)
+    if not image or not image.get("visual_files"):
+        return
+
+    file_data = image["visual_files"][0]
+    filename = file_data["basename"]
+    filename_md5 = filename.split('.')[0]
+    final_md5 = None
+
+    # First try filename-based MD5
+    if re.match(r"^[a-f0-9]{32}$", filename_md5):
+        final_md5 = filename_md5
+        log.info(f"Using filename MD5: {final_md5}")
+    else:
+        # Fallback to content-based MD5
+        try:
+            file_path = file_data["path"]
+            log.info(f"Generating MD5 from file content: {file_path}")
+            
+            md5_hash = hashlib.md5()
+            with open(file_path, "rb") as f:
+                # Read file in 64kb chunks for memory efficiency
+                for chunk in iter(lambda: f.read(65536), b""):
+                    md5_hash.update(chunk)
+            
+            final_md5 = md5_hash.hexdigest()
+            log.info(f"Generated content MD5: {final_md5}")
+        except Exception as e:
+            log.error(f"Failed to generate MD5: {str(e)}")
+            return
+
+    if final_md5:
+        process_e621_post(client, image_id, final_md5)
+    else:
+        log.warning("No valid MD5 available for processing")
+
+# Plugin setup and execution
+# In the main execution block:
+if __name__ == "__main__":
+    json_input = json.loads(sys.stdin.read())
+    stash = StashInterface(json_input["server_connection"])
+
+    config = stash.get_configuration().get("plugins", {})
+    settings = {
+        "SkipTags": "e621_tagged",  # Add automatic filtering
+        "ExcludeOrganized": False
+    }
+    settings.update(config.get("e621_tagger", {}))
+
+    log.info(settings)
+
+    # Get e621_tagged ID for filtering
+    e621_tag = get_or_create_tag(stash, "e621_tagged")
+
+    # Existing tags + automatic e621_tagged exclusion
+    skip_tags = [t.strip() for t in settings["SkipTags"].split(",") if t.strip()]
+    skip_tags.append(e621_tag["id"])  # Filter by ID instead of name
+
+    images = get_all_images(stash, skip_tags, settings["ExcludeOrganized"])
+
+    # Rest of the loop remains the same
+    for i, image in enumerate(images, 1):
+        image_tag_names = [tag["name"] for tag in image.get("tags", [])]
+        if any(tag in image_tag_names for tag in skip_tags):
+            log.info(f"Skipping image {image['id']} - contains skip tag")
+            continue
+
+        log.progress(i/len(images))
+        scrape_image(stash, image["id"])
diff --git a/plugins/e621_tagger/e621_tagger.yml b/plugins/e621_tagger/e621_tagger.yml
@@ -0,0 +1,21 @@
+name: e621_tagger
+description: Finding images and videos on e621 and tagging them.
+version: 0.1
+url: https://github.com/stashapp/CommunityScripts/
+exec:
+  - python
+  - "{pluginDir}/e621_tagger.py"
+
+interface: raw
+
+settings:
+  SkipTags:
+    displayName: List of tags to skip (comma separated). Default - e621_tagged
+    type: STRING
+  ExcludeOrganized:
+    displayName: Exclude images that are set as organized (default is to include)
+    type: BOOLEAN
+
+tasks:
+  - name: "Tag everything"
+    description: "Tag everything (Warning: can take a while)"
diff --git a/plugins/e621_tagger/requirements.txt b/plugins/e621_tagger/requirements.txt
@@ -0,0 +1,3 @@
+requests
+# stashapi has to be installed from source until stashapp-tools is updated to include the latest version
+stashapi @ git+https://github.com/stg-annon/stashapi.git

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+requests`
	`2`	`+# stashapi has to be installed from source until stashapp-tools is updated to include the latest version`
	`3`	`+stashapi @ git+https://github.com/stg-annon/stashapi.git`