Skip to content

Commit 4e20153

Browse files
authored
e621_tagger new features (#558)
1 parent 05a5e49 commit 4e20153

File tree

2 files changed

+68
-79
lines changed

2 files changed

+68
-79
lines changed

plugins/e621_tagger/e621_tagger.py

Lines changed: 67 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -8,54 +8,61 @@
88
from stashapi.stashapp import StashInterface
99

1010

11-
1211
def get_all_images(
13-
client: StashInterface,
14-
skip_tags: list[str],
15-
exclude_organized: bool
12+
client: StashInterface,
13+
skip_tags: list[int],
14+
exclude_organized: bool,
15+
per_page: int = 100,
1616
) -> list[dict]:
1717
"""
18-
Get all images with proper tag exclusion and organization filter
18+
Generator to fetch images in pages from the stash API.
1919
"""
20-
image_filter = {}
21-
pagination = {
22-
"page": 1,
23-
"per_page": -1, # -1 gets all results at once
24-
"sort": "created_at",
25-
"direction": "ASC",
26-
}
27-
28-
# Convert tag names to IDs
29-
tag_ids = []
30-
for tag_name in skip_tags:
31-
tag = get_or_create_tag(client, tag_name)
32-
if tag:
33-
tag_ids.append(tag["id"])
34-
35-
if tag_ids:
36-
image_filter["tags"] = {
37-
"value": [],
38-
"excludes": tag_ids,
39-
"modifier": "INCLUDES_ALL",
40-
"depth": -1,
20+
page = 1
21+
while True:
22+
image_filter = {}
23+
pagination = {
24+
"page": page,
25+
"per_page": per_page,
26+
"sort": "created_at",
27+
"direction": "ASC",
4128
}
4229

43-
if exclude_organized:
44-
image_filter["organized"] = False # Correct field name
30+
if skip_tags:
31+
image_filter["tags"] = {
32+
"value": [],
33+
"excludes": skip_tags,
34+
"modifier": "INCLUDES_ALL",
35+
"depth": -1,
36+
}
37+
38+
if exclude_organized:
39+
image_filter["organized"] = False
40+
41+
images = client.find_images(f=image_filter, filter=pagination)
42+
if not images:
43+
# no more pages
44+
break
4545

46-
# Maintain original parameter structure
47-
return client.find_images(f=image_filter, filter=pagination)
46+
log.info(f"Fetched page {page} with {len(images)} images")
47+
for img in images:
48+
yield img
49+
50+
# move to next page
51+
page += 1
4852

4953

5054
def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> None:
5155
"""Process e621 metadata and update Stash records"""
52-
# Skip already processed images
56+
# same as before...
5357
image = stash.find_image(image_id)
54-
if any(tag["name"] == "e621_tagged" for tag in image.get("tags", [])):
58+
if any(t["name"] == "e621_tagged" for t in image.get("tags", [])):
59+
return
60+
61+
if any(t["name"] == "e621_tag_failed" for t in image.get("tags", [])):
5562
return
5663

5764
try:
58-
time.sleep(2) # Rate limiting
65+
time.sleep(0.5)
5966
response = requests.get(
6067
f"https://e621.net/posts.json?md5={image_md5}",
6168
headers={"User-Agent": "Stash-e621-Tagger/1.0"},
@@ -64,53 +71,49 @@ def process_e621_post(stash: StashInterface, image_id: str, image_md5: str) -> N
6471
response.raise_for_status()
6572
post_data = response.json().get("post", {})
6673
except Exception as e:
67-
log.error(f"e621 API error: {str(e)}")
74+
log.error(f"Marking as failed. e621 API error: {str(e)}")
75+
e621_tag_failed = get_or_create_tag(stash, "e621_tag_failed")
76+
fail_ids = [e621_tag_failed["id"]] + [t["id"] for t in image.get("tags", [])]
77+
stash.update_image({"id": image_id, "tag_ids": list(set(fail_ids))})
6878
return
6979

7080
if not post_data:
7181
return
7282

73-
# Create essential entities
7483
e621_tag = get_or_create_tag(stash, "e621_tagged")
7584
post_url = f"https://e621.net/posts/{post_data['id']}"
7685

77-
# Process tags
7886
tag_ids = [e621_tag["id"]]
79-
for category in ["general", "species", "character", "artist", "copyright"]:
80-
for tag in post_data.get("tags", {}).get(category, []):
81-
# Clean and validate tag
87+
for cat in ["general", "species", "character", "artist", "copyright"]:
88+
for tag in post_data.get("tags", {}).get(cat, []):
8289
clean_tag = tag.strip()
8390
if not clean_tag:
8491
continue
85-
8692
stash_tag = get_or_create_tag(stash, clean_tag)
8793
if stash_tag:
8894
tag_ids.append(stash_tag["id"])
8995

90-
# Process studio
9196
studio_id = None
9297
if artists := post_data.get("tags", {}).get("artist"):
9398
studio = get_or_create_studio(stash, artists[0])
9499
studio_id = studio["id"]
95100

96-
# Process performers
97101
performer_ids = []
98-
for char_tag in post_data.get("tags", {}).get("character", []):
99-
performer_name = char_tag.split('_(')[0]
100-
performer = get_or_create_performer(stash, performer_name)
101-
performer_ids.append(performer["id"])
102+
for char in post_data.get("tags", {}).get("character", []):
103+
name = char.split('_(')[0]
104+
perf = get_or_create_performer(stash, name)
105+
performer_ids.append(perf["id"])
102106

103-
# Update image
104107
try:
105108
stash.update_image({
106109
"id": image_id,
110+
"organized": True,
107111
"urls": [post_url],
108112
"tag_ids": list(set(tag_ids)),
109113
"studio_id": studio_id,
110114
"performer_ids": performer_ids
111115
})
112-
113-
log.info("Image updated: ${image_id}")
116+
log.info(f"Image updated: {image_id}")
114117
except Exception as e:
115118
log.error(f"Update failed: {str(e)}")
116119

@@ -166,72 +169,58 @@ def get_or_create_performer(stash: StashInterface, name: str) -> dict:
166169

167170
def scrape_image(client: StashInterface, image_id: str) -> None:
168171
"""Main scraping handler"""
172+
# same logic as before for MD5 extraction and process_e621_post call
169173
image = client.find_image(image_id)
170174
if not image or not image.get("visual_files"):
171175
return
172176

173177
file_data = image["visual_files"][0]
174178
filename = file_data["basename"]
175179
filename_md5 = filename.split('.')[0]
176-
final_md5 = None
177180

178-
# First try filename-based MD5
179181
if re.match(r"^[a-f0-9]{32}$", filename_md5):
180182
final_md5 = filename_md5
181183
log.info(f"Using filename MD5: {final_md5}")
182184
else:
183-
# Fallback to content-based MD5
184185
try:
185-
file_path = file_data["path"]
186-
log.info(f"Generating MD5 from file content: {file_path}")
187-
188186
md5_hash = hashlib.md5()
189-
with open(file_path, "rb") as f:
190-
# Read file in 64kb chunks for memory efficiency
187+
with open(file_data["path"], "rb") as f:
191188
for chunk in iter(lambda: f.read(65536), b""):
192189
md5_hash.update(chunk)
193-
194190
final_md5 = md5_hash.hexdigest()
195191
log.info(f"Generated content MD5: {final_md5}")
196192
except Exception as e:
197193
log.error(f"Failed to generate MD5: {str(e)}")
198194
return
199195

200-
if final_md5:
201-
process_e621_post(client, image_id, final_md5)
202-
else:
203-
log.warning("No valid MD5 available for processing")
196+
process_e621_post(client, image_id, final_md5)
197+
204198

205-
# Plugin setup and execution
206-
# In the main execution block:
207199
if __name__ == "__main__":
200+
log.info("Starting tagger with pagination...")
208201
json_input = json.loads(sys.stdin.read())
209202
stash = StashInterface(json_input["server_connection"])
210203

211204
config = stash.get_configuration().get("plugins", {})
212205
settings = {
213-
"SkipTags": "e621_tagged", # Add automatic filtering
206+
"SkipTags": "e621_tagged, e621_tag_failed",
214207
"ExcludeOrganized": False
215208
}
216209
settings.update(config.get("e621_tagger", {}))
217210

218-
log.info(settings)
219-
220-
# Get e621_tagged ID for filtering
221-
e621_tag = get_or_create_tag(stash, "e621_tagged")
211+
e621_tagged = get_or_create_tag(stash, "e621_tagged")
212+
e621_failed = get_or_create_tag(stash, "e621_tag_failed")
222213

223-
# Existing tags + automatic e621_tagged exclusion
224214
skip_tags = [t.strip() for t in settings["SkipTags"].split(",") if t.strip()]
225-
skip_tags.append(e621_tag["id"]) # Filter by ID instead of name
226-
227-
images = get_all_images(stash, skip_tags, settings["ExcludeOrganized"])
215+
skip_tags = [st for st in skip_tags]
216+
skip_tags.extend([e621_tagged["id"], e621_failed["id"]])
228217

229-
# Rest of the loop remains the same
230-
for i, image in enumerate(images, 1):
231-
image_tag_names = [tag["name"] for tag in image.get("tags", [])]
232-
if any(tag in image_tag_names for tag in skip_tags):
218+
log.info("Fetching images in pages...")
219+
for idx, image in enumerate(get_all_images(stash, skip_tags, settings["ExcludeOrganized"], per_page=100), start=1):
220+
current_tags = [t["name"] for t in image.get("tags", [])]
221+
if any(t in current_tags for t in skip_tags):
233222
log.info(f"Skipping image {image['id']} - contains skip tag")
234223
continue
235224

236-
log.progress(i/len(images))
225+
log.progress(idx)
237226
scrape_image(stash, image["id"])

plugins/e621_tagger/e621_tagger.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name: e621_tagger
22
description: Finding images and videos on e621 and tagging them.
3-
version: 0.1
3+
version: 0.2
44
url: https://github.com/stashapp/CommunityScripts/
55
exec:
66
- python

0 commit comments

Comments
 (0)