@@ -2,6 +2,34 @@ name: gelbooru-xpath
22# for gelbooru 0.2+
33# https://github.com/stashapp/CommunityScrapers/issues/2273
44# loosely based on danbooru
5+
6+ # intended to capture filename as produced by gallery-dl (rule34_<id>_<hash>.<ext>)
7+ sceneByFragment : &fragementscraper
8+ action : scrapeXPath
9+ queryURL : " {filename}"
10+ queryURLReplace :
11+ filename :
12+ - regex : " [^a-zA-Z\\ d\\ -._~]" # clean filename so that it can construct a valid url
13+ with : " "
14+ - regex : " ^gelbooru_(.*)" # map to domain by prefix
15+ with : " https://gelbooru.com/index.php?page=post&s=view&id=$1"
16+ - regex : " tbib_(.*)" # map to domain by prefix
17+ with : " https://tbib.org/index.php?page=post&s=view&id=$1"
18+ - regex : " ^rule34_(.*)" # map to domain by prefix
19+ with : " https://rule34.xxx/index.php?page=post&s=view&id=$1"
20+ - regex : " ^xbooru_(.*)" # map to domain by prefix
21+ with : " https://xbooru.com/post/show/$1"
22+ - regex : " ^/safebooru_(.*)" # map to domain by prefix
23+ with : " https://safebooru.org/post/show/$1"
24+ - regex : " ^/hypnohub_(.*)" # map to domain by prefix
25+ with : " https://hypnohub.net/post/show/$1"
26+ - regex : " ^yandere_(.*)" # map to domain by prefix
27+ with : " https://yande.re/post/show/$1"
28+ - regex : ' ^(.*&id=)([0-9]+)_.*$' # capture numeric sequence at begining as ID
29+ with : " $1$2"
30+ scraper : postScraper
31+ imageByFragment : *fragementscraper
32+
533sceneByURL :
634 - action : scrapeXPath
735 url : &urls
@@ -21,8 +49,11 @@ imageByURL:
2149xPathScrapers :
2250 postScraper :
2351 image :
52+ # title intentionally excluded
53+ # Title: &title
54+ # selector: //title
2455 Date : &date
25- selector : //li[contains(text(),"Posted")]/text()[1]
56+ selector : //div[@id="post-view" or @id="container"]// li[contains(text(),"Posted")]/text()[1]
2657 postProcess :
2758 - replace :
2859 - regex : ' Posted:'
@@ -32,15 +63,24 @@ xPathScrapers:
3263 - parseDate : 2006-01-02
3364 Performers : &performers
3465 Name :
35- selector : //li[contains(@class,"tag-type-character")]/a[last()]/text()
66+ selector : //div[@id="post-view" or @id="container"]// li[contains(@class,"tag-type-character")]/a[last()]/text()
3667 Studio : &artist
37- Name : //li[contains(@class,"tag-type-artist")]/a[last()]/text()
68+ Name : //div[@id="post-view" or @id="container"]// li[contains(@class,"tag-type-artist")]/a[last()]/text()
3869 Tags : &tag_string
3970 Name :
40- selector : //li[contains(@class,"tag-type-general")]/a[last()]/text()
71+ # Variant A: only pull tags
72+ # selector: //div[@id="post-view" or @id="container"]//li[contains(@class,"tag-type-general")]/a[last()]/text()
73+ # Variant B: also pull metadata tags (like 2D, 3D, AI generated)
74+ selector : //div[@id="post-view" or @id="container"]//li[contains(@class,"tag-type-general") or contains(@class,"tag-type-metadata")]/a[last()]/text()
4175 URLs : &source
42- selector : ' //li[contains(text(),"Source:")]/a/@href'
76+ selector : ' //div[@id="post-view" or @id="container"]//li[contains(text(),"Source:")]/a/@href'
77+ # pulls note overlay texts (translations) into Details box
78+ Details :
79+ selector : //div[@id="post-view" or @id="container"]//div[contains(@class,"note-body")]/text()
80+ concat : " \n "
4381 scene :
82+ # title intentionally excluded
83+ # Title: *title
4484 Date : *date
4585 Performers : *performers
4686 Studio : *artist
@@ -52,4 +92,4 @@ driver:
5292 - Key : User-Agent
5393 Value : stashapp/stash scraper
5494
55- # Last Updated April 19 , 2025
95+ # Last Updated October 17 , 2025
0 commit comments