Skip to content

Commit 91ba5b7

Browse files
committed
Update script scrapers that return performer URL
Changes in Stash v0.27 means that we now want to return arrays of URLs for both scenes and performers, and arrays of images for performers: this updates (hopefully) all affected script scrapers
1 parent 1f91724 commit 91ba5b7

File tree

20 files changed

+149
-38
lines changed

20 files changed

+149
-38
lines changed

scrapers/AyloAPI/scrape.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ def to_scraped_performer(
313313
performer["tags"] = tags
314314

315315
if site:
316-
performer["url"] = _construct_performer_url(performer_from_api, site)
316+
performer["urls"] = [_construct_performer_url(performer_from_api, site)]
317317

318318
return performer
319319

scrapers/BangBros/BangBros.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ def bangbros(obj: Any, _) -> Any:
4545
"url",
4646
lambda x: x.replace("/scene/", "/video/").replace("www.bangbros.com", domain),
4747
)
48+
fixed = replace_all(
49+
obj,
50+
"urls",
51+
lambda x: x.replace("/scene/", "/video/").replace("www.bangbros.com", domain),
52+
)
4853

4954
# Rename certain studios according to the map
5055
fixed = replace_at(

scrapers/Brazzers/Brazzers.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ def brazzers(obj: Any, _) -> Any:
3030
"url",
3131
lambda x: x.replace("/scene/", "/video/").replace("/model/", "/pornstar/"),
3232
)
33+
fixed = replace_all(
34+
obj,
35+
"urls",
36+
lambda x: x.replace("/scene/", "/video/").replace("/model/", "/pornstar/"),
37+
)
3338

3439
# Rename certain studios according to the map
3540
fixed = replace_at(

scrapers/CzechHunter/CzechHunter.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def czechhunter(obj: Any, _) -> Any:
3131

3232
# Replace the studio name in all URLs
3333
fixed = replace_all(obj, "url", lambda x: x.replace("bigstr.com", replacement))
34+
fixed = replace_all(fixed, "urls", lambda x: x.replace("bigstr.com", replacement))
3435

3536
return fixed
3637

scrapers/Deviante/Deviante.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ def deviante(obj: Any, _) -> Any:
4949
"url",
5050
lambda x: x.replace("/scene/", "/video/").replace("deviante.com", replacement),
5151
)
52+
fixed = replace_all(
53+
fixed,
54+
"urls",
55+
lambda x: x.replace("/scene/", "/video/").replace("deviante.com", replacement),
56+
)
5257

5358
return fixed
5459

scrapers/DigitalPlayground/DigitalPlayground.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import sys
33
from typing import Any
44
from py_common import log
5-
from py_common.util import replace_at, replace_all
5+
from py_common.util import replace_all
66
from AyloAPI.scrape import (
77
gallery_from_url,
88
scraper_args,
@@ -18,14 +18,14 @@
1818
studio_map = {
1919
"DP Parody": "DP Parodies",
2020
"dpw": "DP World",
21-
"Dpstar Sex Challenges": "Sex Challenges",
21+
"Dpstar Sex Challenges": "Sex Challenges",
2222
"Episodes": "Digital Playground Episodes",
2323
}
2424

2525

2626
def digitalplayground(obj: Any, _) -> Any:
2727
fixed = replace_all(obj, "name", replacement=lambda x: studio_map.get(x, x))
28-
fixed = replace_all(fixed, "url", lambda x: x.replace("/model/", "/modelprofile/"))
28+
fixed = replace_all(fixed, "urls", lambda x: x.replace("/model/", "/modelprofile/"))
2929

3030
return fixed
3131

scrapers/FakeHub/FakeHub.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ def fakehub(obj: Any, _) -> Any:
3333
fixed = replace_all(
3434
obj,
3535
"url",
36+
lambda x: x.replace("fakehub.com", replacement),
37+
)
38+
fixed = replace_all(
39+
fixed,
40+
"urls",
3641
lambda x: x.replace("/model/", "/modelprofile/").replace(
3742
"fakehub.com", replacement
3843
),

scrapers/GayWire/GayWire.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ def gaywire(obj: Any, _) -> Any:
4040
"url",
4141
lambda x: x.replace("www.bangbros.com", "gaywire.com"),
4242
)
43+
fixed = replace_all(
44+
obj,
45+
"urls",
46+
lambda x: x.replace("www.bangbros.com", "gaywire.com"),
47+
)
4348

4449
# Rename certain studios according to the map
4550
fixed = replace_at(

scrapers/KBProductions/KBProductions.py

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,14 @@
7777
}
7878

7979

80+
def feetinches_to_cm(feet, inches):
81+
return str(round((float(feet) * 12 + float(inches)) * 2.54))
82+
83+
84+
def lbs_to_kg(lbs):
85+
return str(round(float(lbs) / 2.2046))
86+
87+
8088
def clean_url(url: str) -> str:
8189
# remove any query parameters
8290
return re.sub(r"\?.*", "", url)
@@ -168,8 +176,8 @@ def get_studio(site: str) -> ScrapedStudio:
168176

169177
def to_scraped_performer(raw_performer: dict) -> ScrapedPerformer:
170178
# Convert dict keys to lower case because, of couse, they can come in differently depending on studio.
171-
raw_performer = {key.lower():value for key,value in raw_performer.items()}
172-
179+
raw_performer = {key.lower(): value for key, value in raw_performer.items()}
180+
173181
# Studios that do not use units for measurements, but are obviously not metric.
174182
STUDIO_USES_IMPERIAL = [
175183
"joeschmoevideos.com",
@@ -179,15 +187,17 @@ def to_scraped_performer(raw_performer: dict) -> ScrapedPerformer:
179187
performer: ScrapedPerformer = {
180188
"name": raw_performer["name"],
181189
"gender": raw_performer["gender"],
182-
"url": make_performer_url(raw_performer["slug"], raw_performer["site_domain"]),
190+
"urls": [
191+
make_performer_url(raw_performer["slug"], raw_performer["site_domain"])
192+
],
183193
"tags": [],
184194
}
185195

186196
if image := raw_performer.get("thumb"):
187-
performer["image"] = image
197+
performer["images"] = [image]
188198
elif image := raw_performer.get("thumbnail"):
189-
image = re.sub(r'^//','https://',image)
190-
performer["image"] = image
199+
image = re.sub(r"^//", "https://", image)
200+
performer["images"] = [image]
191201

192202
if bio := raw_performer.get("bio"):
193203
performer["details"] = strip_tags(bio)
@@ -209,7 +219,7 @@ def to_scraped_performer(raw_performer: dict) -> ScrapedPerformer:
209219
if (height_ft := raw_performer.get("height")) and (
210220
h := re.match(r"(\d+)\D+(\d+).+", height_ft)
211221
):
212-
height_cm = feetinches_to_cm(h.group(1),h.group(2))
222+
height_cm = feetinches_to_cm(h.group(1), h.group(2))
213223
performer["height"] = str(height_cm)
214224
elif (height_m := raw_performer.get("height")) and (
215225
h := re.match(r"^(\d\.\d\d)$", height_m)
@@ -234,12 +244,20 @@ def to_scraped_performer(raw_performer: dict) -> ScrapedPerformer:
234244
elif (weight_nounits := raw_performer.get("weight")) and (
235245
w := re.match(r"^([\d\.]+)$", weight_nounits)
236246
):
237-
performer["weight"] = lbs_to_kg(w.group(1)) if raw_performer["site_domain"] in STUDIO_USES_IMPERIAL else str(w.group(1))
247+
performer["weight"] = (
248+
lbs_to_kg(w.group(1))
249+
if raw_performer["site_domain"] in STUDIO_USES_IMPERIAL
250+
else str(w.group(1))
251+
)
238252

239-
if (penis_nounits:= raw_performer.get("dick size")) and (
253+
if (penis_nounits := raw_performer.get("dick size")) and (
240254
s := re.match(r"^([\d\.]+)$", penis_nounits)
241255
):
242-
performer["penis_length"] = feetinches_to_cm(0,s.group(1)) if raw_performer["site_domain"] in STUDIO_USES_IMPERIAL else str(s.group(1))
256+
performer["penis_length"] = (
257+
feetinches_to_cm(0, s.group(1))
258+
if raw_performer["site_domain"] in STUDIO_USES_IMPERIAL
259+
else str(s.group(1))
260+
)
243261

244262
if circumcised := raw_performer.get("cut / uncut"):
245263
performer["circumcised"] = circumcised.capitalize()
@@ -309,7 +327,7 @@ def to_scraped_scene_from_content(raw_scene: dict) -> ScrapedScene:
309327
{
310328
"name": x["name"],
311329
"image": x["thumb"],
312-
"url": make_performer_url(x["slug"], site),
330+
"urls": [make_performer_url(x["slug"], site)],
313331
}
314332
for x in models
315333
]
@@ -404,14 +422,6 @@ def scrape_performer(url: str) -> ScrapedPerformer | None:
404422
return to_scraped_performer(props["model"])
405423

406424

407-
def feetinches_to_cm(feet,inches):
408-
return(str(round((float(feet) * 12 + float(inches)) * 2.54)))
409-
410-
411-
def lbs_to_kg(lbs):
412-
return(str(round(float(lbs) / 2.2046)))
413-
414-
415425
if __name__ == "__main__":
416426
op, args = scraper_args()
417427

@@ -426,4 +436,5 @@ def lbs_to_kg(lbs):
426436
sys.exit(1)
427437

428438
result = replace_all(result, "url", fix_url) # type: ignore
439+
result = replace_all(result, "urls", fix_url) # type: ignore
429440
print(json.dumps(result))

scrapers/Men/Men.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,13 @@ def men(obj: Any, _) -> Any:
4242
.replace("/model/", model)
4343
.replace("men.com", domain),
4444
)
45+
fixed = replace_all(
46+
fixed,
47+
"urls",
48+
lambda x: x.replace("/scene/", scene)
49+
.replace("/model/", model)
50+
.replace("men.com", domain),
51+
)
4552

4653
return fixed
4754

0 commit comments

Comments
 (0)