Update script scrapers that return performer URL

Maista6969 · Maista6969 · commit 91ba5b78f0dd · 2024-12-11T02:35:07.000+01:00
Changes in Stash v0.27 means that we now want to return arrays of URLs
for both scenes and performers, and arrays of images for performers:
this updates (hopefully) all affected script scrapers
diff --git a/scrapers/AyloAPI/scrape.py b/scrapers/AyloAPI/scrape.py
@@ -313,7 +313,7 @@ def to_scraped_performer(
         performer["tags"] = tags
 
     if site:
-        performer["url"] = _construct_performer_url(performer_from_api, site)
+        performer["urls"] = [_construct_performer_url(performer_from_api, site)]
 
     return performer
 
diff --git a/scrapers/BangBros/BangBros.py b/scrapers/BangBros/BangBros.py
@@ -45,6 +45,11 @@ def bangbros(obj: Any, _) -> Any:
         "url",
         lambda x: x.replace("/scene/", "/video/").replace("www.bangbros.com", domain),
     )
+    fixed = replace_all(
+        obj,
+        "urls",
+        lambda x: x.replace("/scene/", "/video/").replace("www.bangbros.com", domain),
+    )
 
     # Rename certain studios according to the map
     fixed = replace_at(
diff --git a/scrapers/Brazzers/Brazzers.py b/scrapers/Brazzers/Brazzers.py
@@ -30,6 +30,11 @@ def brazzers(obj: Any, _) -> Any:
         "url",
         lambda x: x.replace("/scene/", "/video/").replace("/model/", "/pornstar/"),
     )
+    fixed = replace_all(
+        obj,
+        "urls",
+        lambda x: x.replace("/scene/", "/video/").replace("/model/", "/pornstar/"),
+    )
 
     # Rename certain studios according to the map
     fixed = replace_at(
diff --git a/scrapers/CzechHunter/CzechHunter.py b/scrapers/CzechHunter/CzechHunter.py
@@ -31,6 +31,7 @@ def czechhunter(obj: Any, _) -> Any:
 
     # Replace the studio name in all URLs
     fixed = replace_all(obj, "url", lambda x: x.replace("bigstr.com", replacement))
+    fixed = replace_all(fixed, "urls", lambda x: x.replace("bigstr.com", replacement))
 
     return fixed
 
diff --git a/scrapers/Deviante/Deviante.py b/scrapers/Deviante/Deviante.py
@@ -49,6 +49,11 @@ def deviante(obj: Any, _) -> Any:
         "url",
         lambda x: x.replace("/scene/", "/video/").replace("deviante.com", replacement),
     )
+    fixed = replace_all(
+        fixed,
+        "urls",
+        lambda x: x.replace("/scene/", "/video/").replace("deviante.com", replacement),
+    )
 
     return fixed
 
diff --git a/scrapers/DigitalPlayground/DigitalPlayground.py b/scrapers/DigitalPlayground/DigitalPlayground.py
@@ -2,7 +2,7 @@
 import sys
 from typing import Any
 from py_common import log
-from py_common.util import replace_at, replace_all
+from py_common.util import replace_all
 from AyloAPI.scrape import (
     gallery_from_url,
     scraper_args,
@@ -18,14 +18,14 @@
 studio_map = {
     "DP Parody": "DP Parodies",
     "dpw": "DP World",
-    "Dpstar Sex Challenges": "Sex Challenges",	
+    "Dpstar Sex Challenges": "Sex Challenges",
     "Episodes": "Digital Playground Episodes",
 }
 
 
 def digitalplayground(obj: Any, _) -> Any:
     fixed = replace_all(obj, "name", replacement=lambda x: studio_map.get(x, x))
-    fixed = replace_all(fixed, "url", lambda x: x.replace("/model/", "/modelprofile/"))
+    fixed = replace_all(fixed, "urls", lambda x: x.replace("/model/", "/modelprofile/"))
 
     return fixed
 
diff --git a/scrapers/FakeHub/FakeHub.py b/scrapers/FakeHub/FakeHub.py
@@ -33,6 +33,11 @@ def fakehub(obj: Any, _) -> Any:
     fixed = replace_all(
         obj,
         "url",
+        lambda x: x.replace("fakehub.com", replacement),
+    )
+    fixed = replace_all(
+        fixed,
+        "urls",
         lambda x: x.replace("/model/", "/modelprofile/").replace(
             "fakehub.com", replacement
         ),
diff --git a/scrapers/GayWire/GayWire.py b/scrapers/GayWire/GayWire.py
@@ -40,6 +40,11 @@ def gaywire(obj: Any, _) -> Any:
         "url",
         lambda x: x.replace("www.bangbros.com", "gaywire.com"),
     )
+    fixed = replace_all(
+        obj,
+        "urls",
+        lambda x: x.replace("www.bangbros.com", "gaywire.com"),
+    )
 
     # Rename certain studios according to the map
     fixed = replace_at(
diff --git a/scrapers/KBProductions/KBProductions.py b/scrapers/KBProductions/KBProductions.py
@@ -77,6 +77,14 @@
 }
 
 
+def feetinches_to_cm(feet, inches):
+    return str(round((float(feet) * 12 + float(inches)) * 2.54))
+
+
+def lbs_to_kg(lbs):
+    return str(round(float(lbs) / 2.2046))
+
+
 def clean_url(url: str) -> str:
     # remove any query parameters
     return re.sub(r"\?.*", "", url)
@@ -168,8 +176,8 @@ def get_studio(site: str) -> ScrapedStudio:
 
 def to_scraped_performer(raw_performer: dict) -> ScrapedPerformer:
     # Convert dict keys to lower case because, of couse, they can come in differently depending on studio.
-    raw_performer = {key.lower():value for key,value in raw_performer.items()}
-    
+    raw_performer = {key.lower(): value for key, value in raw_performer.items()}
+
     # Studios that do not use units for measurements, but are obviously not metric.
     STUDIO_USES_IMPERIAL = [
         "joeschmoevideos.com",
@@ -179,15 +187,17 @@ def to_scraped_performer(raw_performer: dict) -> ScrapedPerformer:
     performer: ScrapedPerformer = {
         "name": raw_performer["name"],
         "gender": raw_performer["gender"],
-        "url": make_performer_url(raw_performer["slug"], raw_performer["site_domain"]),
+        "urls": [
+            make_performer_url(raw_performer["slug"], raw_performer["site_domain"])
+        ],
         "tags": [],
     }
 
     if image := raw_performer.get("thumb"):
-        performer["image"] = image
+        performer["images"] = [image]
     elif image := raw_performer.get("thumbnail"):
-        image = re.sub(r'^//','https://',image)
-        performer["image"] = image
+        image = re.sub(r"^//", "https://", image)
+        performer["images"] = [image]
 
     if bio := raw_performer.get("bio"):
         performer["details"] = strip_tags(bio)
@@ -209,7 +219,7 @@ def to_scraped_performer(raw_performer: dict) -> ScrapedPerformer:
     if (height_ft := raw_performer.get("height")) and (
         h := re.match(r"(\d+)\D+(\d+).+", height_ft)
     ):
-        height_cm = feetinches_to_cm(h.group(1),h.group(2))
+        height_cm = feetinches_to_cm(h.group(1), h.group(2))
         performer["height"] = str(height_cm)
     elif (height_m := raw_performer.get("height")) and (
         h := re.match(r"^(\d\.\d\d)$", height_m)
@@ -234,12 +244,20 @@ def to_scraped_performer(raw_performer: dict) -> ScrapedPerformer:
     elif (weight_nounits := raw_performer.get("weight")) and (
         w := re.match(r"^([\d\.]+)$", weight_nounits)
     ):
-        performer["weight"] = lbs_to_kg(w.group(1)) if raw_performer["site_domain"] in STUDIO_USES_IMPERIAL else str(w.group(1))
+        performer["weight"] = (
+            lbs_to_kg(w.group(1))
+            if raw_performer["site_domain"] in STUDIO_USES_IMPERIAL
+            else str(w.group(1))
+        )
 
-    if (penis_nounits:= raw_performer.get("dick size")) and (
+    if (penis_nounits := raw_performer.get("dick size")) and (
         s := re.match(r"^([\d\.]+)$", penis_nounits)
     ):
-        performer["penis_length"] = feetinches_to_cm(0,s.group(1)) if raw_performer["site_domain"] in STUDIO_USES_IMPERIAL else str(s.group(1))
+        performer["penis_length"] = (
+            feetinches_to_cm(0, s.group(1))
+            if raw_performer["site_domain"] in STUDIO_USES_IMPERIAL
+            else str(s.group(1))
+        )
 
     if circumcised := raw_performer.get("cut / uncut"):
         performer["circumcised"] = circumcised.capitalize()
@@ -309,7 +327,7 @@ def to_scraped_scene_from_content(raw_scene: dict) -> ScrapedScene:
             {
                 "name": x["name"],
                 "image": x["thumb"],
-                "url": make_performer_url(x["slug"], site),
+                "urls": [make_performer_url(x["slug"], site)],
             }
             for x in models
         ]
@@ -404,14 +422,6 @@ def scrape_performer(url: str) -> ScrapedPerformer | None:
     return to_scraped_performer(props["model"])
 
 
-def feetinches_to_cm(feet,inches):
-    return(str(round((float(feet) * 12 + float(inches)) * 2.54)))
-
-
-def lbs_to_kg(lbs):
-    return(str(round(float(lbs) / 2.2046)))
-
-
 if __name__ == "__main__":
     op, args = scraper_args()
 
@@ -426,4 +436,5 @@ def lbs_to_kg(lbs):
             sys.exit(1)
 
     result = replace_all(result, "url", fix_url)  # type: ignore
+    result = replace_all(result, "urls", fix_url)  # type: ignore
     print(json.dumps(result))
diff --git a/scrapers/Men/Men.py b/scrapers/Men/Men.py
@@ -42,6 +42,13 @@ def men(obj: Any, _) -> Any:
         .replace("/model/", model)
         .replace("men.com", domain),
     )
+    fixed = replace_all(
+        fixed,
+        "urls",
+        lambda x: x.replace("/scene/", scene)
+        .replace("/model/", model)
+        .replace("men.com", domain),
+    )
 
     return fixed
 
diff --git a/scrapers/MileHighMedia_BiandTrans/MileHighMedia_BiandTrans.py b/scrapers/MileHighMedia_BiandTrans/MileHighMedia_BiandTrans.py
@@ -35,6 +35,7 @@ def milehigh(obj: Any, _) -> Any:
     # Replace the studio name in all URLs: even if there's no specific studio,
     # milehigh.com is wrong and needs to be replaced with milehighmedia.com
     fixed = replace_all(fixed, "url", lambda x: x.replace("milehigh.com", replacement))
+    fixed = replace_all(fixed, "urls", lambda x: x.replace("milehigh.com", replacement))
 
     return fixed
 
diff --git a/scrapers/MileHighMedia_Gay/MileHighMedia_Gay.py b/scrapers/MileHighMedia_Gay/MileHighMedia_Gay.py
@@ -29,6 +29,7 @@ def milehigh(obj: Any, _) -> Any:
     # Replace the studio name in all URLs: even if there's no specific studio,
     # milehigh.com is wrong and needs to be replaced with milehighmedia.com
     fixed = replace_all(obj, "url", lambda x: x.replace("milehigh.com", replacement))
+    fixed = replace_all(obj, "urls", lambda x: x.replace("milehigh.com", replacement))
 
     return fixed
 
diff --git a/scrapers/MileHighMedia_Straight/MileHighMedia_Straight.py b/scrapers/MileHighMedia_Straight/MileHighMedia_Straight.py
@@ -50,6 +50,7 @@ def milehigh(obj: Any, _) -> Any:
     # Replace the studio name in all URLs: even if there's no specific studio,
     # milehigh.com is wrong and needs to be replaced with milehighmedia.com
     fixed = replace_all(fixed, "url", lambda x: x.replace("milehigh.com", replacement))
+    fixed = replace_all(fixed, "urls", lambda x: x.replace("milehigh.com", replacement))
 
     return fixed
 
diff --git a/scrapers/Minnano-AV/Minnano-AV.py b/scrapers/Minnano-AV/Minnano-AV.py
@@ -229,7 +229,7 @@ def performer_by_url(url):
 
     if favorite_form_url := get_xpath_result(tree, XPATHS["id"]):
         if match := re.search(REGEXES["id"], favorite_form_url):
-            scrape["url"] = FORMATS["url"].format(PERFORMER_ID=match[0])
+            scrape["urls"] = [FORMATS["url"].format(PERFORMER_ID=match[0])]
         else:
             log.debug("URL XPath matched, but no value found.")
 
diff --git a/scrapers/Mofos/Mofos.py b/scrapers/Mofos/Mofos.py
@@ -39,6 +39,11 @@ def mofos(obj: Any, _) -> Any:
         "url",
         lambda x: x.replace("mofos.com", domain),
     )
+    fixed = replace_all(
+        fixed,
+        "urls",
+        lambda x: x.replace("mofos.com", domain),
+    )
 
     return fixed
 
diff --git a/scrapers/PropertySex/PropertySex.py b/scrapers/PropertySex/PropertySex.py
@@ -29,6 +29,11 @@ def propertysex(obj: Any, _) -> Any:
         "url",
         lambda x: x.replace("propertysex.com", domain),
     )
+    fixed = replace_all(
+        fixed,
+        "urls",
+        lambda x: x.replace("propertysex.com", domain),
+    )
 
     return fixed
 
diff --git a/scrapers/RealityDudes/RealityDudes.py b/scrapers/RealityDudes/RealityDudes.py
@@ -17,18 +17,25 @@
 
 
 def realitydudes(obj: Any, _) -> Any:
+    if dig(obj, "studio", "name") != "Papi":
+        return obj
+
     # Papi still uses the old domain and model URL
-    if dig(obj, "studio", "name") == "Papi":
-        domain = "papi.com"
-        model = "/pornstar/"
+    domain = "papi.com"
+    model = "/pornstar/"
 
-        return replace_all(
-            obj,
-            "url",
-            lambda x: x.replace("realitydudes.com", domain).replace("/model/", model),
-        )
+    fixed = replace_all(
+        obj,
+        "url",
+        lambda x: x.replace("realitydudes.com", domain).replace("/model/", model),
+    )
+    fixed = replace_all(
+        fixed,
+        "urls",
+        lambda x: x.replace("realitydudes.com", domain).replace("/model/", model),
+    )
 
-    return obj
+    return fixed
 
 
 if __name__ == "__main__":
diff --git a/scrapers/RealityKings/RealityKings.py b/scrapers/RealityKings/RealityKings.py
@@ -39,6 +39,11 @@ def rk(obj: Any, _) -> Any:
         "url",
         lambda x: x.replace("realitykings.com", domain),
     )
+    fixed = replace_all(
+        fixed,
+        "urls",
+        lambda x: x.replace("realitykings.com", domain),
+    )
 
     return fixed
 
diff --git a/scrapers/SexyHub/SexyHub.py b/scrapers/SexyHub/SexyHub.py
@@ -35,6 +35,13 @@ def sexyhub(obj: Any, _) -> Any:
             "sexyhub.com", replacement
         ),
     )
+    fixed = replace_all(
+        obj,
+        "urls",
+        lambda x: x.replace("/model/", "/modelprofile/").replace(
+            "sexyhub.com", replacement
+        ),
+    )
 
     return fixed
 
diff --git a/scrapers/py_common/types.py b/scrapers/py_common/types.py