Merge pull request #2610 from feederbox826/ersties

feederbox826 · web-flow · commit 991cf4afc07c · 2025-12-07T16:29:24.000-05:00
[Ersties] merge PRs
diff --git a/scrapers/Ersties/Ersties.py b/scrapers/Ersties/Ersties.py
@@ -2,31 +2,27 @@
 import requests
 import re
 import json
-from py_common.util import guess_nationality
+from py_common.util import guess_nationality, scraper_args
+from py_common.types import ScrapedScene, ScrapedPerformer, ScrapedGroup
+from py_common import log
+from py_common.config import get_config
 from datetime import datetime
 from bs4 import BeautifulSoup as bs
 
-#Authentication tokens and cookies are needed for this scraper. Use the network console in your browsers developer tools to find this information in an api call header.
-#Auth Variables For Header
-authorization = ''
-cookie = ''
-x_visit_uid = ''
+config = get_config(default="""
+# Ersties auth configuration
+# Use the network console in your browsers developer tools to find this information in an api call header.
+AUTHORIZATION = 
+COOKIE =
+X_VISIT_UID =
+""")
 
 #Headers for Requests
 scrape_headers = {
-    'authorization': authorization,
-    'cookie': cookie,
-    'x-visit-uid': x_visit_uid,
+    'authorization': config.AUTHORIZATION,
+    'cookie': config.COOKIE,
+    'x-visit-uid': config.X_VISIT_UID,
 }
-
-#Get JSON from Stash
-def readJSONInput():
-    input = sys.stdin.read()
-    return json.loads(input)
-
-def debugPrint(t):
-    sys.stderr.write(t + "\n")
-
 def clean_text(details: str) -> str:
     """
     remove escaped backslashes and html parse the details text
@@ -47,20 +43,22 @@ def clean_text(details: str) -> str:
         details = details.strip()
     return details
 
-def get_scene(inputurl):
+def get_scene(inputurl) -> ScrapedScene:
 
-    # Use a regular expression to extract the number after '#play-' and before '-comments'
-    match = re.search(r'#play-(\d+)-comments', inputurl)
+    # Try to extract the scene ID from URLs like:
+    #   ...#play-6138
+    #   ...#play-6138-comments
+    match = re.search(r'#play-(\d+)(?:-comments)?', inputurl)
 
     # Check if the pattern was found and save it as a variable
     if match:
         sceneid = match.group(1)  
     else:
-        debugPrint('No scene ID found in URL. Please make sure you are using the ULR ending with "#play-nnnn-comments".')
+        log.error('No scene ID found in URL. Please make sure the URL contains "#play-<id>".')
         sys.exit()
 
-    #Build URL to scrape
-    scrape_url='https://api.ersties.com/videos/'+sceneid
+    # Build URL to scrape
+    scrape_url = 'https://api.ersties.com/videos/' + sceneid
 
     #Scrape URL
     scrape = requests.get(scrape_url, headers=scrape_headers)
@@ -70,87 +68,102 @@ def get_scene(inputurl):
     if scrape.status_code ==200:
         scrape_data = scrape.json()
 
-        ret = {}
-
-        ret['title'] = scrape_data['title_en']
-        ret['code'] = str(scrape_data['id'])
-        ret['details'] = clean_text(str(scrape_data['gallery']['description_en'])) 
-        ret['studio'] = {'name':'Ersties'}
-        ret['tags'] = [{'name': x['name_en']} for x in scrape_data['tags']]
-        ret['performers'] = [{'name':x['name_en'], 'details':x['description_en'], 'urls':['https://ersties.com/profile/'+str(x['id'])],'images':[f'https://thumb.ersties.com/width=510,height=660,fit=cover,quality=85,sharpen=1,format=jpeg/content/images_mysql/Model_Cover_Image/backup/'+x['thumbnail']] } for x in scrape_data['participated_models']]
-        for thumbnail in scrape_data['thumbnails']:
-            if thumbnail['is_main']:
-                ret['image'] = f'https://thumb.ersties.com/width=900,height=500,fit=cover,quality=85,sharpen=1,format=jpeg/content/images_mysql/images_videothumbnails/backup/'+thumbnail['file_name']
+        ret: ScrapedScene = {}
+        ret['code'] = str(scrape_data.get('id', ''))
+        ret['tags'] = [{'name': x.get('name_en', '')} for x in scrape_data.get('tags', [])]
+
+        gallery = scrape_data.get('gallery') or {}
+        gallery_title = gallery.get('title_en') or gallery.get('title')
+        scene_title = scrape_data.get('title_en') or scrape_data.get('title')
+        if gallery_title and scene_title:
+            ret['title'] = f"{gallery_title}: {scene_title}"
+        ret['details'] = clean_text(str(gallery.get('description_en', '')))
+        ret['studio'] = {'name': 'Ersties'}
+        ret['performers'] = [
+            {
+                'name': model.get('name_en', ''),
+                'details': model.get('description_en', ''),
+                'urls': [f'https://ersties.com/profile/{model.get("id")}'],
+                'images': [
+                    f'https://thumb.ersties.com/format=jpeg/content/images_mysql/Model_Cover_Image/backup/{model.get("thumbnail", "")}'
+                ],
+            }
+            for model in scrape_data.get('participated_models', [])
+        ]
+
+        # Main image
+        for thumbnail in scrape_data.get('thumbnails', []):
+            if thumbnail.get('is_main'):
+                ret['image'] = f"https://thumb.ersties.com/format=jpeg/content/images_mysql/images_videothumbnails/backup/{thumbnail.get('file_name', '')}"
                 break
-        #Get Date
-        epoch_time = scrape_data['gallery']['available_since']
-        # Check if the date is returned as an integer.
+        # Date (scene + group)
+        epoch_time = gallery.get('available_since')
+        group_date = None
         if isinstance(epoch_time, int):
-            #Convert date from Epoch Time
-            ret['date'] = datetime.fromtimestamp(epoch_time).strftime("%Y-%m-%d")
-        #Get Group Information
-        #Get Group Date
-        group_epoch_time = scrape_data['gallery']['available_since']
-        # Check if the date is returned as an integer.
-        if isinstance(group_epoch_time, int):
-            #Convert date from Epoch Time
-            group_date = datetime.fromtimestamp(group_epoch_time).strftime("%Y-%m-%d")
-        ret['groups'] = [{'name': scrape_data['gallery']['title_en'], 'synopsis': clean_text(str(scrape_data['gallery']['description_en'])), 'studio': {'name':'Ersties'}, 'urls':[f'https://ersties.com/shoot/'+str(scrape_data['gallery']['id'])], 'front_image': f'https://thumb.ersties.com/width=510,height=660,fit=cover,quality=85,sharpen=1,format=jpeg/content/images_mysql/Shoot_Cover/'+scrape_data['gallery']['image'], 'date': group_date}]
+            group_date = datetime.fromtimestamp(epoch_time).strftime("%Y-%m-%d")
+            ret['date'] = group_date
+
+        ret['groups'] = [{
+            'name': gallery.get('title_en', ''),
+            'synopsis': clean_text(str(gallery.get('description_en', ''))),
+            'studio': {'name': 'Ersties'},
+            'urls': [f'https://ersties.com/shoot/{gallery.get("id", "")}'],
+            'front_image': f"https://thumb.ersties.com/format=jpeg/content/images_mysql/Shoot_Cover/{gallery.get('image', '')}",
+            'date': group_date,
+        }]
+
     else:
-        debugPrint('Response: '+str(scrape.status_code)+'. Please check your auth header.')
-        sys.exit()    
+        log.error(f"Response:{str(scrape.status_code)}. Please check your auth header.")
+        sys.exit()
     return ret
 
-def get_group(inputurl):
+def get_group(inputurl) -> ScrapedGroup:
     # Check if URL is a Shoot
     if re.search(r"/shoot/\d+$", inputurl):
         urltype = 'shoot'
         match = re.search(r'shoot/(\d+)', inputurl)
-        groupid = match.group(1)
+        if match:
+            groupid = match.group(1)
     else:
-        debugPrint('No shoot ID found in URL. Please make sure you are using the correct URL.')
+        log.error('No shoot ID found in URL. Please make sure you are using the correct URL.')
         sys.exit()
-    
-    #Scrape Shoot
+    # Scrape Shoot
     if urltype == 'shoot':
-        #Build URL to scrape group
-        scrape_url='https://api.ersties.com/galleries/'+groupid
+        # Build URL to scrape group
+        scrape_url = 'https://api.ersties.com/galleries/' + groupid
 
-        #Scrape URL
+        # Scrape URL
         scrape = requests.get(scrape_url, headers=scrape_headers)
 
-        #Parse response
-        #Check for valid response
-        if scrape.status_code ==200:
+        # Parse response
+        # Check for valid response
+        if scrape.status_code == 200:
             scrape_data = scrape.json()
 
-            ret = {}
+            ret: ScrapedGroup = {}
 
-            ret['name'] = scrape_data['title_en']
-            ret['synopsis'] = clean_text(str(scrape_data['description_en']))
-            ret['studio'] = {'name':'Ersties'}
-            ret['front_image'] = f'https://thumb.ersties.com/width=510,height=660,fit=cover,quality=85,sharpen=1,format=jpeg/content/images_mysql/Shoot_Cover/'+scrape_data['image']  
-            #Get Date
-            epoch_time = scrape_data['available_since']
-            # Check if the date is returned as an integer.
+            ret['name'] = scrape_data.get('title_en', '')
+            ret['synopsis'] = clean_text(str(scrape_data.get('description_en', '')))
+            ret['studio'] = {'name': 'Ersties'}
+            ret['front_image'] = f"https://thumb.ersties.com/format=jpeg/content/images_mysql/Shoot_Cover/{scrape_data.get('image', '')}"
+            # Get Date
+            epoch_time = scrape_data.get('available_since')
             if isinstance(epoch_time, int):
-                #Convert date from Epoch Time
                 ret['date'] = datetime.fromtimestamp(epoch_time).strftime("%Y-%m-%d")
         else:
-            debugPrint('Response: '+str(scrape.status_code)+'. Please check your auth header.')
-            sys.exit() 
-    
+            log.error(f"Response: {str(scrape.status_code)}. Please check your auth header.")
+            sys.exit()
     return ret
 
-def get_performer(inputurl):
+def get_performer(inputurl) -> ScrapedPerformer:
     # Use a regular expression to extract the number after '#play-' and before '-comments'
     match = re.search(r'profile/(\d+)', inputurl)
 
     # Check if the pattern was found and save it as a variable
     if match:
         groupid = match.group(1)  
     else:
-        debugPrint('No performer ID found in URL. Please make sure you are using the ULR ending with "profile/nnnn".')
+        log.error('No performer ID found in URL. Please make sure you are using the ULR ending with "profile/nnnn".')
         sys.exit()
 
     #Build URL to scrape group
@@ -164,29 +177,30 @@ def get_performer(inputurl):
     if scrape.status_code ==200:
         scrape_data = scrape.json()
 
-        ret = {}
+        ret: ScrapedPerformer = {
+            "name": scrape_data['name_en'],
+            "details": scrape_data['description_en'],
+            "image": f'https://thumb.ersties.com/width=510,height=660,fit=cover,quality=85,sharpen=1,format=jpeg/content/images_mysql/Model_Cover_Image/backup/'+scrape_data['thumbnail']
+        }
 
-        ret['name'] = scrape_data['name_en']
         if scrape_data['location_en'] is not None:
             ret['country'] = guess_nationality(scrape_data['location_en'])
-        ret['details'] = scrape_data['description_en']
-        ret['image'] = f'https://thumb.ersties.com/width=510,height=660,fit=cover,quality=85,sharpen=1,format=jpeg/content/images_mysql/Model_Cover_Image/backup/'+scrape_data['thumbnail']  
     else:
-        debugPrint('No performer ID found in URL. Please make sure you are using the ULR ending with "profile/nnnn".')
+        log.error('No performer ID found in URL. Please make sure you are using the ULR ending with "profile/nnnn".')
         sys.exit()
     return ret
 
-if sys.argv[1] == 'sceneByURL':
-    i = readJSONInput()
-    ret = get_scene(i.get('url'))
-    print(json.dumps(ret))
-
-if sys.argv[1] == 'groupByURL':
-    i = readJSONInput()
-    ret = get_group(i.get('url'))
-    print(json.dumps(ret))
-
-if sys.argv[1] == 'performerByURL':
-    i = readJSONInput()
-    ret = get_performer(i.get('url'))
-    print(json.dumps(ret))
+if __name__ == '__main__':
+    op, args = scraper_args()
+    result = None
+    match op, args:
+        case 'scene-by-url', { "url": url } if url:
+            result = get_scene(url)
+        case 'group-by-url', { "url": url } if url:
+            result = get_group(url)
+        case 'performer-by-url', { "url": url } if url:
+            result = get_performer(url)
+        case _:
+            log.debug(f'Unknown operation {op} with arguments {args}')
+            sys.exit(1)
+    print(json.dumps(result))
diff --git a/scrapers/Ersties/Ersties.yml b/scrapers/Ersties/Ersties.yml
@@ -1,3 +1,4 @@
+# requires: py_common
 name: Ersties
 sceneByURL:
   - action: script
@@ -6,21 +7,21 @@ sceneByURL:
     script:
       - python
       - Ersties.py
-      - sceneByURL
+      - scene-by-url
 groupByURL:
   - action: script
     url:
-      - ersties.com/
+      - ersties.com/shoot/
     script:
       - python
       - Ersties.py
-      - groupByURL
+      - group-by-url
 performerByURL:
   - action: script
     url:
-      - ersties.com/
+      - ersties.com/profile/
     script:
       - python
       - Ersties.py
-      - performerByURL
-# Last Updated October 21, 2024
+      - performer-by-url
+# Last Updated: December 7, 2025
diff --git a/scrapers/py_common/types.py b/scrapers/py_common/types.py
@@ -106,6 +106,7 @@ class ScrapedMovie(TypedDict, total=False):
     aliases: str
     tags: list[ScrapedTag]
 
+ScrapedGroup = ScrapedMovie
 
 class ScrapedGallery(TypedDict, total=False):
     title: str
@@ -130,6 +131,7 @@ class ScrapedScene(TypedDict, total=False):
     image: str
     studio: ScrapedStudio
     movies: list[ScrapedMovie]
+    groups: list[ScrapedMovie]
     tags: list[ScrapedTag]
     performers: list[ScrapedPerformer]
     code: str
diff --git a/scrapers/py_common/util.py b/scrapers/py_common/util.py
@@ -156,10 +156,14 @@ def __default_parser(**kwargs):
     ).add_argument("--url")
 
     # Filling in an URL and hitting the "Scrape" icon
-    subparsers.add_parser(
+    subparsers.add_parser( # DEPRECATED, USE group-by-url instead
         "movie-by-url", help="Scrape a movie by its URL"
     ).add_argument("--url")
 
+    subparsers.add_parser(
+        "group-by-url", help="Scrape a group by its URL"
+    ).add_argument("--url")
+
     # The looking glass search icon
     # name field is guaranteed to be filled by Stash
     subparsers.add_parser("scene-by-name", help="Scrape a scene by name").add_argument(
@@ -232,6 +236,7 @@ def scraper_args(**kwargs):
     - performer-by-fragment
     - performer-by-url
     - movie-by-url
+    - group-by-url
     - scene-by-name
     - scene-by-url
     - scene-by-fragment