Skip to content

Commit 2ca1d99

Browse files
Add Coomer scraper (#2129)
* Scrapper for coomer.su * remove LICENSE file * Add sceneByURL and clean_text function * Add studio scrapping * Adding Performer...somehow managed to forget that. * Added headers for requests (thanks to @feederbox826 for suggestion)
1 parent 322f664 commit 2ca1d99

File tree

2 files changed

+138
-0
lines changed

2 files changed

+138
-0
lines changed

scrapers/Coomer/Coomer.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
import sys
2+
import json
3+
import hashlib
4+
import stashapi.log as log
5+
import requests
6+
import re
7+
from bs4 import BeautifulSoup as bs
8+
9+
# TODO: Enable searching from other fields?
10+
11+
headers = {
12+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0',
13+
'Referer': 'https://coomer.su/search_hash'
14+
}
15+
16+
def debugPrint(t):
17+
sys.stderr.write(t + "\n")
18+
19+
# Get JSON from Stash
20+
def readJSONInput():
21+
input = sys.stdin.read()
22+
return json.loads(input)
23+
24+
def clean_text(details: str) -> str:
25+
"""
26+
remove escaped backslashes and html parse the details text
27+
"""
28+
if details:
29+
details = re.sub(r"\\", "", details)
30+
details = re.sub(r"<\s*/?br\s*/?\s*>", "\n",
31+
details) # bs.get_text doesnt replace br's with \n
32+
details = re.sub(r'</?p>', '\n', details)
33+
details = bs(details, features='html.parser').get_text()
34+
# Remove leading/trailing/double whitespaces
35+
details = '\n'.join(
36+
[
37+
' '.join([s for s in x.strip(' ').split(' ') if s != ''])
38+
for x in ''.join(details).split('\n')
39+
]
40+
)
41+
details = details.strip()
42+
return details
43+
44+
def post_query(service, user, id):
45+
coomer_getpost_url = f"https://coomer.su/api/v1/{service}/user/{user}/post/{id}"
46+
post_lookup_response = requests.get(coomer_getpost_url, headers=headers)
47+
48+
if post_lookup_response.status_code == 200:
49+
data = post_lookup_response.json()
50+
log.debug(data)
51+
post = data['post']
52+
studio = {"Name": user}
53+
if service == "onlyfans":
54+
studio["URL"] = f"https://onlyfans.com/{user}"
55+
elif service == "fansly":
56+
studio["URL"] = f"https://fansly.com/{user}"
57+
elif service == "candfans":
58+
studio["URL"] = f"https://candfans.com/{user}"
59+
else:
60+
debugPrint("No service listed")
61+
62+
out = {"Title": post['title'],
63+
"Date": post['published'][:10],
64+
"URL": f"https://coomer.su/{post['service']}/user/{post['user']}/post/{post['id']}",
65+
"Details": clean_text(post['content']),
66+
"Studio": studio,
67+
"Performers": [{"Name": user}]
68+
}
69+
70+
71+
log.debug(out)
72+
return out
73+
else:
74+
debugPrint(f'Response: {str(post_lookup_response.status_code)} \n Text: {str(post_lookup_response.text)}')
75+
76+
def get_scene(inputurl):
77+
debugPrint(inputurl)
78+
match = re.search(r'/(\w+?)/user/(.+?)/post/(\d+)', inputurl)
79+
if match:
80+
service = match.group(1)
81+
user = match.group(2)
82+
id = match.group(3)
83+
else:
84+
debugPrint('No post ID found in URL. Please make sure you are using the correct URL.')
85+
sys.exit()
86+
87+
return post_query(service, user, id)
88+
89+
def sceneByFragment(fragment):
90+
file = fragment[0]
91+
with open(file["path"], "rb") as f:
92+
bytes = f.read()
93+
readable_hash = hashlib.sha256(bytes).hexdigest()
94+
log.debug(f"sha256 hash: {readable_hash}")
95+
96+
coomer_searchhash_url = "https://coomer.su/api/v1/search_hash/"
97+
98+
hash_lookup_response = requests.get(coomer_searchhash_url + str(readable_hash), headers=headers)
99+
100+
if hash_lookup_response.status_code == 200:
101+
data = hash_lookup_response.json()
102+
post = data['posts'][0] # Not sure why there would be more than one result, we'll just use the first one
103+
104+
return post_query(post['service'], post['user'], post['id'])
105+
106+
else:
107+
debugPrint("The hash of the file was not found. Please make sure you are using an original file.")
108+
109+
110+
111+
if sys.argv[1] == 'sceneByURL':
112+
i = readJSONInput()
113+
log.debug(i)
114+
ret = get_scene(i.get('url'))
115+
log.debug(f"Returned from search: {json.dumps(ret)}")
116+
print(json.dumps(ret))
117+
118+
if sys.argv[1] == 'sceneByFragment':
119+
i = readJSONInput()
120+
log.debug(f"Existing scene data: {json.dumps(i)}")
121+
ret = sceneByFragment(i["files"])
122+
log.debug(f"Returned from search: {json.dumps(ret)}")
123+
print(json.dumps(ret))

scrapers/Coomer/Coomer.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
name: Coomer
2+
sceneByURL:
3+
- action: script
4+
url:
5+
- coomer.su/
6+
script:
7+
- python
8+
- Coomer.py
9+
- sceneByURL
10+
sceneByFragment:
11+
action: script
12+
script:
13+
- python
14+
- Coomer.py
15+
- sceneByFragment

0 commit comments

Comments
 (0)