Skip to content

Commit 59aa802

Browse files
Merge pull request #2043 from diamondpete/scraper-changes
Various Site Changes
2 parents 631e79e + 2d51e51 commit 59aa802

13 files changed

+215
-110
lines changed

Contents/Code/PAdatabaseActors.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1165,6 +1165,7 @@
11651165
'Timea Bella': ['Luciana'],
11661166
'Traci Lynn': ['Hallee'],
11671167
'Tracy Gold': ['Noleta'],
1168+
'Tristan Summers': ['Tristan Taylor'],
11681169
'Tyra Moon': ['Athina'],
11691170
'Valerie Herrera': ['Valerie'],
11701171
'Vanessa Monroe': ['Vanessa'],
@@ -1596,6 +1597,7 @@
15961597
22: { # Reality Kings
15971598
'Agness Miller': ['Agness'],
15981599
'Morgan Layne': ['Morgan'],
1600+
'Jessica Robbin': ['Jessica Rabbit'],
15991601
},
16001602
23: { # WowGirls
16011603
'Goldie Baby': ['Nikia'],
@@ -6109,13 +6111,23 @@
61096111
69: { # Adult Empire
61106112
'Addison Vodka': ['Addison'],
61116113
'Ariana Grand': ['Ariana'],
6114+
'Carol Foxxx': ['Carol Foxx'],
6115+
'Cherry Torn': ['Cherry Thorn'],
6116+
'Christina Stevens': ['Kristina'],
61126117
'Christine DeShaffer': ['Christine De Shaffer'],
6118+
'Desiree Deluca': ['Desiree'],
61136119
'Graycee Baybee': ['Grace Baybee'],
61146120
'Ice La Fox': ['Ice LaFox'],
6121+
'Jana Malto': ['Jana'],
61156122
'Jesus Reyes Vazquez': ['Jesus Reyes Vazqeuz'],
61166123
'Red Julia': ['Judith'],
6124+
'Kalina Ryu': ['Lily Star'],
6125+
'Kay Carter': ['Kay Karter'],
6126+
'Kitti Skyway': ['Kitty Skyway'],
61176127
'Kya Tropic': ['Kya Clover'],
6128+
'Eva Gomez': ['Kylina'],
61186129
'Lutro Steel': ['Lutro Steele'],
6130+
'Mai Ly': ['Mai Lynn'],
61196131
'Mandarine': ['Alexandrine'],
61206132
'Mazzy Paige': ['Mazzie Paige', 'Mazzi'],
61216133
'Maude Carolle': ['Aude Lecocq'],
@@ -6128,6 +6140,7 @@
61286140
'Suzan Nielsen': ['Susan'],
61296141
'Talon': ['Talon Valenti'],
61306142
'Vanessa D\'Angely': ['Vanessa'],
6143+
'Will Ryder': ['Jeff Mullen'],
61316144
'Zeus': ['Jesus "Zeus"Munoz'],
61326145
},
61336146
70: { # Heavy on Hotties
@@ -6148,6 +6161,9 @@
61486161
},
61496162
71: { # Brazzers
61506163
'Alice Fabre': ['Alice']
6164+
},
6165+
72: { # Kelly Madison Productions
6166+
'Ryan Madison': ['Madison']
61516167
}
61526168
}
61536169

@@ -6224,6 +6240,7 @@
62246240
69: ['Adult Empire'],
62256241
70: ['Heavy on Hotties'],
62266242
71: ['Brazzers'],
6243+
72: ['Kelly Madison', 'TeenFidelity', 'PornFidelity'],
62276244
}
62286245

62296246

Contents/Code/PAsiteList.py

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -373,9 +373,9 @@
373373
181: ('We Live Together', 'https://www.realitykings.com', 'https://site-api.project1service.com'),
374374
182: ('Wives in Pantyhose', 'https://www.realitykings.com', 'https://site-api.project1service.com'),
375375
183: ('21Naturals', 'https://www.21naturals.com', 'https://tsmkfa364q-dsn.algolia.net/1/indexes/*/queries'),
376-
184: ('PornFidelity', 'https://www.pornfidelity.com', '/episodes/search/??site=2&page=1&search='),
377-
185: ('TeenFidelity', 'https://www.pornfidelity.com', '/episodes/search/??site=3&page=1&search='),
378-
186: ('Kelly Madison', 'https://www.pornfidelity.com', '/episodes/search/??site=1&page=1&search='),
376+
184: ('PornFidelity', 'https://www.pornfidelity.com', '/search?q='),
377+
185: ('TeenFidelity', 'https://www.pornfidelity.com', '/search?q='),
378+
186: ('Kelly Madison', 'https://www.pornfidelity.com', '/search?q='),
379379
187: ('TeamSkeet', 'https://www.teamskeet.com', '/movies/'),
380380
188: ('Exxxtra Small', 'https://www.teamskeet.com', '/movies/'),
381381
189: ('Teen Pies', 'https://www.teamskeet.com', '/movies/'),
@@ -1385,39 +1385,39 @@
13851385
1193: ('Over 40 Handjobs', 'https://www.over40handjobs.com', '/models/'),
13861386
1194: ('Ebony Tugs', 'https://ebonytugs.com', '/models/'),
13871387
1195: ('Teen Tugs', 'https://teentugs.com', '/models/'),
1388-
1196: ('Czech Sex Casting', 'https://www.czechsexcasting.com', '/en/search-results?value='),
1389-
1197: ('Sex With Muslims', 'https://www.sexwithmuslims.com', '/en/search-results?value='),
1390-
1198: ('Sex In Taxi', 'https://www.sexintaxi.com', '/en/search-results?value='),
1391-
1199: ('VR Porn CZ', 'https://www.vrporncz.com', '/en/search-results?value='),
1392-
1200: ('Fucking Street', 'https://www.fuckingstreet.com', '/en/search-results?value='),
1393-
1201: ('Hunter POV', 'https://www.hunterpov.com', '/en/search-results?value='),
1394-
1202: ('Czech Gypsies', 'https://www.czechgypsies.com', '/en/search-results?value='),
1395-
1203: ('Dick On Trip', 'https://www.dickontrip.com', '/en/search-results?value='),
1396-
1204: ('Czech Boobs', 'https://www.czechboobs.com', '/en/search-results?value='),
1397-
1205: ('Czech Deviant', 'https://www.czechdeviant.com', '/en/search-results?value='),
1398-
1206: ('Amateri Premium', 'https://www.amateripremium.com', '/en/search-results?value='),
1399-
1207: ('Fucking Office', 'https://www.fuckingoffice.com', '/en/search-results?value='),
1400-
1208: ('Czech Executor', 'https://www.czechexecutor.com', '/en/search-results?value='),
1401-
1209: ('Czech Hitchhikers', 'https://www.czechhitchhikers.com', '/en/search-results?value='),
1402-
1210: ('Girls Take Away', 'https://www.girlstakeaway.com', '/en/search-results?value='),
1403-
1211: ('Czech Escort Girls', 'https://www.czechescortgirls.com', '/en/search-results?value='),
1404-
1212: ('Horny Doctor', 'https://www.hornydoctor.com', '/en/search-results?value='),
1405-
1213: ('Lady Dee', 'https://www.ladydee.com', '/en/search-results?value='),
1406-
1214: ('Teen From Bohemia', 'https://www.teenfrombohemia.com', '/en/search-results?value='),
1407-
1215: ('Czech Real Dolls', 'https://www.czechrealdolls.com', '/en/search-results?value='),
1408-
1216: ('Amateur From Bohemia', 'https://www.amateursfrombohemia.com', '/en/search-results?value='),
1409-
1217: ('Czech Anal Sex', 'https://www.czechanalsex.com', '/en/search-results?value='),
1410-
1218: ('Dellia Twins', 'https://www.dellaitwins.com', '/en/search-results?value='),
1411-
1219: ('Chloe Lamour', 'https://www.chloelamour.com', '/en/search-results?value='),
1412-
1220: ('Public From Bohemia', 'https://www.publicfrombohemia.com', '/en/search-results?value='),
1413-
1221: ('Susan Ayn', 'https://www.susanayn.com', '/en/search-results?value='),
1414-
1222: ('Horny Girls CZ', 'https://www.hornygirlscz.com', '/en/search-results?value='),
1415-
1223: ('Czech Sex Party', 'https://www.czechsexparty.com', '/en/search-results?value='),
1416-
1224: ('Retro Porn CZ', 'https://www.retroporncz.com', '/en/search-results?value='),
1417-
1225: ('Boys Fuck MILFs', 'https://www.boysfuckmilfs.com', '/en/search-results?value='),
1418-
1226: ('Czech Bi Porn', 'https://www.czechbiporn.com', '/en/search-results?value='),
1419-
1227: ('Czech Shemale', 'https://www.czechshemale.com', '/en/search-results?value='),
1420-
1228: ('Czech Gay City', 'https://www.czechgaycity.com', '/en/search-results?value='),
1388+
1196: ('Czech Sex Casting', 'https://www.czechsexcasting.com', '/en/search?q='),
1389+
1197: ('Sex With Muslims', 'https://www.sexwithmuslims.com', '/en/search?q='),
1390+
1198: ('Sex In Taxi', 'https://www.sexintaxi.com', '/en/search?q='),
1391+
1199: ('VR Porn CZ', 'https://www.vrporncz.com', '/en/search?q='),
1392+
1200: ('Fucking Street', 'https://www.fuckingstreet.com', '/en/search?q='),
1393+
1201: ('Hunter POV', 'https://www.hunterpov.com', '/en/search?q='),
1394+
1202: ('Czech Gypsies', 'https://www.czechgypsies.com', '/en/search?q='),
1395+
1203: ('Dick On Trip', 'https://www.dickontrip.com', '/en/search?q='),
1396+
1204: ('Czech Boobs', 'https://www.czechboobs.com', '/en/search?q='),
1397+
1205: ('Czech Deviant', 'https://www.czechdeviant.com', '/en/search?q='),
1398+
1206: ('Amateri Premium', 'https://www.amateripremium.com', '/en/search?q='),
1399+
1207: ('Fucking Office', 'https://www.fuckingoffice.com', '/en/search?q='),
1400+
1208: ('Czech Executor', 'https://www.czechexecutor.com', '/en/search?q='),
1401+
1209: ('Czech Hitchhikers', 'https://www.czechhitchhikers.com', '/en/search?q='),
1402+
1210: ('Girls Take Away', 'https://www.girlstakeaway.com', '/en/search?q='),
1403+
1211: ('Czech Escort Girls', 'https://www.czechescortgirls.com', '/en/search?q='),
1404+
1212: ('Horny Doctor', 'https://www.hornydoctor.com', '/en/search?q='),
1405+
1213: ('Lady Dee', 'https://www.ladydee.com', '/en/search?q='),
1406+
1214: ('Teen From Bohemia', 'https://www.teenfrombohemia.com', '/en/search?q='),
1407+
1215: ('Czech Real Dolls', 'https://www.czechrealdolls.com', '/en/search?q='),
1408+
1216: ('Amateur From Bohemia', 'https://www.amateursfrombohemia.com', '/en/search?q='),
1409+
1217: ('Czech Anal Sex', 'https://www.czechanalsex.com', '/en/search?q='),
1410+
1218: ('Dellia Twins', 'https://www.dellaitwins.com', '/en/search?q='),
1411+
1219: ('Chloe Lamour', 'https://www.chloelamour.com', '/en/search?q='),
1412+
1220: ('Public From Bohemia', 'https://www.publicfrombohemia.com', '/en/search?q='),
1413+
1221: ('Susan Ayn', 'https://www.susanayn.com', '/en/search?q='),
1414+
1222: ('Horny Girls CZ', 'https://www.hornygirlscz.com', '/en/search?q='),
1415+
1223: ('Czech Sex Party', 'https://www.czechsexparty.com', '/en/search?q='),
1416+
1224: ('Retro Porn CZ', 'https://www.retroporncz.com', '/en/search?q='),
1417+
1225: ('Boys Fuck MILFs', 'https://www.boysfuckmilfs.com', '/en/search?q='),
1418+
1226: ('Czech Bi Porn', 'https://www.czechbiporn.com', '/en/search?q='),
1419+
1227: ('Czech Shemale', 'https://www.czechshemale.com', '/en/search?q='),
1420+
1228: ('Czech Gay City', 'https://www.czechgaycity.com', '/en/search?q='),
14211421
1229: ('Top Web Models', 'https://tour.topwebmodels.com', '/scenes'),
14221422
1230: ('Big Gulp Girls', 'https://tour.biggulpgirls.com', '/scenes'),
14231423
1231: ('2 Girls 1 Camera', 'https://tour.2girls1camera.com', '/scenes'),
@@ -1819,7 +1819,7 @@
18191819
1727: ('Teen Sneaks', 'https://adulttime.com', 'https://tsmkfa364q-dsn.algolia.net/1/indexes/*/queries'),
18201820
1728: ('Modern Day Sins', 'https://adulttime.com', 'https://tsmkfa364q-dsn.algolia.net/1/indexes/*/queries'),
18211821
1729: ('Accidental Gangbang', 'https://adulttime.com', 'https://tsmkfa364q-dsn.algolia.net/1/indexes/*/queries'),
1822-
1730: ('Aunt Judys XXX', 'https://auntjudysxxx.com', '/tour/search.php?query='),
1822+
1730: ('Aunt Judys XXX', 'https://auntjudys.com', '/tour/search.php?query='),
18231823
1731: ('Aunt Judys', 'https://auntjudys.com', '/tour/search.php?query='),
18241824
1732: ('Gilfed', 'https://gilfed.com', 'https://site-api.project1service.com'),
18251825
1733: ('Dilfed', 'https://dilfed.com', 'https://site-api.project1service.com'),

Contents/Code/networkAdultEmpireCash.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,9 @@ def update(metadata, lang, siteNum, movieGenres, movieActors, movieCollections,
114114

115115
# Posters
116116
for poster in detailsPageElements.xpath('//div[@id="dv_frames"]//img/@src'):
117-
img = poster.replace('/320/', '/1280/')
118-
art.append(img)
117+
img = poster.replace('/320/', '/3840/').replace('/10/', '/3840/').replace('_320c.jpg', '_10.jpg')
118+
if img not in art:
119+
art.append(img)
119120

120121
Log('Artwork found: %d' % len(art))
121122
for idx, posterUrl in enumerate(art, 1):

Contents/Code/networkAdultPrime.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,10 @@ def update(metadata, lang, siteNum, movieGenres, movieActors, movieCollections,
8080
metadata.summary = summary
8181

8282
# Studio
83-
metadata.studio = 'Adult Prime'
83+
metadata.studio = detailsPageElements.xpath('//p[@class="update-info-line regular"][./b[contains(., "Studio")]]//a')[0].text_content().strip()
8484

8585
# Tagline and Collection(s)
86-
tagline = detailsPageElements.xpath('//p[@class="update-info-line regular"][./b[contains(., "Studio")]]//a')[0].text_content().strip()
86+
tagline = detailsPageElements.xpath('//p[@class="update-info-line regular"][./b[contains(., "Series")]]//a')[1].text_content().strip()
8787
metadata.tagline = tagline
8888
movieCollections.addCollection(tagline)
8989

@@ -135,7 +135,11 @@ def update(metadata, lang, siteNum, movieGenres, movieActors, movieCollections,
135135
art.append(img)
136136

137137
Log('Artwork found: %d' % len(art))
138+
postersClean = list()
138139
for idx, posterUrl in enumerate(art, 1):
140+
# Remove Timestamp and Token from URL
141+
cleanUrl = posterUrl.split('?')[0]
142+
postersClean.append(cleanUrl)
139143
if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
140144
# Download image file for analysis
141145
try:
@@ -153,6 +157,8 @@ def update(metadata, lang, siteNum, movieGenres, movieActors, movieCollections,
153157
except:
154158
pass
155159

160+
art.extend(postersClean)
161+
156162
return metadata
157163

158164

Contents/Code/networkBang.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,9 @@ def search(results, lang, siteNum, searchData):
4848
sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL
4949
curID = PAutils.Encode(sceneURL)
5050

51-
try:
52-
date = searchResult.xpath('.//span[@class="hidden xs:inline-block truncate"]/text()')[0].strip()
53-
except:
54-
date = ''
55-
51+
date = searchResult.xpath('.//span[@class="hidden xs:inline-block truncate"]')
5652
if date:
57-
releaseDate = datetime.strptime(date, '%b %d, %Y').strftime('%Y-%m-%d')
53+
releaseDate = datetime.strptime(date[0].text_content().split('\xe2\x80\xa2')[-1].strip(), '%b %d, %Y').strftime('%Y-%m-%d')
5854
else:
5955
releaseDate = searchData.dateFormat() if searchData.date else ''
6056
displayDate = releaseDate if date else ''
@@ -175,6 +171,7 @@ def update(metadata, lang, siteNum, movieGenres, movieActors, movieCollections,
175171
for idx, posterUrl in enumerate(art, 1):
176172
if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
177173
# Download image file for analysis
174+
posterUrl = posterUrl.split('?')[0]
178175
try:
179176
image = PAutils.HTTPRequest(posterUrl)
180177
im = StringIO(image.content)

Contents/Code/networkCherryPimps.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ def search(results, lang, siteNum, searchData):
88
url = PAsearchSites.getSearchSearchURL(siteNum) + '%s&page=%d' % (searchData.encoded, searchPageNum)
99
req = PAutils.HTTPRequest(url)
1010
searchResults = HTML.ElementFromString(req.text)
11-
for searchResult in searchResults.xpath('//div[contains(@class, "video-thumb") or contains(@class, "item-video")]'):
11+
for searchResult in searchResults.xpath('//div[@class="item-updates"]//div[contains(@class, "item-update")]'):
1212
titleNoFormatting = PAutils.parseTitle(searchResult.xpath('(.//p[@class="text-thumb"] | .//div[@class="item-title"])/a')[0].text_content().strip(), siteNum)
1313
curID = PAutils.Encode(searchResult.xpath('(.//p[@class="text-thumb"] | .//div[@class="item-title"])/a/@href')[0])
1414
subSite = searchResult.xpath('.//p[@class="text-thumb"]/a[@class="badge"] | .//div[@class="item-sitename"]/a')[0].text_content().strip()
@@ -117,8 +117,12 @@ def update(metadata, lang, siteNum, movieGenres, movieActors, movieCollections,
117117
art.append(poster)
118118

119119
Log('Artwork found: %d' % len(art))
120+
postersClean = list()
120121
for idx, posterUrl in enumerate(art, 1):
121-
if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
122+
# Remove Timestamp and Token from URL
123+
cleanUrl = posterUrl.split('?')[0]
124+
postersClean.append(cleanUrl)
125+
if not PAsearchSites.posterAlreadyExists(cleanUrl, metadata):
122126
# Download image file for analysis
123127
try:
124128
image = PAutils.HTTPRequest(posterUrl)
@@ -128,11 +132,13 @@ def update(metadata, lang, siteNum, movieGenres, movieActors, movieCollections,
128132
# Add the image proxy items to the collection
129133
if width > 1:
130134
# Item is a poster
131-
metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx)
135+
metadata.posters[cleanUrl] = Proxy.Media(image.content, sort_order=idx)
132136
if width > 100:
133137
# Item is an art item
134-
metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx)
138+
metadata.art[cleanUrl] = Proxy.Media(image.content, sort_order=idx)
135139
except:
136140
pass
137141

142+
art.extend(postersClean)
143+
138144
return metadata

Contents/Code/networkDirtyFlix.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,40 @@ def update(metadata, lang, siteNum, movieGenres, movieActors, movieCollections,
7575
sceneID = PAutils.Decode(metadata_id[0])
7676
sceneDate = metadata_id[2]
7777
searchPageURL = PAutils.Decode(metadata_id[3])
78+
(siteKey, sitePages) = PAutils.getDictValuesFromKey(siteDB, PAsearchSites.getSearchSiteName(siteNum))
7879

7980
req = PAutils.HTTPRequest(searchPageURL)
80-
detailsPageElements = HTML.ElementFromString(req.text).xpath('//div[@class="movie-block"][.//*[contains(@src, "%s")]]' % sceneID)[0]
81+
originalPageElements = HTML.ElementFromString(req.text).xpath('//div[@class="movie-block"][.//*[contains(@src, "%s")]]' % sceneID)
82+
83+
if originalPageElements:
84+
detailsPageElements = originalPageElements[0]
85+
else:
86+
lastSearchPage = searchPageURL.split('/')[-1]
87+
if lastSearchPage.isdigit():
88+
startPage = int(lastSearchPage) + 1
89+
else:
90+
startPage = 2
91+
searchPage = '%s%d' % (PAsearchSites.getSearchSearchURL(siteNum), startPage)
92+
req = PAutils.HTTPRequest(searchPage)
93+
searchResults = HTML.ElementFromString(req.text)
94+
95+
re_sceneid = re.compile(r'(?<=tour_thumbs/).*(?=\/)')
96+
for idx in range(startPage, sitePages):
97+
for searchResult in searchResults.xpath('//div[@class="movie-block"]'):
98+
searchID = 0
99+
m = re_sceneid.search(searchResult.xpath('.//li/img/@src')[0])
100+
if m:
101+
searchID = m.group(0)
102+
103+
if searchID == sceneID:
104+
detailsPageElements = HTML.ElementFromString(req.text).xpath('//div[@class="movie-block"][.//*[contains(@src, "%s")]]' % sceneID)[0]
105+
break
106+
else:
107+
searchPage = '%s%d' % (PAsearchSites.getSearchSearchURL(siteNum), idx)
108+
req = PAutils.HTTPRequest(searchPage)
109+
searchResults = HTML.ElementFromString(req.text)
110+
continue
111+
break
81112

82113
xPath = PAutils.getDictValuesFromKey(xPathDB, PAsearchSites.getSearchSiteName(siteNum))
83114

@@ -158,7 +189,7 @@ def update(metadata, lang, siteNum, movieGenres, movieActors, movieCollections,
158189
siteDB = {
159190
'Trick Your GF': [7, 4],
160191
'Make Him Cuckold': [9, 5],
161-
'She Is Nerdy': [10, 12],
192+
'She Is Nerdy': [10, 15],
162193
'Tricky Agent': [11, 4],
163194
}
164195

@@ -255,7 +286,7 @@ def update(metadata, lang, siteNum, movieGenres, movieActors, movieCollections,
255286
'Iris Kiss': ['snc165', 'wnc1637'],
256287
'Isabel Stern': ['wfc1075'],
257288
'Iva Zan': ['wrygf536', 'wtag558', 'wnc745'],
258-
'Izi Ashley': ['wfc978', 'wtag980', 'wnc97'],
289+
'Izi Ashley': ['wfc978', 'wtag980', 'wnc97', 'wnc976'],
259290
'Jane Fox': ['wtag1235'],
260291
'Jenny Fer': ['wnc1330'],
261292
'Jenny Love': ['wrygf634', 'wfc607', 'wtag601'],

0 commit comments

Comments
 (0)