|
| 1 | +import PAsearchSites |
| 2 | +import PAutils |
| 3 | + |
| 4 | + |
| 5 | +def search(results, lang, siteNum, searchData): |
| 6 | + sceneID = searchData.title.split(' ', 1)[0] |
| 7 | + if unicode(sceneID, 'UTF-8').isdigit(): |
| 8 | + searchData.title = searchData.title.replace(sceneID, '', 1).strip() |
| 9 | + else: |
| 10 | + sceneID = None |
| 11 | + |
| 12 | + searchResults = [] |
| 13 | + if sceneID: |
| 14 | + directURL = PAsearchSites.getSearchSearchURL(siteNum) + sceneID |
| 15 | + searchResults.append(directURL) |
| 16 | + |
| 17 | + googleResults = PAutils.getFromSearchEngine(searchData.title, siteNum) |
| 18 | + for sceneURL in googleResults: |
| 19 | + if ('video/' in sceneURL or 'videos/' in sceneURL) and '/page/' not in sceneURL and sceneURL not in searchResults: |
| 20 | + searchResults.append(sceneURL.split('?')[0]) |
| 21 | + |
| 22 | + for sceneURL in searchResults: |
| 23 | + req = PAutils.HTTPRequest(sceneURL) |
| 24 | + if req.ok: |
| 25 | + detailsPageElements = HTML.ElementFromString(req.text) |
| 26 | + titleNoFormatting = PAutils.parseTitle(detailsPageElements.xpath('//h1')[0].text_content(), siteNum) |
| 27 | + if 'http' not in sceneURL: |
| 28 | + sceneURL = PAsearchSites.getSearchSearchURL(siteNum) + sceneID |
| 29 | + curID = PAutils.Encode(sceneURL) |
| 30 | + |
| 31 | + date = detailsPageElements.xpath('//div[@class="content-date"]') |
| 32 | + if date: |
| 33 | + releaseDate = datetime.strptime(date[0].text_content().strip(), '%d.%m.%Y').strftime('%Y-%m-%d') |
| 34 | + else: |
| 35 | + releaseDate = searchData.dateFormat() if searchData.date else '' |
| 36 | + |
| 37 | + displayDate = releaseDate if date else '' |
| 38 | + |
| 39 | + if searchData.date and releaseDate: |
| 40 | + score = 80 - Util.LevenshteinDistance(searchData.date, releaseDate) |
| 41 | + else: |
| 42 | + score = 80 - Util.LevenshteinDistance(searchData.title.lower(), titleNoFormatting.lower()) |
| 43 | + |
| 44 | + results.Append(MetadataSearchResult(id='%s|%d|%s' % (curID, siteNum, releaseDate), name='[%s] %s %s' % (PAsearchSites.getSearchSiteName(siteNum), titleNoFormatting, displayDate), score=score, lang=lang)) |
| 45 | + |
| 46 | + return results |
| 47 | + |
| 48 | + |
| 49 | +def update(metadata, lang, siteNum, movieGenres, movieActors, movieCollections, art): |
| 50 | + metadata_id = str(metadata.id).split('|') |
| 51 | + sceneURL = PAutils.Decode(metadata_id[0]) |
| 52 | + req = PAutils.HTTPRequest(sceneURL) |
| 53 | + detailsPageElements = HTML.ElementFromString(req.text) |
| 54 | + |
| 55 | + # Title |
| 56 | + metadata.title = PAutils.parseTitle(detailsPageElements.xpath('//div[contains(@class, "content-title")]')[0].text_content().strip(), siteNum) |
| 57 | + |
| 58 | + # Summary |
| 59 | + metadata.summary = detailsPageElements.xpath('//div[contain(@class, "content-desc")]')[0].text_content().strip() |
| 60 | + |
| 61 | + # Studio |
| 62 | + metadata.studio = 'Caramel Cash' |
| 63 | + |
| 64 | + # Tagline and Collection(s) |
| 65 | + tagline = PAsearchSites.getSearchSiteName(siteNum) |
| 66 | + metadata.tagline = tagline |
| 67 | + movieCollections.addCollection(tagline) |
| 68 | + |
| 69 | + # Release Date |
| 70 | + date = detailsPageElements.xpath('//div[contains(@class, "content-date")]') |
| 71 | + if date: |
| 72 | + if (1041 <= siteNum <= 1042): |
| 73 | + cleanDate = re.sub(r'(\d)(st|nd|rd|th)', r'\1', date[0].text_content().split(':')[-1].strip()) |
| 74 | + date_object = datetime.strptime(cleanDate, '%d %b %Y') |
| 75 | + else: |
| 76 | + date_object = datetime.strptime(date[0].text_content().strip(), '%d.%m.%Y') |
| 77 | + metadata.originally_available_at = date_object |
| 78 | + metadata.year = metadata.originally_available_at.year |
| 79 | + |
| 80 | + # Genres |
| 81 | + genres = detailsPageElements.xpath('//div[@class="content-tags"]/a') |
| 82 | + for genreLink in genres: |
| 83 | + genreName = genreLink.text_content().strip() |
| 84 | + |
| 85 | + movieGenres.addGenre(genreName) |
| 86 | + |
| 87 | + # Actor(s) |
| 88 | + actors = detailsPageElements.xpath('//section[@class="content-sec backdrop"]//div[@class="main__models"]/a') |
| 89 | + for actorLink in actors: |
| 90 | + actorName = actorLink.text_content().strip() |
| 91 | + actorPhotoURL = '' |
| 92 | + |
| 93 | + movieActors.addActor(actorName, actorPhotoURL) |
| 94 | + |
| 95 | + # Posters/Background |
| 96 | + xpaths = [ |
| 97 | + '//section[@class="content-gallery-sec"]//a[@data-lightbox="gallery"]/@href' |
| 98 | + ] |
| 99 | + |
| 100 | + for xpath in xpaths: |
| 101 | + for img in detailsPageElements.xpath(xpath): |
| 102 | + art.append(img) |
| 103 | + |
| 104 | + |
| 105 | + Log('Artwork found: %d' % len(art)) |
| 106 | + images = [] |
| 107 | + posterExists = False |
| 108 | + for idx, posterUrl in enumerate(art, 1): |
| 109 | + if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): |
| 110 | + # Download image file for analysis |
| 111 | + try: |
| 112 | + image = PAutils.HTTPRequest(posterUrl) |
| 113 | + im = StringIO(image.content) |
| 114 | + resized_image = Image.open(im) |
| 115 | + width, height = resized_image.size |
| 116 | + # Add the image proxy items to the collection |
| 117 | + if height > width: |
| 118 | + # Item is a poster |
| 119 | + metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) |
| 120 | + posterExists = True |
| 121 | + if width > 100 and width > height: |
| 122 | + # Item is an art item |
| 123 | + images.append((image, posterUrl)) |
| 124 | + metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) |
| 125 | + except: |
| 126 | + pass |
| 127 | + elif PAsearchSites.posterOnlyAlreadyExists(posterUrl, metadata): |
| 128 | + posterExists = True |
| 129 | + |
| 130 | + if not posterExists: |
| 131 | + for idx, (image, posterUrl) in enumerate(images, 1): |
| 132 | + try: |
| 133 | + im = StringIO(image.content) |
| 134 | + resized_image = Image.open(im) |
| 135 | + width, height = resized_image.size |
| 136 | + # Add the image proxy items to the collection |
| 137 | + if width > 1: |
| 138 | + # Item is a poster |
| 139 | + metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) |
| 140 | + except: |
| 141 | + pass |
| 142 | + |
| 143 | + return metadata |
0 commit comments