@@ -112,7 +112,7 @@ class Episode:
112112 """ Defines an Episode. """
113113
114114 def __init__ (self , uuid = None , nodeid = None , path = None , channel = None , program_title = None , title = None , description = None , thumb = None , duration = None ,
115- season = None , season_uuid = None , number = None , rating = None , aired = None , expiry = None , stream = None , islongform = False ):
115+ season = None , season_uuid = None , number = None , rating = None , aired = None , expiry = None , stream = None , content_type = None ):
116116 """
117117 :type uuid: str
118118 :type nodeid: str
@@ -130,7 +130,7 @@ def __init__(self, uuid=None, nodeid=None, path=None, channel=None, program_titl
130130 :type aired: datetime
131131 :type expiry: datetime
132132 :type stream: string
133- :type islongform: bool
133+ :type content_type: string
134134 """
135135 self .uuid = uuid
136136 self .nodeid = nodeid
@@ -148,7 +148,7 @@ def __init__(self, uuid=None, nodeid=None, path=None, channel=None, program_titl
148148 self .aired = aired
149149 self .expiry = expiry
150150 self .stream = stream
151- self .islongform = islongform
151+ self .content_type = content_type
152152
153153 def __repr__ (self ):
154154 return "%r" % self .__dict__
@@ -338,6 +338,14 @@ def update():
338338 if not data :
339339 return None
340340
341+ if 'episode' in data and data ['episode' ]['pageInfo' ]['type' ] == 'live_channel' :
342+ episode = Episode (
343+ uuid = data ['episode' ]['pageInfo' ]['nodeUuid' ],
344+ program_title = data ['episode' ]['pageInfo' ]['title' ],
345+ content_type = data ['episode' ]['pageInfo' ]['type' ],
346+ )
347+ return episode
348+
341349 if 'video' in data and data ['video' ]:
342350 # We have found detailed episode information
343351 episode = self ._parse_clip_data (data ['video' ])
@@ -353,14 +361,19 @@ def update():
353361
354362 return None
355363
356- def get_stream_by_uuid (self , uuid , islongform ):
364+ def get_stream_by_uuid (self , uuid , content_type ):
357365 """ Return a ResolvedStream for this video.
358- :type uuid: str
359- :type islongform: bool
366+ :type uuid: string
367+ :type content_type: string
360368 :rtype: ResolvedStream
361369 """
362- mode = 'long-form' if islongform else 'short-form'
363- response = self ._get_url (self .API_GOPLAY + '/web/v1/videos/%s/%s' % (mode , uuid ), authentication = 'Bearer %s' % self ._auth .get_token ())
370+ if content_type in ('video-long_form' , 'long_form' ):
371+ mode = 'videos/long-form'
372+ elif content_type == 'video-short_form' :
373+ mode = 'videos/short-form'
374+ elif content_type == 'live_channel' :
375+ mode = 'liveStreams'
376+ response = self ._get_url (self .API_GOPLAY + '/web/v1/%s/%s' % (mode , uuid ), authentication = 'Bearer %s' % self ._auth .get_token ())
364377 data = json .loads (response )
365378
366379 if not data :
@@ -482,8 +495,8 @@ def get_recommendation_categories(self):
482495 raw_html = self ._get_url (self .SITE_URL )
483496
484497 # Categories regexes
485- regex_articles = re .compile (r'<article[^>]+>(. *?)</article>' , re .DOTALL )
486- regex_category = re .compile (r'<h2.*?>(.*?)</h2>(?:.*?<div class="visually-hidden">(.*?)</div>)?' , re .DOTALL )
498+ regex_articles = re .compile (r'<article[^>]+>([\s\S] *?)</article>' , re .DOTALL )
499+ regex_category = re .compile (r'<h2.*?>(.*?)</h2>(?:.*?<div class=\ "visually-hidden\ ">(.*?)</div>)?' , re .DOTALL )
487500
488501 categories = []
489502 for result in regex_articles .finditer (raw_html ):
@@ -492,9 +505,9 @@ def get_recommendation_categories(self):
492505 match_category = regex_category .search (article_html )
493506 category_title = None
494507 if match_category :
495- category_title = match_category .group (1 ).strip ()
508+ category_title = unescape ( match_category .group (1 ).strip () )
496509 if match_category .group (2 ):
497- category_title += ' [B]%s[/B]' % match_category .group (2 ).strip ()
510+ category_title += ' [B]%s[/B]' % unescape ( match_category .group (2 ).strip () )
498511
499512 if category_title :
500513 # Extract programs and lookup in all_programs so we have more metadata
@@ -547,8 +560,8 @@ def _extract_programs(html):
547560 :rtype list[Program]
548561 """
549562 # Item regexes
550- regex_item = re .compile (r'<a[^>]+?href="(?P<path>[^"]+)"[^>]+?>'
551- r'. *?<h3 class="poster-teaser__title">(?P<title>[^<]*)</h3>.*?data-background-image= "(?P<image>. *?)". *?'
563+ regex_item = re .compile (r'<a[^>]+?href=\ "(?P<path>[^\ "]+)\ "[^>]+?>'
564+ r'[\s\S] *?<h3 class=\ "poster-teaser__title\ ">(?P<title>[^<]*)</h3>[\s\S]*?poster-teaser__image\" src=\ "(?P<image>[\s\S] *?)\"[\s\S] *?'
552565 r'</a>' , re .DOTALL )
553566
554567 # Extract items
@@ -574,20 +587,21 @@ def _extract_videos(html):
574587 :rtype list[Episode]
575588 """
576589 # Item regexes
577- regex_item = re .compile (r'<a[^>]+?href="(?P<path>[^"]+)"[^>]+?>. *?</a>' , re .DOTALL )
590+ regex_item = re .compile (r'<a[^>]+?class=\"(?P<item_type>[^\"]+)\"[^>]+? href=\ "(?P<path>[^\ "]+)\ "[^>]+?>[\s\S] *?</a>' , re .DOTALL )
578591
579- regex_episode_program = re .compile (r'<h3 class="episode-teaser__subtitle">([^<]*)</h3 >' )
580- regex_episode_title = re .compile (r'<(?:div|h3) class="(?:poster|card|image|episode)-teaser__title">(?:<span>)?([^<]*)(?:</span>)?</(?:div|h3)>' )
581- regex_episode_duration = re .compile (r'data-duration="([^"]*)"' )
582- regex_episode_video_id = re .compile (r'data-video-id="([^"]*)"' )
583- regex_episode_image = re .compile (r'data-background-image= "([^"]*)"' )
584- regex_episode_badge = re .compile (r'<div class=" (?:poster|card|image|episode)-teaser__badge badge ">([^<]*)</div>' )
592+ regex_episode_program = re .compile (r'<(?:div|h3) class=\ "episode-teaser__subtitle\ ">([^<]*)</(?:div|h3) >' )
593+ regex_episode_title = re .compile (r'<(?:div|h3) class=\ "(?:poster|card|image|episode)-teaser__title\ ">(?:<span>)?([^<]*)(?:</span>)?</(?:div|h3)>' )
594+ regex_episode_duration = re .compile (r'data-duration=\ "([^\ "]*)\ "' )
595+ regex_episode_video_id = re .compile (r'data-video-id=\ "([^\ "]*)\ "' )
596+ regex_episode_image = re .compile (r'<img class=\"episode-teaser__header\" src=\ "([^<\ "]*)\ "' )
597+ regex_episode_badge = re .compile (r'<div class=\"badge (?:poster|card|image|episode)-teaser__badge (?:poster|card|image|episode)-teaser__badge--default\ ">([^<]*)</div>' )
585598
586599 # Extract items
587600 episodes = []
588601 for item in regex_item .finditer (html ):
589602 item_html = item .group (0 )
590603 path = item .group ('path' )
604+ item_type = item .group ('item_type' )
591605
592606 # Extract title
593607 try :
@@ -632,6 +646,8 @@ def _extract_videos(html):
632646 if episode_badge :
633647 description += "\n \n [B]%s[/B]" % episode_badge
634648
649+ content_type = 'video-short_form' if 'card-' in item_type else 'video-long_form'
650+
635651 # Episode
636652 episodes .append (Episode (
637653 path = path .lstrip ('/' ),
@@ -642,6 +658,7 @@ def _extract_videos(html):
642658 uuid = episode_video_id ,
643659 thumb = episode_image ,
644660 program_title = episode_program ,
661+ content_type = content_type
645662 ))
646663
647664 return episodes
@@ -721,7 +738,7 @@ def _parse_episode_data(data, season_uuid=None):
721738 expiry = datetime .fromtimestamp (int (data .get ('unpublishDate' ))) if data .get ('unpublishDate' ) else None ,
722739 rating = data .get ('parentalRating' ),
723740 stream = data .get ('path' ),
724- islongform = data .get ('isLongForm ' ),
741+ content_type = data .get ('type ' ),
725742 )
726743 return episode
727744
0 commit comments