[ie/tv8.it] Add live and playlist extractors (yt-dlp#12569)

DTrombett · web-flow · commit 2ee3a0aff9be · 2025-03-16T23:10:16.000+01:00
Closes yt-dlp#12542 Authored by: DTrombett
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py
@@ -1883,6 +1883,8 @@
     SkyItVideoIE,
     SkyItVideoLiveIE,
     TV8ItIE,
+    TV8ItLiveIE,
+    TV8ItPlaylistIE,
 )
 from .skylinewebcams import SkylineWebcamsIE
 from .skynewsarabia import (
diff --git a/yt_dlp/extractor/skyit.py b/yt_dlp/extractor/skyit.py
@@ -2,16 +2,18 @@
 
 from .common import InfoExtractor
 from ..utils import (
+    clean_html,
     dict_get,
     int_or_none,
     parse_duration,
     unified_timestamp,
+    url_or_none,
+    urljoin,
 )
+from ..utils.traversal import traverse_obj
 
 
-class SkyItPlayerIE(InfoExtractor):
-    IE_NAME = 'player.sky.it'
-    _VALID_URL = r'https?://player\.sky\.it/player/(?:external|social)\.html\?.*?\bid=(?P<id>\d+)'
+class SkyItBaseIE(InfoExtractor):
     _GEO_BYPASS = False
     _DOMAIN = 'sky'
     _PLAYER_TMPL = 'https://player.sky.it/player/external.html?id=%s&domain=%s'
@@ -33,7 +35,6 @@ def _player_url_result(self, video_id):
             SkyItPlayerIE.ie_key(), video_id)
 
     def _parse_video(self, video, video_id):
-        title = video['title']
         is_live = video.get('type') == 'live'
         hls_url = video.get(('streaming' if is_live else 'hls') + '_url')
         if not hls_url and video.get('geoblock' if is_live else 'geob'):
@@ -43,7 +44,7 @@ def _parse_video(self, video, video_id):
 
         return {
             'id': video_id,
-            'title': title,
+            'title': video.get('title'),
             'formats': formats,
             'thumbnail': dict_get(video, ('video_still', 'video_still_medium', 'thumb')),
             'description': video.get('short_desc') or None,
@@ -52,6 +53,11 @@ def _parse_video(self, video, video_id):
             'is_live': is_live,
         }
 
+
+class SkyItPlayerIE(SkyItBaseIE):
+    IE_NAME = 'player.sky.it'
+    _VALID_URL = r'https?://player\.sky\.it/player/(?:external|social)\.html\?.*?\bid=(?P<id>\d+)'
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
         domain = urllib.parse.parse_qs(urllib.parse.urlparse(
@@ -67,7 +73,7 @@ def _real_extract(self, url):
         return self._parse_video(video, video_id)
 
 
-class SkyItVideoIE(SkyItPlayerIE):  # XXX: Do not subclass from concrete IE
+class SkyItVideoIE(SkyItBaseIE):
     IE_NAME = 'video.sky.it'
     _VALID_URL = r'https?://(?:masterchef|video|xfactor)\.sky\.it(?:/[^/]+)*/video/[0-9a-z-]+-(?P<id>\d+)'
     _TESTS = [{
@@ -96,7 +102,7 @@ def _real_extract(self, url):
         return self._player_url_result(video_id)
 
 
-class SkyItVideoLiveIE(SkyItPlayerIE):  # XXX: Do not subclass from concrete IE
+class SkyItVideoLiveIE(SkyItBaseIE):
     IE_NAME = 'video.sky.it:live'
     _VALID_URL = r'https?://video\.sky\.it/diretta/(?P<id>[^/?&#]+)'
     _TEST = {
@@ -124,7 +130,7 @@ def _real_extract(self, url):
         return self._parse_video(livestream, asset_id)
 
 
-class SkyItIE(SkyItPlayerIE):  # XXX: Do not subclass from concrete IE
+class SkyItIE(SkyItBaseIE):
     IE_NAME = 'sky.it'
     _VALID_URL = r'https?://(?:sport|tg24)\.sky\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
     _TESTS = [{
@@ -223,3 +229,80 @@ class TV8ItIE(SkyItVideoIE):  # XXX: Do not subclass from concrete IE
         'params': {'skip_download': 'm3u8'},
     }]
     _DOMAIN = 'mtv8'
+
+
+class TV8ItLiveIE(SkyItBaseIE):
+    IE_NAME = 'tv8.it:live'
+    IE_DESC = 'TV8 Live'
+    _VALID_URL = r'https?://(?:www\.)?tv8\.it/streaming'
+    _TESTS = [{
+        'url': 'https://tv8.it/streaming',
+        'info_dict': {
+            'id': 'tv8',
+            'ext': 'mp4',
+            'title': str,
+            'description': str,
+            'is_live': True,
+            'live_status': 'is_live',
+        },
+    }]
+
+    def _real_extract(self, url):
+        video_id = 'tv8'
+        livestream = self._download_json(
+            'https://apid.sky.it/vdp/v1/getLivestream', video_id,
+            'Downloading manifest JSON', query={'id': '7'})
+        metadata = self._download_json('https://tv8.it/api/getStreaming', video_id, fatal=False)
+
+        return {
+            **self._parse_video(livestream, video_id),
+            **traverse_obj(metadata, ('info', {
+                'title': ('title', 'text', {str}),
+                'description': ('description', 'html', {clean_html}),
+            })),
+        }
+
+
+class TV8ItPlaylistIE(InfoExtractor):
+    IE_NAME = 'tv8.it:playlist'
+    IE_DESC = 'TV8 Playlist'
+    _VALID_URL = r'https?://(?:www\.)?tv8\.it/(?!video)[^/#?]+/(?P<id>[^/#?]+)'
+    _TESTS = [{
+        'url': 'https://tv8.it/intrattenimento/tv8-gialappas-night',
+        'playlist_mincount': 32,
+        'info_dict': {
+            'id': 'tv8-gialappas-night',
+            'title': 'Tv8 Gialappa\'s Night',
+            'description': 'md5:c876039d487d9cf40229b768872718ed',
+            'thumbnail': r're:https://static\.sky\.it/.+\.(png|jpe?g|webp)',
+        },
+    }, {
+        'url': 'https://tv8.it/sport/uefa-europa-league',
+        'playlist_mincount': 11,
+        'info_dict': {
+            'id': 'uefa-europa-league',
+            'title': 'UEFA Europa League',
+            'description': 'md5:9ab1832b7a8b1705b1f590e13a36bc6a',
+            'thumbnail': r're:https://static\.sky\.it/.+\.(png|jpe?g|webp)',
+        },
+    }]
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        webpage = self._download_webpage(url, playlist_id)
+        data = self._search_nextjs_data(webpage, playlist_id)['props']['pageProps']['data']
+        entries = [self.url_result(
+            urljoin('https://tv8.it', card['href']), ie=TV8ItIE,
+            **traverse_obj(card, {
+                'description': ('extraData', 'videoDesc', {str}),
+                'id': ('extraData', 'asset_id', {str}),
+                'thumbnail': ('image', 'src', {url_or_none}),
+                'title': ('title', 'typography', 'text', {str}),
+            }))
+            for card in traverse_obj(data, ('lastContent', 'cards', lambda _, v: v['href']))]
+
+        return self.playlist_result(entries, playlist_id, **traverse_obj(data, ('card', 'desktop', {
+            'description': ('description', 'html', {clean_html}),
+            'thumbnail': ('image', 'src', {url_or_none}),
+            'title': ('title', 'text', {str}),
+        })))

Original file line number	Diff line number	Diff line change
`@@ -1883,6 +1883,8 @@`
`1883`	`1883`	`SkyItVideoIE,`
`1884`	`1884`	`SkyItVideoLiveIE,`
`1885`	`1885`	`TV8ItIE,`
	`1886`	`+ TV8ItLiveIE,`
	`1887`	`+ TV8ItPlaylistIE,`
`1886`	`1888`	`)`
`1887`	`1889`	`from .skylinewebcams import SkylineWebcamsIE`
`1888`	`1890`	`from .skynewsarabia import (`