Skip to content

Commit 2ee3a0a

Browse files
authored
[ie/tv8.it] Add live and playlist extractors (yt-dlp#12569)
Closes yt-dlp#12542 Authored by: DTrombett
1 parent 01a8be4 commit 2ee3a0a

File tree

2 files changed

+93
-8
lines changed

2 files changed

+93
-8
lines changed

yt_dlp/extractor/_extractors.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1883,6 +1883,8 @@
18831883
SkyItVideoIE,
18841884
SkyItVideoLiveIE,
18851885
TV8ItIE,
1886+
TV8ItLiveIE,
1887+
TV8ItPlaylistIE,
18861888
)
18871889
from .skylinewebcams import SkylineWebcamsIE
18881890
from .skynewsarabia import (

yt_dlp/extractor/skyit.py

Lines changed: 91 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,18 @@
22

33
from .common import InfoExtractor
44
from ..utils import (
5+
clean_html,
56
dict_get,
67
int_or_none,
78
parse_duration,
89
unified_timestamp,
10+
url_or_none,
11+
urljoin,
912
)
13+
from ..utils.traversal import traverse_obj
1014

1115

12-
class SkyItPlayerIE(InfoExtractor):
13-
IE_NAME = 'player.sky.it'
14-
_VALID_URL = r'https?://player\.sky\.it/player/(?:external|social)\.html\?.*?\bid=(?P<id>\d+)'
16+
class SkyItBaseIE(InfoExtractor):
1517
_GEO_BYPASS = False
1618
_DOMAIN = 'sky'
1719
_PLAYER_TMPL = 'https://player.sky.it/player/external.html?id=%s&domain=%s'
@@ -33,7 +35,6 @@ def _player_url_result(self, video_id):
3335
SkyItPlayerIE.ie_key(), video_id)
3436

3537
def _parse_video(self, video, video_id):
36-
title = video['title']
3738
is_live = video.get('type') == 'live'
3839
hls_url = video.get(('streaming' if is_live else 'hls') + '_url')
3940
if not hls_url and video.get('geoblock' if is_live else 'geob'):
@@ -43,7 +44,7 @@ def _parse_video(self, video, video_id):
4344

4445
return {
4546
'id': video_id,
46-
'title': title,
47+
'title': video.get('title'),
4748
'formats': formats,
4849
'thumbnail': dict_get(video, ('video_still', 'video_still_medium', 'thumb')),
4950
'description': video.get('short_desc') or None,
@@ -52,6 +53,11 @@ def _parse_video(self, video, video_id):
5253
'is_live': is_live,
5354
}
5455

56+
57+
class SkyItPlayerIE(SkyItBaseIE):
58+
IE_NAME = 'player.sky.it'
59+
_VALID_URL = r'https?://player\.sky\.it/player/(?:external|social)\.html\?.*?\bid=(?P<id>\d+)'
60+
5561
def _real_extract(self, url):
5662
video_id = self._match_id(url)
5763
domain = urllib.parse.parse_qs(urllib.parse.urlparse(
@@ -67,7 +73,7 @@ def _real_extract(self, url):
6773
return self._parse_video(video, video_id)
6874

6975

70-
class SkyItVideoIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE
76+
class SkyItVideoIE(SkyItBaseIE):
7177
IE_NAME = 'video.sky.it'
7278
_VALID_URL = r'https?://(?:masterchef|video|xfactor)\.sky\.it(?:/[^/]+)*/video/[0-9a-z-]+-(?P<id>\d+)'
7379
_TESTS = [{
@@ -96,7 +102,7 @@ def _real_extract(self, url):
96102
return self._player_url_result(video_id)
97103

98104

99-
class SkyItVideoLiveIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE
105+
class SkyItVideoLiveIE(SkyItBaseIE):
100106
IE_NAME = 'video.sky.it:live'
101107
_VALID_URL = r'https?://video\.sky\.it/diretta/(?P<id>[^/?&#]+)'
102108
_TEST = {
@@ -124,7 +130,7 @@ def _real_extract(self, url):
124130
return self._parse_video(livestream, asset_id)
125131

126132

127-
class SkyItIE(SkyItPlayerIE): # XXX: Do not subclass from concrete IE
133+
class SkyItIE(SkyItBaseIE):
128134
IE_NAME = 'sky.it'
129135
_VALID_URL = r'https?://(?:sport|tg24)\.sky\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
130136
_TESTS = [{
@@ -223,3 +229,80 @@ class TV8ItIE(SkyItVideoIE): # XXX: Do not subclass from concrete IE
223229
'params': {'skip_download': 'm3u8'},
224230
}]
225231
_DOMAIN = 'mtv8'
232+
233+
234+
class TV8ItLiveIE(SkyItBaseIE):
235+
IE_NAME = 'tv8.it:live'
236+
IE_DESC = 'TV8 Live'
237+
_VALID_URL = r'https?://(?:www\.)?tv8\.it/streaming'
238+
_TESTS = [{
239+
'url': 'https://tv8.it/streaming',
240+
'info_dict': {
241+
'id': 'tv8',
242+
'ext': 'mp4',
243+
'title': str,
244+
'description': str,
245+
'is_live': True,
246+
'live_status': 'is_live',
247+
},
248+
}]
249+
250+
def _real_extract(self, url):
251+
video_id = 'tv8'
252+
livestream = self._download_json(
253+
'https://apid.sky.it/vdp/v1/getLivestream', video_id,
254+
'Downloading manifest JSON', query={'id': '7'})
255+
metadata = self._download_json('https://tv8.it/api/getStreaming', video_id, fatal=False)
256+
257+
return {
258+
**self._parse_video(livestream, video_id),
259+
**traverse_obj(metadata, ('info', {
260+
'title': ('title', 'text', {str}),
261+
'description': ('description', 'html', {clean_html}),
262+
})),
263+
}
264+
265+
266+
class TV8ItPlaylistIE(InfoExtractor):
267+
IE_NAME = 'tv8.it:playlist'
268+
IE_DESC = 'TV8 Playlist'
269+
_VALID_URL = r'https?://(?:www\.)?tv8\.it/(?!video)[^/#?]+/(?P<id>[^/#?]+)'
270+
_TESTS = [{
271+
'url': 'https://tv8.it/intrattenimento/tv8-gialappas-night',
272+
'playlist_mincount': 32,
273+
'info_dict': {
274+
'id': 'tv8-gialappas-night',
275+
'title': 'Tv8 Gialappa\'s Night',
276+
'description': 'md5:c876039d487d9cf40229b768872718ed',
277+
'thumbnail': r're:https://static\.sky\.it/.+\.(png|jpe?g|webp)',
278+
},
279+
}, {
280+
'url': 'https://tv8.it/sport/uefa-europa-league',
281+
'playlist_mincount': 11,
282+
'info_dict': {
283+
'id': 'uefa-europa-league',
284+
'title': 'UEFA Europa League',
285+
'description': 'md5:9ab1832b7a8b1705b1f590e13a36bc6a',
286+
'thumbnail': r're:https://static\.sky\.it/.+\.(png|jpe?g|webp)',
287+
},
288+
}]
289+
290+
def _real_extract(self, url):
291+
playlist_id = self._match_id(url)
292+
webpage = self._download_webpage(url, playlist_id)
293+
data = self._search_nextjs_data(webpage, playlist_id)['props']['pageProps']['data']
294+
entries = [self.url_result(
295+
urljoin('https://tv8.it', card['href']), ie=TV8ItIE,
296+
**traverse_obj(card, {
297+
'description': ('extraData', 'videoDesc', {str}),
298+
'id': ('extraData', 'asset_id', {str}),
299+
'thumbnail': ('image', 'src', {url_or_none}),
300+
'title': ('title', 'typography', 'text', {str}),
301+
}))
302+
for card in traverse_obj(data, ('lastContent', 'cards', lambda _, v: v['href']))]
303+
304+
return self.playlist_result(entries, playlist_id, **traverse_obj(data, ('card', 'desktop', {
305+
'description': ('description', 'html', {clean_html}),
306+
'thumbnail': ('image', 'src', {url_or_none}),
307+
'title': ('title', 'text', {str}),
308+
})))

0 commit comments

Comments
 (0)