1+ from __future__ import annotations
2+ from typing import TYPE_CHECKING , Iterable
3+
4+ from mov_cli .config import Config
5+ from mov_cli .http_client import HTTPClient
6+
7+ if TYPE_CHECKING :
8+ from typing import Optional , Generator , Any
9+
10+ from mov_cli import Config
11+ from mov_cli .http_client import HTTPClient
12+ from mov_cli .scraper import ScraperOptionsT
13+
14+ from dataclasses import dataclass , field
15+
16+ from mov_cli import utils
17+ from mov_cli .scraper import Scraper
18+ from mov_cli import Single , Metadata , MetadataType
19+ from mov_cli import ExtraMetadata
20+
21+ import yt_dlp
22+
23+ __all__ = ("SoundCloudScraper" , "SoundCloudMetadata" ,)
24+
25+ @dataclass
26+ class SoundCloudMetadata (Metadata ):
27+ id : int
28+ info : dict = field (default = None )
29+
30+ class SoundCloudScraper (Scraper ):
31+ def __init__ (self , config : Config , http_client : HTTPClient , options : Optional [ScraperOptionsT ] | None = None ) -> None :
32+ self .base_url = "https://soundcloud.com"
33+
34+ super ().__init__ (config , http_client , options )
35+
36+ def search (self , query : str , limit : Optional [int ]) -> Iterable [Metadata ]:
37+ search_page = self .http_client .get (f"{ self .base_url } /search?q={ query } " )
38+
39+ soup = self .soup (search_page )
40+
41+ noscript = soup .find_all ("noscript" )[- 1 ]
42+
43+ items = noscript .select ("h2 > a" )
44+
45+ if limit is not None :
46+ items = items [:limit ]
47+
48+ yt_options = {"skip_download" : True , "quiet" : not self .config .debug }
49+
50+ for _ , item in enumerate (items ):
51+ if item ["href" ].count ("/" ) == 2 : # NOTE: only get music
52+ with yt_dlp .YoutubeDL (yt_options ) as f :
53+ info = f .extract_info (self .base_url + item ["href" ])
54+
55+ yield SoundCloudMetadata (
56+ id = _ ,
57+ title = info .get ("title" ) + " ~ " + info .get ("uploader" ),
58+ type = MetadataType .SINGLE ,
59+ year = info .get ("upload_date" , "" )[:4 ],
60+ info = info ,
61+
62+ extra_func = lambda : ExtraMetadata (
63+ description = info .get ("description" ),
64+ image_url = info .get ("thumbnails" )[- 1 ]["url" ],
65+ genres = info .get ("genres" )
66+ )
67+ )
68+
69+ def scrape (self , metadata : SoundCloudScraper , episode : utils .EpisodeSelector ) -> Single :
70+ return Single (
71+ url = metadata .info .get ("formats" )[- 1 ]["url" ],
72+ title = metadata .title ,
73+ year = metadata .year
74+ )
0 commit comments