feat: soundcloud scraper

ananasmoe · ananasmoe · commit 88f750b638c0 · 2024-06-06T14:56:28.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,162 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 mov-cli
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/Makefile b/Makefile
@@ -0,0 +1,16 @@
+.PHONY: build
+
+PIP = pip
+PYTHON = python
+
+build:
+	${PYTHON} -m build
+
+install:
+	${PIP} install . -U
+
+install-editable:
+	${PIP} install -e . --config-settings editable_mode=compat
+
+test:
+	ruff .
diff --git a/README.md b/README.md
@@ -0,0 +1,30 @@
+<div align="center">
+
+  # mov-cli-soundcloud
+  <sub>A mov-cli v4 plugin for playing soundcloud.</sub>
+
+  <img src="https://github.com/mov-cli/mov-cli-soundcloud/assets/132799819/7c47b8e1-54d0-44be-abef-e3d82f5848f0">
+
+
+</div>
+
+## Installation 🛠️
+Here's how to install and add the plugin to mov-cli.
+
+1. Install the pip package.
+```sh
+pip install mov-cli-soundcloud
+```
+2. Then add the plugin to your mov-cli config.
+```sh
+mov-cli -e
+```
+```toml
+[mov-cli.plugins]
+soundcloud = "mov-cli-soundcloud"
+```
+
+## Usage 🖱️
+```sh
+mov-cli -s soundcloud sakuro
+```
diff --git a/mov_cli_soundcloud/__init__.py b/mov_cli_soundcloud/__init__.py
@@ -0,0 +1,18 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from mov_cli.plugins import PluginHookData
+
+from .scraper import *
+
+plugin: PluginHookData = {
+    "version": 1, 
+    "package_name": "mov-cli-soundcloud",
+    "scrapers": {
+        "DEFAULT": SoundCloudScraper, 
+        "soundcloud": SoundCloudScraper
+    }
+}
+
+__version__ = "1.0.0"
diff --git a/mov_cli_soundcloud/scraper.py b/mov_cli_soundcloud/scraper.py
@@ -0,0 +1,74 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING, Iterable
+
+from mov_cli.config import Config
+from mov_cli.http_client import HTTPClient
+
+if TYPE_CHECKING:
+    from typing import Optional, Generator, Any
+
+    from mov_cli import Config
+    from mov_cli.http_client import HTTPClient
+    from mov_cli.scraper import ScraperOptionsT
+
+from dataclasses import dataclass, field
+
+from mov_cli import utils
+from mov_cli.scraper import Scraper
+from mov_cli import Single, Metadata, MetadataType
+from mov_cli import ExtraMetadata
+
+import yt_dlp
+
+__all__ = ("SoundCloudScraper", "SoundCloudMetadata",)
+
+@dataclass
+class SoundCloudMetadata(Metadata):
+    id: int
+    info: dict = field(default = None)
+
+class SoundCloudScraper(Scraper):
+    def __init__(self, config: Config, http_client: HTTPClient, options: Optional[ScraperOptionsT] | None = None) -> None:
+        self.base_url = "https://soundcloud.com"
+
+        super().__init__(config, http_client, options)
+
+    def search(self, query: str, limit: Optional[int]) -> Iterable[Metadata]:
+        search_page = self.http_client.get(f"{self.base_url}/search?q={query}")
+
+        soup = self.soup(search_page)
+
+        noscript = soup.find_all("noscript")[-1]
+
+        items = noscript.select("h2 > a")
+
+        if limit is not None:
+            items = items[:limit]
+
+        yt_options = {"skip_download": True, "quiet": not self.config.debug}
+
+        for _, item in enumerate(items):
+            if item["href"].count("/") == 2: # NOTE: only get music
+                with yt_dlp.YoutubeDL(yt_options) as f:
+                    info = f.extract_info(self.base_url + item["href"])
+                    
+                yield SoundCloudMetadata(
+                    id = _,
+                    title = info.get("title") + " ~ " + info.get("uploader"),
+                    type = MetadataType.SINGLE,
+                    year = info.get("upload_date", "")[:4],
+                    info = info,
+
+                    extra_func = lambda: ExtraMetadata(
+                        description = info.get("description"),
+                        image_url = info.get("thumbnails")[-1]["url"],
+                        genres = info.get("genres")
+                    )
+                )
+
+    def scrape(self, metadata: SoundCloudScraper, episode: utils.EpisodeSelector) -> Single:
+        return Single(
+            url = metadata.info.get("formats")[-1]["url"],
+            title = metadata.title,
+            year = metadata.year
+        )
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,49 @@
+[project]
+name = "mov-cli-soundcloud"
+description = "A mov-cli plugin for playing soundcloud."
+authors = [
+    {name = "r3tr0ananas", email = "ananas@ananas.moe"}
+]
+readme = {file = "README.md", content-type = "text/markdown"}
+requires-python = ">=3.8"
+license = { file = "LICENSE" }
+keywords = [
+    "amazing mov-cli plugin"
+]
+classifiers = [
+	'Operating System :: Microsoft :: Windows :: Windows 11',
+    'Operating System :: Microsoft :: Windows :: Windows 10',
+    'Operating System :: POSIX :: Linux',
+    'License :: OSI Approved :: MIT License',
+    'Programming Language :: Python :: 3.8',
+    'Programming Language :: Python :: 3.9',
+    'Programming Language :: Python :: 3.10',
+	'Programming Language :: Python :: 3.11',
+    "Programming Language :: Python :: 3.12"
+]
+dependencies = [
+    "requests",
+    "importlib-metadata; python_version<'3.8'"
+]
+
+dynamic = ["version"]
+
+[project.optional-dependencies]
+dev = [
+    "ruff",
+    "build"
+]
+
+[project.urls]
+GitHub = "https://github.com/mov-cli/mov-cli-soundcloud"
+BugTracker = "https://github.com/mov-cli/mov-cli-soundcloud/issues"
+
+[tool.setuptools.dynamic]
+version = { attr = "mov_cli_soundcloud.__version__" }
+
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.packages.find]
+include = ["mov_cli_soundcloud*"]