diff --git a/src/seedcase_flower/cli.py b/src/seedcase_flower/cli.py index fbdc74ae..31ccf57a 100644 --- a/src/seedcase_flower/cli.py +++ b/src/seedcase_flower/cli.py @@ -6,7 +6,7 @@ from cyclopts import App, Parameter, config # from seedcase_flower.config import Config as FlowerConfig -from seedcase_flower.internals import BuildStyle, _read_properties, _resolve_uri +from seedcase_flower.internals import BuildStyle, Uri, _parse_uri, _read_properties app = App( name="seedcase-flower", @@ -39,7 +39,12 @@ def build( """Build human-readable documentation from a `datapackage.json` file. Args: - uri: The URI to a datapackage.json file. + uri: The path to a local `datapackage.json` file or its parent folder. + Can also be an `https:` URL to a remote `datapackage.json` or a + `github:` / `gh:` URI pointing to a repo with a `datapackage.json` + in the repo root (in the format `gh:org/repo`, which can also include + reference to a tag or branch, such as `gh:org/repo@main` or + `gh:org/repo@1.0.1). style: The style used to structure the output. If a template directory is given, this parameter will be ignored. template_dir: The directory that contains the Jinja template @@ -48,8 +53,8 @@ def build( output_dir: The directory to save the generated files in. verbose: If True, prints additional information to the console. """ - path: Path = _resolve_uri(uri) - properties: dict[str, Any] = _read_properties(path) + uri: Uri = _parse_uri(uri) # type: ignore # TODO fix in read_prop PR + properties: dict[str, Any] = _read_properties(uri) # type: ignore # TODO fix in read_prop PR # One item per section, rendered from template. # Internally uses Jinja2 to render templates with metadata, which diff --git a/src/seedcase_flower/internals.py b/src/seedcase_flower/internals.py index a4a6c239..e190fe19 100644 --- a/src/seedcase_flower/internals.py +++ b/src/seedcase_flower/internals.py @@ -1,9 +1,11 @@ """Helper functions for private use.""" import json +from dataclasses import dataclass from enum import Enum from pathlib import Path from typing import Any +from urllib import parse class BuildStyle(Enum): @@ -14,15 +16,62 @@ class BuildStyle(Enum): quarto_resource_tables = "quarto_resource_tables" -# Output maybe str? Path? -# Use `match` inside for strictness on URI types? Or use a library for URI parsing? -# TODO Extend to parse strings and return either URL or Path -def _resolve_uri(uri: str) -> Path: - return Path(uri) +@dataclass(frozen=True) +class Uri: + """A parsed URI with its normalised value and locality flag.""" + + value: str + local: bool + + +def _parse_uri(uri: str) -> Uri: + split_uri = parse.urlsplit(uri) + if split_uri.scheme == "": + split_uri = split_uri._replace(scheme="file") + match split_uri.scheme: + case "file": + return _convert_to_file_uri(split_uri) + case "https": + return _convert_to_https_uri(split_uri) + case "gh" | "github": + return _convert_to_github_uri(split_uri) + case _: + raise ValueError( + "The uri must be either a path to an existing file/folder " + "or a URI with one of the following URI prefixes: " + "`file:`, `https:`, `gh:`, `github:`" + ) + + +def _convert_to_file_uri(split_file_uri: parse.SplitResult) -> Uri: + path = Path(split_file_uri.path).resolve() + if path.is_dir(): + path /= "datapackage.json" + split_file_uri = split_file_uri._replace(path=path.as_posix()) + return Uri(value=split_file_uri.geturl(), local=True) + + +def _convert_to_https_uri(split_https_uri: parse.SplitResult) -> Uri: + return Uri(value=split_https_uri.geturl(), local=False) + + +def _convert_to_github_uri(split_gh_uri: parse.SplitResult) -> Uri: + return Uri( + value=split_gh_uri._replace( + scheme="https", + netloc="raw.githubusercontent.com", + path=f"/{split_gh_uri.path}/refs/heads/main/datapackage.json", + ).geturl(), + local=False, + ) # TODO Extend to also read properties from URLs -def _read_properties(path: Path) -> dict[str, Any]: - with open(path) as properties_file: - datapackage: dict[str, Any] = json.load(properties_file) - return datapackage +def _read_properties(uri: Uri) -> dict[str, Any]: + if uri.local: + path = Path(parse.urlsplit(uri.value).path) + with open(path) as properties_file: + return json.load(properties_file) # type: ignore # TODO fix in read_prop PR + else: + # TODO read from remote file + return {"placeholder": uri.value} diff --git a/tests/test_cli.py b/tests/test_cli.py index ae04e1f2..939ce556 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -7,7 +7,7 @@ import pytest from seedcase_flower.cli import app, view -from seedcase_flower.internals import BuildStyle +from seedcase_flower.internals import BuildStyle, Uri _DATAPACKAGE_DATA = { "name": "placeholder", @@ -29,9 +29,9 @@ def datapackage_path(tmp_path): @pytest.fixture -def mock_resolve_uri(mocker): - """Mock _resolve_uri to isolate CLI tests from filesystem resolution.""" - return mocker.patch("seedcase_flower.cli._resolve_uri") +def mock_parse_uri(mocker): + """Mock _parse_uri to isolate CLI tests from filesystem resolution.""" + return mocker.patch("seedcase_flower.cli._parse_uri") @pytest.fixture @@ -43,16 +43,16 @@ def mock_read_properties(mocker): # Testing CLI invocation ==== -def test_build_with_mocked_internals(mock_resolve_uri, mock_read_properties): +def test_build_with_mocked_internals(mock_parse_uri, mock_read_properties): """Isolate CLI behaviour by mocking internal helpers.""" - fake_path = Path("datapackage.json") - mock_resolve_uri.return_value = fake_path + fake_uri = Uri(value="file:///datapackage.json", local=True) + mock_parse_uri.return_value = fake_uri # Simulate running the app from the command line (but without calling sys.exit()) app(["build", "datapackage.json"], result_action="return_value") # Checking that the correct values were passed to the internal functions - mock_resolve_uri.assert_called_once_with("datapackage.json") - mock_read_properties.assert_called_once_with(fake_path) + mock_parse_uri.assert_called_once_with("datapackage.json") + mock_read_properties.assert_called_once_with(fake_uri) # Checking stdout ==== @@ -122,8 +122,13 @@ def test_build_reads_uri_from_flower_toml(tmp_path, monkeypatch): Build human-readable documentation from a datapackage.json file. ╭─ Parameters ───────────────────────────────────────────────────────────────────────────╮ - │ URI --uri The URI to a datapackage.json file. [default: │ - │ datapackage.json] │ + │ URI --uri The path to a local datapackage.json file or its parent │ + │ folder. Can also be an https: URL to a remote │ + │ datapackage.json or a github: / gh: URI pointing to a │ + │ repo with a datapackage.json in the repo root (in the │ + │ format gh:org/repo, which can also include reference to a │ + │ tag or branch, such as gh:org/repo@main or │ + │ `gh:org/repo@1.0.1). [default: datapackage.json] │ │ STYLE --style The style used to structure the output. If a template │ │ directory is given, this parameter will be ignored. │ │ [choices: quarto-one-page, quarto-resource-listing, │ diff --git a/tests/test_internals.py b/tests/test_internals.py new file mode 100644 index 00000000..d1cd508c --- /dev/null +++ b/tests/test_internals.py @@ -0,0 +1,102 @@ +"""Tests for internal helper functions.""" + +import pytest + +from seedcase_flower.internals import Uri, _parse_uri + +# _parse_uri: plain path (no scheme) ==== + + +def test_parse_uri_plain_file_path_is_local(tmp_path): + """A plain file path with no scheme should return a local Uri.""" + result = _parse_uri(str(tmp_path / "datapackage.json")) + assert result.local is True + + +def test_parse_uri_plain_file_path_has_file_scheme(tmp_path): + """A plain file path should be normalised to a file:// URI.""" + result = _parse_uri(str(tmp_path / "datapackage.json")) + assert result.value.startswith("file://") + + +def test_parse_uri_directory_path_appends_datapackage_json(tmp_path): + """Passing a directory path should append datapackage.json to the URI.""" + result = _parse_uri(str(tmp_path)) + assert result.value.endswith("datapackage.json") + + +def test_parse_uri_directory_path_is_local(tmp_path): + """Passing a directory path should return a local Uri.""" + result = _parse_uri(str(tmp_path)) + assert result.local is True + + +# _parse_uri: file:// scheme ==== + + +def test_parse_uri_file_scheme_is_local(tmp_path): + """A file:// URI should return a local Uri.""" + result = _parse_uri(f"file://{tmp_path / 'datapackage.json'}") + assert result.local is True + + +def test_parse_uri_file_scheme_preserves_path(tmp_path): + """A file:// URI pointing to a file should preserve the path.""" + file = tmp_path / "datapackage.json" + result = _parse_uri(f"file://{file}") + assert str(file) in result.value + + +# _parse_uri: https:// scheme ==== + + +def test_parse_uri_https_is_not_local(): + """An https:// URI should return a non-local Uri.""" + result = _parse_uri("https://example.com/datapackage.json") + assert result.local is False + + +def test_parse_uri_https_preserves_url(): + """An https:// URI should be returned unchanged.""" + url = "https://example.com/datapackage.json" + result = _parse_uri(url) + assert result.value == url + + +# _parse_uri: gh:// / github:// scheme ==== + + +@pytest.mark.parametrize("scheme", ["gh", "github"]) +def test_parse_uri_github_scheme_converts_to_raw_githubusercontent(scheme): + """GitHub URIs should be converted to a raw.githubusercontent.com URL.""" + result = _parse_uri(f"{scheme}://owner/repo") + assert result.value.startswith("https://raw.githubusercontent.com/") + + +@pytest.mark.parametrize("scheme", ["gh", "github"]) +def test_parse_uri_github_scheme_is_not_local(scheme): + """GitHub URIs should return a non-local Uri.""" + result = _parse_uri(f"{scheme}://owner/repo") + assert result.local is False + + +@pytest.mark.parametrize("scheme", ["gh", "github"]) +def test_parse_uri_github_scheme_appends_datapackage_json(scheme): + """GitHub URIs should point to the datapackage.json on the main branch.""" + result = _parse_uri(f"{scheme}://owner/repo") + assert result.value.endswith("datapackage.json") + + +# _parse_uri: unsupported scheme ==== + + +def test_parse_uri_unsupported_scheme_raises_value_error(): + """An unsupported URI scheme should raise a ValueError.""" + with pytest.raises(ValueError, match="uri must be either"): + _parse_uri("ftp://example.com/datapackage.json") + + +def test_parse_uri_returns_uri_instance(tmp_path): + """_parse_uri should always return a Uri instance.""" + result = _parse_uri(str(tmp_path / "datapackage.json")) + assert isinstance(result, Uri) diff --git a/tools/vulture-allowlist.py b/tools/vulture-allowlist.py index 8c079259..bd5ab353 100644 --- a/tools/vulture-allowlist.py +++ b/tools/vulture-allowlist.py @@ -7,3 +7,6 @@ _check_jsonpath # unused method (src/seedcase_flower/section.py:83) quarto_resource_tables # unused variable (src/seedcase_flower/internals.py:14) cls # unused variable (src/seedcase_flower/section.py:85) +target # unused variable (src/seedcase_flower/.venv/lib/python3.12/site-packages/_virtualenv.py:50) +handler # unused variable (src/seedcase_flower/internals.py:20) +source # unused variable (src/seedcase_flower/internals.py:20) diff --git a/uv.lock b/uv.lock index f6314c42..95f8cf16 100644 --- a/uv.lock +++ b/uv.lock @@ -615,11 +615,11 @@ wheels = [ [[package]] name = "filelock" -version = "3.24.3" +version = "3.25.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/73/92/a8e2479937ff39185d20dd6a851c1a63e55849e447a55e798cc2e1f49c65/filelock-3.24.3.tar.gz", hash = "sha256:011a5644dc937c22699943ebbfc46e969cdde3e171470a6e40b9533e5a72affa", size = 37935, upload-time = "2026-02-19T00:48:20.543Z" } +sdist = { url = "https://files.pythonhosted.org/packages/77/18/a1fd2231c679dcb9726204645721b12498aeac28e1ad0601038f94b42556/filelock-3.25.0.tar.gz", hash = "sha256:8f00faf3abf9dc730a1ffe9c354ae5c04e079ab7d3a683b7c32da5dd05f26af3", size = 40158, upload-time = "2026-03-01T15:08:45.916Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9c/0f/5d0c71a1aefeb08efff26272149e07ab922b64f46c63363756224bd6872e/filelock-3.24.3-py3-none-any.whl", hash = "sha256:426e9a4660391f7f8a810d71b0555bce9008b0a1cc342ab1f6947d37639e002d", size = 24331, upload-time = "2026-02-19T00:48:18.465Z" }, + { url = "https://files.pythonhosted.org/packages/f9/0b/de6f54d4a8bedfe8645c41497f3c18d749f0bd3218170c667bf4b81d0cdd/filelock-3.25.0-py3-none-any.whl", hash = "sha256:5ccf8069f7948f494968fc0713c10e5c182a9c9d9eef3a636307a20c2490f047", size = 26427, upload-time = "2026-03-01T15:08:44.593Z" }, ] [[package]] @@ -717,11 +717,11 @@ wheels = [ [[package]] name = "identify" -version = "2.6.16" +version = "2.6.17" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5b/8d/e8b97e6bd3fb6fb271346f7981362f1e04d6a7463abd0de79e1fda17c067/identify-2.6.16.tar.gz", hash = "sha256:846857203b5511bbe94d5a352a48ef2359532bc8f6727b5544077a0dcfb24980", size = 99360, upload-time = "2026-01-12T18:58:58.201Z" } +sdist = { url = "https://files.pythonhosted.org/packages/57/84/376a3b96e5a8d33a7aa2c5b3b31a4b3c364117184bf0b17418055f6ace66/identify-2.6.17.tar.gz", hash = "sha256:f816b0b596b204c9fdf076ded172322f2723cf958d02f9c3587504834c8ff04d", size = 99579, upload-time = "2026-03-01T20:04:12.702Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b8/58/40fbbcefeda82364720eba5cf2270f98496bdfa19ea75b4cccae79c698e6/identify-2.6.16-py2.py3-none-any.whl", hash = "sha256:391ee4d77741d994189522896270b787aed8670389bfd60f326d677d64a6dfb0", size = 99202, upload-time = "2026-01-12T18:58:56.627Z" }, + { url = "https://files.pythonhosted.org/packages/40/66/71c1227dff78aaeb942fed29dd5651f2aec166cc7c9aeea3e8b26a539b7d/identify-2.6.17-py2.py3-none-any.whl", hash = "sha256:be5f8412d5ed4b20f2bd41a65f920990bdccaa6a4a18a08f1eefdcd0bdd885f0", size = 99382, upload-time = "2026-03-01T20:04:11.439Z" }, ] [[package]] @@ -2521,7 +2521,7 @@ wheels = [ [[package]] name = "virtualenv" -version = "21.0.0" +version = "21.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "distlib" }, @@ -2529,9 +2529,9 @@ dependencies = [ { name = "platformdirs" }, { name = "python-discovery" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ce/4f/d6a5ff3b020c801c808b14e2d2330cdc8ebefe1cdfbc457ecc368e971fec/virtualenv-21.0.0.tar.gz", hash = "sha256:e8efe4271b4a5efe7a4dce9d60a05fd11859406c0d6aa8464f4cf451bc132889", size = 5836591, upload-time = "2026-02-25T20:21:07.691Z" } +sdist = { url = "https://files.pythonhosted.org/packages/2f/c9/18d4b36606d6091844daa3bd93cf7dc78e6f5da21d9f21d06c221104b684/virtualenv-21.1.0.tar.gz", hash = "sha256:1990a0188c8f16b6b9cf65c9183049007375b26aad415514d377ccacf1e4fb44", size = 5840471, upload-time = "2026-02-27T08:49:29.702Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/29/d1/3f62e4f9577b28c352c11623a03fb916096d5c131303d4861b4914481b6b/virtualenv-21.0.0-py3-none-any.whl", hash = "sha256:d44e70637402c7f4b10f48491c02a6397a3a187152a70cba0b6bc7642d69fb05", size = 5817167, upload-time = "2026-02-25T20:21:05.476Z" }, + { url = "https://files.pythonhosted.org/packages/78/55/896b06bf93a49bec0f4ae2a6f1ed12bd05c8860744ac3a70eda041064e4d/virtualenv-21.1.0-py3-none-any.whl", hash = "sha256:164f5e14c5587d170cf98e60378eb91ea35bf037be313811905d3a24ea33cc07", size = 5825072, upload-time = "2026-02-27T08:49:27.516Z" }, ] [[package]]