Skip to content

Commit f00d00e

Browse files
committed
New data-retrieval logic for tasks page
1 parent 44fa9a6 commit f00d00e

File tree

6 files changed

+540
-0
lines changed

6 files changed

+540
-0
lines changed

.github/workflows/task_list.yaml

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
name: Task-list page
2+
3+
on:
4+
push:
5+
branches: ["main"]
6+
pull_request:
7+
branches: ["main"]
8+
workflow_dispatch:
9+
10+
11+
jobs:
12+
retrieve-tasks-data:
13+
runs-on: ubuntu-latest
14+
15+
steps:
16+
- uses: actions/checkout@v4
17+
with:
18+
fetch-depth: 1
19+
20+
- uses: actions/setup-python@v5
21+
with:
22+
python-version: 3.11
23+
cache: pip
24+
25+
- name: Install requirements
26+
run: python3 -m pip install -r tasks/data_retrieval/requirements.txt
27+
28+
- name: Fetch tasks data
29+
run: python3 -u tasks/data_retrieval/create_tasks_data.py
30+
31+
- run: cat tasks/data_retrieval/tasks_data.json

tasks/data_retrieval/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
downloads
2+
venv
3+
tasks_data.json
Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
import time
2+
from pathlib import Path
3+
import requests
4+
import json
5+
from os import environ
6+
from zipfile import ZipFile
7+
from typing import Any
8+
from pydantic import BaseModel
9+
from typing import Optional, Literal
10+
11+
12+
DOWNLOAD_FOLDER = Path(__file__).parent / "downloads"
13+
DOWNLOAD_FOLDER.mkdir(exist_ok=True)
14+
15+
16+
class TaskReadV2(BaseModel):
17+
"""
18+
Based on
19+
https://github.com/fractal-analytics-platform/fractal-server/blob/main/fractal_server/app/schemas/v2/task.py
20+
"""
21+
22+
name: str
23+
type: Literal["parallel", "non_parallel", "compound"]
24+
source: Optional[str] = None
25+
version: Optional[str] = None
26+
docs_info: Optional[str] = None
27+
docs_link: Optional[str] = None
28+
input_types: dict[str, bool]
29+
output_types: dict[str, bool]
30+
category: Optional[str] = None
31+
modality: Optional[str] = None
32+
authors: Optional[str] = None
33+
tags: list[str]
34+
35+
class Config:
36+
extra = "forbid"
37+
38+
39+
def parse_wheel_filename(wheel_path: str) -> dict[str, str]:
40+
"""
41+
Given a wheel-file name or path, extract distribution and version.
42+
"""
43+
wheel_filename = wheel_path.split("/")[-1]
44+
parts = wheel_filename.split("-")
45+
return dict(name=parts[0], version=parts[1])
46+
47+
48+
def load_manifest_from_zip(wheel_path: str) -> dict[str, Any]:
49+
"""
50+
Given a wheel file on-disk, extract the Fractal manifest.
51+
"""
52+
with ZipFile(wheel_path) as wheel:
53+
namelist = wheel.namelist()
54+
try:
55+
manifest = next(
56+
name for name in namelist if "__FRACTAL_MANIFEST__.json" in name
57+
)
58+
except StopIteration:
59+
msg = f"{wheel_path} does not include __FRACTAL_MANIFEST__.json"
60+
raise ValueError(msg)
61+
with wheel.open(manifest) as manifest_fd:
62+
manifest_dict = json.load(manifest_fd)
63+
return manifest_dict
64+
65+
66+
def download_file(url: str) -> str:
67+
file_name = url.split("/")[-1]
68+
response = requests.get(url, stream=True)
69+
file_path = (DOWNLOAD_FOLDER / file_name).as_posix()
70+
with open(file_path, "wb") as f:
71+
for data in response.iter_content():
72+
f.write(data)
73+
return file_path
74+
75+
76+
def handle_pypi_project(pypi_project_url: str) -> dict[str, Any]:
77+
"""
78+
Example: https://pypi.org/project/fractal-tasks-core
79+
"""
80+
81+
# Extract project_name
82+
parts = pypi_project_url.split("/")
83+
if parts[:4] != ["https:", "", "pypi.org", "project"]:
84+
raise ValueError(
85+
f"Invalid {pypi_project_url=}.\n"
86+
"Valid example: https://pypi.org/project/fractal-tasks-core"
87+
)
88+
project_name = parts[4]
89+
90+
# Fetch and parse PyPI information
91+
pypi_api_url = f"https://pypi.org/pypi/{project_name}/json"
92+
res = requests.get(pypi_api_url)
93+
response_data = res.json()
94+
if not res.status_code == 200:
95+
raise RuntimeError(f"Invalid response from {pypi_api_url}: {res}")
96+
latest_version = response_data["info"]["version"]
97+
releases = response_data["releases"]
98+
latest_release = releases[latest_version]
99+
latest_release_wheel_assets = [
100+
item for item in latest_release if item["filename"].endswith(".whl")
101+
]
102+
if len(latest_release_wheel_assets) > 1:
103+
raise ValueError(
104+
f"Found more than one wheel asset in release {latest_version}."
105+
)
106+
latest_release_wheel_asset = latest_release_wheel_assets[0]
107+
latest_release_wheel_asset_url = latest_release_wheel_asset["url"]
108+
109+
# Download wheel and parse manifest
110+
wheel_path = download_file(latest_release_wheel_asset_url)
111+
info = parse_wheel_filename(wheel_path)
112+
manifest = load_manifest_from_zip(wheel_path)
113+
Path(wheel_path).unlink()
114+
115+
return dict(manifest=manifest, **info)
116+
117+
118+
def handle_github_repository(github_url: str) -> dict[str, Any]:
119+
"""
120+
Example:
121+
https://github.com/fractal-analytics-platform/fractal-lif-converters/
122+
"""
123+
124+
# Extract owner and repository
125+
parts = github_url.split("/")
126+
if parts[:3] != ["https:", "", "github.com"]:
127+
print(parts)
128+
raise ValueError(
129+
f"Invalid {github_url=}.\n"
130+
"Valid example: https://github.com/fractal-analytics-platform/fractal-lif-converters"
131+
)
132+
owner, repository = parts[3:5]
133+
134+
# Fetch and parse GitHub information
135+
github_api_url = (
136+
f"https://api.github.com/repos/{owner}/{repository}/releases/latest"
137+
)
138+
headers = {
139+
"Accept": "application/vnd.github+json",
140+
"X-GitHub-Api-Version": "2022-11-28",
141+
}
142+
res = requests.get(github_api_url, headers=headers)
143+
if not res.status_code == 200:
144+
raise RuntimeError(f"Invalid response from {github_api_url}: {res}")
145+
assets = res.json()["assets"]
146+
wheel_assets = [asset for asset in assets if asset["name"].endswith(".whl")]
147+
if len(wheel_assets) > 1:
148+
raise ValueError("Found more than one wheel asset in latest GitHub release.")
149+
wheel_asset = wheel_assets[0]
150+
wheel_asset_browser_download_url = wheel_asset["browser_download_url"]
151+
152+
# Download wheel and parse manifest
153+
wheel_path = download_file(wheel_asset_browser_download_url)
154+
info = parse_wheel_filename(wheel_path)
155+
manifest = load_manifest_from_zip(wheel_path)
156+
Path(wheel_path).unlink()
157+
158+
return dict(manifest=manifest, **info)
159+
160+
161+
def get_package_info(source: str) -> dict[str, Any]:
162+
if source.startswith("https://github.com"):
163+
return handle_github_repository(source)
164+
elif source.startswith("https://pypi.org"):
165+
return handle_pypi_project(source)
166+
else:
167+
raise ValueError(f"Invalid {source=}.")
168+
169+
170+
def _get_task_type(
171+
task: dict[str, Any],
172+
) -> Literal["parallel", "non_parallel", "compound"]:
173+
np = task.get("executable_non_parallel", None)
174+
p = task.get("executable_parallel", None)
175+
if p and np:
176+
return "compound"
177+
elif p and not np:
178+
return "parallel"
179+
elif np and not p:
180+
return "non_parallel"
181+
else:
182+
raise ValueError(f"Invalid task with {p=} and {np=}.")
183+
184+
185+
COLUMN_NAMES = [
186+
"version",
187+
"name",
188+
"category",
189+
"modality",
190+
"tags",
191+
"input_types",
192+
"output_types",
193+
"docs_link",
194+
]
195+
COLUMN_DEFAULTS = {
196+
"input_types": {},
197+
"output_types": {},
198+
"tags": [],
199+
}
200+
COLUMN_TITLES = list(map(str.title, COLUMN_NAMES))
201+
202+
203+
# Read and filter list of sources
204+
sources_file = Path(__file__).parent / "sources.txt"
205+
with sources_file.open("r") as f:
206+
sources = f.read().splitlines()
207+
sources = [
208+
source
209+
for source in sources
210+
if not (source.startswith("#") or source == "")
211+
]
212+
213+
TASKS = []
214+
for source in sources:
215+
t_start = time.perf_counter()
216+
print(f"START processing {source=}")
217+
try:
218+
new_tasks = []
219+
data = get_package_info(source)
220+
pkg_name = data["name"]
221+
pkg_version = data.get("version")
222+
pkg_task_list = data["manifest"]["task_list"]
223+
for task in pkg_task_list:
224+
new_task = dict()
225+
for column_name in COLUMN_NAMES:
226+
new_task[column_name] = task.get(
227+
column_name, COLUMN_DEFAULTS.get(column_name, None)
228+
)
229+
new_task["version"] = pkg_version
230+
new_task["type"] = _get_task_type(task)
231+
TaskReadV2(**new_task)
232+
new_tasks.append(new_task)
233+
except Exception as e:
234+
print(f"ERROR, skip.\nOriginal error:\n{str(e)}")
235+
TASKS.extend(new_tasks)
236+
t_end = time.perf_counter()
237+
print(f"END processing {source=} - elapsed {t_end-t_start:.3f} s.")
238+
print()
239+
240+
output_file = Path(__file__).parent / "tasks_data.json"
241+
with output_file.open("w") as f:
242+
json.dump(TASKS, f, indent=2)
243+
244+
DOWNLOAD_FOLDER.rmdir()
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
requests
2+
pydantic

tasks/data_retrieval/sources.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# PyPI
2+
https://pypi.org/project/fractal-tasks-core/
3+
https://pypi.org/project/fractal-faim-ipa
4+
https://pypi.org/project/fractal-lif-converters
5+
https://pypi.org/project/operetta-compose
6+
7+
# GitHub releases with wheels
8+
https://github.com/fractal-analytics-platform/fractal-lif-converters/
9+
10+
11+
# https://github.com/fractal-analytics-platform/fractal-helper-tasks
12+
# https://github.com/fmi-basel/gliberal-scMultipleX
13+
# https://github.com/Apricot-Therapeutics/APx_fractal_task_collection
14+
# https://github.com/fractal-analytics-platform/fractal-plantseg-tasks
15+
# https://github.com/m-albert/fractal-ome-zarr-hcs-stitching/archive
16+
# https://github.com/fractal-analytics-platform/fractal-ilastik-tasksC/archive/refs/tags/0.1.1.zip

0 commit comments

Comments
 (0)