Skip to content

Commit 9f1b09d

Browse files
committed
Initial commit
0 parents  commit 9f1b09d

File tree

9 files changed

+710
-0
lines changed

9 files changed

+710
-0
lines changed

.github/workflows/docs.yml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
name: Generate and serve API data for EESSI
2+
on:
3+
push:
4+
branches:
5+
- main
6+
permissions:
7+
contents: read
8+
pages: write
9+
id-token: write
10+
jobs:
11+
deploy:
12+
environment:
13+
name: github-pages
14+
url: ${{ steps.deployment.outputs.page_url }}
15+
runs-on: ubuntu-latest
16+
steps:
17+
- uses: actions/configure-pages@v5
18+
- uses: actions/checkout@v5
19+
- uses: eessi/github-action-eessi@v3
20+
with:
21+
use_eessi_module: true
22+
eessi_stack_version: "2025.06"
23+
- name: Create a virtualenv to install zensical
24+
run: |
25+
python -m venv /tmp/venv_docs
26+
source /tmp/venv_docs/bin/activate
27+
pip install zensical
28+
- name: Generate API data
29+
run: |
30+
echo "Generating data files..."
31+
module purge
32+
module unuse $MODULEPATH
33+
module use /cvmfs/software.eessi.io/init/modules/
34+
module load EESSI/2025.06
35+
module load EasyBuild/5
36+
module load EESSI-extend
37+
python generate_data_files.py --eessi-version=2023.06
38+
python generate_data_files.py --eessi-version=2025.06
39+
python merge_data_files.py out.yaml eessi*.yaml
40+
mv out.yaml docs/data/eessi_software_metadata.yaml
41+
# Generate Markdown index for data files
42+
echo "# Data Files" > docs/data/index.md
43+
for f in docs/data/eessi*.yaml; do
44+
[ "$f" = "docs/data/index.md" ] && continue
45+
echo "- [$f]($f)" >> docs/data/index.md
46+
done
47+
- run: zensical build --clean
48+
- uses: actions/upload-pages-artifact@v4
49+
with:
50+
path: site
51+
- uses: actions/deploy-pages@v4

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
venv_*
2+
site
3+

docs/data/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
eessi*.json
2+
eessi*.yaml

docs/data/index.md

Whitespace-only changes.

docs/index.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
---
2+
icon: lucide/rocket
3+
---
4+
5+
# EESSI data files for API endpoints
6+
7+
This repository serves a set of data files that can be used by custom API endpoints. These files are automatically
8+
updated as EESSI adds new software.
9+
10+
11+
## Access to the files
12+
13+
See [the data file page](data/index.md)

scripts/generate_data_files.py

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
import argparse
2+
import glob
3+
import os
4+
import re
5+
import sys
6+
import shutil
7+
import tempfile
8+
import subprocess
9+
import yaml
10+
from collections import defaultdict, OrderedDict
11+
from easybuild.tools.version import VERSION as EASYBUILD_VERSION
12+
from easybuild.framework.easyconfig.easyconfig import process_easyconfig, get_toolchain_hierarchy
13+
from easybuild.tools.options import set_up_configuration
14+
from easybuild.tools.include import include_easyblocks
15+
from contextlib import contextmanager
16+
17+
VALID_EESSI_VERSIONS = ["2025.06", "2023.06"]
18+
19+
EESSI_REFERENCE_ARCHITECTURE = "x86_64/intel/icelake"
20+
21+
# Give order to my toolchains so I can easily figure out what "latest" means
22+
EESSI_SUPPORTED_TOP_LEVEL_TOOLCHAINS = OrderedDict({
23+
'2025.06': [
24+
{'name': 'foss', 'version': '2025a'},
25+
{'name': 'foss', 'version': '2024a'},
26+
],
27+
'2023.06': [
28+
{'name': 'foss', 'version': '2023b'},
29+
{'name': 'foss', 'version': '2023a'},
30+
{'name': 'foss', 'version': '2022b'},
31+
],
32+
})
33+
34+
@contextmanager
35+
def suppress_stdout():
36+
old_stdout = sys.stdout
37+
sys.stdout = open(os.devnull, "w")
38+
try:
39+
yield
40+
finally:
41+
sys.stdout.close()
42+
sys.stdout = old_stdout
43+
44+
45+
def load_and_list_modules(module_name):
46+
"""
47+
Run `module load <name>` and `module list` inside a subshell.
48+
Returns the list of loaded modules visible inside that subshell.
49+
Does not modify Python's environment.
50+
"""
51+
52+
# Run as one shell script so the same session is used
53+
cmd = f"""
54+
module load {module_name} || exit 1
55+
module --terse list 2>&1
56+
"""
57+
58+
result = subprocess.run(
59+
["bash", "-c", cmd],
60+
stdout=subprocess.PIPE,
61+
stderr=subprocess.STDOUT,
62+
text=True
63+
)
64+
65+
if result.returncode != 0:
66+
raise RuntimeError(f"Failed to load module '{module_name}':\n{result.stdout}")
67+
68+
# Parse module list output
69+
modules = [
70+
line
71+
for line in result.stdout.splitlines()
72+
if "/" in line
73+
]
74+
75+
# Filter out the modules we expect to be loaded
76+
eessi_extend_module_stub = 'EESSI-extend/'
77+
eb_module_stub = 'EasyBuild/'
78+
if module_name.startswith(eessi_extend_module_stub):
79+
# Don't filter anything
80+
pass
81+
elif module_name.startswith(eb_module_stub):
82+
# Filter EESSI-extend
83+
modules = [module for module in modules if not module.startswith(eessi_extend_module_stub)]
84+
else:
85+
# Filter EESSI-extend and EasyBuild
86+
modules = [module for module in modules if not module.startswith(eessi_extend_module_stub) and not module.startswith(eb_module_stub)]
87+
88+
return modules
89+
90+
91+
def use_timestamped_reprod_if_exists(original_path):
92+
"""
93+
Replace the last 'software' with 'reprod' and insert the latest timestamp directory
94+
after the version directory if it exists.
95+
"""
96+
# Default to returning the original path
97+
returned_path = original_path
98+
99+
# Split path
100+
parts = original_path.strip(os.sep).split(os.sep)
101+
102+
# Find the last occurrence of 'software'
103+
idx = len(parts) - 1 - parts[::-1].index('software')
104+
105+
# Replace 'software' by 'reprod'
106+
parts[idx] = 'reprod'
107+
108+
# Path up to version directory (software/software/version)
109+
pre_timestamp = os.sep.join([''] + parts[:idx+3])
110+
# Path after version directory (easybuild/reprod/easyblocks)
111+
post_version = parts[idx+3:]
112+
113+
# Look for timestamp directories under pre_timestamp
114+
timestamp_dirs = [d for d in glob.glob(os.path.join(pre_timestamp, '*')) if os.path.isdir(d)]
115+
if timestamp_dirs:
116+
latest_timestamp = max(timestamp_dirs) # lexicographic order
117+
# Reconstruct path: reprod/.../version/<latest_timestamp>/easybuild/reprod/easyblocks
118+
final_path = os.path.join(pre_timestamp, latest_timestamp, *post_version)
119+
if os.path.exists(final_path):
120+
returned_path = final_path
121+
122+
return returned_path
123+
124+
def collect_eb_files(base_path):
125+
"""
126+
Scan for .eb files and their corresponding *-easybuild-devel files,
127+
extract the major EasyBuild version from devel files, and group .eb files by major version.
128+
For folders containing 'EasyBuild' or 'EESSI-extend', assume the loaded EasyBuild version if extraction fails.
129+
130+
Parameters:
131+
base_path (str): Root folder to scan for .eb files.
132+
133+
Returns:
134+
dict: {major_version: [list of .eb file paths]}
135+
"""
136+
eb_files_by_version = defaultdict(list)
137+
version_pattern = re.compile(r'software/EasyBuild/(\d+)\.(\d+)\.(\d+)/bin')
138+
139+
# Get major version from loaded EasyBuild installation for exceptions
140+
easybuild_major_version = str(EASYBUILD_VERSION.version[0])
141+
142+
# Find all .eb files recursively
143+
eb_files = glob.glob(os.path.join(base_path, '*/*/easybuild/*.eb'))
144+
145+
for eb_file in eb_files:
146+
folder = os.path.dirname(eb_file)
147+
148+
# Look for the -easybuild-devel file in the same folder
149+
devel_files = glob.glob(os.path.join(folder, '*-easybuild-devel'))
150+
if not devel_files:
151+
raise FileNotFoundError(f"No *-easybuild-devel file found in folder: {folder}")
152+
153+
# Pick the latest devel file if multiple exist
154+
latest_devel = max(devel_files, key=os.path.getmtime)
155+
156+
# Extract the EasyBuild version
157+
with open(latest_devel, 'r') as f:
158+
content = f.read()
159+
match = version_pattern.search(content)
160+
161+
# Handle exception folders
162+
if 'EasyBuild' in folder or 'EESSI-extend' in folder:
163+
major_version = match.group(1) if match else easybuild_major_version
164+
# Don't add EESSI-extend to EB4 or the same file will appear twice
165+
if 'EESSI-extend' in folder and major_version == '4':
166+
continue
167+
else:
168+
if not match:
169+
raise ValueError(f"Cannot extract EasyBuild version from file: {latest_devel}")
170+
major_version = match.group(1)
171+
172+
eb_files_by_version[f"{major_version}"].append(eb_file)
173+
174+
return dict(eb_files_by_version)
175+
176+
177+
if __name__ == "__main__":
178+
# The EESSI version is provided as an argument
179+
parser = argparse.ArgumentParser(description="EESSI version to scan.")
180+
parser.add_argument(
181+
"--eessi-version",
182+
"-e",
183+
required=True,
184+
choices=VALID_EESSI_VERSIONS,
185+
help=f"Allowed versions: {', '.join(VALID_EESSI_VERSIONS)}"
186+
)
187+
188+
args = parser.parse_args()
189+
eessi_version = args.eessi_version
190+
191+
print(f"Using EESSI version: {eessi_version}")
192+
193+
# We use a single architecture path to gather information about the software versions
194+
base_path = f'/cvmfs/software.eessi.io/versions/{eessi_version}/software/linux/{EESSI_REFERENCE_ARCHITECTURE}/software/'
195+
result = collect_eb_files(base_path)
196+
197+
set_up_configuration(args="")
198+
tmpdir = tempfile.mkdtemp()
199+
200+
# Store all our data in a dict
201+
eessi_software = {eessi_version: {}}
202+
203+
# Store the toolchain hierarchies supported by the EESSI version
204+
eessi_software[eessi_version]["toolchain_hierarchy"] = {}
205+
for top_level_toolchain in EESSI_SUPPORTED_TOP_LEVEL_TOOLCHAINS[eessi_version]:
206+
toolchain_family = f"{top_level_toolchain['name']}_{top_level_toolchain['version']}"
207+
# Get the hierarchy and always add the system toolchain
208+
eessi_software[eessi_version]["toolchain_hierarchy"][toolchain_family] = [{'name': 'system', 'version': 'system'}] + get_toolchain_hierarchy(top_level_toolchain)
209+
210+
for eb_version_of_install, files in sorted(result.items()):
211+
print(f"Major version {eb_version_of_install}:")
212+
if eb_version_of_install == str(EASYBUILD_VERSION.version[0]):
213+
total_files = len(files)
214+
for i, file in enumerate(files, start=1):
215+
percent = (i / total_files) * 100
216+
print(f"{percent:.1f}% - {file}")
217+
218+
# Don't try to parse an EasyBuild easyconfig that is not the same major release
219+
if '/software/EasyBuild/' in file and f'/EasyBuild/{eb_version_of_install}' not in file:
220+
continue
221+
# print(process_easyconfig(path)[0]['ec'].asdict())
222+
223+
eb_hooks_path = use_timestamped_reprod_if_exists(f"{os.path.dirname(file)}/reprod/easyblocks")
224+
easyblocks_dir = include_easyblocks(tmpdir, [eb_hooks_path+"/*.py"])
225+
with suppress_stdout():
226+
parsed_ec=process_easyconfig(file)[0]
227+
# included easyblocks are the first entry in sys.path, so just pop them but keep a list of what was used
228+
sys.path.pop(0)
229+
easyblocks_used = [os.path.basename(f) for f in glob.glob(f"{easyblocks_dir}/**/*.py", recursive=True) if os.path.basename(f) != '__init__.py']
230+
shutil.rmtree(easyblocks_dir)
231+
232+
# Use the path as the key since we know it is unique
233+
eessi_software[eessi_version][file] = parsed_ec['ec'].asdict()
234+
eessi_software[eessi_version][file]['mtime'] = os.path.getmtime(file)
235+
236+
# Make sure we can load the module before adding it's information to the main dict
237+
try:
238+
eessi_software[eessi_version][file]['required_modules'] = load_and_list_modules(parsed_ec['full_mod_name'])
239+
except RuntimeError as e:
240+
print(f"Ignoring {file} due to error processing module: {e}")
241+
eessi_software[eessi_version].pop(file)
242+
continue
243+
244+
# Store everything we now know about the installation as a dict
245+
# Add important data that is related to the module environment
246+
eessi_software[eessi_version][file]['full_mod_name'] = parsed_ec['full_mod_name']
247+
eessi_software[eessi_version][file]['short_mod_name'] = parsed_ec['short_mod_name']
248+
eessi_software[eessi_version][file]['required_modules'] = load_and_list_modules(parsed_ec['full_mod_name'])
249+
# Retain the easyblocks used so we can use a heuristic to figure out the type of extensions (R, Python, Perl)
250+
eessi_software[eessi_version][file]['easyblocks'] = easyblocks_used
251+
252+
# Store the result
253+
with open(f"eessi_software_{eessi_version}-eb{str(EASYBUILD_VERSION.version[0])}.yaml", "w") as f:
254+
yaml.dump(eessi_software, f)

scripts/merge_data_files.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#!/usr/bin/env python3
2+
import sys
3+
import yaml
4+
5+
def strict_merge(a, b, path=""):
6+
"""Recursively merge dictionary b into a, erroring on mismatched values."""
7+
if not isinstance(a, dict) or not isinstance(b, dict):
8+
# If they are not both dicts, they must match exactly:
9+
if a != b:
10+
raise ValueError(f"Conflict at {path}: {a!r} != {b!r}")
11+
return a # values identical, no change
12+
13+
for key in b:
14+
sub_path = f"{path}.{key}" if path else key
15+
if key not in a:
16+
a[key] = b[key]
17+
else:
18+
a[key] = strict_merge(a[key], b[key], sub_path)
19+
return a
20+
21+
22+
def main():
23+
if len(sys.argv) < 3:
24+
print("Usage: merge_yaml.py out.yaml file1.yaml file2.yaml ...")
25+
sys.exit(1)
26+
27+
output_file = sys.argv[1]
28+
input_files = sys.argv[2:]
29+
30+
merged = {}
31+
for filename in input_files:
32+
with open(filename) as f:
33+
data = yaml.load(f, Loader=yaml.FullLoader) or {}
34+
merged = strict_merge(merged, data)
35+
36+
with open(output_file, "w") as out:
37+
yaml.dump(merged, out)
38+
39+
print(f"Successfully merged into {output_file}")
40+
41+
42+
if __name__ == "__main__":
43+
main()

0 commit comments

Comments
 (0)