Skip to content

Commit 225ba39

Browse files
authored
Merge pull request #33 from CU-ESIIL/codex/show-all-code-in-self-contained-functions
docs: inline tag builder script
2 parents cb9db49 + 56cf530 commit 225ba39

File tree

1 file changed

+161
-3
lines changed

1 file changed

+161
-3
lines changed

TAG_SYSTEM_HOWTO.md

Lines changed: 161 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,168 @@ This guide explains how to recreate the Data Library's tag system in another rep
1616

1717
3. Ensure Python 3 and the [PyYAML](https://pyyaml.org/) package are available in your environment.
1818

19-
## 2. Copy the tag builder script
19+
## 2. Create the tag builder script
20+
21+
Create `scripts/build_tags.py` in your project and paste the following code. The `build_tag_pages` function wraps the entire tag system so it can be imported or run as a script without referring back to this repository.
22+
23+
```python
24+
#!/usr/bin/env python3
25+
"""Generate tag front matter for markdown files and build tag index page."""
26+
from pathlib import Path
27+
import re
28+
import yaml
29+
30+
31+
def build_tag_pages(docs_dir=Path("docs"), mkdocs_path=Path("mkdocs.yml")):
32+
tag_page = docs_dir / "tags.md"
33+
topic_dir = docs_dir / "topic"
34+
35+
# Descriptions for Innovation Summit datasets
36+
summit_descriptions = {
37+
"Air data":
38+
"EPA county-level air quality metrics track pollution-driven tipping points.",
39+
"FAO":
40+
"Global food balance sheets reveal agricultural trends linked to ecological transitions.",
41+
"FIRED":
42+
"Wildfire event polygons highlight landscapes nearing fire-driven tipping points.",
43+
"NLCD":
44+
"National land cover maps expose land-use changes that can trigger ecosystem shifts.",
45+
"Phenology network":
46+
"Seasonal plant and animal observations signal climate-driven ecological transitions.",
47+
"epa water quality":
48+
"Water-quality monitoring helps detect aquatic systems approaching degradation thresholds.",
49+
"epica dome c ch4":
50+
"Antarctic methane records provide context for modern atmospheric tipping points.",
51+
"global forest change":
52+
"Landsat-based forest loss and gain reveal deforestation tipping points worldwide.",
53+
"iNaturalist":
54+
"Citizen-science species occurrences capture biodiversity shifts near critical thresholds.",
55+
"lidar canopy height":
56+
"NEON lidar canopy models track forest structure changes preceding regime shifts.",
57+
"nclimgrid":
58+
"NOAA gridded climate normals show trends that may push regions past climate tipping points.",
59+
"neon and lter":
60+
"Integrated macroinvertebrate data uncover aquatic community transitions.",
61+
"neon aquatic":
62+
"Sensor-based water data monitor freshwater systems for early warning signs.",
63+
"neon hyperspectral":
64+
"High-resolution spectral imagery detects vegetation stress before ecosystem tipping.",
65+
"neon lidar and organismal":
66+
"Fusing structural and biological data links habitat change to ecological thresholds.",
67+
"nrcs soil exploration":
68+
"Soil survey attributes illuminate land degradation tipping points.",
69+
"osm":
70+
"OpenStreetMap vectors map human pressures that drive ecological tipping dynamics.",
71+
"prism":
72+
"Gridded temperature and precipitation normals track climate trends toward tipping points.",
73+
"rap-tiles":
74+
"Rangeland Analysis Platform tiles reveal vegetation transitions and desertification risk.",
75+
"sentinel streaming":
76+
"Sentinel-2 quicklooks enable rapid detection of landscape changes near thresholds.",
77+
"usgs water services":
78+
"Streamflow and groundwater APIs flag hydrologic systems near critical limits.",
79+
"watershed boundaries":
80+
"Hydrologic unit maps frame catchments vulnerable to ecological shifts.",
81+
"weatherbench":
82+
"Benchmark datasets support models predicting extreme events and tipping points.",
83+
}
84+
85+
def derive_tags(md_path):
86+
parts = md_path.relative_to(docs_dir).parts[:-1]
87+
tags = []
88+
for i, p in enumerate(parts):
89+
if i < 2:
90+
tags.append(p.replace(" ", "-").lower())
91+
return tags
92+
93+
def read_title(content, md_path):
94+
lines = content.splitlines()
95+
if lines and lines[0].startswith("# "):
96+
return lines[0][2:].strip()
97+
if len(lines) >= 2 and set(lines[1]) == {"="}:
98+
return lines[0].strip()
99+
return md_path.stem.replace("_", " ").strip()
100+
101+
tags_map = {}
102+
for md_path in docs_dir.rglob("*.md"):
103+
if md_path == tag_page:
104+
continue
105+
parts = md_path.relative_to(docs_dir).parts
106+
if len(parts) > 3 or parts[0] == "topic":
107+
continue
108+
content = md_path.read_text(encoding="utf-8")
109+
frontmatter_match = re.match(r"^---\n(.*?)\n---\n", content, re.DOTALL)
110+
if frontmatter_match:
111+
fm = yaml.safe_load(frontmatter_match.group(1)) or {}
112+
body = content[frontmatter_match.end():]
113+
else:
114+
fm = {}
115+
body = content
116+
tags = fm.get("tags") or derive_tags(md_path)
117+
title = read_title(content, md_path)
118+
for tag in tags:
119+
tags_map.setdefault(tag, []).append((title, md_path.relative_to(docs_dir).as_posix()))
120+
121+
tag_page.write_text("# Tags\n\n", encoding="utf-8")
122+
with tag_page.open("a", encoding="utf-8") as f:
123+
for tag in sorted(tags_map):
124+
f.write(f"## {tag}\n\n")
125+
if tag == "innovation-summit-2025":
126+
f.write("[Visit the Innovation Summit website](https://www.colorado.edu/esiil/)\n\n")
127+
f.write("![Innovation Summit 2025](assets/pre-summit-training-header.png)\n\n")
128+
for title, path in sorted(tags_map[tag]):
129+
desc = summit_descriptions.get(title)
130+
if tag == "innovation-summit-2025" and desc:
131+
f.write(f"- [{title}]({path}) - {desc}\n")
132+
else:
133+
f.write(f"- [{title}]({path})\n")
134+
f.write("\n")
135+
136+
tag_counts = {tag: len(paths) for tag, paths in tags_map.items()}
137+
top_tags = [
138+
tag
139+
for tag, count in sorted(tag_counts.items(), key=lambda x: (-x[1], x[0]))
140+
if count > 1 and not any(ch.isdigit() for ch in tag)
141+
][:10]
142+
143+
topic_dir.mkdir(exist_ok=True)
144+
for tag in top_tags:
145+
tag_file = topic_dir / f"{tag}.md"
146+
with tag_file.open("w", encoding="utf-8") as f:
147+
f.write(f"# {tag}\n\n")
148+
for title, path in sorted(tags_map.get(tag, [])):
149+
f.write(f"- [{title}](../{path})\n")
150+
151+
# Custom standalone page for Innovation Summit 2025 tag
152+
summit_page = docs_dir / "innovation-summit-2025.md"
153+
with summit_page.open("w", encoding="utf-8") as f:
154+
f.write("# Innovation Summit 2025\n\n")
155+
f.write("[Visit the Innovation Summit website](https://www.colorado.edu/esiil/)\n\n")
156+
f.write("![Innovation Summit 2025](assets/pre-summit-training-header.png)\n\n")
157+
for title, path in sorted(tags_map.get("innovation-summit-2025", [])):
158+
desc = summit_descriptions.get(title)
159+
if desc:
160+
f.write(f"- [{title}]({path}) - {desc}\n")
161+
else:
162+
f.write(f"- [{title}]({path})\n")
163+
f.write("\n")
164+
165+
if mkdocs_path.exists():
166+
cfg = yaml.safe_load(mkdocs_path.read_text(encoding="utf-8"))
167+
cfg["nav"] = [
168+
{"Innovation Summit 2025": "innovation-summit-2025.md"},
169+
{"Home": "index.md"},
170+
{"Topics": [{tag: f"topic/{tag}.md"} for tag in top_tags]},
171+
{"Tags": "tags.md"},
172+
]
173+
mkdocs_path.write_text(yaml.dump(cfg, sort_keys=False), encoding="utf-8")
174+
175+
176+
if __name__ == "__main__":
177+
build_tag_pages()
178+
```
20179

21-
1. Copy [`scripts/build_tags.py`](scripts/build_tags.py) from this repository into the `scripts/` directory of your new project.
22-
2. Adjust the `summit_descriptions` dictionary inside the script if you want special descriptions for particular tags. You can also edit the logic that derives tags from paths in the `derive_tags` function.
180+
Adjust the `summit_descriptions` dictionary or the `derive_tags` function if you need custom behavior.
23181

24182
## 3. Generate tag pages
25183

0 commit comments

Comments
 (0)