@@ -16,10 +16,168 @@ This guide explains how to recreate the Data Library's tag system in another rep
1616
17173 . Ensure Python 3 and the [ PyYAML] ( https://pyyaml.org/ ) package are available in your environment.
1818
19- ## 2. Copy the tag builder script
19+ ## 2. Create the tag builder script
20+
21+ Create ` scripts/build_tags.py ` in your project and paste the following code. The ` build_tag_pages ` function wraps the entire tag system so it can be imported or run as a script without referring back to this repository.
22+
23+ ``` python
24+ # !/usr/bin/env python3
25+ """ Generate tag front matter for markdown files and build tag index page."""
26+ from pathlib import Path
27+ import re
28+ import yaml
29+
30+
31+ def build_tag_pages (docs_dir = Path(" docs" ), mkdocs_path = Path(" mkdocs.yml" )):
32+ tag_page = docs_dir / " tags.md"
33+ topic_dir = docs_dir / " topic"
34+
35+ # Descriptions for Innovation Summit datasets
36+ summit_descriptions = {
37+ " Air data" :
38+ " EPA county-level air quality metrics track pollution-driven tipping points." ,
39+ " FAO" :
40+ " Global food balance sheets reveal agricultural trends linked to ecological transitions." ,
41+ " FIRED" :
42+ " Wildfire event polygons highlight landscapes nearing fire-driven tipping points." ,
43+ " NLCD" :
44+ " National land cover maps expose land-use changes that can trigger ecosystem shifts." ,
45+ " Phenology network" :
46+ " Seasonal plant and animal observations signal climate-driven ecological transitions." ,
47+ " epa water quality" :
48+ " Water-quality monitoring helps detect aquatic systems approaching degradation thresholds." ,
49+ " epica dome c ch4" :
50+ " Antarctic methane records provide context for modern atmospheric tipping points." ,
51+ " global forest change" :
52+ " Landsat-based forest loss and gain reveal deforestation tipping points worldwide." ,
53+ " iNaturalist" :
54+ " Citizen-science species occurrences capture biodiversity shifts near critical thresholds." ,
55+ " lidar canopy height" :
56+ " NEON lidar canopy models track forest structure changes preceding regime shifts." ,
57+ " nclimgrid" :
58+ " NOAA gridded climate normals show trends that may push regions past climate tipping points." ,
59+ " neon and lter" :
60+ " Integrated macroinvertebrate data uncover aquatic community transitions." ,
61+ " neon aquatic" :
62+ " Sensor-based water data monitor freshwater systems for early warning signs." ,
63+ " neon hyperspectral" :
64+ " High-resolution spectral imagery detects vegetation stress before ecosystem tipping." ,
65+ " neon lidar and organismal" :
66+ " Fusing structural and biological data links habitat change to ecological thresholds." ,
67+ " nrcs soil exploration" :
68+ " Soil survey attributes illuminate land degradation tipping points." ,
69+ " osm" :
70+ " OpenStreetMap vectors map human pressures that drive ecological tipping dynamics." ,
71+ " prism" :
72+ " Gridded temperature and precipitation normals track climate trends toward tipping points." ,
73+ " rap-tiles" :
74+ " Rangeland Analysis Platform tiles reveal vegetation transitions and desertification risk." ,
75+ " sentinel streaming" :
76+ " Sentinel-2 quicklooks enable rapid detection of landscape changes near thresholds." ,
77+ " usgs water services" :
78+ " Streamflow and groundwater APIs flag hydrologic systems near critical limits." ,
79+ " watershed boundaries" :
80+ " Hydrologic unit maps frame catchments vulnerable to ecological shifts." ,
81+ " weatherbench" :
82+ " Benchmark datasets support models predicting extreme events and tipping points." ,
83+ }
84+
85+ def derive_tags (md_path ):
86+ parts = md_path.relative_to(docs_dir).parts[:- 1 ]
87+ tags = []
88+ for i, p in enumerate (parts):
89+ if i < 2 :
90+ tags.append(p.replace(" " , " -" ).lower())
91+ return tags
92+
93+ def read_title (content , md_path ):
94+ lines = content.splitlines()
95+ if lines and lines[0 ].startswith(" # " ):
96+ return lines[0 ][2 :].strip()
97+ if len (lines) >= 2 and set (lines[1 ]) == {" =" }:
98+ return lines[0 ].strip()
99+ return md_path.stem.replace(" _" , " " ).strip()
100+
101+ tags_map = {}
102+ for md_path in docs_dir.rglob(" *.md" ):
103+ if md_path == tag_page:
104+ continue
105+ parts = md_path.relative_to(docs_dir).parts
106+ if len (parts) > 3 or parts[0 ] == " topic" :
107+ continue
108+ content = md_path.read_text(encoding = " utf-8" )
109+ frontmatter_match = re.match(r " ^ ---\n ( . *? ) \n ---\n " , content, re.DOTALL )
110+ if frontmatter_match:
111+ fm = yaml.safe_load(frontmatter_match.group(1 )) or {}
112+ body = content[frontmatter_match.end():]
113+ else :
114+ fm = {}
115+ body = content
116+ tags = fm.get(" tags" ) or derive_tags(md_path)
117+ title = read_title(content, md_path)
118+ for tag in tags:
119+ tags_map.setdefault(tag, []).append((title, md_path.relative_to(docs_dir).as_posix()))
120+
121+ tag_page.write_text(" # Tags\n\n " , encoding = " utf-8" )
122+ with tag_page.open(" a" , encoding = " utf-8" ) as f:
123+ for tag in sorted (tags_map):
124+ f.write(f " ## { tag} \n\n " )
125+ if tag == " innovation-summit-2025" :
126+ f.write(" [Visit the Innovation Summit website](https://www.colorado.edu/esiil/)\n\n " )
127+ f.write(" \n\n " )
128+ for title, path in sorted (tags_map[tag]):
129+ desc = summit_descriptions.get(title)
130+ if tag == " innovation-summit-2025" and desc:
131+ f.write(f " - [ { title} ]( { path} ) - { desc} \n " )
132+ else :
133+ f.write(f " - [ { title} ]( { path} ) \n " )
134+ f.write(" \n " )
135+
136+ tag_counts = {tag: len (paths) for tag, paths in tags_map.items()}
137+ top_tags = [
138+ tag
139+ for tag, count in sorted (tag_counts.items(), key = lambda x : (- x[1 ], x[0 ]))
140+ if count > 1 and not any (ch.isdigit() for ch in tag)
141+ ][:10 ]
142+
143+ topic_dir.mkdir(exist_ok = True )
144+ for tag in top_tags:
145+ tag_file = topic_dir / f " { tag} .md "
146+ with tag_file.open(" w" , encoding = " utf-8" ) as f:
147+ f.write(f " # { tag} \n\n " )
148+ for title, path in sorted (tags_map.get(tag, [])):
149+ f.write(f " - [ { title} ](../ { path} ) \n " )
150+
151+ # Custom standalone page for Innovation Summit 2025 tag
152+ summit_page = docs_dir / " innovation-summit-2025.md"
153+ with summit_page.open(" w" , encoding = " utf-8" ) as f:
154+ f.write(" # Innovation Summit 2025\n\n " )
155+ f.write(" [Visit the Innovation Summit website](https://www.colorado.edu/esiil/)\n\n " )
156+ f.write(" \n\n " )
157+ for title, path in sorted (tags_map.get(" innovation-summit-2025" , [])):
158+ desc = summit_descriptions.get(title)
159+ if desc:
160+ f.write(f " - [ { title} ]( { path} ) - { desc} \n " )
161+ else :
162+ f.write(f " - [ { title} ]( { path} ) \n " )
163+ f.write(" \n " )
164+
165+ if mkdocs_path.exists():
166+ cfg = yaml.safe_load(mkdocs_path.read_text(encoding = " utf-8" ))
167+ cfg[" nav" ] = [
168+ {" Innovation Summit 2025" : " innovation-summit-2025.md" },
169+ {" Home" : " index.md" },
170+ {" Topics" : [{tag: f " topic/ { tag} .md " } for tag in top_tags]},
171+ {" Tags" : " tags.md" },
172+ ]
173+ mkdocs_path.write_text(yaml.dump(cfg, sort_keys = False ), encoding = " utf-8" )
174+
175+
176+ if __name__ == " __main__" :
177+ build_tag_pages()
178+ ```
20179
21- 1 . Copy [ ` scripts/build_tags.py ` ] ( scripts/build_tags.py ) from this repository into the ` scripts/ ` directory of your new project.
22- 2 . Adjust the ` summit_descriptions ` dictionary inside the script if you want special descriptions for particular tags. You can also edit the logic that derives tags from paths in the ` derive_tags ` function.
180+ Adjust the ` summit_descriptions ` dictionary or the ` derive_tags ` function if you need custom behavior.
23181
24182## 3. Generate tag pages
25183
0 commit comments