Skip to content

Commit a4e5393

Browse files
authored
Generate a table showing more details about proposals (microsoft#541)
This works by preprocessing the proposals to add front matter, extracted from the text of the proposals. This is used in index.md to generate the table. This runs just before jekyll in the github action.
1 parent 44eebf1 commit a4e5393

File tree

4 files changed

+430
-5
lines changed

4 files changed

+430
-5
lines changed
Lines changed: 338 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,338 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Script to update Jekyll front matter for HLSL proposal markdown files.
4+
5+
This script processes .md files in the proposals directory and:
6+
1. Extracts metadata from bullet points (Proposal, Author, Sponsor, Status, Planned Version)
7+
2. Adds Jekyll front matter if it doesn't exist
8+
3. Warns if front matter already exists (and leaves it unchanged)
9+
4. Skips the templates subdirectory
10+
"""
11+
12+
import os
13+
import re
14+
import sys
15+
from pathlib import Path
16+
from typing import Dict, Optional, Tuple
17+
18+
19+
def extract_title_from_heading(content: str) -> Optional[str]:
20+
"""Extract the first H1 heading as the title."""
21+
lines = content.split('\n')
22+
for line in lines:
23+
line = line.strip()
24+
if line.startswith('# '):
25+
return line[2:].strip()
26+
return None
27+
28+
29+
def extract_metadata_from_content(content: str) -> Dict[str, str]:
30+
"""Extract metadata from bullet point format in the content."""
31+
metadata = {}
32+
33+
# Simple patterns to match the start of bullet points
34+
simple_patterns = {
35+
'proposal': r'^\*\s*Proposal:\s*\[([^\]]+)\]',
36+
'author': r'^\*\s*Author\(s\):\s*(.+)$',
37+
'sponsor': r'^\*\s*Sponsor:\s*(.+)$',
38+
'status': r'^\*\s*Status:\s*(.+)$',
39+
'planned_version': r'^\*\s*Planned\s+Version:\s*(.+)$'
40+
}
41+
42+
lines = content.split('\n')
43+
current_field = None
44+
current_value = ""
45+
46+
def save_current_field():
47+
"""Helper to save the current field if it exists."""
48+
if current_field and current_value:
49+
# Clean up the value
50+
value = current_value.strip()
51+
if current_field == 'author' or current_field == 'sponsor':
52+
# Remove markdown links: [Name](url) -> Name
53+
value = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', value)
54+
# Clean up any remaining formatting
55+
value = re.sub(r'\s+', ' ', value) # normalize whitespace
56+
elif current_field == 'status':
57+
# Remove markdown formatting like **text** -> text
58+
value = re.sub(r'\*\*([^*]+)\*\*', r'\1', value)
59+
60+
metadata[current_field] = value
61+
62+
i = 0
63+
while i < len(lines):
64+
line = lines[i]
65+
line_stripped = line.strip()
66+
67+
# Check if this is a new bullet point
68+
found_match = False
69+
for key, pattern in simple_patterns.items():
70+
match = re.match(pattern, line_stripped, re.IGNORECASE)
71+
if match:
72+
# Save previous field if any
73+
save_current_field()
74+
75+
# Start new field
76+
current_field = key
77+
current_value = match.group(1).strip()
78+
found_match = True
79+
80+
# For multi-line fields, look ahead to see if next lines are continuations
81+
j = i + 1
82+
while j < len(lines):
83+
next_line = lines[j]
84+
next_line_stripped = next_line.strip()
85+
86+
# Stop if we hit an empty line
87+
if not next_line_stripped:
88+
break
89+
90+
# Stop if we hit a new section (starts with ##)
91+
if next_line_stripped.startswith('##'):
92+
break
93+
94+
# Stop if we hit another bullet point
95+
if any(re.match(p, next_line_stripped, re.IGNORECASE) for p in simple_patterns.values()):
96+
break
97+
98+
# Check if this line continues the current field (indented continuation)
99+
if next_line.startswith(' ') and next_line_stripped:
100+
current_value += " " + next_line_stripped
101+
j += 1
102+
else:
103+
# Not a continuation, stop here
104+
break
105+
106+
# Skip the lines we've already processed
107+
i = j - 1
108+
break
109+
110+
if not found_match:
111+
# If we're not in a field and this line starts a new section, we're done with metadata
112+
if not current_field and line_stripped.startswith('##'):
113+
break
114+
115+
i += 1
116+
117+
# Don't forget the last field
118+
save_current_field()
119+
120+
return metadata
121+
122+
123+
def has_front_matter(content: str) -> bool:
124+
"""Check if the file already has Jekyll front matter."""
125+
return content.strip().startswith('---')
126+
127+
128+
def parse_simple_yaml(yaml_content: str) -> Dict[str, str]:
129+
"""Simple YAML parser for basic key: value pairs."""
130+
result = {}
131+
for line in yaml_content.split('\n'):
132+
line = line.strip()
133+
if ':' in line:
134+
key, value = line.split(':', 1)
135+
key = key.strip()
136+
value = value.strip()
137+
# Remove quotes if present and handle escaped quotes
138+
if value.startswith('"') and value.endswith('"'):
139+
value = value[1:-1].replace('\\"', '"')
140+
elif value.startswith("'") and value.endswith("'"):
141+
value = value[1:-1].replace("\\'", "'")
142+
result[key] = value
143+
return result
144+
145+
146+
def parse_existing_front_matter(content: str) -> Tuple[Dict[str, str], str]:
147+
"""Parse existing front matter and return (front_matter_dict, remaining_content)."""
148+
if not has_front_matter(content):
149+
return {}, content
150+
151+
lines = content.split('\n')
152+
if lines[0].strip() != '---':
153+
return {}, content
154+
155+
front_matter_lines = []
156+
content_start = 1
157+
158+
for i in range(1, len(lines)):
159+
if lines[i].strip() == '---':
160+
content_start = i + 1
161+
break
162+
front_matter_lines.append(lines[i])
163+
164+
# Parse the YAML front matter
165+
front_matter_yaml = '\n'.join(front_matter_lines)
166+
front_matter_dict = {}
167+
168+
try:
169+
if front_matter_yaml.strip():
170+
front_matter_dict = parse_simple_yaml(front_matter_yaml)
171+
except Exception as e:
172+
print(f"Warning: Could not parse existing front matter: {e}", file=sys.stderr)
173+
front_matter_dict = {}
174+
175+
remaining_content = '\n'.join(lines[content_start:])
176+
return front_matter_dict, remaining_content
177+
178+
179+
def create_front_matter(metadata: Dict[str, str], title: Optional[str], existing_front_matter: Dict[str, str] = None) -> str:
180+
"""Create Jekyll front matter from extracted metadata, merging with existing front matter."""
181+
if existing_front_matter is None:
182+
existing_front_matter = {}
183+
184+
# Start with existing front matter, then add/override with extracted metadata
185+
merged_front_matter = existing_front_matter.copy()
186+
187+
# Only add extracted values if they don't already exist in front matter
188+
if title and 'title' not in merged_front_matter:
189+
merged_front_matter['title'] = title
190+
191+
if 'proposal' in metadata and 'proposal' not in merged_front_matter:
192+
merged_front_matter['proposal'] = metadata['proposal']
193+
194+
if 'author' in metadata and 'author' not in merged_front_matter:
195+
merged_front_matter['author'] = metadata['author']
196+
197+
if 'sponsor' in metadata and 'sponsor' not in merged_front_matter:
198+
merged_front_matter['sponsor'] = metadata['sponsor']
199+
200+
if 'status' in metadata and 'status' not in merged_front_matter:
201+
merged_front_matter['status'] = metadata['status']
202+
203+
if 'planned_version' in metadata and 'planned_version' not in merged_front_matter:
204+
merged_front_matter['planned_version'] = metadata['planned_version']
205+
206+
# Convert to YAML format
207+
front_matter_lines = ['---']
208+
209+
# Preserve order: existing keys first, then new ones
210+
all_keys = list(existing_front_matter.keys())
211+
for key in ['title', 'proposal', 'author', 'sponsor', 'status', 'planned_version']:
212+
if key in merged_front_matter and key not in all_keys:
213+
all_keys.append(key)
214+
215+
for key in all_keys:
216+
if key in merged_front_matter:
217+
value = merged_front_matter[key]
218+
# Ensure proper YAML quoting for string values
219+
if isinstance(value, str):
220+
# Escape quotes and use proper YAML quoting
221+
escaped_value = value.replace('"', '\\"')
222+
front_matter_lines.append(f'{key}: "{escaped_value}"')
223+
else:
224+
front_matter_lines.append(f'{key}: {value}')
225+
226+
front_matter_lines.append('---')
227+
228+
return '\n'.join(front_matter_lines)
229+
230+
231+
def process_file(file_path: Path) -> bool:
232+
"""Process a single markdown file. Returns True if file was modified."""
233+
try:
234+
with open(file_path, 'r', encoding='utf-8') as f:
235+
content = f.read()
236+
except Exception as e:
237+
print(f"Error reading {file_path}: {e}", file=sys.stderr)
238+
return False
239+
240+
# Extract metadata from content
241+
metadata = extract_metadata_from_content(content)
242+
title = extract_title_from_heading(content)
243+
244+
# Parse existing front matter
245+
existing_front_matter, content_without_front_matter = parse_existing_front_matter(content)
246+
247+
# Check if we have anything to add
248+
has_existing_front_matter = bool(existing_front_matter)
249+
has_new_metadata = bool(metadata or title)
250+
251+
if not has_new_metadata and not has_existing_front_matter:
252+
return False
253+
254+
# Check for conflicts between existing front matter and extracted values
255+
conflicts = []
256+
if title and 'title' in existing_front_matter and existing_front_matter['title'] != title:
257+
conflicts.append(f"title: existing='{existing_front_matter['title']}' vs extracted='{title}'")
258+
259+
for key in ['proposal', 'author', 'sponsor', 'status', 'planned_version']:
260+
if key in metadata and key in existing_front_matter and existing_front_matter[key] != metadata[key]:
261+
conflicts.append(f"{key}: existing='{existing_front_matter[key]}' vs extracted='{metadata[key]}'")
262+
263+
# Log conflicts
264+
for conflict in conflicts:
265+
print(f"Warning: {file_path} has conflicting metadata - {conflict}", file=sys.stderr)
266+
267+
# Check if we need to add any new fields
268+
needs_update = False
269+
added_fields = []
270+
271+
if title and 'title' not in existing_front_matter:
272+
needs_update = True
273+
added_fields.append('title')
274+
275+
for key in ['proposal', 'author', 'sponsor', 'status', 'planned_version']:
276+
if key in metadata and key not in existing_front_matter:
277+
needs_update = True
278+
added_fields.append(key)
279+
280+
if has_existing_front_matter and not needs_update:
281+
return False
282+
283+
# Create merged front matter
284+
front_matter = create_front_matter(metadata, title, existing_front_matter)
285+
286+
# Combine front matter with content (without existing front matter)
287+
new_content = front_matter + '\n' + content_without_front_matter
288+
289+
try:
290+
with open(file_path, 'w', encoding='utf-8') as f:
291+
f.write(new_content)
292+
return True
293+
except Exception as e:
294+
print(f"Error writing {file_path}: {e}", file=sys.stderr)
295+
return False
296+
297+
298+
def main():
299+
"""Main function to process all proposal markdown files."""
300+
# Get the script directory and find the proposals directory
301+
script_dir = Path(__file__).parent
302+
repo_root = script_dir.parent
303+
proposals_dir = repo_root / 'proposals'
304+
305+
if not proposals_dir.exists():
306+
print(f"Error: Proposals directory not found at {proposals_dir}", file=sys.stderr)
307+
sys.exit(1)
308+
309+
# Find all .md files in proposals directory and subdirectories
310+
md_files = []
311+
for file_path in proposals_dir.rglob('*.md'):
312+
# Skip files in templates subdirectory
313+
if 'templates' in file_path.parts:
314+
continue
315+
316+
# Skip index.md and other non-proposal files
317+
if file_path.name in ['index.md', 'README.md']:
318+
continue
319+
320+
md_files.append(file_path)
321+
322+
if not md_files:
323+
print("No proposal markdown files found to process")
324+
return
325+
326+
modified_count = 0
327+
for file_path in sorted(md_files):
328+
if process_file(file_path):
329+
modified_count += 1
330+
331+
if modified_count > 0:
332+
print(f"Updated front matter for {modified_count} files")
333+
else:
334+
print("No files needed front matter updates")
335+
336+
337+
if __name__ == '__main__':
338+
main()

.github/workflows/jekyll-gh-pages.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ jobs:
4949
cmake --build build --target pdf
5050
cp build/hlsl.pdf ${{github.workspace}}/specs/
5151
cp build/html/* ${{github.workspace}}/specs/
52+
- name: Update proposal front matter
53+
run: python3 .github/update_proposal_front_matter.py
5254
- name: Build with Jekyll
5355
uses: actions/jekyll-build-pages@v1
5456
with:

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,3 +348,5 @@ MigrationBackup/
348348

349349
# Ionide (cross platform F# VS Code tools) working folder
350350
.ionide/
351+
352+
_site/

0 commit comments

Comments
 (0)