Skip to content

Commit dbf34ba

Browse files
authored
Merge pull request #3133 from Blargian/formats_action
Add Github action for autogenerating table of contents json files
2 parents 06d5dfa + 9242ee1 commit dbf34ba

File tree

6 files changed

+230
-43
lines changed

6 files changed

+230
-43
lines changed
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# This GitHub Action is used for triggering updates of
2+
# the toc.json files present in any directory that
3+
# needs an automatically generated table of contents.
4+
5+
name: Generate Table of Contents files
6+
7+
env:
8+
# Force the stdout and stderr streams to be unbuffered
9+
PYTHONUNBUFFERED: 1
10+
11+
on:
12+
pull_request:
13+
types:
14+
- synchronize
15+
- reopened
16+
- opened
17+
18+
permissions: write-all
19+
20+
jobs:
21+
generate_toc_formats:
22+
runs-on: ubuntu-latest
23+
steps:
24+
# Step 1: Check out the repository
25+
- name: Check out repository
26+
uses: actions/checkout@v3
27+
28+
# Step 2 - Setup Python
29+
- name: Set up Python
30+
uses: actions/setup-python@v3
31+
with:
32+
python-version: '3.x'
33+
34+
# Step 3: Install Python dependencies
35+
- name: Install dependencies
36+
run: |
37+
python -m pip install --upgrade pip
38+
pip install -r 'scripts/table-of-contents-generator/requirements.txt'
39+
40+
# Step 4 - Pull main repo docs, run script to generate TOCs:
41+
- name: Generate TOCs
42+
run: |
43+
yarn prep-from-master
44+
python -u ./scripts/table-of-contents-generator/toc_gen.py --dir="docs/en/interfaces/formats" --single-toc --out="table-of-contents-files" --ignore "_snippets"
45+
46+
# Step 5 - Fail the workflow if script returns exit code 1
47+
- name: Check exit code
48+
run: |
49+
if [[ "${{ steps.toc_gen.outcome }}" == "failure" ]]; then
50+
echo "Ran into trouble generating a table of contents. See the logs for details."
51+
exit 1
52+
fi
53+
54+
# Step 6 - Check if anything was actually updated
55+
- name: Check for Changes
56+
id: check_changes
57+
run: |
58+
git status -u
59+
if [[ -n "$(git diff --exit-code)" ]]; then
60+
echo "Changes detected."
61+
echo "has_changes=true" >> $GITHUB_OUTPUT
62+
else
63+
echo "No changes detected."
64+
echo "has_changes=false" >> $GITHUB_OUTPUT
65+
fi
66+
67+
# Step 7 - Commit and Push generated Table Of Contents files
68+
- uses: stefanzweifel/git-auto-commit-action@v5
69+
env:
70+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
71+
with:
72+
commit_message: "Autogenerate table of contents files from GitHub action - $(date '+%Y-%m-%d %H:%M:%S')"
73+
file_pattern: 'table-of-contents-files/*'
74+
create_branch: true

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,6 @@ docs/en/cloud/manage/api/services-api-reference.md
4949
.vscode
5050
.aspell.en.prepl
5151
*.md.bak
52+
53+
# Don't ignore generated table of contents files
54+
!toc.json

scripts/autogenerate_table_of_contents.py

Lines changed: 0 additions & 43 deletions
This file was deleted.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
PyYAML==6.0.2
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
This script can be used to automatically generate a table of contents (JSON file) from the markdown files in a directory,
5+
or multiple directories.
6+
"""
7+
8+
import json
9+
import os
10+
import argparse
11+
import sys
12+
from collections import defaultdict
13+
import yaml
14+
15+
def parse_args() -> argparse.Namespace:
16+
parser = argparse.ArgumentParser(
17+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
18+
description="Script to generate .json table of contents from YAML frontmatter title, description and slug",
19+
)
20+
parser.add_argument(
21+
"--single-toc",
22+
action="store_true",
23+
help="Generates a single TOC for all files in all sub-directories of provided directory. By default, generates TOC per folder.",
24+
)
25+
parser.add_argument(
26+
"--out",
27+
default=None,
28+
help="Path to output the resulting table of contents file to (by default it is output to the provided directory - file is named according to --dir)"
29+
)
30+
parser.add_argument(
31+
"--dir",
32+
help="Path to a folder containing markdown (.md, .mdx) documents containing YAML with title, description, slug."
33+
)
34+
parser.add_argument('--ignore', metavar='S', type=str, nargs='+',
35+
help='Directory names to ignore. E.g --ignore _snippets images')
36+
return parser.parse_args()
37+
38+
def extract_title_description_slug(filename):
39+
data = defaultdict(str)
40+
missing_fields = []
41+
frontmatter_data = {}
42+
43+
try:
44+
with open(filename, "r") as f:
45+
content = f.read()
46+
# find the first frontmatter tag
47+
frontmatter_start = content.find('---\n')
48+
if frontmatter_start != -1:
49+
# find the second frontmatter tag
50+
frontmatter_end = content.find('---\n', frontmatter_start + 4)
51+
if frontmatter_start != -1:
52+
# find the second frontmatter tag
53+
frontmatter_end = content.find('---\n', frontmatter_start + 4)
54+
if frontmatter_end != -1:
55+
frontmatter_str = content[frontmatter_start+4:frontmatter_end]
56+
frontmatter_data = yaml.safe_load(frontmatter_str) or {}
57+
58+
data.update(frontmatter_data)
59+
60+
if missing_fields:
61+
print(f"Warning: {filename} is missing some fields:")
62+
for field in missing_fields:
63+
print(f"- {field}")
64+
65+
return frontmatter_data
66+
except OSError as e:
67+
print(f"Ran into a problem reading frontmatter: {e}")
68+
sys.exit(1)
69+
def walk_dirs(root_dir, ignore_dirs=[]):
70+
for root, dirs, files in os.walk(root_dir):
71+
# Modify the 'dirs' list in-place to remove ignored directories
72+
dirs[:] = [d for d in dirs if d not in ignore_dirs
73+
and not any(d.startswith(ig) for ig in ignore_dirs)]
74+
yield root
75+
76+
def write_to_file(json_items, directory, output=None):
77+
78+
if output is not None:
79+
# output to the given path the toc.json file
80+
# If dir='docs/en/interfaces/formats' the file is called docs_en_interfaces_formats_toc.json
81+
output_path = output+"/"+directory.replace("/", "_")
82+
else:
83+
output_path = directory
84+
try:
85+
os.makedirs(os.path.dirname(output_path), exist_ok=True) # Create directories if they don't exist
86+
with open(output_path, "w") as f:
87+
json.dump(json_items, f, indent=4)
88+
f.write('\n')
89+
print(f"Wrote {output_path}")
90+
except OSError as e:
91+
if e.errno == 21:
92+
print(f"Directory already exists: {e}")
93+
else:
94+
print(f"An error occurred creating directory: {e}")
95+
def write_file(json_items, args, directory):
96+
print(args)
97+
if args.out is not None:
98+
write_to_file(json_items, directory+"/toc.json", args.out)
99+
elif args.out is None:
100+
write_to_file(json_items, directory+"/toc.json")
101+
102+
def main():
103+
104+
# Extract script arguments
105+
args = parse_args()
106+
root_dir = args.dir
107+
if root_dir is None:
108+
print("Please provide a directory with argument --dir")
109+
sys.exit(1)
110+
if os.path.lexists(root_dir) is False:
111+
print("Path provided does not exist")
112+
sys.exit(1)
113+
if args.single_toc is True:
114+
json_items = [] # single list for all directories
115+
116+
for directory in walk_dirs(root_dir, args.ignore): # Walk directories
117+
118+
if args.single_toc is False:
119+
json_items = [] # new list for each directory
120+
121+
for filename in os.listdir(directory): # for each directory
122+
full_path = os.path.join(directory, filename)
123+
if os.path.isfile(full_path) is False:
124+
continue
125+
else:
126+
# index.md is ignored as we expect this to be the page for the table of contents
127+
if (filename.endswith(".md") or filename.endswith(".mdx")) and filename != "index.md":
128+
result = extract_title_description_slug(full_path)
129+
if result is not None:
130+
json_items.append(result)
131+
if args.single_toc is False:
132+
# don't write toc.json for empty folders
133+
if len(json_items) != 0:
134+
json_items = sorted(json_items, key=lambda x: x.get("title"))
135+
# output to the specified directory if arg --out is provided
136+
write_file(json_items, args, directory)
137+
else:
138+
print("Ran into an issue trying to extract YAML: empty result")
139+
140+
if args.single_toc is True:
141+
# don't write toc.json for empty folders
142+
if len(json_items) != 0:
143+
json_array = sorted(json_items, key=lambda x: x.get("title"))
144+
# output to the specified directory if arg --out is provided
145+
write_file(json_items, args, directory)
146+
sys.exit(0)
147+
else:
148+
sys.exit(1)
149+
150+
if __name__ == "__main__":
151+
main()
152+

table-of-contents-files/docs_en_interfaces_formats_toc.json

Whitespace-only changes.

0 commit comments

Comments
 (0)