Skip to content

Commit 31064bf

Browse files
committed
Various changes, experiments and updates
1 parent 008e490 commit 31064bf

File tree

5 files changed

+137
-67
lines changed

5 files changed

+137
-67
lines changed

.github/workflows/table_of_contents.yml

Lines changed: 50 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -19,43 +19,61 @@ jobs:
1919
generate_toc_formats:
2020
runs-on: ubuntu-latest
2121
steps:
22-
- uses: actions/checkout@v3
22+
# Step 1: Check out the repository
23+
- name: Check out repository
24+
uses: actions/checkout@v3
25+
with:
26+
repository: ClickHouse/clickhouse-docs
27+
ref: ${{ github.ref }} # checkout the current branch
2328

24-
# Step 1 - Cache directory contents
25-
- name: Cache directory contents # Generating the TOC if there are no files added/removed is wasteful
26-
uses: actions/cache@v3
29+
# Step 2 - Setup Python
30+
- name: Set up Python
31+
uses: actions/setup-python@v3
2732
with:
28-
path: |
29-
docs/en/interfaces/formats
30-
key: toc-cache-${{ hashFiles('docs/en/interfaces/formats/**')}}
31-
32-
# Step 2 - Check if Cache was hit (files have not changed) generate the TOC
33-
- name: Generate Format Table Of Contents
34-
if: steps.Cache.outputs.cache-hit != 'true' # If there's no changes
35-
id: toc_gen
33+
python-version: '3.x'
34+
35+
# Step 3: Install Python dependencies
36+
- name: Install dependencies
3637
run: |
37-
# Step 2.1 - Setup Python
38-
- name: Set up Python
39-
uses: actions/setup-python@v3
40-
with:
41-
python-version: '3.x'
42-
43-
# Step 2.2: Install Python dependencies
44-
- name: Install dependencies
45-
run: |
46-
python -m pip install --upgrade pip
47-
pip install -r 'scripts/knowledgebase-checker/requirements.txt'
48-
49-
# Step 2.3: Run scripts to generate TOCs:
50-
- name: Generate TOCs
51-
run: |
52-
./scripts/table-of-contents-generator/toc_gen.py --kb-dir="docs/en/interfaces/formats" --single-toc
53-
continue-on-error: true
54-
55-
# Step 6: Fail the build if any script returns exit code 1
38+
python -m pip install --upgrade pip
39+
pip install -r 'scripts/table-of-contents-generator/requirements.txt'
40+
41+
# Step 4 - Pull main repo docs, run script to generate TOCs:
42+
- name: Generate TOCs
43+
run: |
44+
yarn prep-from-master
45+
python -u ./scripts/table-of-contents-generator/toc_gen.py --dir="docs/en/interfaces/formats" --single-toc --out="table-of-contents-files" --ignore "_snippets"
46+
47+
# Step 5 - Fail the workflow if script returns exit code 1
5648
- name: Check exit code
5749
run: |
5850
if [[ "${{ steps.toc_gen.outcome }}" == "failure" ]]; then
5951
echo "Ran into trouble generating a table of contents. See the logs for details."
6052
exit 1
61-
fi
53+
fi
54+
55+
# Step 6 - Check if anything was actually updated
56+
- name: Check for Changes
57+
id: check_changes
58+
run: |
59+
git status -u
60+
if [[ -n "$(git diff --exit-code)" ]]; then
61+
echo "Changes detected."
62+
echo "has_changes=true" >> $GITHUB_OUTPUT
63+
else
64+
echo "No changes detected."
65+
echo "has_changes=false" >> $GITHUB_OUTPUT
66+
fi
67+
68+
# Step 7 - Commit and Push generated Table Of Contents files
69+
- name: Commit and Push Changes
70+
if: steps.check_changes.outputs.has_changes == 'true'
71+
run: |
72+
# configure the user
73+
git config --global user.name "${{ github.actor }}"
74+
git config --global user.email "${{ github.actor}}@users.noreply.github.com"
75+
# standard git flow
76+
git checkout
77+
git add table-of-contents-files/*
78+
git commit -m "Autogenerate table of contents files from GitHub action - $(date '+%Y-%m-%d %H:%M:%S')"
79+
git push origin HEAD:update_table_of_contents

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,6 @@ docs/en/cloud/manage/api/services-api-reference.md
4949
.vscode
5050
.aspell.en.prepl
5151
*.md.bak
52+
53+
# Don't ignore generated table of contents files
54+
!toc.json
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
PyYAML==6.0.2

scripts/table-of-contents-generator/toc_gen.py

100644100755
Lines changed: 83 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
1+
#!/usr/bin/env python3
2+
13
"""
24
This script can be used to automatically generate a table of contents (JSON file) from the markdown files in a directory,
35
or multiple directories.
46
"""
57

6-
#!/usr/bin/env python3
7-
88
import json
99
import os
1010
import argparse
1111
import sys
12+
from collections import defaultdict
13+
import yaml
1214

1315
def parse_args() -> argparse.Namespace:
1416
parser = argparse.ArgumentParser(
@@ -20,43 +22,82 @@ def parse_args() -> argparse.Namespace:
2022
action="store_true",
2123
help="Generates a single TOC for all files in all sub-directories of provided directory. By default, generates TOC per folder.",
2224
)
25+
parser.add_argument(
26+
"--out",
27+
default=None,
28+
help="Path to output the resulting table of contents file to (by default it is output to the provided directory - file is named according to --dir)"
29+
)
2330
parser.add_argument(
2431
"--dir",
2532
help="Path to a folder containing markdown (.md, .mdx) documents containing YAML with title, description, slug."
2633
)
34+
parser.add_argument('--ignore', metavar='S', type=str, nargs='+',
35+
help='Directory names to ignore. E.g --ignore _snippets images')
2736
return parser.parse_args()
2837

2938
def extract_title_description_slug(filename):
30-
with open(filename, "r") as f:
31-
lines = f.readlines()
32-
33-
title, description, slug = None, None, None
34-
for line in lines:
35-
if line.startswith("title:"):
36-
title = line.strip().split(": ")[1]
37-
if line.startswith("description:"):
38-
description = line.strip().split(": ")[1]
39-
elif line.startswith("slug:"):
40-
slug = line.strip().split(": ")[1]
41-
if title and slug and description:
42-
return {"title": title, "description": description, "slug": slug, "dir": filename}
43-
return None
44-
45-
def walk_dirs(root_dir):
39+
data = defaultdict(str)
40+
missing_fields = []
41+
frontmatter_data = {}
42+
43+
try:
44+
with open(filename, "r") as f:
45+
content = f.read()
46+
# find the first frontmatter tag
47+
frontmatter_start = content.find('---\n')
48+
if frontmatter_start != -1:
49+
# find the second frontmatter tag
50+
frontmatter_end = content.find('---\n', frontmatter_start + 4)
51+
if frontmatter_start != -1:
52+
# find the second frontmatter tag
53+
frontmatter_end = content.find('---\n', frontmatter_start + 4)
54+
if frontmatter_end != -1:
55+
frontmatter_str = content[frontmatter_start+4:frontmatter_end]
56+
frontmatter_data = yaml.safe_load(frontmatter_str) or {}
57+
58+
data.update(frontmatter_data)
59+
60+
if missing_fields:
61+
print(f"Warning: {filename} is missing some fields:")
62+
for field in missing_fields:
63+
print(f"- {field}")
64+
65+
return frontmatter_data
66+
except OSError as e:
67+
print(f"Ran into a problem reading frontmatter: {e}")
68+
sys.exit(1)
69+
def walk_dirs(root_dir, ignore_dirs=[]):
4670
for root, dirs, files in os.walk(root_dir):
71+
# Modify the 'dirs' list in-place to remove ignored directories
72+
dirs[:] = [d for d in dirs if d not in ignore_dirs
73+
and not any(d.startswith(ig) for ig in ignore_dirs)]
4774
yield root
4875

49-
def write_to_file(json_array, output_path):
76+
def write_to_file(json_items, directory, output=None):
77+
78+
if output is not None:
79+
# output to the given path the toc.json file
80+
# If dir='docs/en/interfaces/formats' the file is called docs_en_interfaces_formats_toc.json
81+
output_path = output+"/"+directory.replace("/", "_")
82+
else:
83+
output_path = directory
5084
try:
5185
os.makedirs(os.path.dirname(output_path), exist_ok=True) # Create directories if they don't exist
5286
with open(output_path, "w") as f:
53-
json.dump(json_array, f, indent=4)
87+
json.dump(json_items, f, indent=4)
5488
f.write('\n')
89+
print(f"Wrote {output_path}")
5590
except OSError as e:
5691
if e.errno == 21:
5792
print(f"Directory already exists: {e}")
5893
else:
5994
print(f"An error occurred creating directory: {e}")
95+
def write_file(json_items, args, directory):
96+
print(args)
97+
if args.out is not None:
98+
write_to_file(json_items, directory+"/toc.json", args.out)
99+
elif args.out is None:
100+
write_to_file(json_items, directory+"/toc.json")
60101

61102
def main():
62103

@@ -66,13 +107,15 @@ def main():
66107
if root_dir is None:
67108
print("Please provide a directory with argument --dir")
68109
sys.exit(1)
69-
70-
if args.single_toc:
110+
if os.path.lexists(root_dir) is False:
111+
print("Path provided does not exist")
112+
sys.exit(1)
113+
if args.single_toc is True:
71114
json_items = [] # single list for all directories
72115

73-
for directory in walk_dirs(root_dir): # Walk directories
116+
for directory in walk_dirs(root_dir, args.ignore): # Walk directories
74117

75-
if not args.single_toc:
118+
if args.single_toc is False:
76119
json_items = [] # new list for each directory
77120

78121
for filename in os.listdir(directory): # for each directory
@@ -85,19 +128,24 @@ def main():
85128
result = extract_title_description_slug(full_path)
86129
if result is not None:
87130
json_items.append(result)
88-
89-
if not args.single_toc:
90-
json_array = sorted(json_items, key=lambda x: x.get("title"))
91-
131+
if args.single_toc is False:
132+
# don't write toc.json for empty folders
133+
if len(json_items) != 0:
134+
json_items = sorted(json_items, key=lambda x: x.get("title"))
135+
# output to the specified directory if arg --out is provided
136+
write_file(json_items, args, directory)
137+
else:
138+
print("Ran into an issue trying to extract YAML: empty result")
139+
140+
if args.single_toc is True:
92141
# don't write toc.json for empty folders
93142
if len(json_items) != 0:
94-
write_to_file(json_items, directory+"/toc.json")
95-
96-
if args.single_toc:
97-
json_array = sorted(json_items, key=lambda x: x.get("title"))
98-
# don't write toc.json for empty folders
99-
if len(json_items) != 0:
100-
write_to_file(json_items, root_dir+"/toc.json")
143+
json_array = sorted(json_items, key=lambda x: x.get("title"))
144+
# output to the specified directory if arg --out is provided
145+
write_file(json_items, args, directory)
146+
sys.exit(0)
147+
else:
148+
sys.exit(1)
101149

102150
if __name__ == "__main__":
103151
main()

table-of-contents-files/docs_en_interfaces_formats_toc.json

Whitespace-only changes.

0 commit comments

Comments
 (0)