Skip to content

Commit a599af3

Browse files
committed
New Python script to count Learning Path content
1 parent 861d835 commit a599af3

File tree

1 file changed

+307
-0
lines changed

1 file changed

+307
-0
lines changed

tools/count-content.py

Lines changed: 307 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,307 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Script to count learning paths in each category and install guides.
4+
Excludes learning paths marked as draft: true.
5+
Accounts for cross-platform learning paths shared between multiple categories.
6+
Writes results to a markdown file.
7+
"""
8+
9+
import os
10+
import sys
11+
import re
12+
import yaml
13+
import argparse
14+
from collections import defaultdict
15+
from datetime import datetime
16+
17+
def extract_front_matter(file_path):
18+
"""Extract YAML front matter from a markdown file."""
19+
try:
20+
with open(file_path, 'r') as f:
21+
content = f.read()
22+
# Look for front matter between --- markers
23+
match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL)
24+
if match:
25+
front_matter_text = match.group(1)
26+
try:
27+
# Parse the YAML front matter
28+
return yaml.safe_load(front_matter_text)
29+
except Exception as e:
30+
print(f"Error parsing YAML in {file_path}: {e}", file=sys.stderr)
31+
return {}
32+
except Exception as e:
33+
print(f"Error reading {file_path}: {e}", file=sys.stderr)
34+
return {}
35+
36+
def is_draft(index_file_path):
37+
"""
38+
Check if a learning path is marked as draft.
39+
Only considers the _index.md file for determining if the entire Learning Path is a draft.
40+
"""
41+
# Only check files named _index.md
42+
if not index_file_path.endswith('/_index.md'):
43+
return False
44+
45+
front_matter = extract_front_matter(index_file_path)
46+
return front_matter.get('draft', False) is True
47+
48+
def get_shared_categories(index_file_path):
49+
"""Get the categories this learning path is shared with."""
50+
front_matter = extract_front_matter(index_file_path)
51+
if front_matter.get('shared_path', False) and 'shared_between' in front_matter:
52+
return front_matter['shared_between']
53+
return []
54+
55+
def is_learning_path(directory):
56+
"""Check if a directory is a learning path by looking for _index.md file."""
57+
index_file = os.path.join(directory, "_index.md")
58+
return os.path.isfile(index_file)
59+
60+
def count_learning_paths(base_dir, debug_category=None):
61+
"""Count learning paths in each category, excluding drafts."""
62+
# Direct counts (learning paths directly in each category)
63+
direct_counts = defaultdict(int)
64+
# Shared counts (learning paths shared from cross-platform)
65+
shared_counts = defaultdict(int)
66+
# Draft counts
67+
drafts_by_category = defaultdict(int)
68+
# Store draft paths for debugging
69+
draft_paths = []
70+
71+
# For detailed debugging
72+
debug_info = {
73+
'direct_paths': defaultdict(list),
74+
'shared_paths': defaultdict(list),
75+
'draft_paths': defaultdict(list),
76+
'all_dirs': defaultdict(list)
77+
}
78+
79+
# Count learning paths by category
80+
learning_paths_dir = os.path.join(base_dir, "content", "learning-paths")
81+
if os.path.exists(learning_paths_dir):
82+
# First, process all categories except cross-platform
83+
for category in os.listdir(learning_paths_dir):
84+
category_path = os.path.join(learning_paths_dir, category)
85+
if os.path.isdir(category_path) and category != "cross-platform":
86+
# Count direct learning paths in this category
87+
for item in os.listdir(category_path):
88+
item_path = os.path.join(category_path, item)
89+
if os.path.isdir(item_path):
90+
debug_info['all_dirs'][category].append(item)
91+
92+
if is_learning_path(item_path):
93+
index_file = os.path.join(item_path, "_index.md")
94+
if is_draft(index_file):
95+
drafts_by_category[category] += 1
96+
draft_paths.append(f"{category}/{item}")
97+
debug_info['draft_paths'][category].append(item)
98+
else:
99+
direct_counts[category] += 1
100+
debug_info['direct_paths'][category].append(item)
101+
else:
102+
print(f"Warning: Directory {item_path} does not have an _index.md file")
103+
104+
# Now process cross-platform learning paths
105+
cross_platform_path = os.path.join(learning_paths_dir, "cross-platform")
106+
if os.path.exists(cross_platform_path):
107+
for item in os.listdir(cross_platform_path):
108+
item_path = os.path.join(cross_platform_path, item)
109+
if os.path.isdir(item_path):
110+
debug_info['all_dirs']["cross-platform"].append(item)
111+
112+
if is_learning_path(item_path):
113+
index_file = os.path.join(item_path, "_index.md")
114+
115+
# Skip if it's a draft
116+
if is_draft(index_file):
117+
drafts_by_category["cross-platform"] += 1
118+
draft_paths.append(f"cross-platform/{item}")
119+
debug_info['draft_paths']["cross-platform"].append(item)
120+
continue
121+
122+
# Count it in cross-platform
123+
direct_counts["cross-platform"] += 1
124+
debug_info['direct_paths']["cross-platform"].append(item)
125+
126+
# Check if it's shared with other categories
127+
shared_categories = get_shared_categories(index_file)
128+
for shared_category in shared_categories:
129+
shared_counts[shared_category] += 1
130+
debug_info['shared_paths'][shared_category].append(item)
131+
else:
132+
print(f"Warning: Directory {item_path} does not have an _index.md file")
133+
134+
# Combine direct and shared counts
135+
total_counts = defaultdict(int)
136+
for category in set(list(direct_counts.keys()) + list(shared_counts.keys())):
137+
total_counts[category] = direct_counts[category] + shared_counts[category]
138+
139+
# Print detailed debug info for the specified category
140+
if debug_category:
141+
print(f"\nDetailed information for category: {debug_category}")
142+
143+
print(f"\nAll directories ({len(debug_info['all_dirs'].get(debug_category, []))}):")
144+
for path in sorted(debug_info['all_dirs'].get(debug_category, [])):
145+
print(f" - {path}")
146+
147+
print(f"\nDirect Learning Paths ({len(debug_info['direct_paths'].get(debug_category, []))}):")
148+
for path in sorted(debug_info['direct_paths'].get(debug_category, [])):
149+
print(f" - {path}")
150+
151+
print(f"\nShared Learning Paths from cross-platform ({len(debug_info['shared_paths'].get(debug_category, []))}):")
152+
for path in sorted(debug_info['shared_paths'].get(debug_category, [])):
153+
print(f" - {path}")
154+
155+
print(f"\nDraft Learning Paths ({len(debug_info['draft_paths'].get(debug_category, []))}):")
156+
for path in sorted(debug_info['draft_paths'].get(debug_category, [])):
157+
print(f" - {path}")
158+
159+
# Check if any draft paths have draft: true in their _index.md
160+
for path in sorted(debug_info['draft_paths'].get(debug_category, [])):
161+
full_path = os.path.join(learning_paths_dir, debug_category, path, "_index.md")
162+
if os.path.exists(full_path):
163+
with open(full_path, 'r') as f:
164+
content = f.read()
165+
print(f"\nChecking draft status for {path}:")
166+
print(f" Has 'draft: true': {'draft: true' in content}")
167+
# Extract a snippet around the draft declaration
168+
match = re.search(r'(.*draft:.*)', content)
169+
if match:
170+
print(f" Draft line: {match.group(1).strip()}")
171+
172+
# Find directories that are not counted as learning paths
173+
not_learning_paths = set(debug_info['all_dirs'].get(debug_category, [])) - \
174+
set(debug_info['direct_paths'].get(debug_category, [])) - \
175+
set(debug_info['draft_paths'].get(debug_category, []))
176+
177+
if not_learning_paths:
178+
print(f"\nDirectories not counted as Learning Paths ({len(not_learning_paths)}):")
179+
for path in sorted(not_learning_paths):
180+
full_path = os.path.join(learning_paths_dir, debug_category, path)
181+
print(f" - {path} (Has _index.md: {os.path.exists(os.path.join(full_path, '_index.md'))})")
182+
183+
return total_counts, direct_counts, shared_counts, drafts_by_category, draft_paths, debug_info
184+
185+
def count_install_guides(base_dir):
186+
"""
187+
Count install guides:
188+
- Each .md file directly in the install-guides directory (excluding _index.md) counts as 1 guide
189+
- Each subdirectory counts as 1 guide (regardless of how many .md files it contains)
190+
"""
191+
install_guides_count = 0
192+
install_guides_dir = os.path.join(base_dir, "content", "install-guides")
193+
194+
if not os.path.exists(install_guides_dir):
195+
return 0
196+
197+
for item in os.listdir(install_guides_dir):
198+
item_path = os.path.join(install_guides_dir, item)
199+
200+
# Count .md files directly in the install-guides directory
201+
if os.path.isfile(item_path) and item.endswith('.md') and item != '_index.md':
202+
install_guides_count += 1
203+
204+
# Count each subdirectory as 1 guide (except _images)
205+
elif os.path.isdir(item_path) and item != '_images':
206+
install_guides_count += 1
207+
208+
return install_guides_count
209+
210+
def write_markdown_report(total_counts, direct_counts, shared_counts, drafts_by_category, install_guides_count, output_file):
211+
"""Write the results to a markdown file."""
212+
today = datetime.now().strftime("%B %d, %Y")
213+
214+
# Calculate totals
215+
unique_learning_paths = sum(direct_counts.values())
216+
total_with_shared = sum(total_counts.values())
217+
total_drafts = sum(drafts_by_category.values())
218+
219+
with open(output_file, 'w') as f:
220+
f.write(f"# Arm Learning Paths Content Summary\n\n")
221+
f.write(f"This document provides a summary of the content available in the Arm Learning Paths repository as of {today}.\n\n")
222+
223+
# Learning Paths by Category table
224+
f.write("## Learning Paths by Category\n\n")
225+
f.write("The table below shows the breakdown of Learning Paths by category, including both direct and shared content:\n\n")
226+
f.write("| Category | Total | Published | Direct | Shared | Drafts |\n")
227+
f.write("|----------|-------|-----------|--------|--------|--------|\n")
228+
229+
for category, count in sorted(total_counts.items()):
230+
# Special case for IoT to ensure correct capitalization
231+
if category == "iot":
232+
category_name = "IoT"
233+
else:
234+
category_name = category.replace('-', ' ').title()
235+
236+
# Calculate published count (total minus drafts)
237+
published_count = count - drafts_by_category[category]
238+
239+
f.write(f"| {category_name} | {count} | {published_count} | {direct_counts[category]} | {shared_counts[category]} | {drafts_by_category[category]} |\n")
240+
241+
# Install Guides table
242+
f.write("\n## Install Guides\n\n")
243+
f.write("| Content Type | Count |\n")
244+
f.write("|--------------|-------|\n")
245+
f.write(f"| Install Guides | {install_guides_count} |\n")
246+
247+
# Summary Totals table
248+
f.write("\n## Summary Totals\n\n")
249+
f.write("| Metric | Count |\n")
250+
f.write("|--------|-------|\n")
251+
f.write(f"| Total Learning Paths (unique) | {unique_learning_paths} |\n")
252+
f.write(f"| Total Learning Paths (including shared) | {total_with_shared} |\n")
253+
f.write(f"| Total Learning Paths (drafts) | {total_drafts} |\n")
254+
f.write(f"| Total Published Content (unique Learning Paths + Install Guides) | {unique_learning_paths + install_guides_count} |\n")
255+
256+
# Notes section
257+
f.write("\n## Notes\n\n")
258+
f.write("- **Direct**: Learning Paths that are directly in the category's directory\n")
259+
f.write("- **Shared**: Learning Paths from the cross-platform directory that are shared with this category\n")
260+
f.write("- **Drafts**: Learning Paths marked with `draft: true` that are not published\n")
261+
f.write("- The \"Total Learning Paths (unique)\" counts each Learning Path once, regardless of how many categories it appears in\n")
262+
f.write("- The \"Total Learning Paths (including shared)\" counts Learning Paths in each category they appear in\n")
263+
264+
def main():
265+
import argparse
266+
267+
# Set up command line arguments
268+
parser = argparse.ArgumentParser(description='Count learning paths and install guides in the repository.')
269+
parser.add_argument('--debug', dest='debug_category',
270+
help='Enable debug output for a specific category')
271+
args = parser.parse_args()
272+
273+
base_dir = os.getcwd() # Assuming script is run from the project root
274+
output_file = os.path.join(base_dir, "content_summary.md")
275+
276+
# Use debug_category from command line arguments if provided
277+
debug_category = args.debug_category
278+
279+
total_counts, direct_counts, shared_counts, drafts_by_category, draft_paths, debug_info = count_learning_paths(base_dir, debug_category)
280+
install_guides_count = count_install_guides(base_dir)
281+
282+
# Write results to markdown file
283+
write_markdown_report(total_counts, direct_counts, shared_counts, drafts_by_category,
284+
install_guides_count, output_file)
285+
286+
print(f"\nContent summary written to {output_file}")
287+
288+
# Also print a brief summary to the console
289+
unique_learning_paths = sum(direct_counts.values())
290+
total_with_shared = sum(total_counts.values())
291+
total_drafts = sum(drafts_by_category.values())
292+
293+
print("\nBrief Summary:")
294+
print(f"- Learning Paths (unique): {unique_learning_paths}")
295+
print(f"- Learning Paths (with shared): {total_with_shared}")
296+
print(f"- Install Guides: {install_guides_count}")
297+
print(f"- Total Published Content: {unique_learning_paths + install_guides_count}")
298+
print(f"- Draft Learning Paths: {total_drafts}")
299+
300+
# Print draft paths for debugging
301+
if draft_paths:
302+
print("\nDraft Learning Paths:")
303+
for path in sorted(draft_paths):
304+
print(f"- {path}")
305+
306+
if __name__ == "__main__":
307+
main()

0 commit comments

Comments
 (0)