|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Script to count learning paths in each category and install guides. |
| 4 | +Excludes learning paths marked as draft: true. |
| 5 | +Accounts for cross-platform learning paths shared between multiple categories. |
| 6 | +Writes results to a markdown file. |
| 7 | +""" |
| 8 | + |
| 9 | +import os |
| 10 | +import sys |
| 11 | +import re |
| 12 | +import yaml |
| 13 | +import argparse |
| 14 | +from collections import defaultdict |
| 15 | +from datetime import datetime |
| 16 | + |
| 17 | +def extract_front_matter(file_path): |
| 18 | + """Extract YAML front matter from a markdown file.""" |
| 19 | + try: |
| 20 | + with open(file_path, 'r') as f: |
| 21 | + content = f.read() |
| 22 | + # Look for front matter between --- markers |
| 23 | + match = re.search(r'^---\s*\n(.*?)\n---', content, re.DOTALL) |
| 24 | + if match: |
| 25 | + front_matter_text = match.group(1) |
| 26 | + try: |
| 27 | + # Parse the YAML front matter |
| 28 | + return yaml.safe_load(front_matter_text) |
| 29 | + except Exception as e: |
| 30 | + print(f"Error parsing YAML in {file_path}: {e}", file=sys.stderr) |
| 31 | + return {} |
| 32 | + except Exception as e: |
| 33 | + print(f"Error reading {file_path}: {e}", file=sys.stderr) |
| 34 | + return {} |
| 35 | + |
| 36 | +def is_draft(index_file_path): |
| 37 | + """ |
| 38 | + Check if a learning path is marked as draft. |
| 39 | + Only considers the _index.md file for determining if the entire Learning Path is a draft. |
| 40 | + """ |
| 41 | + # Only check files named _index.md |
| 42 | + if not index_file_path.endswith('/_index.md'): |
| 43 | + return False |
| 44 | + |
| 45 | + front_matter = extract_front_matter(index_file_path) |
| 46 | + return front_matter.get('draft', False) is True |
| 47 | + |
| 48 | +def get_shared_categories(index_file_path): |
| 49 | + """Get the categories this learning path is shared with.""" |
| 50 | + front_matter = extract_front_matter(index_file_path) |
| 51 | + if front_matter.get('shared_path', False) and 'shared_between' in front_matter: |
| 52 | + return front_matter['shared_between'] |
| 53 | + return [] |
| 54 | + |
| 55 | +def is_learning_path(directory): |
| 56 | + """Check if a directory is a learning path by looking for _index.md file.""" |
| 57 | + index_file = os.path.join(directory, "_index.md") |
| 58 | + return os.path.isfile(index_file) |
| 59 | + |
| 60 | +def count_learning_paths(base_dir, debug_category=None): |
| 61 | + """Count learning paths in each category, excluding drafts.""" |
| 62 | + # Direct counts (learning paths directly in each category) |
| 63 | + direct_counts = defaultdict(int) |
| 64 | + # Shared counts (learning paths shared from cross-platform) |
| 65 | + shared_counts = defaultdict(int) |
| 66 | + # Draft counts |
| 67 | + drafts_by_category = defaultdict(int) |
| 68 | + # Store draft paths for debugging |
| 69 | + draft_paths = [] |
| 70 | + |
| 71 | + # For detailed debugging |
| 72 | + debug_info = { |
| 73 | + 'direct_paths': defaultdict(list), |
| 74 | + 'shared_paths': defaultdict(list), |
| 75 | + 'draft_paths': defaultdict(list), |
| 76 | + 'all_dirs': defaultdict(list) |
| 77 | + } |
| 78 | + |
| 79 | + # Count learning paths by category |
| 80 | + learning_paths_dir = os.path.join(base_dir, "content", "learning-paths") |
| 81 | + if os.path.exists(learning_paths_dir): |
| 82 | + # First, process all categories except cross-platform |
| 83 | + for category in os.listdir(learning_paths_dir): |
| 84 | + category_path = os.path.join(learning_paths_dir, category) |
| 85 | + if os.path.isdir(category_path) and category != "cross-platform": |
| 86 | + # Count direct learning paths in this category |
| 87 | + for item in os.listdir(category_path): |
| 88 | + item_path = os.path.join(category_path, item) |
| 89 | + if os.path.isdir(item_path): |
| 90 | + debug_info['all_dirs'][category].append(item) |
| 91 | + |
| 92 | + if is_learning_path(item_path): |
| 93 | + index_file = os.path.join(item_path, "_index.md") |
| 94 | + if is_draft(index_file): |
| 95 | + drafts_by_category[category] += 1 |
| 96 | + draft_paths.append(f"{category}/{item}") |
| 97 | + debug_info['draft_paths'][category].append(item) |
| 98 | + else: |
| 99 | + direct_counts[category] += 1 |
| 100 | + debug_info['direct_paths'][category].append(item) |
| 101 | + else: |
| 102 | + print(f"Warning: Directory {item_path} does not have an _index.md file") |
| 103 | + |
| 104 | + # Now process cross-platform learning paths |
| 105 | + cross_platform_path = os.path.join(learning_paths_dir, "cross-platform") |
| 106 | + if os.path.exists(cross_platform_path): |
| 107 | + for item in os.listdir(cross_platform_path): |
| 108 | + item_path = os.path.join(cross_platform_path, item) |
| 109 | + if os.path.isdir(item_path): |
| 110 | + debug_info['all_dirs']["cross-platform"].append(item) |
| 111 | + |
| 112 | + if is_learning_path(item_path): |
| 113 | + index_file = os.path.join(item_path, "_index.md") |
| 114 | + |
| 115 | + # Skip if it's a draft |
| 116 | + if is_draft(index_file): |
| 117 | + drafts_by_category["cross-platform"] += 1 |
| 118 | + draft_paths.append(f"cross-platform/{item}") |
| 119 | + debug_info['draft_paths']["cross-platform"].append(item) |
| 120 | + continue |
| 121 | + |
| 122 | + # Count it in cross-platform |
| 123 | + direct_counts["cross-platform"] += 1 |
| 124 | + debug_info['direct_paths']["cross-platform"].append(item) |
| 125 | + |
| 126 | + # Check if it's shared with other categories |
| 127 | + shared_categories = get_shared_categories(index_file) |
| 128 | + for shared_category in shared_categories: |
| 129 | + shared_counts[shared_category] += 1 |
| 130 | + debug_info['shared_paths'][shared_category].append(item) |
| 131 | + else: |
| 132 | + print(f"Warning: Directory {item_path} does not have an _index.md file") |
| 133 | + |
| 134 | + # Combine direct and shared counts |
| 135 | + total_counts = defaultdict(int) |
| 136 | + for category in set(list(direct_counts.keys()) + list(shared_counts.keys())): |
| 137 | + total_counts[category] = direct_counts[category] + shared_counts[category] |
| 138 | + |
| 139 | + # Print detailed debug info for the specified category |
| 140 | + if debug_category: |
| 141 | + print(f"\nDetailed information for category: {debug_category}") |
| 142 | + |
| 143 | + print(f"\nAll directories ({len(debug_info['all_dirs'].get(debug_category, []))}):") |
| 144 | + for path in sorted(debug_info['all_dirs'].get(debug_category, [])): |
| 145 | + print(f" - {path}") |
| 146 | + |
| 147 | + print(f"\nDirect Learning Paths ({len(debug_info['direct_paths'].get(debug_category, []))}):") |
| 148 | + for path in sorted(debug_info['direct_paths'].get(debug_category, [])): |
| 149 | + print(f" - {path}") |
| 150 | + |
| 151 | + print(f"\nShared Learning Paths from cross-platform ({len(debug_info['shared_paths'].get(debug_category, []))}):") |
| 152 | + for path in sorted(debug_info['shared_paths'].get(debug_category, [])): |
| 153 | + print(f" - {path}") |
| 154 | + |
| 155 | + print(f"\nDraft Learning Paths ({len(debug_info['draft_paths'].get(debug_category, []))}):") |
| 156 | + for path in sorted(debug_info['draft_paths'].get(debug_category, [])): |
| 157 | + print(f" - {path}") |
| 158 | + |
| 159 | + # Check if any draft paths have draft: true in their _index.md |
| 160 | + for path in sorted(debug_info['draft_paths'].get(debug_category, [])): |
| 161 | + full_path = os.path.join(learning_paths_dir, debug_category, path, "_index.md") |
| 162 | + if os.path.exists(full_path): |
| 163 | + with open(full_path, 'r') as f: |
| 164 | + content = f.read() |
| 165 | + print(f"\nChecking draft status for {path}:") |
| 166 | + print(f" Has 'draft: true': {'draft: true' in content}") |
| 167 | + # Extract a snippet around the draft declaration |
| 168 | + match = re.search(r'(.*draft:.*)', content) |
| 169 | + if match: |
| 170 | + print(f" Draft line: {match.group(1).strip()}") |
| 171 | + |
| 172 | + # Find directories that are not counted as learning paths |
| 173 | + not_learning_paths = set(debug_info['all_dirs'].get(debug_category, [])) - \ |
| 174 | + set(debug_info['direct_paths'].get(debug_category, [])) - \ |
| 175 | + set(debug_info['draft_paths'].get(debug_category, [])) |
| 176 | + |
| 177 | + if not_learning_paths: |
| 178 | + print(f"\nDirectories not counted as Learning Paths ({len(not_learning_paths)}):") |
| 179 | + for path in sorted(not_learning_paths): |
| 180 | + full_path = os.path.join(learning_paths_dir, debug_category, path) |
| 181 | + print(f" - {path} (Has _index.md: {os.path.exists(os.path.join(full_path, '_index.md'))})") |
| 182 | + |
| 183 | + return total_counts, direct_counts, shared_counts, drafts_by_category, draft_paths, debug_info |
| 184 | + |
| 185 | +def count_install_guides(base_dir): |
| 186 | + """ |
| 187 | + Count install guides: |
| 188 | + - Each .md file directly in the install-guides directory (excluding _index.md) counts as 1 guide |
| 189 | + - Each subdirectory counts as 1 guide (regardless of how many .md files it contains) |
| 190 | + """ |
| 191 | + install_guides_count = 0 |
| 192 | + install_guides_dir = os.path.join(base_dir, "content", "install-guides") |
| 193 | + |
| 194 | + if not os.path.exists(install_guides_dir): |
| 195 | + return 0 |
| 196 | + |
| 197 | + for item in os.listdir(install_guides_dir): |
| 198 | + item_path = os.path.join(install_guides_dir, item) |
| 199 | + |
| 200 | + # Count .md files directly in the install-guides directory |
| 201 | + if os.path.isfile(item_path) and item.endswith('.md') and item != '_index.md': |
| 202 | + install_guides_count += 1 |
| 203 | + |
| 204 | + # Count each subdirectory as 1 guide (except _images) |
| 205 | + elif os.path.isdir(item_path) and item != '_images': |
| 206 | + install_guides_count += 1 |
| 207 | + |
| 208 | + return install_guides_count |
| 209 | + |
| 210 | +def write_markdown_report(total_counts, direct_counts, shared_counts, drafts_by_category, install_guides_count, output_file): |
| 211 | + """Write the results to a markdown file.""" |
| 212 | + today = datetime.now().strftime("%B %d, %Y") |
| 213 | + |
| 214 | + # Calculate totals |
| 215 | + unique_learning_paths = sum(direct_counts.values()) |
| 216 | + total_with_shared = sum(total_counts.values()) |
| 217 | + total_drafts = sum(drafts_by_category.values()) |
| 218 | + |
| 219 | + with open(output_file, 'w') as f: |
| 220 | + f.write(f"# Arm Learning Paths Content Summary\n\n") |
| 221 | + f.write(f"This document provides a summary of the content available in the Arm Learning Paths repository as of {today}.\n\n") |
| 222 | + |
| 223 | + # Learning Paths by Category table |
| 224 | + f.write("## Learning Paths by Category\n\n") |
| 225 | + f.write("The table below shows the breakdown of Learning Paths by category, including both direct and shared content:\n\n") |
| 226 | + f.write("| Category | Total | Published | Direct | Shared | Drafts |\n") |
| 227 | + f.write("|----------|-------|-----------|--------|--------|--------|\n") |
| 228 | + |
| 229 | + for category, count in sorted(total_counts.items()): |
| 230 | + # Special case for IoT to ensure correct capitalization |
| 231 | + if category == "iot": |
| 232 | + category_name = "IoT" |
| 233 | + else: |
| 234 | + category_name = category.replace('-', ' ').title() |
| 235 | + |
| 236 | + # Calculate published count (total minus drafts) |
| 237 | + published_count = count - drafts_by_category[category] |
| 238 | + |
| 239 | + f.write(f"| {category_name} | {count} | {published_count} | {direct_counts[category]} | {shared_counts[category]} | {drafts_by_category[category]} |\n") |
| 240 | + |
| 241 | + # Install Guides table |
| 242 | + f.write("\n## Install Guides\n\n") |
| 243 | + f.write("| Content Type | Count |\n") |
| 244 | + f.write("|--------------|-------|\n") |
| 245 | + f.write(f"| Install Guides | {install_guides_count} |\n") |
| 246 | + |
| 247 | + # Summary Totals table |
| 248 | + f.write("\n## Summary Totals\n\n") |
| 249 | + f.write("| Metric | Count |\n") |
| 250 | + f.write("|--------|-------|\n") |
| 251 | + f.write(f"| Total Learning Paths (unique) | {unique_learning_paths} |\n") |
| 252 | + f.write(f"| Total Learning Paths (including shared) | {total_with_shared} |\n") |
| 253 | + f.write(f"| Total Learning Paths (drafts) | {total_drafts} |\n") |
| 254 | + f.write(f"| Total Published Content (unique Learning Paths + Install Guides) | {unique_learning_paths + install_guides_count} |\n") |
| 255 | + |
| 256 | + # Notes section |
| 257 | + f.write("\n## Notes\n\n") |
| 258 | + f.write("- **Direct**: Learning Paths that are directly in the category's directory\n") |
| 259 | + f.write("- **Shared**: Learning Paths from the cross-platform directory that are shared with this category\n") |
| 260 | + f.write("- **Drafts**: Learning Paths marked with `draft: true` that are not published\n") |
| 261 | + f.write("- The \"Total Learning Paths (unique)\" counts each Learning Path once, regardless of how many categories it appears in\n") |
| 262 | + f.write("- The \"Total Learning Paths (including shared)\" counts Learning Paths in each category they appear in\n") |
| 263 | + |
| 264 | +def main(): |
| 265 | + import argparse |
| 266 | + |
| 267 | + # Set up command line arguments |
| 268 | + parser = argparse.ArgumentParser(description='Count learning paths and install guides in the repository.') |
| 269 | + parser.add_argument('--debug', dest='debug_category', |
| 270 | + help='Enable debug output for a specific category') |
| 271 | + args = parser.parse_args() |
| 272 | + |
| 273 | + base_dir = os.getcwd() # Assuming script is run from the project root |
| 274 | + output_file = os.path.join(base_dir, "content_summary.md") |
| 275 | + |
| 276 | + # Use debug_category from command line arguments if provided |
| 277 | + debug_category = args.debug_category |
| 278 | + |
| 279 | + total_counts, direct_counts, shared_counts, drafts_by_category, draft_paths, debug_info = count_learning_paths(base_dir, debug_category) |
| 280 | + install_guides_count = count_install_guides(base_dir) |
| 281 | + |
| 282 | + # Write results to markdown file |
| 283 | + write_markdown_report(total_counts, direct_counts, shared_counts, drafts_by_category, |
| 284 | + install_guides_count, output_file) |
| 285 | + |
| 286 | + print(f"\nContent summary written to {output_file}") |
| 287 | + |
| 288 | + # Also print a brief summary to the console |
| 289 | + unique_learning_paths = sum(direct_counts.values()) |
| 290 | + total_with_shared = sum(total_counts.values()) |
| 291 | + total_drafts = sum(drafts_by_category.values()) |
| 292 | + |
| 293 | + print("\nBrief Summary:") |
| 294 | + print(f"- Learning Paths (unique): {unique_learning_paths}") |
| 295 | + print(f"- Learning Paths (with shared): {total_with_shared}") |
| 296 | + print(f"- Install Guides: {install_guides_count}") |
| 297 | + print(f"- Total Published Content: {unique_learning_paths + install_guides_count}") |
| 298 | + print(f"- Draft Learning Paths: {total_drafts}") |
| 299 | + |
| 300 | + # Print draft paths for debugging |
| 301 | + if draft_paths: |
| 302 | + print("\nDraft Learning Paths:") |
| 303 | + for path in sorted(draft_paths): |
| 304 | + print(f"- {path}") |
| 305 | + |
| 306 | +if __name__ == "__main__": |
| 307 | + main() |
0 commit comments