|
| 1 | +#!/usr/bin/env python3 |
| 2 | +import os |
| 3 | +import re |
| 4 | +from pathlib import Path |
| 5 | + |
| 6 | +# ---------------------------- |
| 7 | +# config |
| 8 | +# ---------------------------- |
| 9 | + |
| 10 | +# Directories to skip when scanning the tree for cpp files |
| 11 | +SKIP_DIR_NAMES = { |
| 12 | + '.git', '.github', '.gitlab', '.idea', '.vscode', '__pycache__', |
| 13 | + 'build', 'build-debug', 'build-release', 'cmake-build-debug', |
| 14 | + 'cmake-build-release', 'install', 'out', 'dist','blt' |
| 15 | +} |
| 16 | + |
| 17 | +# File extensions considered "source files" |
| 18 | +SRC_EXTENSIONS = {'.cpp', '.hpp'} |
| 19 | + |
| 20 | +# CMake file names to parse |
| 21 | +CMAKE_FILE_NAMES = {'CMakeLists.txt', '.cmake'} # second entry lets us also catch foo.cmake if you extend script |
| 22 | + |
| 23 | + |
| 24 | +# ---------------------------- |
| 25 | +# helpers |
| 26 | +# ---------------------------- |
| 27 | + |
| 28 | +def is_skipped_dir(dirname: str) -> bool: |
| 29 | + """Return True if we should not descend into this directory.""" |
| 30 | + return dirname in SKIP_DIR_NAMES |
| 31 | + |
| 32 | + |
| 33 | +def collect_all_cpp_files(repo_root: Path) -> set[Path]: |
| 34 | + """ |
| 35 | + Walk the repo and return every *.cpp-like file path (relative to repo_root) |
| 36 | + except in skipped dirs. |
| 37 | + """ |
| 38 | + all_cpp = set() |
| 39 | + |
| 40 | + for dirpath, dirnames, filenames in os.walk(repo_root): |
| 41 | + # prune skip dirs in-place so walk does not descend |
| 42 | + dirnames[:] = [d for d in dirnames if not is_skipped_dir(d)] |
| 43 | + |
| 44 | + for f in filenames: |
| 45 | + p = Path(dirpath) / f |
| 46 | + if p.suffix in SRC_EXTENSIONS: |
| 47 | + all_cpp.add(p.relative_to(repo_root)) |
| 48 | + |
| 49 | + return all_cpp |
| 50 | + |
| 51 | + |
| 52 | +def find_all_cmake_files(repo_root: Path) -> list[Path]: |
| 53 | + """ |
| 54 | + Return list of all CMakeLists.txt and *.cmake files (except in skipped dirs). |
| 55 | + """ |
| 56 | + cmake_files = [] |
| 57 | + for dirpath, dirnames, filenames in os.walk(repo_root): |
| 58 | + dirnames[:] = [d for d in dirnames if not is_skipped_dir(d)] |
| 59 | + |
| 60 | + for f in filenames: |
| 61 | + if f == "CMakeLists.txt" or f.endswith(".cmake"): |
| 62 | + cmake_files.append(Path(dirpath) / f) |
| 63 | + |
| 64 | + return cmake_files |
| 65 | + |
| 66 | + |
| 67 | +def tokenize_cmake_sources(cmake_text: str) -> list[str]: |
| 68 | + """ |
| 69 | + Super light tokenizer: |
| 70 | + - strips comments (# ...) |
| 71 | + - splits on any whitespace, parens, quotes |
| 72 | + - returns tokens that look like file paths |
| 73 | + """ |
| 74 | + # remove comments |
| 75 | + no_comments = [] |
| 76 | + for line in cmake_text.splitlines(): |
| 77 | + # CMake comments start with '#' |
| 78 | + if '#' in line: |
| 79 | + line = line.split('#', 1)[0] |
| 80 | + no_comments.append(line) |
| 81 | + text = "\n".join(no_comments) |
| 82 | + |
| 83 | + # We want to capture things that look like paths to source files. |
| 84 | + # We'll just extract stuff ending in .cpp/.cc/.cxx/.C etc using regex. |
| 85 | + pattern = r'([A-Za-z0-9_./\\+-]+(?:\.(?:cpp|cc|cxx|C)))' |
| 86 | + return re.findall(pattern, text) |
| 87 | + |
| 88 | + |
| 89 | +def normalize_and_filter(tokens: list[str], cmake_dir: Path, repo_root: Path) -> set[Path]: |
| 90 | + """ |
| 91 | + Convert token strings to normalized relative Paths if they look like real files. |
| 92 | + Handles: |
| 93 | + - relative paths like src/foo.cpp |
| 94 | + - absolute paths under repo root |
| 95 | + Ignores anything that doesn't exist. |
| 96 | + """ |
| 97 | + out = set() |
| 98 | + for tok in tokens: |
| 99 | + raw = Path(tok) |
| 100 | + |
| 101 | + # If token is relative, interpret it relative to the CMake file location |
| 102 | + if not raw.is_absolute(): |
| 103 | + cand = (cmake_dir / raw).resolve() |
| 104 | + else: |
| 105 | + cand = raw.resolve() |
| 106 | + |
| 107 | + try: |
| 108 | + rel = cand.relative_to(repo_root.resolve()) |
| 109 | + except ValueError: |
| 110 | + # file is outside repo_root |
| 111 | + continue |
| 112 | + |
| 113 | + if cand.exists() and cand.suffix in SRC_EXTENSIONS: |
| 114 | + out.add(rel) |
| 115 | + |
| 116 | + return out |
| 117 | + |
| 118 | + |
| 119 | +def main(): |
| 120 | + repo_root = Path(os.getcwd()).resolve() |
| 121 | + |
| 122 | + # 1. gather cpp files from disk |
| 123 | + fs_cpp = collect_all_cpp_files(repo_root) |
| 124 | + |
| 125 | + # 2. gather cpp files from cmake |
| 126 | + cmake_files = find_all_cmake_files(repo_root) |
| 127 | + cmake_cpp: set[Path] = set() |
| 128 | + |
| 129 | + for cmake_path in cmake_files: |
| 130 | + try: |
| 131 | + txt = cmake_path.read_text() |
| 132 | + except Exception as e: |
| 133 | + print(f"Warning: could not read {cmake_path}: {e}") |
| 134 | + continue |
| 135 | + |
| 136 | + tokens = tokenize_cmake_sources(txt) |
| 137 | + cmake_cpp |= normalize_and_filter(tokens, cmake_path.parent, repo_root) |
| 138 | + |
| 139 | + # 3. diff |
| 140 | + unused_cpp = sorted(fs_cpp - cmake_cpp) |
| 141 | + missing_on_disk = sorted(cmake_cpp - fs_cpp) # sanity check |
| 142 | + |
| 143 | + # 4. report |
| 144 | + print("=== Summary ===") |
| 145 | + print(f"Total source files on disk: {len(fs_cpp)}") |
| 146 | + print(f"Total source files referenced in CMake: {len(cmake_cpp)}") |
| 147 | + print() |
| 148 | + |
| 149 | + print("=== Present on disk but NOT referenced in any CMake file ===") |
| 150 | + if unused_cpp: |
| 151 | + for p in unused_cpp: |
| 152 | + print(p.as_posix()) |
| 153 | + else: |
| 154 | + print("(none)") |
| 155 | + |
| 156 | + print() |
| 157 | + print("=== Referenced in CMake but file not found on disk (possible stale entry) ===") |
| 158 | + if missing_on_disk: |
| 159 | + for p in missing_on_disk: |
| 160 | + print(p.as_posix()) |
| 161 | + else: |
| 162 | + print("(none)") |
| 163 | + |
| 164 | + |
| 165 | +if __name__ == "__main__": |
| 166 | + main() |
0 commit comments