Skip to content

Commit c508d6e

Browse files
authored
remove unused source/header files (#3896)
Removes files that were no longer in use
1 parent 1e12d1f commit c508d6e

File tree

4 files changed

+166
-1601
lines changed

4 files changed

+166
-1601
lines changed
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
#!/usr/bin/env python3
2+
import os
3+
import re
4+
from pathlib import Path
5+
6+
# ----------------------------
7+
# config
8+
# ----------------------------
9+
10+
# Directories to skip when scanning the tree for cpp files
11+
SKIP_DIR_NAMES = {
12+
'.git', '.github', '.gitlab', '.idea', '.vscode', '__pycache__',
13+
'build', 'build-debug', 'build-release', 'cmake-build-debug',
14+
'cmake-build-release', 'install', 'out', 'dist','blt'
15+
}
16+
17+
# File extensions considered "source files"
18+
SRC_EXTENSIONS = {'.cpp', '.hpp'}
19+
20+
# CMake file names to parse
21+
CMAKE_FILE_NAMES = {'CMakeLists.txt', '.cmake'} # second entry lets us also catch foo.cmake if you extend script
22+
23+
24+
# ----------------------------
25+
# helpers
26+
# ----------------------------
27+
28+
def is_skipped_dir(dirname: str) -> bool:
29+
"""Return True if we should not descend into this directory."""
30+
return dirname in SKIP_DIR_NAMES
31+
32+
33+
def collect_all_cpp_files(repo_root: Path) -> set[Path]:
34+
"""
35+
Walk the repo and return every *.cpp-like file path (relative to repo_root)
36+
except in skipped dirs.
37+
"""
38+
all_cpp = set()
39+
40+
for dirpath, dirnames, filenames in os.walk(repo_root):
41+
# prune skip dirs in-place so walk does not descend
42+
dirnames[:] = [d for d in dirnames if not is_skipped_dir(d)]
43+
44+
for f in filenames:
45+
p = Path(dirpath) / f
46+
if p.suffix in SRC_EXTENSIONS:
47+
all_cpp.add(p.relative_to(repo_root))
48+
49+
return all_cpp
50+
51+
52+
def find_all_cmake_files(repo_root: Path) -> list[Path]:
53+
"""
54+
Return list of all CMakeLists.txt and *.cmake files (except in skipped dirs).
55+
"""
56+
cmake_files = []
57+
for dirpath, dirnames, filenames in os.walk(repo_root):
58+
dirnames[:] = [d for d in dirnames if not is_skipped_dir(d)]
59+
60+
for f in filenames:
61+
if f == "CMakeLists.txt" or f.endswith(".cmake"):
62+
cmake_files.append(Path(dirpath) / f)
63+
64+
return cmake_files
65+
66+
67+
def tokenize_cmake_sources(cmake_text: str) -> list[str]:
68+
"""
69+
Super light tokenizer:
70+
- strips comments (# ...)
71+
- splits on any whitespace, parens, quotes
72+
- returns tokens that look like file paths
73+
"""
74+
# remove comments
75+
no_comments = []
76+
for line in cmake_text.splitlines():
77+
# CMake comments start with '#'
78+
if '#' in line:
79+
line = line.split('#', 1)[0]
80+
no_comments.append(line)
81+
text = "\n".join(no_comments)
82+
83+
# We want to capture things that look like paths to source files.
84+
# We'll just extract stuff ending in .cpp/.cc/.cxx/.C etc using regex.
85+
pattern = r'([A-Za-z0-9_./\\+-]+(?:\.(?:cpp|cc|cxx|C)))'
86+
return re.findall(pattern, text)
87+
88+
89+
def normalize_and_filter(tokens: list[str], cmake_dir: Path, repo_root: Path) -> set[Path]:
90+
"""
91+
Convert token strings to normalized relative Paths if they look like real files.
92+
Handles:
93+
- relative paths like src/foo.cpp
94+
- absolute paths under repo root
95+
Ignores anything that doesn't exist.
96+
"""
97+
out = set()
98+
for tok in tokens:
99+
raw = Path(tok)
100+
101+
# If token is relative, interpret it relative to the CMake file location
102+
if not raw.is_absolute():
103+
cand = (cmake_dir / raw).resolve()
104+
else:
105+
cand = raw.resolve()
106+
107+
try:
108+
rel = cand.relative_to(repo_root.resolve())
109+
except ValueError:
110+
# file is outside repo_root
111+
continue
112+
113+
if cand.exists() and cand.suffix in SRC_EXTENSIONS:
114+
out.add(rel)
115+
116+
return out
117+
118+
119+
def main():
120+
repo_root = Path(os.getcwd()).resolve()
121+
122+
# 1. gather cpp files from disk
123+
fs_cpp = collect_all_cpp_files(repo_root)
124+
125+
# 2. gather cpp files from cmake
126+
cmake_files = find_all_cmake_files(repo_root)
127+
cmake_cpp: set[Path] = set()
128+
129+
for cmake_path in cmake_files:
130+
try:
131+
txt = cmake_path.read_text()
132+
except Exception as e:
133+
print(f"Warning: could not read {cmake_path}: {e}")
134+
continue
135+
136+
tokens = tokenize_cmake_sources(txt)
137+
cmake_cpp |= normalize_and_filter(tokens, cmake_path.parent, repo_root)
138+
139+
# 3. diff
140+
unused_cpp = sorted(fs_cpp - cmake_cpp)
141+
missing_on_disk = sorted(cmake_cpp - fs_cpp) # sanity check
142+
143+
# 4. report
144+
print("=== Summary ===")
145+
print(f"Total source files on disk: {len(fs_cpp)}")
146+
print(f"Total source files referenced in CMake: {len(cmake_cpp)}")
147+
print()
148+
149+
print("=== Present on disk but NOT referenced in any CMake file ===")
150+
if unused_cpp:
151+
for p in unused_cpp:
152+
print(p.as_posix())
153+
else:
154+
print("(none)")
155+
156+
print()
157+
print("=== Referenced in CMake but file not found on disk (possible stale entry) ===")
158+
if missing_on_disk:
159+
for p in missing_on_disk:
160+
print(p.as_posix())
161+
else:
162+
print("(none)")
163+
164+
165+
if __name__ == "__main__":
166+
main()

0 commit comments

Comments
 (0)