Skip to content

Commit 2dafa5f

Browse files
committed
fix: deep audit round 2 — gitignore, git -z, stoplists, dead code
1 parent 047d462 commit 2dafa5f

File tree

20 files changed

+152
-76
lines changed

20 files changed

+152
-76
lines changed

pyproject.toml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ optional-dependencies.dev = [
6262
"coverage>=7.0,<8.0",
6363
"hypothesis>=6.0,<7.0",
6464
"import-linter>=2.0,<3.0",
65-
"isort>=5.12,<9.0",
6665
"lxml>=5.0,<7.0",
6766
"mistune>=3.0,<4.0",
6867
"mutmut>=3.5,<4.0",
@@ -132,10 +131,6 @@ lint.ignore = [ "E501" ] # Line length handled by black
132131
lint.per-file-ignores."whitelist_vulture.py" = [ "F401", "F821" ]
133132
lint.mccabe.max-complexity = 15
134133

135-
[tool.isort]
136-
profile = "black"
137-
line_length = 130
138-
139134
[tool.pyproject-fmt]
140135
column_width = 130
141136
keep_full_version = true

src/treemapper/diffctx/edges/config/docker.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,16 @@ def _resolve_docker_path(base_dir: Path, rel_path: str) -> Path:
3939
rel_path = rel_path.strip().strip("'\"")
4040
rel_path = _strip_dot_slash(rel_path)
4141
normalized = base_dir / rel_path
42-
if ".." in normalized.parts:
43-
return base_dir
44-
return normalized
42+
if ".." not in normalized.parts:
43+
return normalized
44+
try:
45+
resolved = normalized.resolve()
46+
base_resolved = base_dir.resolve()
47+
if resolved.is_relative_to(base_resolved):
48+
return resolved
49+
except (OSError, ValueError):
50+
pass
51+
return base_dir
4552

4653

4754
def _collect_docker_refs(docker_files: list[Path]) -> set[str]:

src/treemapper/diffctx/edges/config/helm.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,6 @@
1010
_HELM_VALUES_RE = re.compile(r"\{\{-?\s*\.Values\.([a-zA-Z0-9_.]+)")
1111
_HELM_INCLUDE_RE = re.compile(r'\{\{\s*(?:include|template)\s+"([^"]+)"')
1212
_HELM_DEFINE_RE = re.compile(r'\{\{-?\s*define\s+"([^"]+)"')
13-
_HELM_RELEASE_RE = re.compile(r"\{\{\s*\.Release\.(\w+)\s*\}\}")
14-
_HELM_CHART_RE = re.compile(r"\{\{\s*\.Chart\.(\w+)\s*\}\}")
15-
_HELM_FILES_RE = re.compile(r'\{\{\s*\.Files\.(?:Get|Glob)\s+"([^"]+)"')
1613

1714
_YAML_KEY_PATH_RE = re.compile(r"^(\s*)([a-zA-Z_][a-zA-Z0-9_-]*):", re.MULTILINE)
1815

src/treemapper/diffctx/edges/history/cochange.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ def build(self, fragments: list[Fragment], repo_root: Path | None = None) -> Edg
3232
weight = min(self.weight, 0.1 * math.log(1 + count))
3333
for fid1 in path_to_frags.get(p1, []):
3434
for fid2 in path_to_frags.get(p2, []):
35+
if fid1 == fid2:
36+
continue
3537
edges[(fid1, fid2)] = max(edges.get((fid1, fid2), 0.0), weight)
3638
edges[(fid2, fid1)] = max(edges.get((fid2, fid1), 0.0), weight)
3739

src/treemapper/diffctx/edges/semantic/go.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,28 @@
4646
}
4747
)
4848
_GO_TYPE_REF_RE = re.compile(r"\*?([A-Z]\w*)\b")
49+
_GO_COMMON_TYPES = frozenset(
50+
{
51+
"Bool",
52+
"String",
53+
"Error",
54+
"Reader",
55+
"Writer",
56+
"Handler",
57+
"Server",
58+
"Client",
59+
"Request",
60+
"Response",
61+
"Context",
62+
"Logger",
63+
"Config",
64+
"Options",
65+
"Result",
66+
"Status",
67+
"Mutex",
68+
"Group",
69+
}
70+
)
4971
_GO_PKG_CALL_RE = re.compile(r"\b(\w+)\.([A-Z]\w*)")
5072
_GO_EMBED_RE = re.compile(r"//go:embed\s+(\S+)", re.MULTILINE)
5173
_GO_PKG_DECL_RE = re.compile(r"^package\s+(\w+)", re.MULTILINE)
@@ -73,7 +95,9 @@ def _extract_definitions(content: str) -> tuple[set[str], set[str]]:
7395

7496
def _extract_references(content: str) -> tuple[set[str], set[str], set[tuple[str, str]]]:
7597
func_calls = {m.group(1) for m in _GO_FUNC_CALL_RE.finditer(content) if m.group(1) not in _GO_KEYWORDS}
76-
type_refs = {m.group(1) for m in _GO_TYPE_REF_RE.finditer(content) if m.group(1)[0].isupper()}
98+
type_refs = {
99+
m.group(1) for m in _GO_TYPE_REF_RE.finditer(content) if m.group(1)[0].isupper() and m.group(1) not in _GO_COMMON_TYPES
100+
}
77101
pkg_calls = {(m.group(1), m.group(2)) for m in _GO_PKG_CALL_RE.finditer(content)}
78102
return func_calls, type_refs, pkg_calls
79103

src/treemapper/diffctx/edges/semantic/rust.py

Lines changed: 70 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
_RUST_USE_RE = re.compile(r"^\s*use\s+(?:crate::)?([a-zA-Z_]\w*(?:::[a-zA-Z_]\w*)*)", re.MULTILINE)
1212
_RUST_USE_BRACED_RE = re.compile(r"use\s+(?:crate::)?([\w:]+)::\{([^}]+)\}", re.MULTILINE)
1313
_RUST_MOD_RE = re.compile(r"^\s*(?:pub(?:\([^)]*\))?\s+)?mod\s+([a-z_][a-z0-9_]*)\s*[;{]", re.MULTILINE)
14-
_RUST_EXTERN_CRATE_RE = re.compile(r"^\s*extern\s+crate\s+([a-z_][a-z0-9_]*)", re.MULTILINE)
1514

1615
_RUST_FN_RE = re.compile(r"^\s*(?:pub(?:\([^)]*\))?\s+)?(?:async\s+)?fn\s+([a-z_][a-z0-9_]*)", re.MULTILINE)
1716
_RUST_STRUCT_RE = re.compile(r"^\s*(?:pub(?:\([^)]*\))?\s+)?struct\s+([A-Z]\w*)", re.MULTILINE)
@@ -24,6 +23,63 @@
2423
_RUST_FN_CALL_RE = re.compile(r"(?<!\w)([a-z_][a-z0-9_]*)\s?!?\s?\(")
2524
_RUST_PATH_CALL_RE = re.compile(r"([a-z_][a-z0-9_]*)::([a-z_][a-z0-9_]*|[A-Z]\w*)")
2625

26+
_RUST_COMMON_TYPES = frozenset(
27+
{
28+
"String",
29+
"Vec",
30+
"Option",
31+
"Result",
32+
"Box",
33+
"Arc",
34+
"Rc",
35+
"Some",
36+
"None",
37+
"Ok",
38+
"Err",
39+
"Self",
40+
"HashMap",
41+
"HashSet",
42+
"BTreeMap",
43+
"BTreeSet",
44+
"Cow",
45+
"Pin",
46+
"PhantomData",
47+
}
48+
)
49+
50+
_RUST_BUILTIN_MACROS = frozenset(
51+
{
52+
"println",
53+
"print",
54+
"eprintln",
55+
"eprint",
56+
"format",
57+
"vec",
58+
"assert",
59+
"assert_eq",
60+
"assert_ne",
61+
"debug_assert",
62+
"debug_assert_eq",
63+
"debug_assert_ne",
64+
"panic",
65+
"todo",
66+
"unimplemented",
67+
"unreachable",
68+
"cfg",
69+
"env",
70+
"file",
71+
"line",
72+
"column",
73+
"stringify",
74+
"concat",
75+
"include",
76+
"include_str",
77+
"include_bytes",
78+
"write",
79+
"writeln",
80+
}
81+
)
82+
2783
_RUST_KEYWORDS = frozenset(
2884
{
2985
"if",
@@ -109,8 +165,12 @@ def _extract_definitions(content: str) -> tuple[set[str], set[str]]:
109165

110166

111167
def _extract_references(content: str) -> tuple[set[str], set[str], set[tuple[str, str]]]:
112-
type_refs = {m.group(1) for m in _RUST_TYPE_REF_RE.finditer(content)}
113-
fn_calls = {m.group(1) for m in _RUST_FN_CALL_RE.finditer(content) if m.group(1) not in _RUST_KEYWORDS}
168+
type_refs = {m.group(1) for m in _RUST_TYPE_REF_RE.finditer(content) if m.group(1) not in _RUST_COMMON_TYPES}
169+
fn_calls = {
170+
m.group(1)
171+
for m in _RUST_FN_CALL_RE.finditer(content)
172+
if m.group(1) not in _RUST_KEYWORDS and m.group(1) not in _RUST_BUILTIN_MACROS
173+
}
114174
path_calls = {(m.group(1), m.group(2)) for m in _RUST_PATH_CALL_RE.finditer(content)}
115175
return type_refs, fn_calls, path_calls
116176

@@ -204,10 +264,12 @@ def _link_fragment(
204264
) -> None:
205265
name_to_frags, mod_to_frags, type_defs, fn_defs = indices
206266

267+
type_refs, fn_calls, path_calls = _extract_references(rf.content)
268+
207269
self._link_uses(rf, mod_to_frags, name_to_frags, edges)
208270
self._link_declared_mods(rf, name_to_frags, edges)
209-
self._link_refs(rf, type_defs, fn_defs, edges)
210-
self._link_path_calls(rf, mod_to_frags, edges)
271+
self._link_refs(rf, type_refs, fn_calls, type_defs, fn_defs, edges)
272+
self._link_path_calls(rf, path_calls, mod_to_frags, edges)
211273
self._link_same_crate(rf, rust_frags, edges)
212274

213275
def _link_uses(
@@ -247,12 +309,12 @@ def _link_declared_mods(
247309
def _link_refs(
248310
self,
249311
rf: Fragment,
312+
type_refs: set[str],
313+
fn_calls: set[str],
250314
type_defs: dict[str, list[FragmentId]],
251315
fn_defs: dict[str, list[FragmentId]],
252316
edges: EdgeDict,
253317
) -> None:
254-
type_refs, fn_calls, _ = _extract_references(rf.content)
255-
256318
for type_ref in type_refs:
257319
for fid in type_defs.get(type_ref.lower(), []):
258320
if fid != rf.id:
@@ -266,10 +328,10 @@ def _link_refs(
266328
def _link_path_calls(
267329
self,
268330
rf: Fragment,
331+
path_calls: set[tuple[str, str]],
269332
mod_to_frags: dict[str, list[FragmentId]],
270333
edges: EdgeDict,
271334
) -> None:
272-
_, _, path_calls = _extract_references(rf.content)
273335
for mod_name, _symbol in path_calls:
274336
for fid in mod_to_frags.get(mod_name.lower(), []):
275337
if fid != rf.id:

src/treemapper/diffctx/fragments.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
GENERIC_MAX_LINES,
2222
INDENT_EXTENSIONS,
2323
MIN_FRAGMENT_LINES,
24-
MIN_FRAGMENT_WORDS,
2524
YAML_EXTENSIONS,
2625
)
2726
from .parsers.base import compute_bracket_balance as _compute_bracket_balance # noqa: F401
@@ -38,7 +37,6 @@
3837
"GENERIC_MAX_LINES",
3938
"INDENT_EXTENSIONS",
4039
"MIN_FRAGMENT_LINES",
41-
"MIN_FRAGMENT_WORDS",
4240
"YAML_EXTENSIONS",
4341
"_compute_bracket_balance",
4442
"_find_balanced_end_line",

src/treemapper/diffctx/git.py

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from .types import DiffHunk
88

99
_HUNK_RE = re.compile(r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@")
10-
_RANGE_RE = re.compile(r"^\s*(\S+?)(\.\.\.?)(\S+?)\s*$") # NOSONAR(S5852)
10+
_RANGE_RE = re.compile(r"^\s*(\S+?)(\.\.\.?)(\S*?)\s*$") # NOSONAR(S5852)
1111

1212

1313
class GitError(Exception):
@@ -96,14 +96,13 @@ def parse_diff(repo_root: Path, diff_range: str) -> list[DiffHunk]:
9696
return hunks
9797

9898

99+
def _run_git_z(repo_root: Path, args: list[str]) -> list[str]:
100+
output = run_git(repo_root, args)
101+
return [p for p in output.split("\0") if p]
102+
103+
99104
def get_changed_files(repo_root: Path, diff_range: str) -> list[Path]:
100-
output = run_git(repo_root, ["diff", "--name-only", "-M", diff_range])
101-
files: list[Path] = []
102-
for line in output.splitlines():
103-
line = line.strip()
104-
if line:
105-
files.append(repo_root / line)
106-
return files
105+
return [repo_root / p for p in _run_git_z(repo_root, ["diff", "--name-only", "-M", "-z", diff_range])]
107106

108107

109108
def split_diff_range(diff_range: str) -> tuple[str | None, str | None]:
@@ -121,17 +120,24 @@ def get_untracked_files(repo_root: Path) -> list[Path]:
121120

122121

123122
def get_deleted_files(repo_root: Path, diff_range: str) -> set[Path]:
124-
output = run_git(repo_root, ["diff", "--diff-filter=D", "--name-only", "-M", diff_range])
125-
return {(repo_root / line.strip()).resolve() for line in output.splitlines() if line.strip()}
123+
return {
124+
(repo_root / p).resolve()
125+
for p in _run_git_z(repo_root, ["diff", "--diff-filter=D", "--name-only", "-M", "-z", diff_range])
126+
}
126127

127128

128129
def get_renamed_old_paths(repo_root: Path, diff_range: str) -> set[Path]:
129-
output = run_git(repo_root, ["diff", "--diff-filter=R", "--name-status", "-M", diff_range])
130+
output = run_git(repo_root, ["diff", "--diff-filter=R", "--name-status", "-M", "-z", diff_range])
131+
parts = output.split("\0")
130132
old_paths: set[Path] = set()
131-
for line in output.splitlines():
132-
parts = line.strip().split("\t")
133-
if len(parts) >= 3 and parts[0].startswith("R"):
134-
old_paths.add((repo_root / parts[1]).resolve())
133+
i = 0
134+
while i < len(parts):
135+
if parts[i].startswith("R"):
136+
if i + 2 < len(parts) and parts[i + 1]:
137+
old_paths.add((repo_root / parts[i + 1]).resolve())
138+
i += 3
139+
else:
140+
i += 1
135141
return old_paths
136142

137143

src/treemapper/diffctx/parsers/base.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
_GENERIC_MAX_LINES = 200
1313
GENERIC_MAX_LINES = _GENERIC_MAX_LINES
1414
_GENERIC_MAX_EXTENSION = 100
15-
_MIN_FRAGMENT_WORDS = 10
16-
MIN_FRAGMENT_WORDS = _MIN_FRAGMENT_WORDS
1715

1816
_BRACKET_PAIRS = {"{": "}", "[": "]", "(": ")"}
1917

@@ -91,8 +89,7 @@ def _is_comment_or_blank(line: str) -> bool:
9189

9290

9391
def _is_top_level_close(line: str) -> bool:
94-
stripped = line.strip()
95-
return stripped == "}" or stripped == "};" or stripped.startswith("}")
92+
return line.strip().startswith("}")
9693

9794

9895
def _find_first_balanced_point(lines: list[str], start_idx: int, target_end_idx: int) -> int | None:

src/treemapper/diffctx/parsers/markdown.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def fragment(self, path: Path, content: str) -> list[Fragment]:
5757
if frag:
5858
fragments.append(frag)
5959

60-
return fragments if fragments else []
60+
return fragments
6161

6262
def _find_all_headings(self, lines: list[str]) -> list[tuple[int, int]]:
6363
headings: list[tuple[int, int]] = []

0 commit comments

Comments
 (0)