11from __future__ import annotations
22
3+ import logging
34import re
45from pathlib import Path
56
1819_JENKINS_SH_RE = re .compile (r"sh\s*(?:\(['\"]|['\"])(.+?)['\"]\)?" , re .MULTILINE | re .DOTALL )
1920_JENKINS_SCRIPT_RE = re .compile (r"script\s*\{([^}]+)\}" , re .MULTILINE | re .DOTALL )
2021
21- _SCRIPT_CALL_RE = re .compile (r"(?:bash|sh|python|python3|node|npm|yarn|pnpm|make|go|cargo|dotnet|mvn|gradle)\s+([^\s;&|]+)" )
22- _FILE_REF_RE = re .compile (r"(?:\.\/|scripts\/|bin\/|tools\/)([a-zA-Z0-9_.-]+(?:\.(?:sh|py|js|ts|rb))?)" )
22+ _SCRIPT_CALL_RE = re .compile (
23+ r"(?:bash|sh|python|python3|node|npm|yarn|pnpm|make|go|cargo|dotnet|mvn|gradle|pytest|ruff|mypy|black|isort|flake8)\s+([^\s;&|]+)"
24+ )
25+ _FILE_REF_RE = re .compile (r"(?:\.\/|scripts\/|bin\/|tools\/|src\/|tests\/)([a-zA-Z0-9_.-]+(?:\.(?:sh|py|js|ts|rb))?)" )
2326
2427
2528def _is_github_actions (path : Path ) -> bool :
@@ -49,6 +52,14 @@ def _is_azure_pipelines(path: Path) -> bool:
4952 return name in {"azure-pipelines.yml" , "azure-pipelines.yaml" } or name .startswith ("azure-pipeline" )
5053
5154
55+ def _is_tox (path : Path ) -> bool :
56+ return path .name .lower () == "tox.ini"
57+
58+
59+ def _is_nox (path : Path ) -> bool :
60+ return path .name .lower () == "noxfile.py"
61+
62+
5263def _is_ci_file (path : Path ) -> bool :
5364 return any (
5465 [
@@ -58,6 +69,8 @@ def _is_ci_file(path: Path) -> bool:
5869 _is_circleci (path ),
5970 _is_travis (path ),
6071 _is_azure_pipelines (path ),
72+ _is_tox (path ),
73+ _is_nox (path ),
6174 ]
6275 )
6376
@@ -112,6 +125,63 @@ def _extract_jenkins_refs(content: str) -> set[str]:
112125 return refs
113126
114127
128+ def _extract_tox_refs (content : str ) -> set [str ]:
129+ refs : set [str ] = set ()
130+ # Extract deps
131+ for match in re .finditer (r"^\s*deps\s*=\s*(.+)$" , content , re .MULTILINE ):
132+ deps = match .group (1 ).split ()
133+ refs .update (d .strip () for d in deps if d .strip ())
134+
135+ # Extract commands
136+ for match in re .finditer (r"^\s*commands\s*=\s*(.+)$" , content , re .MULTILINE ):
137+ cmd = match .group (1 )
138+ # Split by whitespace to find potential paths
139+ # Examples:
140+ # commands = pytest {posargs}
141+ # commands = ruff check src/
142+ # commands = python -m pytest tests/
143+ parts = cmd .split ()
144+ for p in parts :
145+ # Strip quoting
146+ p = p .strip ("'\" " )
147+ # Remove tox specific vars like {posargs}
148+ p = re .sub (r"\{[^}]+\}" , "" , p )
149+
150+ if not p or p .startswith ("-" ):
151+ continue
152+
153+ # Heuristic: if it looks like a path (contains / or .py/.ini etc)
154+ # or if we are permissive and just add everything that looks like an ident
155+ # Given discover_files_by_refs filters candidates, being permissive is safer.
156+ refs .add (p )
157+
158+ refs .update (_extract_script_refs (cmd ))
159+
160+ return refs
161+
162+
163+ def _extract_nox_refs (content : str ) -> set [str ]:
164+ refs : set [str ] = set ()
165+ # Extract session.run calls
166+ # session.run("cmd", "arg1", ...)
167+ for match in re .finditer (r"session\.run\(([^)]+)\)" , content ):
168+ args_str = match .group (1 )
169+ # Simple extraction of string literals
170+ args = re .findall (r'["\']([^"\']+)["\']' , args_str )
171+ for arg in args :
172+ refs .add (arg )
173+ refs .update (_extract_script_refs (arg ))
174+
175+ # Extract session.install calls
176+ for match in re .finditer (r"session\.install\(([^)]+)\)" , content ):
177+ args_str = match .group (1 )
178+ args = re .findall (r'["\']([^"\']+)["\']' , args_str )
179+ for arg in args :
180+ refs .add (arg )
181+
182+ return refs
183+
184+
115185class CICDEdgeBuilder (EdgeBuilder ):
116186 weight = 0.55
117187 script_weight = 0.60
@@ -135,19 +205,29 @@ def discover_related_files(
135205 except (OSError , UnicodeDecodeError ):
136206 continue
137207
208+ local_refs = set ()
138209 if _is_github_actions (ci ):
139- refs .update (_extract_gha_refs (content ))
210+ local_refs .update (_extract_gha_refs (content ))
140211 elif _is_gitlab_ci (ci ):
141- refs .update (_extract_gitlab_refs (content ))
212+ local_refs .update (_extract_gitlab_refs (content ))
142213 elif _is_jenkinsfile (ci ):
143- refs .update (_extract_jenkins_refs (content ))
214+ local_refs .update (_extract_jenkins_refs (content ))
215+ elif _is_tox (ci ):
216+ local_refs .update (_extract_tox_refs (content ))
217+ elif _is_nox (ci ):
218+ local_refs .update (_extract_nox_refs (content ))
144219 else :
145- refs .update (_extract_script_refs (content ))
220+ local_refs .update (_extract_script_refs (content ))
146221
147222 if any (cmd in content .lower () for cmd in ["npm" , "yarn" , "pnpm" ]):
148- refs .add ("package.json" )
223+ local_refs .add ("package.json" )
224+
225+ logging .debug ("CICD refs for %s: %s" , ci .name , local_refs )
226+ refs .update (local_refs )
149227
150- return discover_files_by_refs (refs , changed_files , all_candidate_files )
228+ discovered = discover_files_by_refs (refs , changed_files , all_candidate_files )
229+ logging .debug ("CICD discovered for %s: %s" , [c .name for c in ci_files ], [d .name for d in discovered ])
230+ return discovered
151231
152232 def build (self , fragments : list [Fragment ], repo_root : Path | None = None ) -> EdgeDict :
153233 ci_frags = [f for f in fragments if _is_ci_file (f .path )]
@@ -171,6 +251,10 @@ def _extract_refs(self, ci: Fragment) -> set[str]:
171251 return _extract_gitlab_refs (ci .content )
172252 if _is_jenkinsfile (ci .path ):
173253 return _extract_jenkins_refs (ci .content )
254+ if _is_tox (ci .path ):
255+ return _extract_tox_refs (ci .content )
256+ if _is_nox (ci .path ):
257+ return _extract_nox_refs (ci .content )
174258 return _extract_script_refs (ci .content )
175259
176260 def _link_refs (self , ci_id : FragmentId , refs : set [str ], idx : FragmentIndex , edges : EdgeDict ) -> None :
0 commit comments