Skip to content

Commit 4ef07fd

Browse files
committed
Python extractor: in overlay mode, traverse only changed files
1 parent 723b15f commit 4ef07fd

File tree

1 file changed

+20
-0
lines changed

1 file changed

+20
-0
lines changed

python/extractor/semmle/traverser.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import re
66
import os.path
7+
import json
78

89
from semmle.path_filters import filter_from_pattern
910
from semmle.util import Extractable, PY_EXTENSIONS, isdir, islink, listdir
@@ -30,6 +31,13 @@ def __init__(self, options, modulenames, logger):
3031
if not os.path.exists(p) and not options.ignore_missing_modules:
3132
raise FileNotFoundError("'%s' does not exist." % p)
3233
self.paths.add(p)
34+
# During overlay extraction, only traverse the files that were changed.
35+
self.overlay_changes = None
36+
if 'CODEQL_EXTRACTOR_PYTHON_OVERLAY_CHANGES' in os.environ:
37+
with open(os.environ['CODEQL_EXTRACTOR_PYTHON_OVERLAY_CHANGES'], 'r', encoding='utf-8') as f:
38+
data = json.load(f)
39+
changed_paths = data.get('changes', [])
40+
self.overlay_changes = { os.path.abspath(p) for p in changed_paths }
3341
self.exclude_paths = set([ os.path.abspath(f) for f in options.exclude_file ])
3442
self.exclude = exclude_filter_from_options(options)
3543
self.filter = filter_from_options_and_environment(options)
@@ -48,11 +56,20 @@ def __iter__(self):
4856
if mod is None:
4957
self.logger.error("No module named '%s'.", name)
5058
raise ExtractorFailure()
59+
if self.overlay_changes is not None and mod.path not in self.overlay_changes:
60+
self.logger.debug("Skipping module '%s' as it was not changed in overlay extraction.", name)
61+
continue
5162
yield mod.get_extractable()
5263
for path in self.paths:
64+
if self.overlay_changes is not None and path not in self.overlay_changes:
65+
self.logger.debug("Skipping path '%s' as it was not changed in overlay extraction.", path)
66+
continue
5367
yield Extractable.from_path(path)
5468
for path in self.recurse_files:
5569
for modpath in self._treewalk(path):
70+
if self.overlay_changes is not None and modpath not in self.overlay_changes:
71+
self.logger.debug("Skipping file '%s' as it was not changed in overlay extraction.", modpath)
72+
continue
5673
yield Extractable.from_path(modpath)
5774
for name in self.recurse_packages:
5875
mod = self.finder.find(name)
@@ -66,6 +83,9 @@ def __iter__(self):
6683
self.logger.error("Package '%s' does not have a path.", name)
6784
raise ExtractorFailure()
6885
for modpath in self._treewalk(path):
86+
if self.overlay_changes is not None and modpath not in self.overlay_changes:
87+
self.logger.debug("Skipping package '%s' as it was not changed in overlay extraction.", modpath)
88+
continue
6989
yield Extractable.from_path(modpath)
7090

7191
def _treewalk(self, path):

0 commit comments

Comments
 (0)