Skip to content

Commit 181e8b1

Browse files
committed
Use .gitignore as part of the excluded file list
When using Bandit to scan projects based on Git source control, it would be benefitual to ignore files based on the patterns in the .gitignore file. Today, Bandit has some default excludes that get overridden if a user passes in other excludes. This is a bit confusing to the end user. But it also serves a purpose similar to .gitignore in that the paths excluded by default are typically included in a .gitignore. This change makes use of an existing dependency of GitPython to process a .gitignore file. Note, it will only check for .gitignore files in top-level directories specified on the Bandit command line as targets. It does not recursive look for .gitignore files. This is done for a couple reasons. Firstly as recursive searching for .gitignore files and loading via GitPython would be complex to add to Bandit existing file discovery. Secondly, the performance of Bandit might suffer greatly if GitPython is used recursively. The GitPython is a wrapper around calling various git commands. So there is a dependency that the user has Git installed. There are also limitations to argument sizes on operating systems which this code has newly introduced to ensure the command doesn't fail. Signed-off-by: Eric Brown <[email protected]>
1 parent 8fa5788 commit 181e8b1

File tree

3 files changed

+67
-3
lines changed

3 files changed

+67
-3
lines changed

bandit/cli/main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,8 @@ def main():
340340
help="comma-separated list of paths (glob patterns "
341341
"supported) to exclude from scan "
342342
"(note that these are in addition to the excluded "
343-
"paths provided in the config file) (default: "
343+
"paths provided in the config file and any files "
344+
"matching patterns defined in .gitignore) (default: "
344345
+ ",".join(constants.EXCLUDE)
345346
+ ")",
346347
)

bandit/core/manager.py

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import tokenize
1414
import traceback
1515

16+
import git
1617
from rich import progress
1718

1819
from bandit.core import constants as b_constants
@@ -197,6 +198,55 @@ def output_results(
197198
f"'{output_format}' formatter: {str(e)}"
198199
)
199200

201+
def _arg_max_args(self, args):
202+
"""
203+
Split up the list in `args` into a list of lists
204+
where each list contains fewer than half ARG_MAX bytes.
205+
"""
206+
ARG_MAX = os.sysconf("SC_ARG_MAX")
207+
result = []
208+
sublist = []
209+
count = 0
210+
for arg in args:
211+
arg_len = len(arg)
212+
if count + arg_len > ARG_MAX / 2:
213+
result.append(sublist)
214+
sublist = [arg]
215+
count = arg_len
216+
else:
217+
sublist.append(arg)
218+
count += arg_len
219+
if sublist:
220+
result.append(sublist)
221+
return result
222+
223+
def _exclude_gitignore(self, repo, new_files, newly_excluded):
224+
"""
225+
Exclude files matching patterns in .gitignore if found.
226+
"""
227+
if repo is not None:
228+
# Git will only compare absolute paths in the
229+
# processing of a .gitignore file.
230+
tmp_files = map(os.path.abspath, new_files)
231+
232+
# Split list into chunks if size is larger than the
233+
# operating system's ARG_MAX
234+
split_files = self._arg_max_args(tmp_files)
235+
236+
for files in split_files:
237+
try:
238+
ignore_list = repo.ignored(*files)
239+
ignored_files = list(
240+
filter(
241+
lambda x: os.path.abspath(x) in ignore_list,
242+
new_files,
243+
)
244+
)
245+
new_files.difference_update(ignored_files)
246+
newly_excluded.update(ignored_files)
247+
except git.exc.GitCommandError as e:
248+
LOG.warning(e)
249+
200250
def discover_files(self, targets, recursive=False, excluded_paths=""):
201251
"""Add tests directly and from a directory to the test set
202252
@@ -224,12 +274,25 @@ def discover_files(self, targets, recursive=False, excluded_paths=""):
224274
for fname in targets:
225275
# if this is a directory and recursive is set, find all files
226276
if os.path.isdir(fname):
277+
repo = None
278+
try:
279+
repo = git.Repo(fname)
280+
except git.exc.InvalidGitRepositoryError:
281+
LOG.debug("No Git repository found")
282+
except git.exc.GitCommandNotFound:
283+
LOG.debug("Git command not found")
284+
except git.exc.NoSuchPathError:
285+
LOG.debug("No such path to a git repo")
286+
227287
if recursive:
228288
new_files, newly_excluded = _get_files_from_dir(
229289
fname,
230290
included_globs=included_globs,
231291
excluded_path_strings=excluded_path_globs,
232292
)
293+
294+
self._exclude_gitignore(repo, new_files, newly_excluded)
295+
233296
files_list.update(new_files)
234297
excluded_files.update(newly_excluded)
235298
else:
@@ -238,7 +301,6 @@ def discover_files(self, targets, recursive=False, excluded_paths=""):
238301
"scan contents",
239302
fname,
240303
)
241-
242304
else:
243305
# if the user explicitly mentions a file on command line,
244306
# we'll scan it, regardless of whether it's in the included

doc/source/man/bandit.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ OPTIONS
6262
comma-separated list of paths (glob patterns
6363
supported) to exclude from scan (note that these are
6464
in addition to the excluded paths provided in the
65-
config file) (default:
65+
config file and any files matching patterns defined in
66+
.gitignore) (default:
6667
.svn,CVS,.bzr,.hg,.git,__pycache__,.tox,.eggs,*.egg)
6768
-b BASELINE, --baseline BASELINE
6869
path of a baseline report to compare against (only

0 commit comments

Comments
 (0)