Skip to content

Commit 9fb7f91

Browse files
committed
Merge branch 'dev' of github.com:dxa4481/truffleHog
2 parents 37f9620 + fe0d537 commit 9fb7f91

File tree

3 files changed

+69
-9
lines changed

3 files changed

+69
-9
lines changed

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
# Truffle Hog
1+
# truffleHog
22
Searches through git repositories for secrets, digging deep into commit history and branches. This is effective at finding secrets accidentally committed.
33

44
## NEW
5-
Trufflehog previously functioned by running entropy checks on git diffs. This functionality still exists, but high signal regex checks have been added, and the ability to surpress entropy checking has also been added.
5+
truffleHog previously functioned by running entropy checks on git diffs. This functionality still exists, but high signal regex checks have been added, and the ability to surpress entropy checking has also been added.
66

77
These features help cut down on noise, and makes the tool easier to shove into a devops pipeline.
88

@@ -36,10 +36,10 @@ Things like subdomain enumeration, s3 bucket detection, and other useful regexes
3636

3737
Feel free to also contribute high signal regexes upstream that you think will benifit the community. Things like Azure keys, Twilio keys, Google Compute keys, are welcome, provided a high signal regex can be constructed.
3838

39-
Trufflehog's base rule set sources from https://github.com/dxa4481/truffleHogRegexes/blob/master/truffleHogRegexes/regexes.json
39+
trufflehog's base rule set sources from https://github.com/dxa4481/truffleHogRegexes/blob/master/truffleHogRegexes/regexes.json
4040

4141
## How it works
42-
This module will go through the entire commit history of each branch, and check each diff from each commit, and check for secrets. This is both by regex and by entropy. For entropy checks, trufflehog will evaluate the shannon entropy for both the base64 char set and hexidecimal char set for every blob of text greater than 20 characters comprised of those character sets in each diff. If at any point a high entropy string >20 characters is detected, it will print to the screen.
42+
This module will go through the entire commit history of each branch, and check each diff from each commit, and check for secrets. This is both by regex and by entropy. For entropy checks, truffleHog will evaluate the shannon entropy for both the base64 char set and hexidecimal char set for every blob of text greater than 20 characters comprised of those character sets in each diff. If at any point a high entropy string >20 characters is detected, it will print to the screen.
4343

4444
## Help
4545

test_all.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
import unittest
22
import os
3+
import sys
4+
import json
5+
import io
36
from truffleHog import truffleHog
7+
from mock import patch
8+
from mock import MagicMock
49

510

611
class TestStringMethods(unittest.TestCase):
@@ -22,5 +27,44 @@ def test_unicode_expection(self):
2227
except UnicodeEncodeError:
2328
self.fail("Unicode print error")
2429

30+
def test_return_correct_commit_hash(self):
31+
# Start at commit d15627104d07846ac2914a976e8e347a663bbd9b, which
32+
# is immediately followed by a secret inserting commit:
33+
# https://github.com/dxa4481/truffleHog/commit/9ed54617547cfca783e0f81f8dc5c927e3d1e345
34+
since_commit = 'd15627104d07846ac2914a976e8e347a663bbd9b'
35+
commit_w_secret = '9ed54617547cfca783e0f81f8dc5c927e3d1e345'
36+
cross_valdiating_commit_w_secret_comment = 'OH no a secret'
37+
38+
json_result = ''
39+
if sys.version_info >= (3,):
40+
tmp_stdout = io.StringIO()
41+
else:
42+
tmp_stdout = io.BytesIO()
43+
bak_stdout = sys.stdout
44+
45+
# Redirect STDOUT, run scan and re-establish STDOUT
46+
sys.stdout = tmp_stdout
47+
try:
48+
truffleHog.find_strings("https://github.com/dxa4481/truffleHog.git",
49+
since_commit=since_commit, printJson=True, surpress_output=False)
50+
finally:
51+
sys.stdout = bak_stdout
52+
53+
json_result_list = tmp_stdout.getvalue().split('\n')
54+
results = [json.loads(r) for r in json_result_list if bool(r.strip())]
55+
filtered_results = list(filter(lambda r: r['commitHash'] == commit_w_secret, results))
56+
self.assertEqual(1, len(filtered_results))
57+
self.assertEqual(commit_w_secret, filtered_results[0]['commitHash'])
58+
# Additionally, we cross-validate the commit comment matches the expected comment
59+
self.assertEqual(cross_valdiating_commit_w_secret_comment, filtered_results[0]['commit'].strip())
60+
61+
@patch('truffleHog.truffleHog.clone_git_repo')
62+
@patch('truffleHog.truffleHog.Repo')
63+
def test_branch(self, repo_const_mock, clone_git_repo):
64+
repo = MagicMock()
65+
repo_const_mock.return_value = repo
66+
truffleHog.find_strings("test_repo", branch="testbranch")
67+
repo.remotes.origin.fetch.assert_called_once_with("testbranch")
68+
2569
if __name__ == '__main__':
2670
unittest.main()

truffleHog/truffleHog.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,14 @@ def main():
2727
parser.add_argument("--entropy", dest="do_entropy", help="Enable entropy checks")
2828
parser.add_argument("--since_commit", dest="since_commit", help="Only scan from a given commit hash")
2929
parser.add_argument("--max_depth", dest="max_depth", help="The max commit depth to go back when searching for secrets")
30+
parser.add_argument("--branch", dest="branch", help="Name of the branch to be scanned")
3031
parser.add_argument('git_url', type=str, help='URL for secret searching')
3132
parser.set_defaults(regex=False)
3233
parser.set_defaults(rules={})
3334
parser.set_defaults(max_depth=1000000)
3435
parser.set_defaults(since_commit=None)
3536
parser.set_defaults(entropy=True)
37+
parser.set_defaults(branch=None)
3638
args = parser.parse_args()
3739
rules = {}
3840
if args.rules:
@@ -48,7 +50,7 @@ def main():
4850
for regex in rules:
4951
regexes[regex] = rules[regex]
5052
do_entropy = str2bool(args.do_entropy)
51-
output = find_strings(args.git_url, args.since_commit, args.max_depth, args.output_json, args.do_regex, do_entropy, surpress_output=False)
53+
output = find_strings(args.git_url, args.since_commit, args.max_depth, args.output_json, args.do_regex, do_entropy, surpress_output=False, branch=args.branch)
5254
project_path = output["project_path"]
5355
shutil.rmtree(project_path, onerror=del_rw)
5456
if output["foundIssues"]:
@@ -183,7 +185,7 @@ def find_entropy(printableDiff, commit_time, branch_name, prev_commit, blob, com
183185
entropicDiff['diff'] = blob.diff.decode('utf-8', errors='replace')
184186
entropicDiff['stringsFound'] = stringsFound
185187
entropicDiff['printDiff'] = printableDiff
186-
entropicDiff['commitHash'] = commitHash
188+
entropicDiff['commitHash'] = prev_commit.hexsha
187189
entropicDiff['reason'] = "High Entropy"
188190
return entropicDiff
189191

@@ -207,7 +209,7 @@ def regex_check(printableDiff, commit_time, branch_name, prev_commit, blob, comm
207209
foundRegex['stringsFound'] = found_strings
208210
foundRegex['printDiff'] = found_diff
209211
foundRegex['reason'] = key
210-
foundRegex['commitHash'] = commitHash
212+
foundRegex['commitHash'] = prev_commit.hexsha
211213
regex_matches.append(foundRegex)
212214
return regex_matches
213215

@@ -240,14 +242,19 @@ def handle_results(output, output_dir, foundIssues):
240242
output["foundIssues"].append(result_path)
241243
return output
242244

243-
def find_strings(git_url, since_commit=None, max_depth=1000000, printJson=False, do_regex=False, do_entropy=True, surpress_output=True, custom_regexes={}):
245+
def find_strings(git_url, since_commit=None, max_depth=1000000, printJson=False, do_regex=False, do_entropy=True, surpress_output=True, custom_regexes={}, branch=None):
244246
output = {"foundIssues": []}
245247
project_path = clone_git_repo(git_url)
246248
repo = Repo(project_path)
247249
already_searched = set()
248250
output_dir = tempfile.mkdtemp()
249251

250-
for remote_branch in repo.remotes.origin.fetch():
252+
if branch:
253+
branches = repo.remotes.origin.fetch(branch)
254+
else:
255+
branches = repo.remotes.origin.fetch()
256+
257+
for remote_branch in branches:
251258
since_commit_reached = False
252259
branch_name = remote_branch.name
253260
prev_commit = None
@@ -281,7 +288,16 @@ def find_strings(git_url, since_commit=None, max_depth=1000000, printJson=False,
281288
output = handle_results(output, output_dir, foundIssues)
282289
output["project_path"] = project_path
283290
output["clone_uri"] = git_url
291+
output["issues_path"] = output_dir
284292
return output
285293

294+
def clean_up(output):
295+
project_path = output.get("project_path", None)
296+
if project_path and os.path.isdir(project_path):
297+
shutil.rmtree(output["project_path"])
298+
issues_path = output.get("issues_path", None)
299+
if issues_path and os.path.isdir(issues_path):
300+
shutil.rmtree(output["issues_path"])
301+
286302
if __name__ == "__main__":
287303
main()

0 commit comments

Comments
 (0)