Skip to content

Commit ce3f2ae

Browse files
authored
Add support for defining sources via a config file (#4172)
* Add config.proto * Make protos * Add ConfigurableSource implementation * Parse config protoyaml into configured sources * Refactor setting postman keywords into a helper method on the engine * Add scan subcommand and plumb it all together * Refactor ConfigurableSource to ConfiguredSource * Export AhoCorasickCoreKeywords and return the set * Disallow source configurations for non-scan subcommands * Fix stdin scan compilation errors * Rename command to multi-scan * Add config file documentation * Add more documentation for ConfiguredSource * Replace initFunc closure with an embedded anonymous struct * Assign directly to refs * Fix typo
1 parent 603767f commit ce3f2ae

File tree

10 files changed

+669
-40
lines changed

10 files changed

+669
-40
lines changed

README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,7 @@ TruffleHog has a sub-command for each source of data that you may want to scan:
418418
- jenkins
419419
- elasticsearch
420420
- stdin
421+
- multi-scan
421422

422423
Each subcommand can have options that you can see with the `--help` flag provided to the sub command:
423424

@@ -481,6 +482,33 @@ For example, to scan a `git` repository, start with
481482
trufflehog git https://github.com/trufflesecurity/trufflehog.git
482483
```
483484

485+
## Configuration
486+
487+
TruffleHog supports defining [custom regex detectors](#regex-detector-alpha)
488+
and multiple sources in a configuration file provided via the `--config` flag.
489+
The regex detectors can be used with any subcommand, while the sources defined
490+
in configuration are only for the `multi-scan` subcommand.
491+
492+
The configuration format for sources can be found on Truffle Security's
493+
[source configuration documentation page](https://docs.trufflesecurity.com/scan-data-for-secrets).
494+
495+
Example GitHub source configuration and [options reference](https://docs.trufflesecurity.com/github#Fvm1I):
496+
497+
```yaml
498+
sources:
499+
- connection:
500+
'@type': type.googleapis.com/sources.GitHub
501+
repositories:
502+
- https://github.com/trufflesecurity/test_keys.git
503+
unauthenticated: {}
504+
name: example config scan
505+
type: SOURCE_TYPE_GITHUB
506+
verify: true
507+
```
508+
509+
You may define multiple connections under the `sources` key (see above), and
510+
TruffleHog will scan all of the sources concurrently.
511+
484512
## S3
485513

486514
The S3 source supports assuming IAM roles for scanning in addition to IAM users. This makes it easier for users to scan multiple AWS accounts without needing to rely on hardcoded credentials for each account.

main.go

Lines changed: 83 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ var (
255255
huggingfaceIncludePrs = huggingfaceScan.Flag("include-prs", "Include pull requests in scan.").Bool()
256256

257257
stdinInputScan = cli.Command("stdin", "Find credentials from stdin.")
258+
multiScanScan = cli.Command("multi-scan", "Find credentials in multiple sources defined in configuration.")
258259

259260
analyzeCmd = analyzer.Command(cli)
260261
usingTUI = false
@@ -515,7 +516,8 @@ func run(state overseer.State) {
515516
verificationCacheMetrics := verificationcache.InMemoryMetrics{}
516517

517518
engConf := engine.Config{
518-
Concurrency: *concurrency,
519+
Concurrency: *concurrency,
520+
ConfiguredSources: conf.Sources,
519521
// The engine must always be configured with the list of
520522
// default detectors, which can be further filtered by the
521523
// user. The filters are applied by the engine and are only
@@ -540,6 +542,16 @@ func run(state overseer.State) {
540542
engConf.VerificationResultCache = simple.NewCache[detectors.Result]()
541543
}
542544

545+
// Check that there are no sources defined for non-scan subcommands. If
546+
// there are, return an error as it is ambiguous what the user is
547+
// trying to do.
548+
if cmd != multiScanScan.FullCommand() && len(conf.Sources) > 0 {
549+
logFatal(
550+
fmt.Errorf("ambiguous configuration"),
551+
"sources should only be defined in configuration for the 'multi-scan' command",
552+
)
553+
}
554+
543555
if *compareDetectionStrategies {
544556
if err := compareScans(ctx, cmd, engConf); err != nil {
545557
logFatal(err, "error comparing detection strategies")
@@ -702,7 +714,7 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
702714
}
703715
}()
704716

705-
var ref sources.JobProgressRef
717+
var refs []sources.JobProgressRef
706718
switch cmd {
707719
case gitScan.FullCommand():
708720
gitCfg := sources.GitConfig{
@@ -715,8 +727,10 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
715727
Bare: *gitScanBare,
716728
ExcludeGlobs: *gitScanExcludeGlobs,
717729
}
718-
if ref, err = eng.ScanGit(ctx, gitCfg); err != nil {
730+
if ref, err := eng.ScanGit(ctx, gitCfg); err != nil {
719731
return scanMetrics, fmt.Errorf("failed to scan Git: %v", err)
732+
} else {
733+
refs = []sources.JobProgressRef{ref}
720734
}
721735
case githubScan.FullCommand():
722736
filter, err := common.FilterFromFiles(*githubScanIncludePaths, *githubScanExcludePaths)
@@ -745,8 +759,10 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
745759
Filter: filter,
746760
AuthInUrl: *githubAuthInUrl,
747761
}
748-
if ref, err = eng.ScanGitHub(ctx, cfg); err != nil {
762+
if ref, err := eng.ScanGitHub(ctx, cfg); err != nil {
749763
return scanMetrics, fmt.Errorf("failed to scan Github: %v", err)
764+
} else {
765+
refs = []sources.JobProgressRef{ref}
750766
}
751767
case githubExperimentalScan.FullCommand():
752768
cfg := sources.GitHubExperimentalConfig{
@@ -756,8 +772,10 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
756772
CollisionThreshold: *githubExperimentalCollisionThreshold,
757773
DeleteCachedData: *githubExperimentalDeleteCache,
758774
}
759-
if ref, err = eng.ScanGitHubExperimental(ctx, cfg); err != nil {
775+
if ref, err := eng.ScanGitHubExperimental(ctx, cfg); err != nil {
760776
return scanMetrics, fmt.Errorf("failed to scan using Github Experimental: %v", err)
777+
} else {
778+
refs = []sources.JobProgressRef{ref}
761779
}
762780
case gitlabScan.FullCommand():
763781
filter, err := common.FilterFromFiles(*gitlabScanIncludePaths, *gitlabScanExcludePaths)
@@ -774,8 +792,10 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
774792
Filter: filter,
775793
AuthInUrl: *gitlabAuthInUrl,
776794
}
777-
if ref, err = eng.ScanGitLab(ctx, cfg); err != nil {
795+
if ref, err := eng.ScanGitLab(ctx, cfg); err != nil {
778796
return scanMetrics, fmt.Errorf("failed to scan GitLab: %v", err)
797+
} else {
798+
refs = []sources.JobProgressRef{ref}
779799
}
780800
case filesystemScan.FullCommand():
781801
if len(*filesystemDirectories) > 0 {
@@ -789,8 +809,10 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
789809
IncludePathsFile: *filesystemScanIncludePaths,
790810
ExcludePathsFile: *filesystemScanExcludePaths,
791811
}
792-
if ref, err = eng.ScanFileSystem(ctx, cfg); err != nil {
812+
if ref, err := eng.ScanFileSystem(ctx, cfg); err != nil {
793813
return scanMetrics, fmt.Errorf("failed to scan filesystem: %v", err)
814+
} else {
815+
refs = []sources.JobProgressRef{ref}
794816
}
795817
case s3Scan.FullCommand():
796818
cfg := sources.S3Config{
@@ -803,8 +825,10 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
803825
CloudCred: *s3ScanCloudEnv,
804826
MaxObjectSize: int64(*s3ScanMaxObjectSize),
805827
}
806-
if ref, err = eng.ScanS3(ctx, cfg); err != nil {
828+
if ref, err := eng.ScanS3(ctx, cfg); err != nil {
807829
return scanMetrics, fmt.Errorf("failed to scan S3: %v", err)
830+
} else {
831+
refs = []sources.JobProgressRef{ref}
808832
}
809833
case syslogScan.FullCommand():
810834
cfg := sources.SyslogConfig{
@@ -815,16 +839,22 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
815839
KeyPath: *syslogTLSKey,
816840
Concurrency: *concurrency,
817841
}
818-
if ref, err = eng.ScanSyslog(ctx, cfg); err != nil {
842+
if ref, err := eng.ScanSyslog(ctx, cfg); err != nil {
819843
return scanMetrics, fmt.Errorf("failed to scan syslog: %v", err)
844+
} else {
845+
refs = []sources.JobProgressRef{ref}
820846
}
821847
case circleCiScan.FullCommand():
822-
if ref, err = eng.ScanCircleCI(ctx, *circleCiScanToken); err != nil {
848+
if ref, err := eng.ScanCircleCI(ctx, *circleCiScanToken); err != nil {
823849
return scanMetrics, fmt.Errorf("failed to scan CircleCI: %v", err)
850+
} else {
851+
refs = []sources.JobProgressRef{ref}
824852
}
825853
case travisCiScan.FullCommand():
826-
if ref, err = eng.ScanTravisCI(ctx, *travisCiScanToken); err != nil {
854+
if ref, err := eng.ScanTravisCI(ctx, *travisCiScanToken); err != nil {
827855
return scanMetrics, fmt.Errorf("failed to scan TravisCI: %v", err)
856+
} else {
857+
refs = []sources.JobProgressRef{ref}
828858
}
829859
case gcsScan.FullCommand():
830860
cfg := sources.GCSConfig{
@@ -840,17 +870,21 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
840870
Concurrency: *concurrency,
841871
MaxObjectSize: int64(*gcsMaxObjectSize),
842872
}
843-
if ref, err = eng.ScanGCS(ctx, cfg); err != nil {
873+
if ref, err := eng.ScanGCS(ctx, cfg); err != nil {
844874
return scanMetrics, fmt.Errorf("failed to scan GCS: %v", err)
875+
} else {
876+
refs = []sources.JobProgressRef{ref}
845877
}
846878
case dockerScan.FullCommand():
847879
cfg := sources.DockerConfig{
848880
BearerToken: *dockerScanToken,
849881
Images: *dockerScanImages,
850882
UseDockerKeychain: *dockerScanToken == "",
851883
}
852-
if ref, err = eng.ScanDocker(ctx, cfg); err != nil {
884+
if ref, err := eng.ScanDocker(ctx, cfg); err != nil {
853885
return scanMetrics, fmt.Errorf("failed to scan Docker: %v", err)
886+
} else {
887+
refs = []sources.JobProgressRef{ref}
854888
}
855889
case postmanScan.FullCommand():
856890
// handle deprecated flag
@@ -886,8 +920,10 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
886920
WorkspacePaths: *postmanWorkspacePaths,
887921
EnvironmentPaths: *postmanEnvironmentPaths,
888922
}
889-
if ref, err = eng.ScanPostman(ctx, cfg); err != nil {
923+
if ref, err := eng.ScanPostman(ctx, cfg); err != nil {
890924
return scanMetrics, fmt.Errorf("failed to scan Postman: %v", err)
925+
} else {
926+
refs = []sources.JobProgressRef{ref}
891927
}
892928
case elasticsearchScan.FullCommand():
893929
cfg := sources.ElasticsearchConfig{
@@ -902,8 +938,10 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
902938
SinceTimestamp: *elasticsearchSinceTimestamp,
903939
BestEffortScan: *elasticsearchBestEffortScan,
904940
}
905-
if ref, err = eng.ScanElasticsearch(ctx, cfg); err != nil {
941+
if ref, err := eng.ScanElasticsearch(ctx, cfg); err != nil {
906942
return scanMetrics, fmt.Errorf("failed to scan Elasticsearch: %v", err)
943+
} else {
944+
refs = []sources.JobProgressRef{ref}
907945
}
908946
case jenkinsScan.FullCommand():
909947
cfg := engine.JenkinsConfig{
@@ -912,8 +950,10 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
912950
Username: *jenkinsUsername,
913951
Password: *jenkinsPassword,
914952
}
915-
if ref, err = eng.ScanJenkins(ctx, cfg); err != nil {
953+
if ref, err := eng.ScanJenkins(ctx, cfg); err != nil {
916954
return scanMetrics, fmt.Errorf("failed to scan Jenkins: %v", err)
955+
} else {
956+
refs = []sources.JobProgressRef{ref}
917957
}
918958
case huggingfaceScan.FullCommand():
919959
if *huggingfaceEndpoint != "" {
@@ -945,13 +985,26 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
945985
IncludePrs: *huggingfaceIncludePrs,
946986
Concurrency: *concurrency,
947987
}
948-
if ref, err = eng.ScanHuggingface(ctx, cfg); err != nil {
988+
if ref, err := eng.ScanHuggingface(ctx, cfg); err != nil {
949989
return scanMetrics, fmt.Errorf("failed to scan HuggingFace: %v", err)
990+
} else {
991+
refs = []sources.JobProgressRef{ref}
992+
}
993+
case multiScanScan.FullCommand():
994+
if *configFilename == "" {
995+
return scanMetrics, fmt.Errorf("missing required flag: --config")
996+
}
997+
if rs, err := eng.ScanConfig(ctx, cfg.ConfiguredSources...); err != nil {
998+
return scanMetrics, fmt.Errorf("failed to scan via config: %w", err)
999+
} else {
1000+
refs = rs
9501001
}
9511002
case stdinInputScan.FullCommand():
9521003
cfg := sources.StdinConfig{}
953-
if ref, err = eng.ScanStdinInput(ctx, cfg); err != nil {
1004+
if ref, err := eng.ScanStdinInput(ctx, cfg); err != nil {
9541005
return scanMetrics, fmt.Errorf("failed to scan stdin input: %v", err)
1006+
} else {
1007+
refs = []sources.JobProgressRef{ref}
9551008
}
9561009
default:
9571010
return scanMetrics, fmt.Errorf("invalid command: %s", cmd)
@@ -962,13 +1015,19 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
9621015
return scanMetrics, fmt.Errorf("engine failed to finish execution: %v", err)
9631016
}
9641017

965-
// Print any errors reported during the scan.
966-
if errs := ref.Snapshot().Errors; len(errs) > 0 {
967-
errMsgs := make([]string, len(errs))
968-
for i := 0; i < len(errs); i++ {
969-
errMsgs[i] = errs[i].Error()
1018+
// Print any non-fatal errors reported during the scan.
1019+
for _, ref := range refs {
1020+
if errs := ref.Snapshot().Errors; len(errs) > 0 {
1021+
errMsgs := make([]string, len(errs))
1022+
for i := 0; i < len(errs); i++ {
1023+
errMsgs[i] = errs[i].Error()
1024+
}
1025+
ctx.Logger().Error(nil, "encountered errors during scan",
1026+
"job", ref.JobID,
1027+
"source_name", ref.SourceName,
1028+
"errors", errMsgs,
1029+
)
9701030
}
971-
ctx.Logger().Error(nil, "encountered errors during scan", "errors", errMsgs)
9721031
}
9731032

9741033
if *printAvgDetectorTime {

0 commit comments

Comments
 (0)