Skip to content

Commit e8860ac

Browse files
perf(engine): optimize Terraform parser with directory caching and LOC-based memory calculation (#7864)
* add log inspector * add more debug and a better tf handling for tests * add information on each file/folder analyzed for better memory calculation * add loc for better memory computation * initialize FileStats properly for returnAnalyzedPaths * add relevant tests for the new supported behaviour * add failedqueries report to secrets inspector * update kics github action to version 2.1.17
1 parent 08d60b3 commit e8860ac

File tree

13 files changed

+447
-34
lines changed

13 files changed

+447
-34
lines changed

.github/workflows/kics-gh-action.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
steps:
1212
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
1313
- name: Run KICS Scan
14-
uses: checkmarx/kics-github-action@71454548efb714daa457caae25c01d64cc0be9d2 # v2.1.13
14+
uses: checkmarx/kics-github-action@e01759d524f8abd5bd650d3d5bd4b96d46ebbc1d # v2.1.17
1515
with:
1616
token: ${{ secrets.GITHUB_TOKEN }}
1717
path: "./Dockerfile"

e2e/testcases/e2e-cli-056_scan_timeout.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@ func init() { //nolint
1919
WantStatus: []int{50, 50, 126},
2020
Validation: func(outputText string) bool {
2121
matchTimeoutLog, _ := regexp.MatchString("Query execution timeout=(0|1|12)s", outputText)
22-
return matchTimeoutLog
22+
// Check for validation error (for invalid timeout=0)
23+
matchValidationError, _ := regexp.MatchString("invalid argument --timeout: value must be greater than 0", outputText)
24+
return matchTimeoutLog || matchValidationError
2325
},
2426
}
2527

internal/console/assets/scan-flags.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,8 @@
175175
"flagType": "int",
176176
"shorthandFlag": "",
177177
"defaultValue": "60",
178-
"usage": "number of seconds the query has to execute before being canceled"
178+
"usage": "number of seconds the query has to execute before being canceled",
179+
"validation": "validateTimeoutFlag"
179180
},
180181
"type": {
181182
"flagType": "multiStr",

internal/console/flags/validate.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ var flagValidationFuncs = flagValidationFuncsMap{
1111
"allQueriesID": allQueriesID,
1212
"validateWorkersFlag": validateWorkersFlag,
1313
"validatePath": validatePath,
14+
"validateTimeoutFlag": validateTimeoutFlag,
1415
}
1516

1617
func isQueryID(id string) bool {

internal/console/flags/validate_multi_str.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,11 @@ func validateWorkersFlag(flagName string) error {
8181
}
8282
return nil
8383
}
84+
85+
func validateTimeoutFlag(flagName string) error {
86+
timeout := GetIntFlag(flagName)
87+
if timeout <= 0 {
88+
return fmt.Errorf("invalid argument --%s: value must be greater than 0", flagName)
89+
}
90+
return nil
91+
}

pkg/analyzer/analyzer.go

Lines changed: 70 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,13 @@ type analyzerInfo struct {
155155
fallbackMinifiedFileLOC int
156156
}
157157

158+
// fileTypeInfo contains file path, detected platform type, and LOC count
159+
type fileTypeInfo struct {
160+
filePath string
161+
fileType string
162+
locCount int
163+
}
164+
158165
// Analyzer keeps all the relevant info for the function Analyze
159166
type Analyzer struct {
160167
Paths []string
@@ -318,13 +325,15 @@ func Analyze(a *Analyzer) (model.AnalyzedPaths, error) {
318325
Types: make([]string, 0),
319326
Exc: make([]string, 0),
320327
ExpectedLOC: 0,
328+
FileStats: make(map[string]model.FileStatistics),
321329
}
322330

323331
var files []string
324332
var wg sync.WaitGroup
325333
// results is the channel shared by the workers that contains the types found
326334
results := make(chan string)
327335
locCount := make(chan int)
336+
fileInfo := make(chan fileTypeInfo)
328337
ignoreFiles := make([]string, 0)
329338
projectConfigFiles := make([]string, 0)
330339
done := make(chan bool)
@@ -374,7 +383,7 @@ func Analyze(a *Analyzer) (model.AnalyzedPaths, error) {
374383
filePath: file,
375384
fallbackMinifiedFileLOC: a.FallbackMinifiedFileLOC,
376385
}
377-
go a.worker(results, unwanted, locCount, &wg)
386+
go a.worker(results, unwanted, locCount, fileInfo, &wg)
378387
}
379388

380389
go func() {
@@ -383,27 +392,35 @@ func Analyze(a *Analyzer) (model.AnalyzedPaths, error) {
383392
close(unwanted)
384393
close(results)
385394
close(locCount)
395+
close(fileInfo)
386396
}()
387397
wg.Wait()
388398
done <- true
389399
}()
390400

391-
availableTypes, unwantedPaths, loc := computeValues(results, unwanted, locCount, done)
401+
availableTypes, unwantedPaths, loc, fileStats := computeValues(results, unwanted, locCount, fileInfo, done)
392402
multiPlatformTypeCheck(&availableTypes)
393403
unwantedPaths = append(unwantedPaths, ignoreFiles...)
394404
unwantedPaths = append(unwantedPaths, projectConfigFiles...)
395405
returnAnalyzedPaths.Types = availableTypes
396406
returnAnalyzedPaths.Exc = unwantedPaths
397407
returnAnalyzedPaths.ExpectedLOC = loc
408+
returnAnalyzedPaths.FileStats = fileStats
398409
// stop metrics for file analyzer
399410
metrics.Metric.Stop()
400411
return returnAnalyzedPaths, nil
401412
}
402413

403414
// worker determines the type of the file by ext (dockerfile and terraform)/content and
404-
// writes the answer to the results channel
415+
// writes the answer to the results channel and file info for statistics
405416
// if no types were found, the worker will write the path of the file in the unwanted channel
406-
func (a *analyzerInfo) worker(results, unwanted chan<- string, locCount chan<- int, wg *sync.WaitGroup) { //nolint: gocyclo
417+
func (a *analyzerInfo) worker( //nolint: gocyclo
418+
results,
419+
unwanted chan<- string,
420+
locCount chan<- int,
421+
fileInfo chan<- fileTypeInfo,
422+
wg *sync.WaitGroup,
423+
) {
407424
defer func() {
408425
if err := recover(); err != nil {
409426
log.Warn().Msgf("Recovered from analyzing panic for file %s with error: %#v", a.filePath, err.(error).Error())
@@ -422,12 +439,14 @@ func (a *analyzerInfo) worker(results, unwanted chan<- string, locCount chan<- i
422439
if a.isAvailableType(dockerfile) {
423440
results <- dockerfile
424441
locCount <- linesCount
442+
fileInfo <- fileTypeInfo{filePath: a.filePath, fileType: dockerfile, locCount: linesCount}
425443
}
426444
// Dockerfile (indirect identification)
427445
case "possibleDockerfile", ".ubi8", ".debian":
428446
if a.isAvailableType(dockerfile) && isDockerfile(a.filePath) {
429447
results <- dockerfile
430448
locCount <- linesCount
449+
fileInfo <- fileTypeInfo{filePath: a.filePath, fileType: dockerfile, locCount: linesCount}
431450
} else {
432451
unwanted <- a.filePath
433452
}
@@ -436,30 +455,34 @@ func (a *analyzerInfo) worker(results, unwanted chan<- string, locCount chan<- i
436455
if a.isAvailableType(terraform) {
437456
results <- terraform
438457
locCount <- linesCount
458+
fileInfo <- fileTypeInfo{filePath: a.filePath, fileType: terraform, locCount: linesCount}
439459
}
440460
// Bicep
441461
case ".bicep":
442462
if a.isAvailableType(bicep) {
443463
results <- arm
444464
locCount <- linesCount
465+
fileInfo <- fileTypeInfo{filePath: a.filePath, fileType: arm, locCount: linesCount}
445466
}
446467
// GRPC
447468
case ".proto":
448469
if a.isAvailableType(grpc) {
449470
results <- grpc
450471
locCount <- linesCount
472+
fileInfo <- fileTypeInfo{filePath: a.filePath, fileType: grpc, locCount: linesCount}
451473
}
452474
// It could be Ansible Config or Ansible Inventory
453475
case ".cfg", ".conf", ".ini":
454476
if a.isAvailableType(ansible) {
455477
results <- ansible
456478
locCount <- linesCount
479+
fileInfo <- fileTypeInfo{filePath: a.filePath, fileType: ansible, locCount: linesCount}
457480
}
458481
/* It could be Ansible, Buildah, CICD, CloudFormation, Crossplane, OpenAPI, Azure Resource Manager
459482
Docker Compose, Knative, Kubernetes, Pulumi, ServerlessFW or Google Deployment Manager.
460483
We also have FHIR's case which will be ignored since it's not a platform file.*/
461484
case yaml, yml, json, sh:
462-
a.checkContent(results, unwanted, locCount, linesCount, ext)
485+
a.checkContent(results, unwanted, locCount, fileInfo, linesCount, ext)
463486
}
464487
}
465488
}
@@ -500,7 +523,14 @@ func needsOverride(check bool, returnType, key, ext string) bool {
500523

501524
// checkContent will determine the file type by content when worker was unable to
502525
// determine by ext, if no type was determined checkContent adds it to unwanted channel
503-
func (a *analyzerInfo) checkContent(results, unwanted chan<- string, locCount chan<- int, linesCount int, ext string) {
526+
func (a *analyzerInfo) checkContent(
527+
results,
528+
unwanted chan<- string,
529+
locCount chan<- int,
530+
fileInfo chan<- fileTypeInfo,
531+
linesCount int,
532+
ext string,
533+
) {
504534
typesFlag := a.typesFlag
505535
excludeTypesFlag := a.excludeTypesFlag
506536
// get file content with UTF-16/UTF-8 detection
@@ -558,6 +588,7 @@ func (a *analyzerInfo) checkContent(results, unwanted chan<- string, locCount ch
558588

559589
results <- returnType
560590
locCount <- linesCount
591+
fileInfo <- fileTypeInfo{filePath: a.filePath, fileType: returnType, locCount: linesCount}
561592
}
562593

563594
func checkReturnType(path, returnType, ext string, content []byte) string {
@@ -661,10 +692,21 @@ func checkForAnsibleHost(yamlContent model.Document) bool {
661692

662693
// computeValues computes expected Lines of Code to be scanned from locCount channel
663694
// and creates the types and unwanted slices from the channels removing any duplicates
664-
func computeValues(types, unwanted chan string, locCount chan int, done chan bool) (typesS, unwantedS []string, locTotal int) {
695+
// also collects file statistics for memory calculation
696+
func computeValues(
697+
types,
698+
unwanted chan string,
699+
locCount chan int,
700+
fileInfo chan fileTypeInfo,
701+
done chan bool,
702+
) (typesS, unwantedS []string, locTotal int, stats map[string]model.FileStatistics) {
665703
var val int
666704
unwantedSlice := make([]string, 0)
667705
typeSlice := make([]string, 0)
706+
stats = make(map[string]model.FileStatistics)
707+
708+
platformFilesInfo := make(map[string][]fileTypeInfo)
709+
668710
for {
669711
select {
670712
case i := <-locCount:
@@ -677,8 +719,28 @@ func computeValues(types, unwanted chan string, locCount chan int, done chan boo
677719
if !utils.Contains(i, typeSlice) {
678720
typeSlice = append(typeSlice, i)
679721
}
722+
case info := <-fileInfo:
723+
platformFilesInfo[info.fileType] = append(platformFilesInfo[info.fileType], info)
680724
case <-done:
681-
return typeSlice, unwantedSlice, val
725+
for platformType, filesInfo := range platformFilesInfo {
726+
dirMap := make(map[string]int)
727+
totalLOC := 0
728+
729+
for _, fileInfo := range filesInfo {
730+
dir := filepath.Dir(fileInfo.filePath)
731+
dirMap[dir]++
732+
totalLOC += fileInfo.locCount
733+
}
734+
735+
stats[platformType] = model.FileStatistics{
736+
FileCount: len(filesInfo),
737+
DirectoryCount: len(dirMap),
738+
FilesByDir: dirMap,
739+
TotalLOC: totalLOC,
740+
}
741+
}
742+
743+
return typeSlice, unwantedSlice, val, stats
682744
}
683745
}
684746
}

pkg/analyzer/analyzer_test.go

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -588,3 +588,132 @@ func TestAnalyzer_Analyze(t *testing.T) {
588588
})
589589
}
590590
}
591+
592+
func TestAnalyzer_FileStats(t *testing.T) {
593+
tests := []struct {
594+
name string
595+
paths []string
596+
typesFromFlag []string
597+
excludeTypesFromFlag []string
598+
wantPlatformStats map[string]platformFileStats
599+
gitIgnoreFileName string
600+
excludeGitIgnore bool
601+
MaxFileSize int
602+
}{
603+
{
604+
name: "file_stats_nested_structure_with_multiple_platforms",
605+
paths: []string{filepath.FromSlash("../../test/fixtures/analyzer_test/helm")},
606+
typesFromFlag: []string{""},
607+
excludeTypesFromFlag: []string{""},
608+
wantPlatformStats: map[string]platformFileStats{
609+
"kubernetes": {
610+
fileCount: 3,
611+
dirCount: 2,
612+
totalLOC: 118,
613+
},
614+
},
615+
gitIgnoreFileName: "",
616+
excludeGitIgnore: true,
617+
MaxFileSize: -1,
618+
},
619+
{
620+
name: "file_stats_multiple_platforms_nested_directories",
621+
paths: []string{filepath.FromSlash("../../test/fixtures/analyzer_test")},
622+
typesFromFlag: []string{""},
623+
excludeTypesFromFlag: []string{""},
624+
wantPlatformStats: map[string]platformFileStats{
625+
"terraform": {
626+
fileCount: 1,
627+
dirCount: 1,
628+
totalLOC: 10,
629+
},
630+
"kubernetes": {
631+
fileCount: 4,
632+
dirCount: 3,
633+
totalLOC: 131,
634+
},
635+
"dockerfile": {
636+
fileCount: 1,
637+
dirCount: 1,
638+
totalLOC: 3,
639+
},
640+
},
641+
gitIgnoreFileName: "",
642+
excludeGitIgnore: true,
643+
MaxFileSize: -1,
644+
},
645+
{
646+
name: "file_stats_with_type_filter",
647+
paths: []string{filepath.FromSlash("../../test/fixtures/analyzer_test")},
648+
typesFromFlag: []string{"terraform", "kubernetes"},
649+
excludeTypesFromFlag: []string{""},
650+
wantPlatformStats: map[string]platformFileStats{
651+
"terraform": {
652+
fileCount: 1,
653+
dirCount: 1,
654+
totalLOC: 10,
655+
},
656+
"kubernetes": {
657+
fileCount: 6,
658+
dirCount: 3,
659+
totalLOC: 156,
660+
},
661+
},
662+
gitIgnoreFileName: "",
663+
excludeGitIgnore: true,
664+
MaxFileSize: -1,
665+
},
666+
}
667+
668+
for _, tt := range tests {
669+
t.Run(tt.name, func(t *testing.T) {
670+
exc := []string{""}
671+
672+
analyzer := &Analyzer{
673+
Paths: tt.paths,
674+
Types: tt.typesFromFlag,
675+
ExcludeTypes: tt.excludeTypesFromFlag,
676+
Exc: exc,
677+
ExcludeGitIgnore: tt.excludeGitIgnore,
678+
GitIgnoreFileName: tt.gitIgnoreFileName,
679+
MaxFileSize: tt.MaxFileSize,
680+
}
681+
682+
got, err := Analyze(analyzer)
683+
require.NoError(t, err)
684+
685+
require.NotNil(t, got.FileStats, "FileStats should not be nil")
686+
687+
for platform, expectedStats := range tt.wantPlatformStats {
688+
platformStats, exists := got.FileStats[platform]
689+
require.True(t, exists, "FileStats should contain platform: %s", platform)
690+
691+
require.Equal(t, expectedStats.fileCount, platformStats.FileCount,
692+
"wrong file count for platform %s", platform)
693+
694+
require.Equal(t, expectedStats.dirCount, platformStats.DirectoryCount,
695+
"wrong directory count for platform %s", platform)
696+
697+
require.Equal(t, expectedStats.totalLOC, platformStats.TotalLOC,
698+
"wrong total LOC for platform %s", platform)
699+
700+
require.NotNil(t, platformStats.FilesByDir, "FilesByDir should not be nil")
701+
require.Equal(t, expectedStats.dirCount, len(platformStats.FilesByDir),
702+
"wrong FilesByDir entries for platform %s", platform)
703+
704+
totalFilesFromDirs := 0
705+
for _, fileCount := range platformStats.FilesByDir {
706+
totalFilesFromDirs += fileCount
707+
}
708+
require.Equal(t, platformStats.FileCount, totalFilesFromDirs,
709+
"file count sum mismatch for platform %s", platform)
710+
}
711+
})
712+
}
713+
}
714+
715+
type platformFileStats struct {
716+
fileCount int
717+
dirCount int
718+
totalLOC int
719+
}

0 commit comments

Comments
 (0)