@@ -155,6 +155,13 @@ type analyzerInfo struct {
155155 fallbackMinifiedFileLOC int
156156}
157157
158+ // fileTypeInfo contains file path, detected platform type, and LOC count
159+ type fileTypeInfo struct {
160+ filePath string
161+ fileType string
162+ locCount int
163+ }
164+
158165// Analyzer keeps all the relevant info for the function Analyze
159166type Analyzer struct {
160167 Paths []string
@@ -318,13 +325,15 @@ func Analyze(a *Analyzer) (model.AnalyzedPaths, error) {
318325 Types : make ([]string , 0 ),
319326 Exc : make ([]string , 0 ),
320327 ExpectedLOC : 0 ,
328+ FileStats : make (map [string ]model.FileStatistics ),
321329 }
322330
323331 var files []string
324332 var wg sync.WaitGroup
325333 // results is the channel shared by the workers that contains the types found
326334 results := make (chan string )
327335 locCount := make (chan int )
336+ fileInfo := make (chan fileTypeInfo )
328337 ignoreFiles := make ([]string , 0 )
329338 projectConfigFiles := make ([]string , 0 )
330339 done := make (chan bool )
@@ -374,7 +383,7 @@ func Analyze(a *Analyzer) (model.AnalyzedPaths, error) {
374383 filePath : file ,
375384 fallbackMinifiedFileLOC : a .FallbackMinifiedFileLOC ,
376385 }
377- go a .worker (results , unwanted , locCount , & wg )
386+ go a .worker (results , unwanted , locCount , fileInfo , & wg )
378387 }
379388
380389 go func () {
@@ -383,27 +392,35 @@ func Analyze(a *Analyzer) (model.AnalyzedPaths, error) {
383392 close (unwanted )
384393 close (results )
385394 close (locCount )
395+ close (fileInfo )
386396 }()
387397 wg .Wait ()
388398 done <- true
389399 }()
390400
391- availableTypes , unwantedPaths , loc := computeValues (results , unwanted , locCount , done )
401+ availableTypes , unwantedPaths , loc , fileStats := computeValues (results , unwanted , locCount , fileInfo , done )
392402 multiPlatformTypeCheck (& availableTypes )
393403 unwantedPaths = append (unwantedPaths , ignoreFiles ... )
394404 unwantedPaths = append (unwantedPaths , projectConfigFiles ... )
395405 returnAnalyzedPaths .Types = availableTypes
396406 returnAnalyzedPaths .Exc = unwantedPaths
397407 returnAnalyzedPaths .ExpectedLOC = loc
408+ returnAnalyzedPaths .FileStats = fileStats
398409 // stop metrics for file analyzer
399410 metrics .Metric .Stop ()
400411 return returnAnalyzedPaths , nil
401412}
402413
403414// worker determines the type of the file by ext (dockerfile and terraform)/content and
404- // writes the answer to the results channel
415+ // writes the answer to the results channel and file info for statistics
405416// if no types were found, the worker will write the path of the file in the unwanted channel
406- func (a * analyzerInfo ) worker (results , unwanted chan <- string , locCount chan <- int , wg * sync.WaitGroup ) { //nolint: gocyclo
417+ func (a * analyzerInfo ) worker ( //nolint: gocyclo
418+ results ,
419+ unwanted chan <- string ,
420+ locCount chan <- int ,
421+ fileInfo chan <- fileTypeInfo ,
422+ wg * sync.WaitGroup ,
423+ ) {
407424 defer func () {
408425 if err := recover (); err != nil {
409426 log .Warn ().Msgf ("Recovered from analyzing panic for file %s with error: %#v" , a .filePath , err .(error ).Error ())
@@ -422,12 +439,14 @@ func (a *analyzerInfo) worker(results, unwanted chan<- string, locCount chan<- i
422439 if a .isAvailableType (dockerfile ) {
423440 results <- dockerfile
424441 locCount <- linesCount
442+ fileInfo <- fileTypeInfo {filePath : a .filePath , fileType : dockerfile , locCount : linesCount }
425443 }
426444 // Dockerfile (indirect identification)
427445 case "possibleDockerfile" , ".ubi8" , ".debian" :
428446 if a .isAvailableType (dockerfile ) && isDockerfile (a .filePath ) {
429447 results <- dockerfile
430448 locCount <- linesCount
449+ fileInfo <- fileTypeInfo {filePath : a .filePath , fileType : dockerfile , locCount : linesCount }
431450 } else {
432451 unwanted <- a .filePath
433452 }
@@ -436,30 +455,34 @@ func (a *analyzerInfo) worker(results, unwanted chan<- string, locCount chan<- i
436455 if a .isAvailableType (terraform ) {
437456 results <- terraform
438457 locCount <- linesCount
458+ fileInfo <- fileTypeInfo {filePath : a .filePath , fileType : terraform , locCount : linesCount }
439459 }
440460 // Bicep
441461 case ".bicep" :
442462 if a .isAvailableType (bicep ) {
443463 results <- arm
444464 locCount <- linesCount
465+ fileInfo <- fileTypeInfo {filePath : a .filePath , fileType : arm , locCount : linesCount }
445466 }
446467 // GRPC
447468 case ".proto" :
448469 if a .isAvailableType (grpc ) {
449470 results <- grpc
450471 locCount <- linesCount
472+ fileInfo <- fileTypeInfo {filePath : a .filePath , fileType : grpc , locCount : linesCount }
451473 }
452474 // It could be Ansible Config or Ansible Inventory
453475 case ".cfg" , ".conf" , ".ini" :
454476 if a .isAvailableType (ansible ) {
455477 results <- ansible
456478 locCount <- linesCount
479+ fileInfo <- fileTypeInfo {filePath : a .filePath , fileType : ansible , locCount : linesCount }
457480 }
458481 /* It could be Ansible, Buildah, CICD, CloudFormation, Crossplane, OpenAPI, Azure Resource Manager
459482 Docker Compose, Knative, Kubernetes, Pulumi, ServerlessFW or Google Deployment Manager.
460483 We also have FHIR's case which will be ignored since it's not a platform file.*/
461484 case yaml , yml , json , sh :
462- a .checkContent (results , unwanted , locCount , linesCount , ext )
485+ a .checkContent (results , unwanted , locCount , fileInfo , linesCount , ext )
463486 }
464487 }
465488}
@@ -500,7 +523,14 @@ func needsOverride(check bool, returnType, key, ext string) bool {
500523
501524// checkContent will determine the file type by content when worker was unable to
502525// determine by ext, if no type was determined checkContent adds it to unwanted channel
503- func (a * analyzerInfo ) checkContent (results , unwanted chan <- string , locCount chan <- int , linesCount int , ext string ) {
526+ func (a * analyzerInfo ) checkContent (
527+ results ,
528+ unwanted chan <- string ,
529+ locCount chan <- int ,
530+ fileInfo chan <- fileTypeInfo ,
531+ linesCount int ,
532+ ext string ,
533+ ) {
504534 typesFlag := a .typesFlag
505535 excludeTypesFlag := a .excludeTypesFlag
506536 // get file content with UTF-16/UTF-8 detection
@@ -558,6 +588,7 @@ func (a *analyzerInfo) checkContent(results, unwanted chan<- string, locCount ch
558588
559589 results <- returnType
560590 locCount <- linesCount
591+ fileInfo <- fileTypeInfo {filePath : a .filePath , fileType : returnType , locCount : linesCount }
561592}
562593
563594func checkReturnType (path , returnType , ext string , content []byte ) string {
@@ -661,10 +692,21 @@ func checkForAnsibleHost(yamlContent model.Document) bool {
661692
662693// computeValues computes expected Lines of Code to be scanned from locCount channel
663694// and creates the types and unwanted slices from the channels removing any duplicates
664- func computeValues (types , unwanted chan string , locCount chan int , done chan bool ) (typesS , unwantedS []string , locTotal int ) {
695+ // also collects file statistics for memory calculation
696+ func computeValues (
697+ types ,
698+ unwanted chan string ,
699+ locCount chan int ,
700+ fileInfo chan fileTypeInfo ,
701+ done chan bool ,
702+ ) (typesS , unwantedS []string , locTotal int , stats map [string ]model.FileStatistics ) {
665703 var val int
666704 unwantedSlice := make ([]string , 0 )
667705 typeSlice := make ([]string , 0 )
706+ stats = make (map [string ]model.FileStatistics )
707+
708+ platformFilesInfo := make (map [string ][]fileTypeInfo )
709+
668710 for {
669711 select {
670712 case i := <- locCount :
@@ -677,8 +719,28 @@ func computeValues(types, unwanted chan string, locCount chan int, done chan boo
677719 if ! utils .Contains (i , typeSlice ) {
678720 typeSlice = append (typeSlice , i )
679721 }
722+ case info := <- fileInfo :
723+ platformFilesInfo [info .fileType ] = append (platformFilesInfo [info .fileType ], info )
680724 case <- done :
681- return typeSlice , unwantedSlice , val
725+ for platformType , filesInfo := range platformFilesInfo {
726+ dirMap := make (map [string ]int )
727+ totalLOC := 0
728+
729+ for _ , fileInfo := range filesInfo {
730+ dir := filepath .Dir (fileInfo .filePath )
731+ dirMap [dir ]++
732+ totalLOC += fileInfo .locCount
733+ }
734+
735+ stats [platformType ] = model.FileStatistics {
736+ FileCount : len (filesInfo ),
737+ DirectoryCount : len (dirMap ),
738+ FilesByDir : dirMap ,
739+ TotalLOC : totalLOC ,
740+ }
741+ }
742+
743+ return typeSlice , unwantedSlice , val , stats
682744 }
683745 }
684746}
0 commit comments