Skip to content

Commit 20feb8b

Browse files
authored
Simplify Repo File Parsing (#167)
1 parent c67802d commit 20feb8b

File tree

2 files changed

+130
-174
lines changed

2 files changed

+130
-174
lines changed

scanner/scanner.go

Lines changed: 120 additions & 171 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,10 @@ package scanner
22

33
import (
44
"context"
5-
"errors"
65
"github.com/boostsecurityio/poutine/models"
76
"github.com/rs/zerolog/log"
87
"io/fs"
98
"os"
10-
"path"
119
"path/filepath"
1210
"strings"
1311

@@ -18,177 +16,104 @@ import (
1816

1917
const MAX_DEPTH = 150
2018

21-
type Scanner struct {
22-
Path string
23-
Package *models.PackageInsights
24-
ResolvedPurls map[string]bool
25-
}
19+
type parseFunc func(*Scanner, string, fs.FileInfo) error
2620

27-
func NewScanner(path string) Scanner {
28-
return Scanner{
29-
Path: path,
30-
Package: &models.PackageInsights{},
31-
ResolvedPurls: map[string]bool{},
32-
}
33-
}
21+
func parseGithubActionsMetadata(scanner *Scanner, filePath string, fileInfo fs.FileInfo) error {
22+
metadata := make([]models.GithubActionsMetadata, 0)
3423

35-
func (s *Scanner) Run(ctx context.Context, o *opa.Opa) error {
36-
err := s.parse()
24+
relPath, err := filepath.Rel(scanner.Path, filePath)
3725
if err != nil {
3826
return err
3927
}
4028

41-
return s.inventory(ctx, o)
42-
}
43-
44-
func (s *Scanner) inventory(ctx context.Context, o *opa.Opa) error {
45-
result := opa.InventoryResult{}
46-
err := o.Eval(ctx,
47-
"data.poutine.queries.inventory.result",
48-
map[string]interface{}{
49-
"packages": []interface{}{s.Package},
50-
},
51-
&result,
52-
)
29+
data, err := os.ReadFile(filePath)
5330
if err != nil {
5431
return err
5532
}
5633

57-
s.Package.BuildDependencies = result.BuildDependencies
58-
s.Package.PackageDependencies = result.PackageDependencies
34+
meta := models.GithubActionsMetadata{
35+
Path: relPath,
36+
}
37+
err = yaml.Unmarshal(data, &meta)
38+
if err != nil {
39+
log.Debug().Err(err).Str("file", relPath).Msg("failed to unmarshal yaml file")
40+
return nil
41+
}
42+
43+
if meta.IsValid() {
44+
metadata = append(metadata, meta)
45+
} else {
46+
log.Debug().Str("file", relPath).Msg("failed to parse github actions metadata")
47+
}
48+
49+
scanner.Package.GithubActionsMetadata = append(scanner.Package.GithubActionsMetadata, metadata...)
5950

6051
return nil
6152
}
6253

63-
func (s *Scanner) parse() error {
64-
var err error
65-
s.Package.GithubActionsMetadata, err = s.GithubActionsMetadata()
54+
func parseGithubWorkflows(scanner *Scanner, filePath string, fileInfo fs.FileInfo) error {
55+
relPath, err := filepath.Rel(scanner.Path, filePath)
6656
if err != nil {
6757
return err
6858
}
6959

70-
s.Package.GithubActionsWorkflows, err = s.GithubWorkflows()
60+
data, err := os.ReadFile(filePath)
7161
if err != nil {
7262
return err
7363
}
7464

75-
s.Package.GitlabciConfigs, err = s.GitlabciConfigs()
65+
workflow := models.GithubActionsWorkflow{Path: relPath}
66+
err = yaml.Unmarshal(data, &workflow)
7667
if err != nil {
77-
return err
68+
log.Debug().Err(err).Str("file", relPath).Msg("failed to unmarshal yaml file")
69+
return nil
7870
}
7971

80-
s.Package.AzurePipelines, err = s.AzurePipelines()
81-
if err != nil {
82-
return err
72+
if workflow.IsValid() {
73+
scanner.Package.GithubActionsWorkflows = append(scanner.Package.GithubActionsWorkflows, workflow)
74+
} else {
75+
log.Debug().Str("file", relPath).Msg("failed to parse github actions workflow")
8376
}
8477

8578
return nil
8679
}
8780

88-
func (s *Scanner) GithubActionsMetadata() ([]models.GithubActionsMetadata, error) {
89-
metadata := make([]models.GithubActionsMetadata, 0)
90-
91-
err := filepath.Walk(s.Path,
92-
func(path string, info os.FileInfo, err error) error {
93-
if err != nil {
94-
return err
95-
}
96-
97-
if info.IsDir() && info.Name() == ".git" {
98-
return filepath.SkipDir
99-
}
100-
101-
if info.IsDir() || (info.Name() != "action.yml" && info.Name() != "action.yaml") {
102-
return nil
103-
}
104-
105-
rel_path, err := filepath.Rel(s.Path, path)
106-
if err != nil {
107-
return err
108-
}
109-
110-
data, err := os.ReadFile(path)
111-
if err != nil {
112-
return err
113-
}
114-
115-
meta := models.GithubActionsMetadata{
116-
Path: rel_path,
117-
}
118-
err = yaml.Unmarshal(data, &meta)
119-
if err != nil {
120-
log.Debug().Err(err).Str("file", rel_path).Msg("failed to unmarshal yaml file")
121-
return nil
122-
}
123-
124-
if meta.IsValid() {
125-
metadata = append(metadata, meta)
126-
} else {
127-
log.Debug().Str("file", rel_path).Msg("failed to parse github actions metadata")
128-
}
129-
130-
return nil
131-
},
132-
)
133-
134-
return metadata, err
135-
}
136-
137-
func (s *Scanner) GithubWorkflows() ([]models.GithubActionsWorkflow, error) {
138-
folder := filepath.Join(s.Path, ".github/workflows")
139-
files, err := os.ReadDir(folder)
81+
func parseAzurePipelines(scanner *Scanner, filePath string, fileInfo fs.FileInfo) error {
82+
relPath, err := filepath.Rel(scanner.Path, filePath)
14083
if err != nil {
141-
if errors.Is(err, fs.ErrNotExist) {
142-
return []models.GithubActionsWorkflow{}, nil
143-
}
144-
return nil, err
84+
return err
14585
}
14686

147-
workflows := make([]models.GithubActionsWorkflow, 0, len(files))
148-
for _, file := range files {
149-
if file.IsDir() {
150-
continue
151-
}
152-
153-
path := path.Join(folder, file.Name())
154-
if !strings.HasSuffix(path, ".yml") && !strings.HasSuffix(path, ".yaml") {
155-
continue
156-
}
157-
rel_path, err := filepath.Rel(s.Path, path)
158-
if err != nil {
159-
return nil, err
160-
}
161-
162-
data, err := os.ReadFile(path)
163-
if err != nil {
164-
return nil, err
165-
}
87+
data, err := os.ReadFile(filePath)
88+
if err != nil {
89+
return err
90+
}
16691

167-
workflow := models.GithubActionsWorkflow{Path: rel_path}
168-
err = yaml.Unmarshal(data, &workflow)
169-
if err != nil {
170-
log.Debug().Err(err).Str("file", rel_path).Msg("failed to unmarshal yaml file")
171-
continue
172-
}
92+
pipeline := models.AzurePipeline{}
93+
err = yaml.Unmarshal(data, &pipeline)
94+
if err != nil {
95+
log.Debug().Err(err).Str("file", relPath).Msg("failed to unmarshal yaml file")
96+
return nil
97+
}
17398

174-
if workflow.IsValid() {
175-
workflows = append(workflows, workflow)
176-
} else {
177-
log.Debug().Str("file", rel_path).Msg("failed to parse github actions workflow")
178-
}
99+
if pipeline.IsValid() {
100+
pipeline.Path = relPath
101+
scanner.Package.AzurePipelines = append(scanner.Package.AzurePipelines, pipeline)
102+
} else {
103+
log.Debug().Str("file", relPath).Msg("failed to parse azure pipeline")
179104
}
180105

181-
return workflows, err
106+
return nil
182107
}
183108

184-
func (s *Scanner) GitlabciConfigs() ([]models.GitlabciConfig, error) {
109+
func parseGitlabCi(scanner *Scanner, filePath string, fileInfo fs.FileInfo) error {
185110
files := map[string]bool{}
186111
queue := []string{"/.gitlab-ci.yml"}
187112
configs := []models.GitlabciConfig{}
188113

189114
for len(queue) > 0 && len(configs) < MAX_DEPTH {
190115
repoPath := filepath.Join("/", queue[0])
191-
configPath := filepath.Join(s.Path, repoPath)
116+
configPath := filepath.Join(scanner.Path, repoPath)
192117
queue = queue[1:]
193118

194119
if files[repoPath] {
@@ -224,60 +149,84 @@ func (s *Scanner) GitlabciConfigs() ([]models.GitlabciConfig, error) {
224149
configs = append(configs, *config)
225150
}
226151

227-
return configs, nil
228-
}
229-
230-
var azurePipelineFileRegex = regexp.MustCompile(`\.?azure-pipelines(-.+)?\.ya?ml$`)
231-
232-
func (s *Scanner) AzurePipelines() ([]models.AzurePipeline, error) {
233-
pipelines := []models.AzurePipeline{}
234-
err := filepath.Walk(s.Path,
235-
func(path string, info os.FileInfo, err error) error {
236-
if err != nil {
237-
return err
238-
}
152+
scanner.Package.GitlabciConfigs = append(scanner.Package.GitlabciConfigs, configs...)
239153

240-
if info.IsDir() && info.Name() == ".git" {
241-
return filepath.SkipDir
242-
}
154+
return nil
155+
}
243156

244-
if info.IsDir() {
245-
return nil
246-
}
157+
type Scanner struct {
158+
Path string
159+
Package *models.PackageInsights
160+
ResolvedPurls map[string]bool
161+
ParseFuncs map[*regexp.Regexp]parseFunc
162+
}
247163

248-
if !azurePipelineFileRegex.MatchString(info.Name()) {
249-
return nil
250-
}
164+
func NewScanner(path string) Scanner {
165+
return Scanner{
166+
Path: path,
167+
Package: &models.PackageInsights{},
168+
ResolvedPurls: map[string]bool{},
169+
ParseFuncs: map[*regexp.Regexp]parseFunc{
170+
regexp.MustCompile(`(\b|/)action\.ya?ml$`): parseGithubActionsMetadata,
171+
regexp.MustCompile(`^\.github/workflows/[^/]+\.ya?ml$`): parseGithubWorkflows,
172+
regexp.MustCompile(`\.?azure-pipelines(-.+)?\.ya?ml$`): parseAzurePipelines,
173+
regexp.MustCompile(`\.?gitlab-ci(-.+)?\.ya?ml$`): parseGitlabCi,
174+
},
175+
}
176+
}
251177

252-
rel_path, err := filepath.Rel(s.Path, path)
253-
if err != nil {
254-
return err
255-
}
256-
data, err := os.ReadFile(path)
257-
if err != nil {
258-
return err
259-
}
178+
func (s *Scanner) Run(ctx context.Context, o *opa.Opa) error {
179+
err := s.walkAndParse()
180+
if err != nil {
181+
return err
182+
}
260183

261-
pipeline := models.AzurePipeline{}
262-
err = yaml.Unmarshal(data, &pipeline)
263-
if err != nil {
264-
return err
265-
}
184+
return s.inventory(ctx, o)
185+
}
266186

267-
if pipeline.IsValid() {
268-
pipeline.Path = rel_path
269-
pipelines = append(pipelines, pipeline)
270-
} else {
271-
log.Debug().Str("file", rel_path).Msg("failed to parse azure pipeline")
187+
func (s *Scanner) walkAndParse() error {
188+
return filepath.Walk(s.Path, func(filePath string, info fs.FileInfo, err error) error {
189+
if err != nil {
190+
return err
191+
}
192+
if info.IsDir() && info.Name() == ".git" {
193+
return filepath.SkipDir
194+
}
195+
if info.IsDir() {
196+
return nil
197+
}
198+
relativePath, err := filepath.Rel(s.Path, filePath)
199+
if err != nil {
200+
log.Error().Err(err).Msg("error getting relative path")
201+
return err
202+
}
203+
for pattern, parseFunc := range s.ParseFuncs {
204+
if pattern.MatchString(relativePath) {
205+
if err := parseFunc(s, filePath, info); err != nil {
206+
log.Error().Err(err).Msg("error parsing file")
207+
// Decide whether to return error or continue processing other files
208+
}
272209
}
210+
}
211+
return nil
212+
})
213+
}
273214

274-
return nil
215+
func (s *Scanner) inventory(ctx context.Context, o *opa.Opa) error {
216+
result := opa.InventoryResult{}
217+
err := o.Eval(ctx,
218+
"data.poutine.queries.inventory.result",
219+
map[string]interface{}{
220+
"packages": []interface{}{s.Package},
275221
},
222+
&result,
276223
)
277-
278224
if err != nil {
279-
return nil, err
225+
return err
280226
}
281227

282-
return pipelines, nil
228+
s.Package.BuildDependencies = result.BuildDependencies
229+
s.Package.PackageDependencies = result.PackageDependencies
230+
231+
return nil
283232
}

0 commit comments

Comments
 (0)