Skip to content

Commit cf98131

Browse files
authored
improve datasource validation (goccy/go-yaml) (#3646)
1 parent 4411d82 commit cf98131

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+406
-291
lines changed

.golangci.yml

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -97,22 +97,7 @@ linters:
9797
desc: errors.Wrap() is deprecated in favor of fmt.Errorf()
9898
yaml:
9999
files:
100-
- '!**/pkg/acquisition/acquisition.go'
101-
- '!**/pkg/acquisition/acquisition_test.go'
102-
- '!**/pkg/acquisition/modules/appsec/appsec.go'
103-
- '!**/pkg/acquisition/modules/cloudwatch/cloudwatch.go'
104-
- '!**/pkg/acquisition/modules/docker/docker.go'
105-
- '!**/pkg/acquisition/modules/file/file.go'
106-
- '!**/pkg/acquisition/modules/journalctl/journalctl.go'
107-
- '!**/pkg/acquisition/modules/kafka/kafka.go'
108-
- '!**/pkg/acquisition/modules/kinesis/kinesis.go'
109-
- '!**/pkg/acquisition/modules/kubernetesaudit/k8s_audit.go'
110-
- '!**/pkg/acquisition/modules/loki/loki.go'
111100
- '!**/pkg/acquisition/modules/loki/timestamp_test.go'
112-
- '!**/pkg/acquisition/modules/victorialogs/victorialogs.go'
113-
- '!**/pkg/acquisition/modules/s3/s3.go'
114-
- '!**/pkg/acquisition/modules/syslog/syslog.go'
115-
- '!**/pkg/acquisition/modules/wineventlog/wineventlog_windows.go'
116101
- '!**/pkg/appsec/appsec.go'
117102
- '!**/pkg/appsec/loader.go'
118103
- '!**/pkg/csplugin/broker.go'
@@ -382,19 +367,19 @@ linters:
382367
# tolerate long functions in tests
383368
- linters:
384369
- revive
385-
path: pkg/(.+)_test.go
370+
path: (.+)_test.go
386371
text: 'function-length: .*'
387372

388373
# tolerate long lines in tests
389374
- linters:
390375
- revive
391-
path: pkg/(.+)_test.go
376+
path: (.+)_test.go
392377
text: 'line-length-limit: .*'
393378

394379
# we use t,ctx instead of ctx,t in tests
395380
- linters:
396381
- revive
397-
path: pkg/(.+)_test.go
382+
path: (.+)_test.go
398383
text: 'context-as-argument: context.Context should be the first parameter of a function'
399384

400385
# tolerate deep exit in cobra's OnInitialize, for now
@@ -496,9 +481,12 @@ linters:
496481
- linters:
497482
- noctx
498483
text: "net.LookupHost must not be called"
484+
485+
- linters:
486+
- staticcheck
487+
text: "SA1019: yamlpatch.NewPatcher is deprecated: use csyaml.NewPatcher instead"
488+
499489
paths:
500-
- pkg/yamlpatch/merge.go
501-
- pkg/yamlpatch/merge_test.go
502490
- pkg/time/rate
503491
- pkg/metabase
504492
- third_party$

go.mod

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ require (
2121
github.com/corazawaf/coraza/v3 v3.3.3
2222
github.com/corazawaf/libinjection-go v0.2.2
2323
github.com/crowdsecurity/dlog v0.0.2
24-
github.com/crowdsecurity/go-cs-lib v0.0.20
24+
github.com/crowdsecurity/go-cs-lib v0.0.21-0.20250723121452-e1c07273af36
2525
github.com/crowdsecurity/grokky v0.2.2
2626
github.com/crowdsecurity/machineid v1.0.2
2727
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc
@@ -38,7 +38,8 @@ require (
3838
github.com/go-openapi/swag v0.22.3
3939
github.com/go-openapi/validate v0.20.0
4040
github.com/go-sql-driver/mysql v1.6.0
41-
github.com/goccy/go-yaml v1.11.0
41+
github.com/goccy/go-yaml v1.18.0
42+
github.com/gogo/protobuf v1.3.2 // indirect
4243
github.com/golang-jwt/jwt/v4 v4.5.2
4344
github.com/google/go-querystring v1.1.0
4445
github.com/google/uuid v1.6.0
@@ -129,7 +130,6 @@ require (
129130
github.com/go-playground/validator/v10 v10.23.0 // indirect
130131
github.com/go-stack/stack v1.8.0 // indirect
131132
github.com/goccy/go-json v0.10.4 // indirect
132-
github.com/gogo/protobuf v1.3.2 // indirect
133133
github.com/golang/glog v1.2.4 // indirect
134134
github.com/golang/protobuf v1.5.4 // indirect
135135
github.com/google/go-cmp v0.6.0 // indirect
@@ -214,7 +214,6 @@ require (
214214
golang.org/x/arch v0.12.0 // indirect
215215
golang.org/x/term v0.32.0 // indirect
216216
golang.org/x/time v0.6.0 // indirect
217-
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect
218217
google.golang.org/appengine v1.6.8 // indirect
219218
google.golang.org/genproto/googleapis/rpc v0.0.0-20240814211410-ddb44dafa142 // indirect
220219
gopkg.in/inf.v0 v0.9.1 // indirect

go.sum

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,8 @@ github.com/crowdsecurity/coraza/v3 v3.3.3-crowdsec.20250609 h1:t0fgUIJ7FnDKQSigu
111111
github.com/crowdsecurity/coraza/v3 v3.3.3-crowdsec.20250609/go.mod h1:q/LGNBRelJdzJZK08U1Rm5cNHv9DKp98p0esMDhJ5tE=
112112
github.com/crowdsecurity/dlog v0.0.2 h1:nj/7jLKO0o8tYn79O+g51ASeGLr5oOVahSoJ6Umq51g=
113113
github.com/crowdsecurity/dlog v0.0.2/go.mod h1:zpv7r+7KXwgVUZnUNjyP22zc/D7LKjyoY02weH2RBbk=
114-
github.com/crowdsecurity/go-cs-lib v0.0.20 h1:9rbuXqBaHBleXW67mytJQKF5KXKreJiI8zBTpAj8S00=
115-
github.com/crowdsecurity/go-cs-lib v0.0.20/go.mod h1:hz2FOHFXc0vWzH78uxo2VebtPQ9Snkbdzy3TMA20tVQ=
114+
github.com/crowdsecurity/go-cs-lib v0.0.21-0.20250723121452-e1c07273af36 h1:06kHMukxynCWhflcySfKmh6qYfDY/NXrOD6mLI8gkss=
115+
github.com/crowdsecurity/go-cs-lib v0.0.21-0.20250723121452-e1c07273af36/go.mod h1:qSsIKFEmRZgRREf3BTWznXDR+e+uLTXo7xSSx5DR6pk=
116116
github.com/crowdsecurity/grokky v0.2.2 h1:yALsI9zqpDArYzmSSxfBq2dhYuGUTKMJq8KOEIAsuo4=
117117
github.com/crowdsecurity/grokky v0.2.2/go.mod h1:33usDIYzGDsgX1kHAThCbseso6JuWNJXOzRQDGXHtWM=
118118
github.com/crowdsecurity/machineid v1.0.2 h1:wpkpsUghJF8Khtmn/tg6GxgdhLA1Xflerh5lirI+bdc=
@@ -300,8 +300,8 @@ github.com/gobuffalo/packr/v2 v2.2.0/go.mod h1:CaAwI0GPIAv+5wKLtv8Afwl+Cm78K/I/V
300300
github.com/gobuffalo/syncx v0.0.0-20190224160051-33c29581e754/go.mod h1:HhnNqWY95UYwwW3uSASeV7vtgYkT2t16hJgV3AEPUpw=
301301
github.com/goccy/go-json v0.10.4 h1:JSwxQzIqKfmFX1swYPpUThQZp/Ka4wzJdK0LWVytLPM=
302302
github.com/goccy/go-json v0.10.4/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
303-
github.com/goccy/go-yaml v1.11.0 h1:n7Z+zx8S9f9KgzG6KtQKf+kwqXZlLNR2F6018Dgau54=
304-
github.com/goccy/go-yaml v1.11.0/go.mod h1:H+mJrWtjPTJAHvRbV09MCK9xYwODM+wRTVFFTWckfng=
303+
github.com/goccy/go-yaml v1.18.0 h1:8W7wMFS12Pcas7KU+VVkaiCng+kG8QiFeFwzFb+rwuw=
304+
github.com/goccy/go-yaml v1.18.0/go.mod h1:XBurs7gK8ATbW4ZPGKgcbrY1Br56PdM69F7LkFRi1kA=
305305
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
306306
github.com/gofrs/uuid v4.0.0+incompatible h1:1SD/1F5pU8p29ybwgQSwpQk+mwdRrXCYuPhW6m+TnJw=
307307
github.com/gofrs/uuid v4.0.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM=
@@ -941,8 +941,6 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T
941941
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
942942
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
943943
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
944-
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk=
945-
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8=
946944
google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
947945
google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM=
948946
google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds=

pkg/acquisition/acquisition.go

Lines changed: 70 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,26 @@
11
package acquisition
22

33
import (
4+
"bytes"
45
"context"
56
"errors"
67
"fmt"
78
"io"
89
"maps"
910
"os"
11+
"slices"
1012
"strings"
1113

1214
"github.com/expr-lang/expr"
1315
"github.com/expr-lang/expr/vm"
16+
"github.com/goccy/go-yaml"
1417
"github.com/google/uuid"
1518
"github.com/prometheus/client_golang/prometheus"
1619
log "github.com/sirupsen/logrus"
1720
tomb "gopkg.in/tomb.v2"
18-
"gopkg.in/yaml.v2"
1921

2022
"github.com/crowdsecurity/go-cs-lib/csstring"
23+
"github.com/crowdsecurity/go-cs-lib/csyaml"
2124
"github.com/crowdsecurity/go-cs-lib/trace"
2225

2326
"github.com/crowdsecurity/crowdsec/pkg/acquisition/configuration"
@@ -70,6 +73,10 @@ func GetDataSourceIface(dataSourceType string) (DataSource, error) {
7073

7174
built, known := component.Built["datasource_"+dataSourceType]
7275

76+
if dataSourceType == "" {
77+
return nil, errors.New("data source type is empty")
78+
}
79+
7380
if !known {
7481
return nil, fmt.Errorf("unknown data source %s", dataSourceType)
7582
}
@@ -114,14 +121,7 @@ func setupLogger(source, name string, level *log.Level) (*log.Entry, error) {
114121
// if the configuration is not valid it returns an error.
115122
// If the datasource can't be run (eg. journalctl not available), it still returns an error which
116123
// can be checked for the appropriate action.
117-
func DataSourceConfigure(commonConfig configuration.DataSourceCommonCfg, metricsLevel int) (DataSource, error) {
118-
// we dump it back to []byte, because we want to decode the yaml blob twice:
119-
// once to DataSourceCommonCfg, and then later to the dedicated type of the datasource
120-
yamlConfig, err := yaml.Marshal(commonConfig)
121-
if err != nil {
122-
return nil, fmt.Errorf("unable to serialize back interface: %w", err)
123-
}
124-
124+
func DataSourceConfigure(commonConfig configuration.DataSourceCommonCfg, yamlConfig []byte, metricsLevel int) (DataSource, error) {
125125
dataSrc, err := GetDataSourceIface(commonConfig.Source)
126126
if err != nil {
127127
return nil, err
@@ -144,23 +144,6 @@ func DataSourceConfigure(commonConfig configuration.DataSourceCommonCfg, metrics
144144
return dataSrc, nil
145145
}
146146

147-
// detectBackwardCompatAcquis: try to magically detect the type for backward compat (type was not mandatory then)
148-
func detectBackwardCompatAcquis(sub configuration.DataSourceCommonCfg) string {
149-
if _, ok := sub.Config["filename"]; ok {
150-
return "file"
151-
}
152-
153-
if _, ok := sub.Config["filenames"]; ok {
154-
return "file"
155-
}
156-
157-
if _, ok := sub.Config["journalctl_filter"]; ok {
158-
return "journalctl"
159-
}
160-
161-
return ""
162-
}
163-
164147
func LoadAcquisitionFromDSN(dsn string, labels map[string]string, transformExpr string) ([]DataSource, error) {
165148
frags := strings.Split(dsn, ":")
166149
if len(frags) == 1 {
@@ -216,6 +199,36 @@ func GetMetricsLevelFromPromCfg(prom *csconfig.PrometheusCfg) int {
216199
return configuration.METRICS_FULL
217200
}
218201

202+
func detectTypes(r io.Reader) ([]string, error) {
203+
collectedKeys, err := csyaml.GetDocumentKeys(r)
204+
if err != nil {
205+
return nil, err
206+
}
207+
208+
ret := make([]string, len(collectedKeys))
209+
210+
for idx, keys := range collectedKeys {
211+
var detected string
212+
213+
switch {
214+
case slices.Contains(keys, "source"):
215+
detected = ""
216+
case slices.Contains(keys, "filename"):
217+
detected = "file"
218+
case slices.Contains(keys, "filenames"):
219+
detected = "file"
220+
case slices.Contains(keys, "journalctl_filter"):
221+
detected = "journalctl"
222+
default:
223+
detected = ""
224+
}
225+
226+
ret[idx] = detected
227+
}
228+
229+
return ret, nil
230+
}
231+
219232
// sourcesFromFile reads and parses one acquisition file into DataSources.
220233
func sourcesFromFile(acquisFile string, metrics_level int) ([]DataSource, error) {
221234
var sources []DataSource
@@ -236,29 +249,31 @@ func sourcesFromFile(acquisFile string, metrics_level int) ([]DataSource, error)
236249

237250
expandedAcquis := csstring.StrictExpand(string(acquisContent), os.LookupEnv)
238251

239-
dec := yaml.NewDecoder(strings.NewReader(expandedAcquis))
240-
dec.SetStrict(true)
252+
detectedTypes, err := detectTypes(strings.NewReader(expandedAcquis))
253+
if err != nil {
254+
return nil, fmt.Errorf("failed to parse %s: %w", yamlFile.Name(), err)
255+
}
256+
257+
documents, err := csyaml.SplitDocuments(strings.NewReader(expandedAcquis))
258+
if err != nil {
259+
return nil, err
260+
}
241261

242262
idx := -1
243263

244-
for {
245-
var sub configuration.DataSourceCommonCfg
246-
264+
for _, yamlDoc := range documents {
247265
idx += 1
248266

249-
err = dec.Decode(&sub)
250-
if err != nil {
251-
if !errors.Is(err, io.EOF) {
252-
return nil, fmt.Errorf("failed to parse %s: %w", acquisFile, err)
253-
}
254-
255-
log.Tracef("End of yaml file")
267+
var sub configuration.DataSourceCommonCfg
256268

257-
break
269+
// can't be strict here, the doc contains specific datasource config too but we won't collect them now.
270+
err := yaml.UnmarshalWithOptions(yamlDoc, &sub)
271+
if err != nil {
272+
return nil, fmt.Errorf("failed to parse %s: %w", yamlFile.Name(), errors.New(yaml.FormatError(err, false, false)))
258273
}
259274

260275
// for backward compat ('type' was not mandatory, detect it)
261-
if guessType := detectBackwardCompatAcquis(sub); guessType != "" {
276+
if guessType := detectedTypes[idx]; guessType != "" {
262277
log.Debugf("datasource type missing in %s (position %d): detected 'source=%s'", acquisFile, idx, guessType)
263278

264279
if sub.Source != "" && sub.Source != guessType {
@@ -267,33 +282,40 @@ func sourcesFromFile(acquisFile string, metrics_level int) ([]DataSource, error)
267282

268283
sub.Source = guessType
269284
}
285+
270286
// it's an empty item, skip it
271-
if len(sub.Labels) == 0 {
272-
if sub.Source == "" {
273-
log.Debugf("skipping empty item in %s", acquisFile)
274-
continue
275-
}
276287

288+
empty, err := csyaml.IsEmptyYAML(bytes.NewReader(yamlDoc))
289+
if err != nil {
290+
return nil, fmt.Errorf("failed to parse %s (position %d): %w", acquisFile, idx, err)
291+
}
292+
293+
if empty {
294+
// there are no keys or only comments, skip the document
295+
continue
296+
}
297+
298+
if len(sub.Labels) == 0 {
277299
if sub.Source != "docker" {
278300
// docker is the only source that can be empty
279301
return nil, fmt.Errorf("missing labels in %s (position %d)", acquisFile, idx)
280302
}
281303
}
282304

283305
if sub.Source == "" {
284-
return nil, fmt.Errorf("data source type is empty ('source') in %s (position %d)", acquisFile, idx)
306+
return nil, fmt.Errorf("missing 'source' field in %s (position %d)", acquisFile, idx)
285307
}
286308

287309
// pre-check that the source is valid
288-
_, err := GetDataSourceIface(sub.Source)
310+
_, err = GetDataSourceIface(sub.Source)
289311
if err != nil {
290312
return nil, fmt.Errorf("in file %s (position %d) - %w", acquisFile, idx, err)
291313
}
292314

293315
uniqueId := uuid.NewString()
294316
sub.UniqueId = uniqueId
295317

296-
src, err := DataSourceConfigure(sub, metrics_level)
318+
src, err := DataSourceConfigure(sub, yamlDoc, metrics_level)
297319
if err != nil {
298320
var dserr *DataSourceUnavailableError
299321
if errors.As(err, &dserr) {
@@ -376,7 +398,6 @@ func copyEvent(evt types.Event, line string) types.Event {
376398

377399
func transform(transformChan chan types.Event, output chan types.Event, acquisTomb *tomb.Tomb, transformRuntime *vm.Program, logger *log.Entry) {
378400
defer trace.CatchPanic("crowdsec/acquis")
379-
380401
logger.Infof("transformer started")
381402

382403
for {

0 commit comments

Comments
 (0)