Skip to content

Commit a962aa5

Browse files
committed
schema validation
1 parent f39cba0 commit a962aa5

30 files changed

+256
-4
lines changed

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ require (
7272
github.com/prometheus/common v0.66.1
7373
github.com/r3labs/diff/v2 v2.15.1
7474
github.com/sanity-io/litter v1.5.8
75+
github.com/santhosh-tekuri/jsonschema/v6 v6.0.2
7576
github.com/segmentio/kafka-go v0.4.48
7677
github.com/shirou/gopsutil/v4 v4.25.8
7778
github.com/sirupsen/logrus v1.9.3

go.sum

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,8 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1
148148
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
149149
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
150150
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
151+
github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI=
152+
github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
151153
github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94=
152154
github.com/docker/go-connections v0.6.0/go.mod h1:AahvXYshr6JgfUJGdDCs2b5EZG/vmaMAntpSFH5BFKE=
153155
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
@@ -508,6 +510,8 @@ github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf
508510
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
509511
github.com/sanity-io/litter v1.5.8 h1:uM/2lKrWdGbRXDrIq08Lh9XtVYoeGtcQxk9rtQ7+rYg=
510512
github.com/sanity-io/litter v1.5.8/go.mod h1:9gzJgR2i4ZpjZHsKvUXIRQVk7P+yM3e+jAF7bU2UI5U=
513+
github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 h1:KRzFb2m7YtdldCEkzs6KqmJw4nqEVZGK7IN2kJkjTuQ=
514+
github.com/santhosh-tekuri/jsonschema/v6 v6.0.2/go.mod h1:JXeL+ps8p7/KNMjDQk3TCwPpBy0wYklyWTfbkIzdIFU=
511515
github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
512516
github.com/segmentio/kafka-go v0.4.48 h1:9jyu9CWK4W5W+SroCe8EffbrRZVqAOkuaLd/ApID4Vs=
513517
github.com/segmentio/kafka-go v0.4.48/go.mod h1:HjF6XbOKh0Pjlkr5GVZxt6CsjjwnmhVOfURM5KMd8qg=

pkg/acquisition/config_test.go

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package acquisition
22

33
import (
4+
"errors"
5+
"io/fs"
46
"os"
57
"path/filepath"
68
"regexp"
@@ -11,13 +13,17 @@ import (
1113

1214
"github.com/stretchr/testify/assert"
1315
"github.com/stretchr/testify/require"
16+
"github.com/goccy/go-yaml"
1417

1518
"github.com/crowdsecurity/crowdsec/pkg/csconfig"
1619
"github.com/crowdsecurity/crowdsec/pkg/cwhub"
1720
"github.com/crowdsecurity/crowdsec/pkg/metrics"
1821
)
1922

20-
var wantErrLineRE = regexp.MustCompile(`(?m)^\s*#\s*wantErr:\s*(.*?)\s*$`)
23+
var (
24+
wantErrLineRE = regexp.MustCompile(`(?m)^\s*#\s*wantErr:\s*(.*?)\s*$`)
25+
wantSchemaErrLineRE = regexp.MustCompile(`(?m)^[ \t]*#[ \t]*schemaErr:[ \t]*([^\r\n]*)[ \t]*$`)
26+
)
2127

2228
func findYAMLFiles(t *testing.T, root string) []string {
2329
t.Helper()
@@ -54,6 +60,17 @@ func wantErrFromYAML(t *testing.T, fileContent []byte) (want string, found bool)
5460
return strings.TrimSpace(string(m[1])), true
5561
}
5662

63+
func wantSchemaErrFromYAML(t *testing.T, fileContent []byte) (want string, found bool) {
64+
t.Helper()
65+
66+
m := wantSchemaErrLineRE.FindSubmatch(fileContent)
67+
if len(m) == 0 {
68+
return "", false
69+
}
70+
71+
return strings.TrimSpace(string(m[1])), true
72+
}
73+
5774
func TestParseSourceConfig(t *testing.T) {
5875
ctx := t.Context()
5976

@@ -63,6 +80,10 @@ func TestParseSourceConfig(t *testing.T) {
6380
expectValid bool
6481
}
6582

83+
type source struct {
84+
Source string
85+
}
86+
6687
// load a configuration, appsec needs it
6788
_, _, err := csconfig.NewConfig("./testdata/config.yaml", false, false, true)
6889
require.NoError(t, err)
@@ -87,6 +108,15 @@ func TestParseSourceConfig(t *testing.T) {
87108
require.NoError(t, err, "read %q", path)
88109

89110
t.Run(filepath.ToSlash(rel), func(t *testing.T) {
111+
var (
112+
so source
113+
schema string
114+
)
115+
116+
if err = yaml.Unmarshal(fileContent, &so); err == nil {
117+
schema = filepath.Join("schemas", so.Source + ".yaml")
118+
}
119+
90120
if runtime.GOOS == "windows" && strings.Contains(path, "journalctl") {
91121
return
92122
}
@@ -97,11 +127,20 @@ func TestParseSourceConfig(t *testing.T) {
97127

98128
wantErr, hasWant := wantErrFromYAML(t, fileContent)
99129

130+
wantSchemaErr, hasWantSchemaErr := wantSchemaErrFromYAML(t, fileContent)
131+
100132
if s.expectValid {
101133
require.False(t, hasWant, "valid config must not include # wantErr: directive")
102134
parsed, err := ParseSourceConfig(ctx, fileContent, metrics.AcquisitionMetricsLevelNone, &hub)
103135
require.NoError(t, err)
104136
require.NotNil(t, parsed)
137+
if schema != "" {
138+
err = ValidateYAML(fileContent, schema)
139+
if !errors.Is(err, fs.ErrNotExist) {
140+
// XXX: ignore missing schema
141+
require.NoError(t, err)
142+
}
143+
}
105144
return
106145
}
107146

@@ -114,6 +153,31 @@ func TestParseSourceConfig(t *testing.T) {
114153
require.Error(t, err, "got no error, expected %q", wantErr)
115154
require.Nil(t, parsed)
116155
assert.Equal(t, wantErr, err.Error())
156+
if schema == "" {
157+
return
158+
}
159+
160+
// schema validation
161+
162+
err = ValidateYAML(fileContent, schema)
163+
if errors.Is(err, fs.ErrNotExist) {
164+
// XXX: ignore missing schema, for now
165+
return
166+
}
167+
168+
// a "schemaErr" comment must be present, even if empty
169+
require.True(t, hasWantSchemaErr, "invalid configurations require an exlicit schemaErr comment. it can be empty string if the schema cannot detect the issue")
170+
switch {
171+
case err == nil && wantSchemaErr != "":
172+
require.Error(t, err, "got no schema error, expected %q", wantSchemaErr)
173+
case err != nil && wantSchemaErr == "":
174+
require.Error(t, err, "got schema error %q, expected nil", err)
175+
case err != nil:
176+
assert.Contains(t, err.Error(), wantSchemaErr)
177+
default:
178+
require.NoError(t, err)
179+
assert.Empty(t, wantSchemaErr)
180+
}
117181
})
118182
}
119183
})

pkg/acquisition/configuration/configuration.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,5 @@ const (
1919
TAIL_MODE = "tail"
2020
CAT_MODE = "cat"
2121
SERVER_MODE = "server" // No difference with tail, just a bit more verbose
22+
// XXX:
2223
)

pkg/acquisition/modules/kafka/config.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ func (s *Source) UnmarshalConfig(yamlConfig []byte) error {
4949

5050
err := yaml.UnmarshalWithOptions(yamlConfig, &s.Config, yaml.Strict())
5151
if err != nil {
52-
return fmt.Errorf("cannot parse %s datasource configuration: %s", s.GetName(), yaml.FormatError(err, false, false))
52+
return fmt.Errorf("cannot parse: %s", yaml.FormatError(err, false, false))
5353
}
5454

5555
if len(s.Config.Brokers) == 0 {

pkg/acquisition/schemaval_test.go

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
package acquisition
2+
3+
import (
4+
"bytes"
5+
"errors"
6+
"fmt"
7+
"os"
8+
"path/filepath"
9+
"sort"
10+
"strings"
11+
12+
"github.com/santhosh-tekuri/jsonschema/v6"
13+
"github.com/goccy/go-yaml"
14+
)
15+
16+
// format a compact error without schema location for testing purposes.
17+
func compactSchemaErr(err error) error {
18+
var ve *jsonschema.ValidationError
19+
if !errors.As(err, &ve) {
20+
return err
21+
}
22+
23+
out := ve.BasicOutput()
24+
if out == nil || len(out.Errors) == 0 {
25+
// Fallback; this may include schema URL, but it's better than losing the error.
26+
return err
27+
}
28+
29+
msgs := make([]string, 0, len(out.Errors))
30+
for _, u := range out.Errors {
31+
if u.Error == nil {
32+
continue
33+
}
34+
loc := u.InstanceLocation
35+
if loc == "" {
36+
loc = "/"
37+
}
38+
msgs = append(msgs, fmt.Sprintf("%s: %s", loc, u.Error.String()))
39+
}
40+
41+
sort.Strings(msgs)
42+
return fmt.Errorf("%s", strings.Join(msgs, "; "))
43+
}
44+
45+
// ValidateYAML validates configYAML against schemaPath.
46+
func ValidateYAML(configYAML []byte, schemaPath string) error {
47+
if schemaPath == "" {
48+
return errors.New("no schema provided")
49+
}
50+
51+
configJSON, err := yaml.YAMLToJSON(configYAML)
52+
if err != nil {
53+
return fmt.Errorf("config: YAML->JSON: %w", err)
54+
}
55+
56+
configDoc, err := jsonschema.UnmarshalJSON(bytes.NewReader(configJSON))
57+
if err != nil {
58+
return fmt.Errorf("config: decode JSON: %w", err)
59+
}
60+
61+
c := jsonschema.NewCompiler()
62+
c.DefaultDraft(jsonschema.Draft2020)
63+
64+
schemaYAML, err := os.ReadFile(schemaPath)
65+
if err != nil {
66+
return fmt.Errorf("read schema %q: %w", schemaPath, err)
67+
}
68+
69+
schemaJSON, err := yaml.YAMLToJSON(schemaYAML)
70+
if err != nil {
71+
return fmt.Errorf("schema %q: YAML->JSON: %w", schemaPath, err)
72+
}
73+
74+
schemaDoc, err := jsonschema.UnmarshalJSON(bytes.NewReader(schemaJSON))
75+
if err != nil {
76+
return fmt.Errorf("schema %q: decode JSON: %w", schemaPath, err)
77+
}
78+
79+
abs, err := filepath.Abs(schemaPath)
80+
if err != nil {
81+
return fmt.Errorf("abs %q: %w", schemaPath, err)
82+
}
83+
84+
if err := c.AddResource(abs, schemaDoc); err != nil {
85+
return fmt.Errorf("add schema resource %q: %w", abs, err)
86+
}
87+
88+
sch, err := c.Compile(abs)
89+
if err != nil {
90+
return fmt.Errorf("compile schema %q: %w", abs, err)
91+
}
92+
93+
if err := sch.Validate(configDoc); err != nil {
94+
var ve *jsonschema.ValidationError
95+
if errors.As(err, &ve) {
96+
return compactSchemaErr(ve)
97+
}
98+
return err
99+
}
100+
101+
return nil
102+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# wantErr: failed to parse: not a valid logrus Level: "toto"
2+
# schemaErr: /log_level: value must be one of 'panic', 'fatal', 'error', 'warn', 'warning', 'info', 'debug', 'trace'
3+
source: appsec
4+
use_container_labels: true
5+
log_level: toto
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# wantErr: failed to parse: not a valid logrus Level: "toto"
2+
# schemaErr: /log_level: value must be one of 'panic', 'fatal', 'error', 'warn', 'warning', 'info', 'debug', 'trace'
3+
source: cloudwatch
4+
use_container_labels: true
5+
log_level: toto
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# wantErr: failed to parse: not a valid logrus Level: "toto"
2+
# schemaErr: /log_level: value must be one of 'panic', 'fatal', 'error', 'warn', 'warning', 'info', 'debug', 'trace'
3+
source: docker
4+
use_container_labels: true
5+
log_level: toto

pkg/acquisition/testdata/invalid/docker/unsupported_mode.yaml renamed to pkg/acquisition/testdata/invalid/docker/common_mode_invalid.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# wantErr: datasource of type docker: unsupported mode server for docker datasource
2+
# schemaErr: /mode: value must be one of 'tail', 'cat'
23
mode: server
34
source: docker
45
container_name:

0 commit comments

Comments
 (0)