Skip to content

Commit 2e4f856

Browse files
authored
Validate required fields (#291)
1 parent c936714 commit 2e4f856

File tree

54 files changed

+1613
-125
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+1613
-125
lines changed

code/go/internal/validator/semantic/types.go

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,11 @@
55
package semantic
66

77
import (
8+
"fmt"
89
"io/fs"
910
"os"
1011
"path/filepath"
12+
"strings"
1113

1214
"github.com/pkg/errors"
1315
"gopkg.in/yaml.v3"
@@ -16,6 +18,8 @@ import (
1618
"github.com/elastic/package-spec/code/go/internal/fspath"
1719
)
1820

21+
const dataStreamDir = "data_stream"
22+
1923
type fields []field
2024

2125
type field struct {
@@ -74,18 +78,13 @@ func validateNestedFields(parent string, fieldsFile string, fields fields, valid
7478

7579
func listFieldsFiles(fsys fspath.FS) ([]string, error) {
7680
var fieldsFiles []string
77-
78-
dataStreamDir := "data_stream"
79-
dataStreams, err := fs.ReadDir(fsys, dataStreamDir)
80-
if errors.Is(err, os.ErrNotExist) {
81-
return fieldsFiles, nil
82-
}
81+
dataStreams, err := listDataStreams(fsys)
8382
if err != nil {
84-
return nil, errors.Wrap(err, "can't list data streams directory")
83+
return nil, err
8584
}
8685

8786
for _, dataStream := range dataStreams {
88-
fieldsDir := filepath.Join(dataStreamDir, dataStream.Name(), "fields")
87+
fieldsDir := filepath.Join(dataStreamDir, dataStream, "fields")
8988
fs, err := fs.ReadDir(fsys, fieldsDir)
9089
if errors.Is(err, os.ErrNotExist) {
9190
continue
@@ -115,3 +114,34 @@ func unmarshalFields(fsys fspath.FS, fieldsPath string) (fields, error) {
115114
}
116115
return f, nil
117116
}
117+
118+
func dataStreamFromFieldsPath(pkgRoot, fieldsFile string) (string, error) {
119+
dataStreamPath := filepath.Clean(filepath.Join(pkgRoot, "data_stream"))
120+
relPath, err := filepath.Rel(dataStreamPath, filepath.Clean(fieldsFile))
121+
if err != nil {
122+
return "", fmt.Errorf("looking for fields file (%s) in data streams path (%s): %w", fieldsFile, dataStreamPath, err)
123+
}
124+
125+
parts := strings.SplitN(relPath, string(filepath.Separator), 2)
126+
if len(parts) != 2 {
127+
return "", errors.Errorf("could not find data stream for fields file %s", fieldsFile)
128+
}
129+
dataStream := parts[0]
130+
return dataStream, nil
131+
}
132+
133+
func listDataStreams(fsys fspath.FS) ([]string, error) {
134+
dataStreams, err := fs.ReadDir(fsys, dataStreamDir)
135+
if errors.Is(err, os.ErrNotExist) {
136+
return nil, nil
137+
}
138+
if err != nil {
139+
return nil, errors.Wrap(err, "can't list data streams directory")
140+
}
141+
142+
list := make([]string, len(dataStreams))
143+
for i, dataStream := range dataStreams {
144+
list[i] = dataStream.Name()
145+
}
146+
return list, nil
147+
}

code/go/internal/validator/semantic/validate_fields_limits.go

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,6 @@
55
package semantic
66

77
import (
8-
"fmt"
9-
"path/filepath"
10-
"strings"
11-
128
"github.com/pkg/errors"
139

1410
ve "github.com/elastic/package-spec/code/go/internal/errors"
@@ -52,18 +48,3 @@ func validateFieldsLimits(fsys fspath.FS, limit int) ve.ValidationErrors {
5248
}
5349
return errs
5450
}
55-
56-
func dataStreamFromFieldsPath(pkgRoot, fieldsFile string) (string, error) {
57-
dataStreamPath := filepath.Clean(filepath.Join(pkgRoot, "data_stream"))
58-
relPath, err := filepath.Rel(dataStreamPath, filepath.Clean(fieldsFile))
59-
if err != nil {
60-
return "", fmt.Errorf("looking for fields file (%s) in data streams path (%s): %w", fieldsFile, dataStreamPath, err)
61-
}
62-
63-
parts := strings.SplitN(relPath, string(filepath.Separator), 2)
64-
if len(parts) != 2 {
65-
return "", errors.Errorf("could not find data stream for fields file %s", fieldsFile)
66-
}
67-
dataStream := parts[0]
68-
return dataStream, nil
69-
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2+
// or more contributor license agreements. Licensed under the Elastic License;
3+
// you may not use this file except in compliance with the Elastic License.
4+
5+
package semantic
6+
7+
import (
8+
ve "github.com/elastic/package-spec/code/go/internal/errors"
9+
"github.com/elastic/package-spec/code/go/internal/fspath"
10+
"github.com/pkg/errors"
11+
)
12+
13+
// ValidateRequiredFields validates that required fields are present and have the expected
14+
// types.
15+
func ValidateRequiredFields(fsys fspath.FS) ve.ValidationErrors {
16+
requiredFields := map[string]string{
17+
"data_stream.type": "constant_keyword",
18+
"data_stream.dataset": "constant_keyword",
19+
"data_stream.namespace": "constant_keyword",
20+
"@timestamp": "date",
21+
}
22+
23+
return validateRequiredFields(fsys, requiredFields)
24+
}
25+
26+
func validateRequiredFields(fsys fspath.FS, requiredFields map[string]string) ve.ValidationErrors {
27+
// map datastream -> field name -> found
28+
foundFields := make(map[string]map[string]struct{})
29+
30+
dataStreams, err := listDataStreams(fsys)
31+
if err != nil {
32+
return ve.ValidationErrors{err}
33+
}
34+
35+
checkField := func(fieldsFile string, f field) ve.ValidationErrors {
36+
expectedType, found := requiredFields[f.Name]
37+
if !found {
38+
return nil
39+
}
40+
41+
datastream, err := dataStreamFromFieldsPath(fsys.Path(), fieldsFile)
42+
if err != nil {
43+
return ve.ValidationErrors{err}
44+
}
45+
46+
if _, ok := foundFields[datastream]; !ok {
47+
foundFields[datastream] = make(map[string]struct{})
48+
}
49+
50+
foundFields[datastream][f.Name] = struct{}{}
51+
if f.Type != expectedType {
52+
return ve.ValidationErrors{errors.Errorf("expected type %q for required field %q, found %q in %q", expectedType, f.Name, f.Type, fieldsFile)}
53+
}
54+
55+
return nil
56+
}
57+
errs := validateFields(fsys, checkField)
58+
59+
for _, dataStream := range dataStreams {
60+
dataStreamFields := foundFields[dataStream]
61+
for requiredName, requiredType := range requiredFields {
62+
if _, found := dataStreamFields[requiredName]; !found {
63+
errs = append(errs, errors.Errorf("expected field %q with type %q not found in datastream %q", requiredName, requiredType, dataStream))
64+
}
65+
}
66+
}
67+
68+
return errs
69+
}

code/go/internal/validator/spec.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ func (s Spec) ValidatePackage(pkg Package) ve.ValidationErrors {
7272
semantic.ValidateFieldGroups,
7373
semantic.ValidateFieldsLimits(rootSpec.Limits.FieldsPerDataStreamLimit),
7474
semantic.ValidateDimensionFields,
75+
semantic.ValidateRequiredFields,
7576
}
7677

7778
return rules.validate(&pkg)
Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,12 @@
1-
- name: source
2-
title: Source
3-
group: 2
4-
type: group
5-
fields:
6-
- name: geo.city_name
7-
level: core
8-
type: keyword
9-
description: City name.
10-
ignore_above: 1024
11-
- name: geo.location
12-
level: core
13-
type: geo_point
14-
description: Longitude and latitude.
15-
- name: geo.region_iso_code
16-
level: core
17-
type: keyword
18-
description: Region ISO code.
19-
ignore_above: 1024
20-
- name: geo.region_name
21-
level: core
22-
type: keyword
23-
description: Region name.
24-
ignore_above: 1024
1+
- name: data_stream.type
2+
type: constant_keyword
3+
description: Data stream type.
4+
- name: data_stream.dataset
5+
type: constant_keyword
6+
description: Data stream dataset.
7+
- name: data_stream.namespace
8+
type: constant_keyword
9+
description: Data stream namespace.
10+
- name: '@timestamp'
11+
type: date
12+
description: Event timestamp.

code/go/pkg/validator/validator_test.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212

1313
"github.com/elastic/package-spec/code/go/internal/errors"
1414

15+
"github.com/stretchr/testify/assert"
1516
"github.com/stretchr/testify/require"
1617
)
1718

@@ -201,6 +202,46 @@ func TestValidateBadKibanaIDs(t *testing.T) {
201202
}
202203
}
203204

205+
func TestValidateMissingReqiredFields(t *testing.T) {
206+
tests := map[string][]string{
207+
"good": {},
208+
"missing_required_fields": {
209+
`expected type "constant_keyword" for required field "data_stream.dataset", found "keyword" in "../../../../test/packages/missing_required_fields/data_stream/foo/fields/base-fields.yml"`,
210+
`expected field "data_stream.type" with type "constant_keyword" not found in datastream "foo"`,
211+
},
212+
}
213+
214+
for pkgName, expectedErrors := range tests {
215+
t.Run(pkgName, func(t *testing.T) {
216+
pkgRootPath := filepath.Join("..", "..", "..", "..", "test", "packages", pkgName)
217+
218+
err := ValidateFromPath(pkgRootPath)
219+
if len(expectedErrors) == 0 {
220+
assert.NoError(t, err)
221+
return
222+
}
223+
assert.Error(t, err)
224+
225+
errs, ok := err.(errors.ValidationErrors)
226+
require.True(t, ok)
227+
assert.Len(t, errs, len(expectedErrors))
228+
229+
for _, expectedError := range expectedErrors {
230+
found := false
231+
for _, foundError := range errs {
232+
if foundError.Error() == expectedError {
233+
found = true
234+
break
235+
}
236+
}
237+
if !found {
238+
t.Errorf("expected error: %q", expectedError)
239+
}
240+
}
241+
})
242+
}
243+
}
244+
204245
func TestValidateVersionIntegrity(t *testing.T) {
205246
tests := map[string]string{
206247
"inconsistent_version": "current manifest version doesn't have changelog entry",
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
- name: data_stream.type
2+
type: constant_keyword
3+
description: Data stream type.
4+
- name: data_stream.dataset
5+
type: constant_keyword
6+
description: Data stream dataset.
7+
- name: data_stream.namespace
8+
type: constant_keyword
9+
description: Data stream namespace.
10+
- name: '@timestamp'
11+
type: date
12+
description: Event timestamp.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
- name: data_stream.type
2+
type: constant_keyword
3+
description: Data stream type.
4+
- name: data_stream.dataset
5+
type: constant_keyword
6+
description: Data stream dataset.
7+
- name: data_stream.namespace
8+
type: constant_keyword
9+
description: Data stream namespace.
10+
- name: '@timestamp'
11+
type: date
12+
description: Event timestamp.
Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,12 @@
1-
- name: source
2-
title: Source
3-
group: 2
4-
type: group
5-
fields:
6-
- name: geo.city_name
7-
level: core
8-
type: keyword
9-
description: City name.
10-
ignore_above: 1024
11-
- name: geo.location
12-
level: core
13-
type: geo_point
14-
description: Longitude and latitude.
15-
- name: geo.region_iso_code
16-
level: core
17-
type: keyword
18-
description: Region ISO code.
19-
ignore_above: 1024
20-
- name: geo.region_name
21-
level: core
22-
type: keyword
23-
description: Region name.
24-
ignore_above: 1024
1+
- name: data_stream.type
2+
type: constant_keyword
3+
description: Data stream type.
4+
- name: data_stream.dataset
5+
type: constant_keyword
6+
description: Data stream dataset.
7+
- name: data_stream.namespace
8+
type: constant_keyword
9+
description: Data stream namespace.
10+
- name: '@timestamp'
11+
type: date
12+
description: Event timestamp.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
- name: data_stream.type
2+
type: constant_keyword
3+
description: Data stream type.
4+
- name: data_stream.dataset
5+
type: constant_keyword
6+
description: Data stream dataset.
7+
- name: data_stream.namespace
8+
type: constant_keyword
9+
description: Data stream namespace.
10+
- name: '@timestamp'
11+
type: date
12+
description: Event timestamp.

0 commit comments

Comments
 (0)