Skip to content

Commit d017330

Browse files
authored
Validate that fields are defined only once per data stream (#309)
1 parent e866bd5 commit d017330

File tree

18 files changed

+217
-2
lines changed

18 files changed

+217
-2
lines changed
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
2+
// or more contributor license agreements. Licensed under the Elastic License;
3+
// you may not use this file except in compliance with the Elastic License.
4+
5+
package semantic
6+
7+
import (
8+
"sort"
9+
"strings"
10+
11+
ve "github.com/elastic/package-spec/code/go/internal/errors"
12+
"github.com/elastic/package-spec/code/go/internal/fspath"
13+
"github.com/pkg/errors"
14+
)
15+
16+
// ValidateUniqueFields verifies that any field is defined only once on each data stream.
17+
func ValidateUniqueFields(fsys fspath.FS) ve.ValidationErrors {
18+
// data_stream -> field -> files
19+
fields := make(map[string]map[string][]string)
20+
21+
countField := func(fieldsFile string, f field) ve.ValidationErrors {
22+
if len(f.Fields) > 0 {
23+
// Don't count groups
24+
return nil
25+
}
26+
27+
dataStream, err := dataStreamFromFieldsPath(fsys.Path(), fieldsFile)
28+
if err != nil {
29+
return ve.ValidationErrors{err}
30+
}
31+
32+
dsMap, found := fields[dataStream]
33+
if !found {
34+
dsMap = make(map[string][]string)
35+
fields[dataStream] = dsMap
36+
}
37+
dsMap[f.Name] = append(dsMap[f.Name], fieldsFile)
38+
return nil
39+
}
40+
41+
err := validateFields(fsys, countField)
42+
if err != nil {
43+
return err
44+
}
45+
46+
var errs ve.ValidationErrors
47+
for dataStream, dataStreamFields := range fields {
48+
for field, files := range dataStreamFields {
49+
if len(files) > 1 {
50+
sort.Strings(files)
51+
errs = append(errs,
52+
errors.Errorf("field %q is defined multiple times for data stream %q, found in: %s",
53+
field, dataStream, strings.Join(files, ", ")))
54+
}
55+
}
56+
57+
}
58+
return errs
59+
}

code/go/internal/validator/spec.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ func (s Spec) ValidatePackage(pkg Package) ve.ValidationErrors {
7171
semantic.ValidatePrerelease,
7272
semantic.ValidateFieldGroups,
7373
semantic.ValidateFieldsLimits(rootSpec.Limits.FieldsPerDataStreamLimit),
74+
semantic.ValidateUniqueFields,
7475
semantic.ValidateDimensionFields,
7576
semantic.ValidateRequiredFields,
7677
}

code/go/pkg/validator/validator_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,30 @@ func TestValidateVersionIntegrity(t *testing.T) {
264264
}
265265
}
266266

267+
func TestValidateDuplicatedFields(t *testing.T) {
268+
tests := map[string]string{
269+
"bad_duplicated_fields": "field \"event.dataset\" is defined multiple times for data stream \"wrong\", found in: ../../../../test/packages/bad_duplicated_fields/data_stream/wrong/fields/base-fields.yml, ../../../../test/packages/bad_duplicated_fields/data_stream/wrong/fields/ecs.yml",
270+
}
271+
272+
for pkgName, expectedErrorMessage := range tests {
273+
t.Run(pkgName, func(t *testing.T) {
274+
errs := ValidateFromPath(filepath.Join("..", "..", "..", "..", "test", "packages", pkgName))
275+
require.Error(t, errs)
276+
vErrs, ok := errs.(errors.ValidationErrors)
277+
require.True(t, ok)
278+
279+
assert.Len(t, vErrs, 1)
280+
281+
var errMessages []string
282+
for _, vErr := range vErrs {
283+
errMessages = append(errMessages, vErr.Error())
284+
}
285+
require.Contains(t, errMessages, expectedErrorMessage)
286+
})
287+
}
288+
289+
}
290+
267291
func requireErrorMessage(t *testing.T, pkgName string, invalidItemsPerFolder map[string][]string, expectedErrorMessage string) {
268292
pkgRootPath := filepath.Join("..", "..", "..", "..", "test", "packages", pkgName)
269293

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# newer versions go on top
2+
- version: "0.0.1"
3+
changes:
4+
- description: Initial draft of the package
5+
type: enhancement
6+
link: https://github.com/elastic/integrations/pull/309
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
paths:
2+
{{#each paths as |path i|}}
3+
- {{path}}
4+
{{/each}}
5+
exclude_files: [".gz$"]
6+
processors:
7+
- add_locale: ~
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
description: Pipeline for processing sample logs
3+
processors:
4+
- set:
5+
field: sample_field
6+
value: "1"
7+
on_failure:
8+
- set:
9+
field: error.message
10+
value: '{{ _ingest.on_failure_message }}'
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
- name: data_stream.type
2+
type: constant_keyword
3+
description: Data stream type.
4+
- name: data_stream.dataset
5+
type: constant_keyword
6+
description: Data stream dataset.
7+
- name: data_stream.namespace
8+
type: constant_keyword
9+
description: Data stream namespace.
10+
- name: '@timestamp'
11+
type: date
12+
description: Event timestamp.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
title: "Valid data stream."
2+
type: logs
3+
streams:
4+
- input: logfile
5+
title: Sample logs
6+
description: Collect sample logs
7+
vars:
8+
- name: paths
9+
type: text
10+
title: Paths
11+
multi: true
12+
default:
13+
- /var/log/*.log
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
paths:
2+
{{#each paths as |path i|}}
3+
- {{path}}
4+
{{/each}}
5+
exclude_files: [".gz$"]
6+
processors:
7+
- add_locale: ~
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
description: Pipeline for processing sample logs
3+
processors:
4+
- set:
5+
field: sample_field
6+
value: "1"
7+
on_failure:
8+
- set:
9+
field: error.message
10+
value: '{{ _ingest.on_failure_message }}'

0 commit comments

Comments
 (0)