Skip to content

Commit dfa206a

Browse files
pkg/logs/processor: use custom marshaler for json payload message
Change jsonPayload Message field type to use encoding.TextMarshaler/TextUnmarshaler to avoid allocation due to conversion of valid UTF-8 bytes to string in the common case. ``` goos: darwin goarch: arm64 pkg: github.com/DataDog/datadog-agent/pkg/logs/processor cpu: Apple M4 Max │ HEAD~1 │ HEAD │ │ sec/op │ sec/op vs base │ JSONEncoder_Encode/valid-16 563.1n ± 17% 543.7n ± 12% -3.45% (p=0.009 n=10) JSONEncoder_Encode/invalid-16 567.0n ± 1% 550.0n ± 1% -3.00% (p=0.000 n=10) geomean 565.0n 546.8n -3.22% │ HEAD~1 │ HEAD │ │ B/op │ B/op vs base │ JSONEncoder_Encode/valid-16 1.127Ki ± 0% 1.017Ki ± 0% -9.79% (p=0.000 n=10) JSONEncoder_Encode/invalid-16 1.127Ki ± 0% 1.017Ki ± 0% -9.79% (p=0.000 n=10) geomean 1.127Ki 1.017Ki -9.79% │ HEAD~1 │ HEAD │ │ allocs/op │ allocs/op vs base │ JSONEncoder_Encode/valid-16 10.000 ± 0% 9.000 ± 0% -10.00% (p=0.000 n=10) JSONEncoder_Encode/invalid-16 10.000 ± 0% 9.000 ± 0% -10.00% (p=0.000 n=10) geomean 10.00 9.000 -10.00% ```
1 parent 615fc90 commit dfa206a

File tree

5 files changed

+50
-18
lines changed

5 files changed

+50
-18
lines changed

cmd/agent/subcommands/analyzelogs/command_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ Auto-discovery IDs:
154154
err := json.Unmarshal(msg.GetContent(), &parsedMessage)
155155
assert.NoError(t, err)
156156

157-
assert.Equal(t, parsedMessage.Message, expectedOutput[i])
157+
assert.Equal(t, parsedMessage.Message.String(), expectedOutput[i])
158158
}
159159

160160
launcher.Stop()
@@ -178,7 +178,7 @@ func TestRunAnalyzeLogsInvalidConfig(t *testing.T) {
178178
- type: exclude_at_match
179179
name: exclude_random
180180
pattern: "datadog-agent"
181-
181+
182182
`, tempLogFile.Name())
183183
tempConfigFile := CreateTestFile(tempDir, "config.yaml", invalidConfig)
184184
assert.NotNil(t, tempConfigFile)

pkg/logs/processor/encoder.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
package processor
88

99
import (
10+
"bytes"
1011
"strings"
1112
"unicode/utf8"
1213

@@ -18,6 +19,36 @@ type Encoder interface {
1819
Encode(msg *message.Message, hostname string) error
1920
}
2021

22+
type ValidUtf8Bytes []byte
23+
24+
func (msg ValidUtf8Bytes) MarshalText() (text []byte, err error) {
25+
if utf8.Valid(msg) {
26+
return msg, nil
27+
}
28+
29+
var buf bytes.Buffer
30+
buf.Grow(len(msg))
31+
32+
for len(msg) > 0 {
33+
r, size := utf8.DecodeRune(msg)
34+
// in case of invalid utf-8, DecodeRune returns (utf8.RuneError, 1)
35+
// and since RuneError is the same as unicode.ReplacementChar
36+
// no need to handle the error explicitly
37+
buf.WriteRune(r)
38+
msg = msg[size:]
39+
}
40+
return buf.Bytes(), nil
41+
}
42+
43+
func (msg *ValidUtf8Bytes) UnmarshalText(text []byte) error {
44+
*msg = bytes.Clone(text)
45+
return nil
46+
}
47+
48+
func (msg ValidUtf8Bytes) String() string {
49+
return string(msg)
50+
}
51+
2152
// toValidUtf8 ensures all characters are UTF-8.
2253
func toValidUtf8(msg []byte) string {
2354
if utf8.Valid(msg) {

pkg/logs/processor/encoder_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/stretchr/testify/assert"
1717

1818
"github.com/DataDog/agent-payload/v5/pb"
19+
1920
"github.com/DataDog/datadog-agent/comp/logs/agent/config"
2021
"github.com/DataDog/datadog-agent/pkg/logs/message"
2122
"github.com/DataDog/datadog-agent/pkg/logs/sources"

pkg/logs/processor/json.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,13 @@ type jsonEncoder struct{}
2727

2828
// JSON representation of a message.
2929
type jsonPayload struct {
30-
Message string `json:"message"`
31-
Status string `json:"status"`
32-
Timestamp int64 `json:"timestamp"`
33-
Hostname string `json:"hostname"`
34-
Service string `json:"service"`
35-
Source string `json:"ddsource"`
36-
Tags string `json:"ddtags"`
30+
Message ValidUtf8Bytes `json:"message"`
31+
Status string `json:"status"`
32+
Timestamp int64 `json:"timestamp"`
33+
Hostname string `json:"hostname"`
34+
Service string `json:"service"`
35+
Source string `json:"ddsource"`
36+
Tags string `json:"ddtags"`
3737
}
3838

3939
// Encode encodes a message into a JSON byte array.
@@ -48,7 +48,7 @@ func (j *jsonEncoder) Encode(msg *message.Message, hostname string) error {
4848
}
4949

5050
encoded, err := json.Marshal(jsonPayload{
51-
Message: toValidUtf8(msg.GetContent()),
51+
Message: ValidUtf8Bytes(msg.GetContent()),
5252
Status: msg.GetStatus(),
5353
Timestamp: ts.UnixNano() / nanoToMillis,
5454
Hostname: hostname,

pkg/logs/processor/json_serverless_init.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,13 @@ type jsonServerlessInitEncoder struct {
2626

2727
// JSON representation of a message for serverless-init.
2828
type jsonServerlessInitPayload struct {
29-
Message string `json:"message"`
30-
Status string `json:"status"`
31-
Timestamp int64 `json:"timestamp"`
32-
Hostname string `json:"hostname"`
33-
Service string `json:"service,omitempty"`
34-
Source string `json:"ddsource"`
35-
Tags string `json:"ddtags"`
29+
Message ValidUtf8Bytes `json:"message"`
30+
Status string `json:"status"`
31+
Timestamp int64 `json:"timestamp"`
32+
Hostname string `json:"hostname"`
33+
Service string `json:"service,omitempty"`
34+
Source string `json:"ddsource"`
35+
Tags string `json:"ddtags"`
3636
}
3737

3838
// Encode encodes a message into a JSON byte array.
@@ -52,7 +52,7 @@ func (j *jsonServerlessInitEncoder) Encode(msg *message.Message, hostname string
5252
}
5353

5454
encoded, err := json.Marshal(jsonServerlessInitPayload{
55-
Message: toValidUtf8(msg.GetContent()),
55+
Message: ValidUtf8Bytes(msg.GetContent()),
5656
Status: msg.GetStatus(),
5757
Timestamp: ts.UnixNano() / nanoToMillis,
5858
Hostname: hostname,

0 commit comments

Comments
 (0)