Skip to content

Commit e17be33

Browse files
authored
Validate that custom labels are utf-8. (#3048)
Also, re-truncate a value that the tracer might have truncated at a byte boundary at a rune boundary instead.
1 parent 5781ed6 commit e17be33

File tree

4 files changed

+82
-3
lines changed

4 files changed

+82
-3
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,4 +154,4 @@ require (
154154
sigs.k8s.io/yaml v1.3.0 // indirect
155155
)
156156

157-
replace go.opentelemetry.io/ebpf-profiler => github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20250410153125-5a32c1ee04e2
157+
replace go.opentelemetry.io/ebpf-profiler => github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20250410225804-edfa6253a8b2

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,8 +258,8 @@ github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bl
258258
github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
259259
github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU=
260260
github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec=
261-
github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20250410153125-5a32c1ee04e2 h1:LwYUA746rqSxP2K5zI+6lneLQVjOfwGDi5j6rCVeqJM=
262-
github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20250410153125-5a32c1ee04e2/go.mod h1:G6CwyVZkF/90mTgEu6AVG6Qqn0gfgvpeoFiNniUBaHM=
261+
github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20250410225804-edfa6253a8b2 h1:EZw+hZlMjVJQbRKNr6ZK7PfIzxLr3VMrPdhQagFuz5M=
262+
github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20250410225804-edfa6253a8b2/go.mod h1:G6CwyVZkF/90mTgEu6AVG6Qqn0gfgvpeoFiNniUBaHM=
263263
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0=
264264
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y=
265265
github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ=

reporter/parca_reporter.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"context"
1212
"debug/elf"
1313
"encoding/binary"
14+
"encoding/hex"
1415
"errors"
1516
"fmt"
1617
"io"
@@ -19,6 +20,7 @@ import (
1920
"strings"
2021
"sync"
2122
"time"
23+
"unicode/utf8"
2224

2325
debuginfogrpc "buf.build/gen/go/parca-dev/parca/grpc/go/parca/debuginfo/v1alpha1/debuginfov1alpha1grpc"
2426
profilestoregrpc "buf.build/gen/go/parca-dev/parca/grpc/go/parca/profilestore/v1alpha1/profilestorev1alpha1grpc"
@@ -167,6 +169,39 @@ func hashString(s string) uint32 {
167169

168170
func (r *ParcaReporter) SupportsReportTraceEvent() bool { return true }
169171

172+
// maybeFixTruncation fixes string truncation done at the byte level
173+
// (at maxLen) to be done at the rune level instead.
174+
//
175+
// It returns the correctly truncated utf-8 string if possible;
176+
// otherwise "", false.
177+
func maybeFixTruncation(s string, maxLen int) (string, bool) {
178+
if utf8.ValidString(s) {
179+
return s, true
180+
}
181+
// maybe we truncated in the middle of a rune -- if that's the case,
182+
// truncate the entire rune.
183+
plausibleTruncatedRuneBegin := -1
184+
if len(s) == maxLen {
185+
i := 0
186+
for ; i < 2; i += 1 {
187+
idx := maxLen - i - 1
188+
if s[idx]&0xC0 != 0x80 {
189+
plausibleTruncatedRuneBegin = idx
190+
break
191+
}
192+
}
193+
}
194+
if plausibleTruncatedRuneBegin != -1 {
195+
s = s[0:plausibleTruncatedRuneBegin]
196+
if !utf8.ValidString(s) {
197+
return "", false
198+
}
199+
} else {
200+
return "", false
201+
}
202+
return s, true
203+
}
204+
170205
// ReportTraceEvent enqueues reported trace events for the OTLP reporter.
171206
func (r *ParcaReporter) ReportTraceEvent(trace *libpf.Trace,
172207
meta *samples.TraceEventMeta) error {
@@ -196,6 +231,15 @@ func (r *ParcaReporter) ReportTraceEvent(trace *libpf.Trace,
196231
}
197232

198233
for k, v := range trace.CustomLabels {
234+
if !utf8.ValidString(k) {
235+
log.Warnf("ignoring non-UTF8 label: %s", hex.EncodeToString([]byte(k)))
236+
continue
237+
}
238+
v, ok := maybeFixTruncation(v, support.CustomLabelMaxValLen - 1)
239+
if !ok {
240+
log.Warnf("ignoring non-UTF8 value for label %s: %s", k, hex.EncodeToString([]byte(v)))
241+
continue
242+
}
199243
r.sampleWriter.Label(k).AppendString(v)
200244
}
201245

reporter/parca_reporter_test.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
package reporter
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/require"
7+
)
8+
9+
const Chinese string = "Go(又稱Golang[4])是Google開發的一种静态强类型、編譯型、并发型,并具有垃圾回收功能的编程语言。"
10+
const Chinese2 string = "Linux是一种自由和开放源码的类Unix操作系统。"
11+
12+
func TestMaybeFixTruncation(t *testing.T) {
13+
for _, test := range []struct {
14+
s string
15+
result string
16+
ok bool
17+
}{
18+
{"ASCII string", "ASCII string", true},
19+
// truncated, but too early -- can't be valid utf8
20+
{Chinese[0:4], "", false},
21+
// truncated at the limit, in the middle of a rune
22+
{Chinese[0:48], Chinese[0:47], true},
23+
// Too long string that happened to be
24+
// truncated on a rune boundary
25+
{Chinese2[0:48], Chinese2[0:48], true},
26+
// Too long string but valid UTF-8 --
27+
// the function should pass it through unscathed
28+
// (it is not responsible for doing its own truncation)
29+
{Chinese2, Chinese2, true},
30+
} {
31+
result, ok := maybeFixTruncation(test.s, 48)
32+
require.Equal(t, test.result, result)
33+
require.Equal(t, test.ok, ok)
34+
}
35+
}

0 commit comments

Comments
 (0)