Skip to content

Commit 564407b

Browse files
try faster appends
1 parent 6a16a1f commit 564407b

File tree

1 file changed

+77
-58
lines changed

1 file changed

+77
-58
lines changed

datadog/serializer.go

Lines changed: 77 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -207,86 +207,105 @@ const (
207207
maxLen = 250 // guard for the StatsD UDP packet size
208208
)
209209

210-
// isTrim returns true if the byte is to be trimmed at the ends.
211-
func isTrim(b byte) bool { return b == '.' || b == '_' || b == '-' }
210+
var shouldTrim [256]bool = [256]bool{
211+
'.': true,
212+
'_': true,
213+
'-': true,
214+
}
212215

213216
// appendSanitizedMetricName converts *any* string into something that StatsD / Graphite
214217
// accepts without complaints.
215218
func appendSanitizedMetricName(dst []byte, raw string) []byte {
216-
nameLen := 0
217-
orig := len(dst)
218219
if raw == "" {
219220
if len(dst) == 0 {
220221
return append(dst, "_unnamed_"...)
221222
}
222223
return dst
223224
}
224-
// ── 1. accent folding (creates one temporary ↴)
225-
// tmp := stripUnicodeAccents([]byte(raw))
226-
227-
// ── 2. run the same ASCII sanitizer, but write into dst
228-
lastWasRepl := false
229-
for i := 0; i < len(raw); i++ {
230-
c := byte(raw[i])
231-
232-
if c < 128 && valid[c] {
233-
// ASCII valid chars
234-
dst = append(dst, c)
235-
nameLen++
236-
lastWasRepl = false
237-
} else if c >= 0xC2 && c <= 0xC3 && i+1 < len(raw) {
238-
// Check for 2-byte UTF-8 sequences that are common accented letters
239-
c2 := byte(raw[i+1])
240-
if c2 >= 0x80 && c2 <= 0xBF { // Valid second byte
241-
// Decode the 2-byte sequence
242-
codepoint := uint16(c&0x1F)<<6 | uint16(c2&0x3F)
243-
244-
// Map common accented characters (U+00C0-U+00FF range)
245-
if codepoint >= 0xC0 && codepoint <= 0xFF {
246-
mapped := accentMap[codepoint]
247-
if valid[mapped] {
225+
orig := len(dst)
226+
227+
// Pre-grow
228+
need := len(raw)
229+
if need > maxLen {
230+
need = maxLen
231+
}
232+
if cap(dst)-len(dst) < need {
233+
nd := make([]byte, len(dst), len(dst)+need)
234+
copy(nd, dst)
235+
dst = nd
236+
}
237+
238+
n := len(raw)
239+
i := 0
240+
lastWasReplacement := false
241+
242+
// Skip leading trim while building
243+
for i < n {
244+
c := raw[i]
245+
if !shouldTrim[c] {
246+
break
247+
}
248+
i++
249+
}
250+
251+
for i < n && (len(dst)-orig) < maxLen {
252+
// Batch ASCII-valid run
253+
remaining := maxLen - (len(dst) - orig)
254+
j := i
255+
limit := i + remaining
256+
if limit > n {
257+
limit = n
258+
}
259+
for j < limit {
260+
c := raw[j]
261+
if c >= 128 || !valid[c] {
262+
break
263+
}
264+
j++
265+
}
266+
if j > i {
267+
dst = append(dst, raw[i:j]...)
268+
lastWasReplacement = false
269+
i = j
270+
continue
271+
}
272+
273+
// 2-byte common accent folding
274+
c0 := raw[i]
275+
if c0 >= 0xC2 && c0 <= 0xC3 && i+1 < n {
276+
c1 := raw[i+1]
277+
if c1 >= 0x80 && c1 <= 0xBF {
278+
code := uint16(c0&0x1F)<<6 | uint16(c1&0x3F)
279+
if code >= 0xC0 && code <= 0xFF {
280+
mapped := accentMap[code]
281+
if valid[mapped] && (len(dst)-orig) < maxLen {
248282
dst = append(dst, mapped)
249-
nameLen++
250-
lastWasRepl = false
251-
i++ // Skip the second byte
283+
lastWasReplacement = false
284+
i += 2
252285
continue
253286
}
254287
}
255288
}
256-
// If we get here, treat as invalid
257-
if !lastWasRepl {
258-
dst = append(dst, replacement)
259-
nameLen++
260-
lastWasRepl = true
261-
}
262-
} else if !lastWasRepl {
263-
// Everything else (3-byte, 4-byte sequences, invalid chars)
264-
dst = append(dst, replacement)
265-
nameLen++
266-
lastWasRepl = true
267289
}
268290

269-
if nameLen >= maxLen {
270-
break
291+
// Replacement for everything else
292+
if !lastWasReplacement && len(dst) > orig && (len(dst)-orig) < maxLen {
293+
dst = append(dst, replacement)
294+
lastWasReplacement = true
271295
}
296+
i++
272297
}
273298

274-
// 3. trim leading / trailing '.', '_' or '-'
275-
start, end := orig, len(dst)
276-
for start < end && isTrim(dst[start]) {
277-
start++
278-
}
279-
for end > start && isTrim(dst[end-1]) {
280-
end--
281-
}
282-
283-
// 4. compact if we trimmed something
284-
if start > orig || end < len(dst) {
285-
copy(dst[orig:], dst[start:end])
286-
dst = dst[:orig+(end-start)]
299+
// Trim trailing '.' '_' '-'
300+
for l := len(dst); l > orig; {
301+
c := dst[l-1]
302+
if c != '.' && c != '_' && c != '-' {
303+
break
304+
}
305+
l--
306+
dst = dst[:l]
287307
}
288308

289-
// 5. fallback if everything vanished
290309
if len(dst) == orig {
291310
return append(dst, "_truncated_"...)
292311
}

0 commit comments

Comments
 (0)