@@ -207,86 +207,105 @@ const (
207
207
maxLen = 250 // guard for the StatsD UDP packet size
208
208
)
209
209
210
- // isTrim returns true if the byte is to be trimmed at the ends.
211
- func isTrim (b byte ) bool { return b == '.' || b == '_' || b == '-' }
210
+ var shouldTrim [256 ]bool = [256 ]bool {
211
+ '.' : true ,
212
+ '_' : true ,
213
+ '-' : true ,
214
+ }
212
215
213
216
// appendSanitizedMetricName converts *any* string into something that StatsD / Graphite
214
217
// accepts without complaints.
215
218
func appendSanitizedMetricName (dst []byte , raw string ) []byte {
216
- nameLen := 0
217
- orig := len (dst )
218
219
if raw == "" {
219
220
if len (dst ) == 0 {
220
221
return append (dst , "_unnamed_" ... )
221
222
}
222
223
return dst
223
224
}
224
- // ── 1. accent folding (creates one temporary ↴)
225
- // tmp := stripUnicodeAccents([]byte(raw))
226
-
227
- // ── 2. run the same ASCII sanitizer, but write into dst
228
- lastWasRepl := false
229
- for i := 0 ; i < len (raw ); i ++ {
230
- c := byte (raw [i ])
231
-
232
- if c < 128 && valid [c ] {
233
- // ASCII valid chars
234
- dst = append (dst , c )
235
- nameLen ++
236
- lastWasRepl = false
237
- } else if c >= 0xC2 && c <= 0xC3 && i + 1 < len (raw ) {
238
- // Check for 2-byte UTF-8 sequences that are common accented letters
239
- c2 := byte (raw [i + 1 ])
240
- if c2 >= 0x80 && c2 <= 0xBF { // Valid second byte
241
- // Decode the 2-byte sequence
242
- codepoint := uint16 (c & 0x1F )<< 6 | uint16 (c2 & 0x3F )
243
-
244
- // Map common accented characters (U+00C0-U+00FF range)
245
- if codepoint >= 0xC0 && codepoint <= 0xFF {
246
- mapped := accentMap [codepoint ]
247
- if valid [mapped ] {
225
+ orig := len (dst )
226
+
227
+ // Pre-grow
228
+ need := len (raw )
229
+ if need > maxLen {
230
+ need = maxLen
231
+ }
232
+ if cap (dst )- len (dst ) < need {
233
+ nd := make ([]byte , len (dst ), len (dst )+ need )
234
+ copy (nd , dst )
235
+ dst = nd
236
+ }
237
+
238
+ n := len (raw )
239
+ i := 0
240
+ lastWasReplacement := false
241
+
242
+ // Skip leading trim while building
243
+ for i < n {
244
+ c := raw [i ]
245
+ if ! shouldTrim [c ] {
246
+ break
247
+ }
248
+ i ++
249
+ }
250
+
251
+ for i < n && (len (dst )- orig ) < maxLen {
252
+ // Batch ASCII-valid run
253
+ remaining := maxLen - (len (dst ) - orig )
254
+ j := i
255
+ limit := i + remaining
256
+ if limit > n {
257
+ limit = n
258
+ }
259
+ for j < limit {
260
+ c := raw [j ]
261
+ if c >= 128 || ! valid [c ] {
262
+ break
263
+ }
264
+ j ++
265
+ }
266
+ if j > i {
267
+ dst = append (dst , raw [i :j ]... )
268
+ lastWasReplacement = false
269
+ i = j
270
+ continue
271
+ }
272
+
273
+ // 2-byte common accent folding
274
+ c0 := raw [i ]
275
+ if c0 >= 0xC2 && c0 <= 0xC3 && i + 1 < n {
276
+ c1 := raw [i + 1 ]
277
+ if c1 >= 0x80 && c1 <= 0xBF {
278
+ code := uint16 (c0 & 0x1F )<< 6 | uint16 (c1 & 0x3F )
279
+ if code >= 0xC0 && code <= 0xFF {
280
+ mapped := accentMap [code ]
281
+ if valid [mapped ] && (len (dst )- orig ) < maxLen {
248
282
dst = append (dst , mapped )
249
- nameLen ++
250
- lastWasRepl = false
251
- i ++ // Skip the second byte
283
+ lastWasReplacement = false
284
+ i += 2
252
285
continue
253
286
}
254
287
}
255
288
}
256
- // If we get here, treat as invalid
257
- if ! lastWasRepl {
258
- dst = append (dst , replacement )
259
- nameLen ++
260
- lastWasRepl = true
261
- }
262
- } else if ! lastWasRepl {
263
- // Everything else (3-byte, 4-byte sequences, invalid chars)
264
- dst = append (dst , replacement )
265
- nameLen ++
266
- lastWasRepl = true
267
289
}
268
290
269
- if nameLen >= maxLen {
270
- break
291
+ // Replacement for everything else
292
+ if ! lastWasReplacement && len (dst ) > orig && (len (dst )- orig ) < maxLen {
293
+ dst = append (dst , replacement )
294
+ lastWasReplacement = true
271
295
}
296
+ i ++
272
297
}
273
298
274
- // 3. trim leading / trailing '.', '_' or '-'
275
- start , end := orig , len (dst )
276
- for start < end && isTrim (dst [start ]) {
277
- start ++
278
- }
279
- for end > start && isTrim (dst [end - 1 ]) {
280
- end --
281
- }
282
-
283
- // 4. compact if we trimmed something
284
- if start > orig || end < len (dst ) {
285
- copy (dst [orig :], dst [start :end ])
286
- dst = dst [:orig + (end - start )]
299
+ // Trim trailing '.' '_' '-'
300
+ for l := len (dst ); l > orig ; {
301
+ c := dst [l - 1 ]
302
+ if c != '.' && c != '_' && c != '-' {
303
+ break
304
+ }
305
+ l --
306
+ dst = dst [:l ]
287
307
}
288
308
289
- // 5. fallback if everything vanished
290
309
if len (dst ) == orig {
291
310
return append (dst , "_truncated_" ... )
292
311
}
0 commit comments