Skip to content

Commit cee8240

Browse files
feat(dd-trace-propagation): get rid of regex for replacing and validating invalid headers (#92)
# What does this PR do? Replace regex usage in dd-trace-propagation with manual validation of character ranges and trace context headers. # Motivation This pops up in profiles, and hardcoded transformations are very likely to be much more performant for simple character byte range matching # Additional Notes Anything else we should know when reviewing?
1 parent 3f9048c commit cee8240

File tree

6 files changed

+283
-204
lines changed

6 files changed

+283
-204
lines changed

Cargo.lock

Lines changed: 0 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dd-trace-propagation/Cargo.toml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,6 @@ crate-type = ["lib"]
1313

1414
[dependencies]
1515
dd-trace = { path = "../dd-trace" }
16-
lazy_static = { version = "1.5", default-features = false }
17-
regex = { version = "1.10", default-features = false, features = [
18-
"unicode-case",
19-
] }
2016
serde = { workspace = true, optional = true, features = [
2117
"derive",
2218
] }

dd-trace-propagation/src/context.rs

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
22
// SPDX-License-Identifier: Apache-2.0
33

4-
use lazy_static::lazy_static;
5-
use regex::Regex;
64
use std::{borrow::Cow, collections::HashMap, str::FromStr, vec};
75

86
use dd_trace::{
@@ -13,11 +11,6 @@ use dd_trace::{
1311

1412
use crate::tracecontext::TRACESTATE_KEY;
1513

16-
lazy_static! {
17-
static ref INVALID_ASCII_CHARACTERS_REGEX: Regex =
18-
Regex::new(r"[^\x20-\x7E]+").expect("failed creating regex");
19-
}
20-
2114
pub const DATADOG_PROPAGATION_TAG_PREFIX: &str = "_dd.p.";
2215

2316
#[derive(Copy, Clone, Default, Debug, PartialEq)]
@@ -166,7 +159,7 @@ impl FromStr for Tracestate {
166159
.trim()
167160
.split(';')
168161
.filter_map(|item| {
169-
if INVALID_ASCII_CHARACTERS_REGEX.is_match(item) {
162+
if !item.as_bytes().iter().all(|c| matches!(c, b' '..=b'~')) {
170163
None
171164
} else {
172165
let mut parts = item.splitn(2, ':');

dd-trace-propagation/src/datadog.rs

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
22
// SPDX-License-Identifier: Apache-2.0
33

4-
use std::{collections::HashMap, str::FromStr};
5-
6-
use lazy_static::lazy_static;
7-
use regex::Regex;
4+
use std::{collections::HashMap, str::FromStr, sync::LazyLock};
85

96
use crate::{
107
carrier::{Extractor, Injector},
@@ -31,22 +28,15 @@ const DATADOG_TAGS_KEY: &str = "x-datadog-tags";
3128
const DATADOG_PROPAGATION_ERROR_KEY: &str = "_dd.propagation_error";
3229
pub const DATADOG_LAST_PARENT_ID_KEY: &str = "_dd.parent_id";
3330

34-
lazy_static! {
35-
pub static ref INVALID_SEGMENT_REGEX: Regex =
36-
Regex::new(r"^0+$").expect("failed creating regex");
37-
static ref VALID_SAMPLING_DECISION_REGEX: Regex =
38-
Regex::new(r"^-([0-9])$").expect("failed creating regex");
39-
static ref TAG_KEY_REGEX: Regex = Regex::new(r"^_dd\.p\.[\x21-\x2b\x2d-\x7e]+$").expect("failed creating regex"); // ASCII minus spaces and commas
40-
static ref TAG_VALUE_REGEX: Regex = Regex::new(r"^[\x20-\x2b\x2d-\x7e]*$").expect("failed creating regex"); // ASCII minus commas
41-
42-
static ref DATADOG_HEADER_KEYS: [String; 5] = [
31+
static DATADOG_HEADER_KEYS: LazyLock<[String; 5]> = LazyLock::new(|| {
32+
[
4333
DATADOG_TRACE_ID_KEY.to_owned(),
4434
DATADOG_ORIGIN_KEY.to_owned(),
4535
DATADOG_PARENT_ID_KEY.to_owned(),
4636
DATADOG_SAMPLING_PRIORITY_KEY.to_owned(),
47-
DATADOG_TAGS_KEY.to_owned()
48-
];
49-
}
37+
DATADOG_TAGS_KEY.to_owned(),
38+
]
39+
});
5040

5141
pub fn inject(context: &mut SpanContext, carrier: &mut dyn Injector, config: &Config) {
5242
let tags = &mut context.tags;
@@ -168,11 +158,19 @@ fn get_propagation_tags(
168158
}
169159

170160
fn validate_tag_key(key: &str) -> bool {
171-
TAG_KEY_REGEX.is_match(key)
161+
let Some(tail) = key.strip_prefix("_dd.p.") else {
162+
return false;
163+
};
164+
tail.as_bytes()
165+
.iter()
166+
.all(|c| matches!(c, b'!'..=b'+' | b'-'..=b'~'))
172167
}
173168

174169
fn validate_tag_value(value: &str) -> bool {
175-
TAG_VALUE_REGEX.is_match(value)
170+
value
171+
.as_bytes()
172+
.iter()
173+
.all(|c| matches!(c, b' '..=b'+' | b'-'..=b'~'))
176174
}
177175

178176
pub fn extract(carrier: &dyn Extractor, config: &Config) -> Option<SpanContext> {
@@ -238,14 +236,13 @@ fn extract_trace_id(carrier: &dyn Extractor) -> Result<Option<u64>, Error> {
238236
None => return Ok(None),
239237
};
240238

241-
if INVALID_SEGMENT_REGEX.is_match(trace_id) {
239+
let trace_id = trace_id
240+
.parse::<u64>()
241+
.map_err(|_| Error::extract("Failed to decode `trace_id`", "datadog"))?;
242+
if trace_id == 0 {
242243
return Err(Error::extract("Invalid `trace_id` found", "datadog"));
243244
}
244-
245-
trace_id
246-
.parse::<u64>()
247-
.map(Some)
248-
.map_err(|_| Error::extract("Failed to decode `trace_id`", "datadog"))
245+
Ok(Some(trace_id))
249246
}
250247

251248
fn extract_parent_id(carrier: &dyn Extractor) -> Result<Option<u64>, Error> {
@@ -324,7 +321,11 @@ fn validate_sampling_decision(tags: &mut HashMap<String, String>) {
324321
let should_remove =
325322
tags.get(SAMPLING_DECISION_MAKER_TAG_KEY)
326323
.is_some_and(|sampling_decision| {
327-
let is_invalid = !VALID_SAMPLING_DECISION_REGEX.is_match(sampling_decision);
324+
let is_invalid = sampling_decision
325+
.parse::<i8>()
326+
.ok()
327+
.map(|m| m > 0)
328+
.unwrap_or(true);
328329
if is_invalid {
329330
dd_warn!("Failed to decode `_dd.p.dm`: {}", sampling_decision);
330331
}

0 commit comments

Comments
 (0)