Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions opentelemetry/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
- Align `Baggage.remove()` signature with `.get()` to take the key as a reference
- `Baggage` can't be retrieved from the `Context` directly anymore and needs to be accessed via `context.baggage()`
- `with_baggage()` and `current_with_baggage()` override any existing `Baggage` in the `Context`
- `Baggage` keys can't be empty and don't allow `"(),/:;<=>?@[\]{}` chars (see [RFC7230, Section 3.2.6](https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.6))
- Changed `Context` to use a stack to properly handle out of order dropping of `ContextGuard`. This imposes a limit of `65535` nested contexts on a single thread. See #[2378](https://github.com/open-telemetry/opentelemetry-rust/pull/2284) and #[1887](https://github.com/open-telemetry/opentelemetry-rust/issues/1887).
- Added additional `name: Option<&str>` parameter to the `event_enabled` method
on the `Logger` trait. This allows implementations (SDK, processor, exporters)
Expand Down
8 changes: 4 additions & 4 deletions opentelemetry/benches/baggage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ const MAX_KEY_VALUE_PAIRS: usize = 64;
// Apple M4 Pro
// Total Number of Cores: 14 (10 performance and 4 efficiency)
// Results:
// set_baggage_static_key_value 12 ns
// set_baggage_static_key 28 ns
// set_baggage_dynamic 60 ns
// set_baggage_dynamic_with_metadata 112 ns
// set_baggage_static_key_value 14 ns
// set_baggage_static_key 26 ns
// set_baggage_dynamic 54 ns
// set_baggage_dynamic_with_metadata 75 ns

fn criterion_benchmark(c: &mut Criterion) {
set_baggage_static_key_value(c);
Expand Down
48 changes: 41 additions & 7 deletions opentelemetry/src/baggage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ static DEFAULT_BAGGAGE: OnceLock<Baggage> = OnceLock::new();
const MAX_KEY_VALUE_PAIRS: usize = 64;
const MAX_LEN_OF_ALL_PAIRS: usize = 8192;

// https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.6
const INVALID_ASCII_KEY_CHARS: [u8; 17] = [
b'(', b')', b',', b'/', b':', b';', b'<', b'=', b'>', b'?', b'@', b'[', b'\\', b']', b'{',
b'}', b'"',
];

/// Returns the default baggage, ensuring it is initialized only once.
#[inline]
fn get_default_baggage() -> &'static Baggage {
Expand Down Expand Up @@ -160,16 +166,14 @@ impl Baggage {
S: Into<BaggageMetadata>,
{
let (key, value, metadata) = (key.into(), value.into(), metadata.into());
if !key.as_str().is_ascii() {
return None;
}
let entry_content_len =
key_value_metadata_bytes_size(key.as_str(), value.as_str(), metadata.as_str());
let entries_count = self.inner.len();
match self.inner.entry(key) {
Entry::Occupied(mut occupied_entry) => {
let key_str = occupied_entry.key().as_str();
let entry_content_len =
key_value_metadata_bytes_size(key_str, value.as_str(), metadata.as_str());
let prev_content_len = key_value_metadata_bytes_size(
occupied_entry.key().as_str(),
key_str,
occupied_entry.get().0.as_str(),
occupied_entry.get().1.as_str(),
);
Expand All @@ -181,9 +185,15 @@ impl Baggage {
Some(occupied_entry.insert((value, metadata)))
}
Entry::Vacant(vacant_entry) => {
let key_str = vacant_entry.key().as_str();
if !Self::is_key_valid(key_str.as_bytes()) {
return None;
}
if entries_count == MAX_KEY_VALUE_PAIRS {
return None;
}
let entry_content_len =
key_value_metadata_bytes_size(key_str, value.as_str(), metadata.as_str());
let new_content_len = self.kv_content_len + entry_content_len;
if new_content_len > MAX_LEN_OF_ALL_PAIRS {
return None;
Expand Down Expand Up @@ -215,11 +225,18 @@ impl Baggage {
pub fn iter(&self) -> Iter<'_> {
self.into_iter()
}

fn is_key_valid(key: &[u8]) -> bool {
!key.is_empty()
&& key
.iter()
.all(|b| b.is_ascii() && !INVALID_ASCII_KEY_CHARS.contains(b))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Each character in the key is sequentially validated against the INVALID_ASCII_KEY_CHARS array. Using a bitmask for INVALID_ASCII_KEY_CHARS can optimize this check by enabling constant-time lookups instead of an iterative scan.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ooh nice idea

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@fraillt Please see if you have some perf suggestions.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure how to do it using bitmasks (as these characters are quite scattered and there's no easy binary pattern to match/mask), but usually when there's not so many values to check, it's very efficient to do it with switch/match, like this:

fn is_key_valid2(key: &[u8]) -> bool {
    !key.is_empty()
        && key.iter().all(|b| match b {
            b'!' => true,
            b'"' => false,
            b'#' => true,
            b'$' => true,
            b'%' => true,
            b'&' => true,
            b'\'' => true,
            b'(' => false,
            b')' => false,
            b'*' => true,
            b'+' => true,
            b',' => false,
            b'-' => true,
            b'.' => true,
            b'/' => false,
            b'0' => true,
            b'1' => true,
            b'2' => true,
            b'3' => true,
            b'4' => true,
            b'5' => true,
            b'6' => true,
            b'7' => true,
            b'8' => true,
            b'9' => true,
            b':' => false,
            b';' => false,
            b'<' => false,
            b'=' => false,
            b'>' => false,
            b'?' => false,
            b'@' => false,
            b'A' => true,
            b'B' => true,
            b'C' => true,
            b'D' => true,
            b'E' => true,
            b'F' => true,
            b'G' => true,
            b'H' => true,
            b'I' => true,
            b'J' => true,
            b'K' => true,
            b'L' => true,
            b'M' => true,
            b'N' => true,
            b'O' => true,
            b'P' => true,
            b'Q' => true,
            b'R' => true,
            b'S' => true,
            b'T' => true,
            b'U' => true,
            b'V' => true,
            b'W' => true,
            b'X' => true,
            b'Y' => true,
            b'Z' => true,
            b'[' => false,
            b'\\' => false,
            b']' => false,
            b'^' => true,
            b'_' => true,
            b'`' => true,
            b'a' => true,
            b'b' => true,
            b'c' => true,
            b'd' => true,
            b'e' => true,
            b'f' => true,
            b'g' => true,
            b'h' => true,
            b'i' => true,
            b'j' => true,
            b'k' => true,
            b'l' => true,
            b'm' => true,
            b'n' => true,
            b'o' => true,
            b'p' => true,
            b'q' => true,
            b'r' => true,
            b's' => true,
            b't' => true,
            b'u' => true,
            b'v' => true,
            b'w' => true,
            b'x' => true,
            b'y' => true,
            b'z' => true,
            b'{' => false,
            b'|' => true,
            b'}' => false,
            b'~' => true,
            _ => false,
        })
}

This is equivalent to b.is_ascii_graphic() && !INVALID_ASCII_KEY_CHARS.contains(b), notice that i also ignore control characters (is_ascii_graphic() not is_ascii).

I have tested this is_key_valid2 function, locally on my machine, and I get +3x performance improvement.

}
}

/// Get the number of bytes for one key-value pair
fn key_value_metadata_bytes_size(key: &str, value: &str, metadata: &str) -> usize {
key.bytes().len() + value.bytes().len() + metadata.bytes().len()
key.len() + value.len() + metadata.len()
}

/// An iterator over the entries of a [`Baggage`].
Expand Down Expand Up @@ -619,4 +636,21 @@ mod tests {
baggage.remove("foo");
assert!(baggage.is_empty());
}

#[test]
fn test_insert_invalid_key() {
let mut baggage = Baggage::default();

// empty
baggage.insert("", "1");
assert!(baggage.is_empty());

// non-ascii
baggage.insert("Grüße", "1");
assert!(baggage.is_empty());

// invalid ascii chars
baggage.insert("(example)", "1");
assert!(baggage.is_empty());
}
}
Loading