Skip to content

Commit 38917cc

Browse files
loewenheimjjbayer
andauthored
feat(eap): Normalize deprecated attributes (#5257)
Co-authored-by: Joris Bayer <[email protected]>
1 parent cf80d55 commit 38917cc

File tree

9 files changed

+222
-29
lines changed

9 files changed

+222
-29
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
- Infer the client ip when set to `{{auto}}` for EAP items. ([#5304](https://github.com/getsentry/relay/pull/5304))
99
- Increase the default size limit for attachments to 200MiB. ([#5310](https://github.com/getsentry/relay/pull/5310))
1010
- Add `sentry.origin` attribute to OTLP spans. ([#5294](https://github.com/getsentry/relay/pull/5294))
11+
- Normalize deprecated attribute names according to `sentry-conventions` for logs and V2 spans. ([#5257](https://github.com/getsentry/relay/pull/5257))
1112

1213
**Breaking Changes**:
1314

relay-event-normalization/src/eap/mod.rs

Lines changed: 173 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,12 @@ use std::net::IpAddr;
77
use chrono::{DateTime, Utc};
88
use relay_common::time::UnixTimestamp;
99
use relay_conventions::{
10-
BROWSER_NAME, BROWSER_VERSION, CLIENT_ADDRESS, OBSERVED_TIMESTAMP_NANOS, USER_AGENT_ORIGINAL,
11-
USER_GEO_CITY, USER_GEO_COUNTRY_CODE, USER_GEO_REGION, USER_GEO_SUBDIVISION,
10+
AttributeInfo, BROWSER_NAME, BROWSER_VERSION, CLIENT_ADDRESS, OBSERVED_TIMESTAMP_NANOS,
11+
USER_AGENT_ORIGINAL, USER_GEO_CITY, USER_GEO_COUNTRY_CODE, USER_GEO_REGION,
12+
USER_GEO_SUBDIVISION, WriteBehavior,
1213
};
1314
use relay_event_schema::protocol::{AttributeType, Attributes, BrowserContext, Geo};
14-
use relay_protocol::{Annotated, ErrorKind, Value};
15+
use relay_protocol::{Annotated, ErrorKind, Meta, Remark, RemarkType, Value};
1516

1617
use crate::{ClientHints, FromUserAgentInfo as _, RawUserAgentInfo};
1718

@@ -164,6 +165,60 @@ pub fn normalize_user_geo(
164165
attributes.insert_if_missing(USER_GEO_REGION, || geo.region);
165166
}
166167

168+
/// Normalizes deprecated attributes according to `sentry-conventions`.
169+
///
170+
/// Attributes with a status of `"normalize"` will be moved to their replacement name.
171+
/// If there is already a value present under the replacement name, it will be left alone,
172+
/// but the deprecated attribute is removed anyway.
173+
///
174+
/// Attributes with a status of `"backfill"` will be copied to their replacement name if the
175+
/// replacement name is not present. In any case, the original name is left alone.
176+
pub fn normalize_attribute_names(attributes: &mut Annotated<Attributes>) {
177+
normalize_attribute_names_inner(attributes, relay_conventions::attribute_info)
178+
}
179+
180+
fn normalize_attribute_names_inner(
181+
attributes: &mut Annotated<Attributes>,
182+
attribute_info: fn(&str) -> Option<&'static AttributeInfo>,
183+
) {
184+
let Some(attributes) = attributes.value_mut() else {
185+
return;
186+
};
187+
188+
let attribute_names: Vec<_> = attributes.keys().cloned().collect();
189+
190+
for name in attribute_names {
191+
let Some(attribute_info) = attribute_info(&name) else {
192+
continue;
193+
};
194+
195+
match attribute_info.write_behavior {
196+
WriteBehavior::CurrentName => continue,
197+
WriteBehavior::NewName(new_name) => {
198+
let Some(old_attribute) = attributes.get_raw_mut(&name) else {
199+
continue;
200+
};
201+
202+
let mut meta = Meta::default();
203+
// TODO: Possibly add a new RemarkType for "renamed/moved"
204+
meta.add_remark(Remark::new(RemarkType::Removed, "attribute.deprecated"));
205+
let new_attribute = std::mem::replace(old_attribute, Annotated(None, meta));
206+
207+
if !attributes.contains_key(new_name) {
208+
attributes.insert_raw(new_name.to_owned(), new_attribute);
209+
}
210+
}
211+
WriteBehavior::BothNames(new_name) => {
212+
if !attributes.contains_key(new_name)
213+
&& let Some(current_attribute) = attributes.get_raw(&name).cloned()
214+
{
215+
attributes.insert_raw(new_name.to_owned(), current_attribute);
216+
}
217+
}
218+
}
219+
}
220+
}
221+
167222
#[cfg(test)]
168223
mod tests {
169224
use relay_protocol::SerializableAnnotated;
@@ -206,14 +261,14 @@ mod tests {
206261
DateTime::from_timestamp_nanos(1_234_201_337),
207262
);
208263

209-
insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r#"
264+
insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r###"
210265
{
211266
"sentry.observed_timestamp_nanos": {
212267
"type": "string",
213268
"value": "111222333"
214269
}
215270
}
216-
"#);
271+
"###);
217272
}
218273

219274
#[test]
@@ -496,4 +551,117 @@ mod tests {
496551
"#,
497552
);
498553
}
554+
555+
#[test]
556+
fn test_normalize_attributes() {
557+
fn mock_attribute_info(name: &str) -> Option<&'static AttributeInfo> {
558+
use relay_conventions::Pii;
559+
560+
match name {
561+
"replace.empty" => Some(&AttributeInfo {
562+
write_behavior: WriteBehavior::NewName("replaced"),
563+
pii: Pii::Maybe,
564+
aliases: &["replaced"],
565+
}),
566+
"replace.existing" => Some(&AttributeInfo {
567+
write_behavior: WriteBehavior::NewName("not.replaced"),
568+
pii: Pii::Maybe,
569+
aliases: &["not.replaced"],
570+
}),
571+
"backfill.empty" => Some(&AttributeInfo {
572+
write_behavior: WriteBehavior::BothNames("backfilled"),
573+
pii: Pii::Maybe,
574+
aliases: &["backfilled"],
575+
}),
576+
"backfill.existing" => Some(&AttributeInfo {
577+
write_behavior: WriteBehavior::BothNames("not.backfilled"),
578+
pii: Pii::Maybe,
579+
aliases: &["not.backfilled"],
580+
}),
581+
_ => None,
582+
}
583+
}
584+
585+
let mut attributes = Annotated::new(Attributes::from([
586+
(
587+
"replace.empty".to_owned(),
588+
Annotated::new("Should be moved".to_owned().into()),
589+
),
590+
(
591+
"replace.existing".to_owned(),
592+
Annotated::new("Should be removed".to_owned().into()),
593+
),
594+
(
595+
"not.replaced".to_owned(),
596+
Annotated::new("Should be left alone".to_owned().into()),
597+
),
598+
(
599+
"backfill.empty".to_owned(),
600+
Annotated::new("Should be copied".to_owned().into()),
601+
),
602+
(
603+
"backfill.existing".to_owned(),
604+
Annotated::new("Should be left alone".to_owned().into()),
605+
),
606+
(
607+
"not.backfilled".to_owned(),
608+
Annotated::new("Should be left alone".to_owned().into()),
609+
),
610+
]));
611+
612+
normalize_attribute_names_inner(&mut attributes, mock_attribute_info);
613+
614+
insta::assert_json_snapshot!(SerializableAnnotated(&attributes), @r###"
615+
{
616+
"backfill.empty": {
617+
"type": "string",
618+
"value": "Should be copied"
619+
},
620+
"backfill.existing": {
621+
"type": "string",
622+
"value": "Should be left alone"
623+
},
624+
"backfilled": {
625+
"type": "string",
626+
"value": "Should be copied"
627+
},
628+
"not.backfilled": {
629+
"type": "string",
630+
"value": "Should be left alone"
631+
},
632+
"not.replaced": {
633+
"type": "string",
634+
"value": "Should be left alone"
635+
},
636+
"replace.empty": null,
637+
"replace.existing": null,
638+
"replaced": {
639+
"type": "string",
640+
"value": "Should be moved"
641+
},
642+
"_meta": {
643+
"replace.empty": {
644+
"": {
645+
"rem": [
646+
[
647+
"attribute.deprecated",
648+
"x"
649+
]
650+
]
651+
}
652+
},
653+
"replace.existing": {
654+
"": {
655+
"rem": [
656+
[
657+
"attribute.deprecated",
658+
"x"
659+
]
660+
]
661+
}
662+
}
663+
}
664+
}
665+
"###);
666+
}
499667
}

relay-event-schema/src/protocol/attributes.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,24 @@ impl Attributes {
236236
Some(&self.0.get(key)?.value()?.value.value)
237237
}
238238

239+
/// Returns the annotated attribute with the given key.
240+
pub fn get_raw<Q>(&self, key: &Q) -> Option<&Annotated<Attribute>>
241+
where
242+
String: Borrow<Q>,
243+
Q: Ord + ?Sized,
244+
{
245+
self.0.get(key)
246+
}
247+
248+
/// Mutably returns the annotated attribute with the given key.
249+
pub fn get_raw_mut<Q>(&mut self, key: &Q) -> Option<&mut Annotated<Attribute>>
250+
where
251+
String: Borrow<Q>,
252+
Q: Ord + ?Sized,
253+
{
254+
self.0.get_mut(key)
255+
}
256+
239257
/// Inserts an attribute with the given value into this collection.
240258
pub fn insert<K: Into<String>, V: Into<AttributeValue>>(&mut self, key: K, value: V) {
241259
fn inner(slf: &mut Attributes, key: String, value: AttributeValue) {
@@ -296,6 +314,19 @@ impl Attributes {
296314
) -> std::collections::btree_map::IterMut<'_, String, Annotated<Attribute>> {
297315
self.0.iter_mut()
298316
}
317+
318+
/// Returns an iterator over the keys in this collection.
319+
pub fn keys(&self) -> std::collections::btree_map::Keys<'_, String, Annotated<Attribute>> {
320+
self.0.keys()
321+
}
322+
323+
/// Provides mutable access to an entry in this collection.
324+
pub fn entry(
325+
&mut self,
326+
key: String,
327+
) -> std::collections::btree_map::Entry<'_, String, Annotated<Attribute>> {
328+
self.0.entry(key)
329+
}
299330
}
300331

301332
impl IntoIterator for Attributes {

relay-server/src/processing/logs/process.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,11 @@ fn scrub_log(log: &mut Annotated<OurLog>, ctx: Context<'_>) -> Result<()> {
109109

110110
fn normalize_log(log: &mut Annotated<OurLog>, meta: &RequestMeta) -> Result<()> {
111111
if let Some(log) = log.value_mut() {
112+
eap::normalize_attribute_types(&mut log.attributes);
113+
eap::normalize_attribute_names(&mut log.attributes);
112114
eap::normalize_received(&mut log.attributes, meta.received_at());
113115
eap::normalize_client_address(&mut log.attributes, meta.client_addr());
114116
eap::normalize_user_agent(&mut log.attributes, meta.user_agent(), meta.client_hints());
115-
eap::normalize_attribute_types(&mut log.attributes);
116117
}
117118

118119
process_value(

relay-server/src/processing/logs/utils.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use crate::envelope::WithHeader;
44

55
/// Returns the calculated size of a [`OurLog`].
66
///
7-
/// Unlike [`relay_ourlogs::calculate_size`], this access the already manifested byte size
7+
/// Unlike [`relay_ourlogs::calculate_size`], this accesses the already manifested byte size
88
/// of the log, instead of calculating it.
99
///
1010
/// When compiled with debug assertion the function asserts the presence of a manifested byte size.

relay-server/src/processing/spans/process.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,11 @@ fn normalize_span(
8888
// TODO: `validate_span()` (start/end timestamps)
8989

9090
if let Some(span) = span.value_mut() {
91+
eap::normalize_attribute_types(&mut span.attributes);
92+
eap::normalize_attribute_names(&mut span.attributes);
9193
eap::normalize_received(&mut span.attributes, meta.received_at());
9294
eap::normalize_client_address(&mut span.attributes, meta.client_addr());
9395
eap::normalize_user_agent(&mut span.attributes, meta.user_agent(), meta.client_hints());
94-
eap::normalize_attribute_types(&mut span.attributes);
9596
eap::normalize_user_geo(&mut span.attributes, || {
9697
meta.client_addr().and_then(|ip| geo_lookup.lookup(ip))
9798
});

tests/integration/test_ourlogs.py

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -125,16 +125,16 @@ def test_ourlog_multiple_containers_not_allowed(
125125
@pytest.mark.parametrize(
126126
"external_mode,expected_byte_size",
127127
[
128-
# 260 here is a billing relevant metric, do not arbitrarily change it,
128+
# 296 here is a billing relevant metric, do not arbitrarily change it,
129129
# this value is supposed to be static and purely based on data received,
130130
# independent of any normalization.
131-
(None, 260),
131+
(None, 296),
132132
# Same applies as above, a proxy Relay does not need to run normalization.
133-
("proxy", 260),
133+
("proxy", 296),
134134
# If an external Relay/Client makes modifications, sizes can change,
135135
# this is fuzzy due to slight changes in sizes due to added timestamps
136136
# and may need to be adjusted when changing normalization.
137-
("managed", 454),
137+
("managed", 521),
138138
],
139139
)
140140
def test_ourlog_extraction_with_sentry_logs(
@@ -195,6 +195,7 @@ def test_ourlog_extraction_with_sentry_logs(
195195
"string.attribute": {"value": "some string", "type": "string"},
196196
"pii": {"value": "4242 4242 4242 4242", "type": "string"},
197197
"sentry.severity_text": {"value": "info", "type": "string"},
198+
"http.response_content_length": {"value": 17, "type": "integer"},
198199
"unknown_type": {"value": "info", "type": "unknown"},
199200
"broken_type": {"value": "info", "type": "not_a_real_type"},
200201
"mismatched_type": {"value": "some string", "type": "boolean"},
@@ -258,6 +259,8 @@ def test_ourlog_extraction_with_sentry_logs(
258259
"sentry.browser.version": {"stringValue": "2.32"},
259260
"sentry.severity_text": {"stringValue": "info"},
260261
"sentry.payload_size_bytes": {"intValue": mock.ANY},
262+
"http.response_content_length": {"intValue": "17"},
263+
"http.response.body.size": {"intValue": "17"},
261264
"sentry.span_id": {"stringValue": "eee19b7ec3c1b174"},
262265
"string.attribute": {"stringValue": "some string"},
263266
"valid_string_with_other": {"stringValue": "test"},
@@ -540,25 +543,10 @@ def test_ourlog_extraction_default_pii_scrubbing_does_not_scrub_default_attribut
540543
"custom_field": {"stringValue": "[REDACTED]"},
541544
"sentry.body": {"stringValue": "Test log"},
542545
"sentry.severity_text": {"stringValue": "info"},
543-
"sentry.observed_timestamp_nanos": {
544-
"stringValue": time_within_delta(
545-
ts,
546-
delta=timedelta(seconds=1),
547-
expect_resolution="ns",
548-
precision="us",
549-
)
550-
},
551546
"sentry.span_id": {"stringValue": "eee19b7ec3c1b174"},
552547
"sentry.payload_size_bytes": mock.ANY,
553548
"sentry.browser.name": {"stringValue": "Python Requests"},
554-
"sentry.timestamp_precise": {
555-
"intValue": time_within_delta(
556-
ts,
557-
delta=timedelta(seconds=0),
558-
expect_resolution="ns",
559-
precision="us",
560-
)
561-
},
549+
**timestamps(ts),
562550
},
563551
"clientSampleRate": 1.0,
564552
"itemId": mock.ANY,

tests/integration/test_spansv2.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ def test_spansv2_basic(
7272
"attributes": {
7373
"foo": {"value": "bar", "type": "string"},
7474
"invalid": {"value": True, "type": "string"},
75+
"http.response_content_length": {"value": 17, "type": "integer"},
7576
},
7677
},
7778
trace_info={
@@ -87,6 +88,8 @@ def test_spansv2_basic(
8788
"span_id": "eee19b7ec3c1b175",
8889
"attributes": {
8990
"foo": {"type": "string", "value": "bar"},
91+
"http.response_content_length": {"value": 17, "type": "integer"},
92+
"http.response.body.size": {"value": 17, "type": "integer"},
9093
"invalid": None,
9194
"sentry.browser.name": {"type": "string", "value": "Python Requests"},
9295
"sentry.browser.version": {"type": "string", "value": "2.32"},

0 commit comments

Comments
 (0)