diff --git a/CHANGELOG.md b/CHANGELOG.md index 4cb1bc21..c1375b30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## Unreleased: mitmproxy_rs next +- Various fixes for Protobuf number encoding. ## 29 April 2025: mitmproxy_rs 0.12.2 diff --git a/mitmproxy-contentviews/src/protobuf/proto_to_yaml.rs b/mitmproxy-contentviews/src/protobuf/proto_to_yaml.rs index 58911d4d..c6482c46 100644 --- a/mitmproxy-contentviews/src/protobuf/proto_to_yaml.rs +++ b/mitmproxy-contentviews/src/protobuf/proto_to_yaml.rs @@ -14,7 +14,8 @@ pub(super) fn message_to_yaml(message: &dyn MessageDyn) -> Value { let mut ret = Mapping::new(); for field in message.descriptor_dyn().fields() { - let key = if field.name().starts_with("unknown_field_") { + let is_unknown_field = field.name().starts_with("unknown_field_"); + let key = if is_unknown_field { Value::from(field.number()) } else { Value::from(field.name()) @@ -28,7 +29,7 @@ pub(super) fn message_to_yaml(message: &dyn MessageDyn) -> Value { let value = match field.get_reflect(message) { ReflectFieldRef::Optional(x) => { if let Some(x) = x.value() { - value_to_yaml(x, field_type) + value_to_yaml(x, field_type, is_unknown_field) } else { continue; } @@ -39,7 +40,7 @@ pub(super) fn message_to_yaml(message: &dyn MessageDyn) -> Value { } Value::Sequence( x.into_iter() - .map(|x| value_to_yaml(x, field_type)) + .map(|x| value_to_yaml(x, field_type, is_unknown_field)) .collect(), ) } @@ -49,7 +50,12 @@ pub(super) fn message_to_yaml(message: &dyn MessageDyn) -> Value { } Value::Mapping( x.into_iter() - .map(|(k, v)| (value_to_yaml(k, field_type), value_to_yaml(v, field_type))) + .map(|(k, v)| { + ( + value_to_yaml(k, field_type, is_unknown_field), + value_to_yaml(v, field_type, is_unknown_field), + ) + }) .collect(), ) } @@ -59,10 +65,10 @@ pub(super) fn message_to_yaml(message: &dyn MessageDyn) -> Value { Value::Mapping(ret) } -fn value_to_yaml(x: ReflectValueRef, field_type: Type) -> Value { +fn value_to_yaml(x: ReflectValueRef, field_type: Type, is_unknown: bool) -> Value { match x { - ReflectValueRef::U32(x) => tag_number(Value::Number(Number::from(x)), field_type), - ReflectValueRef::U64(x) => tag_number(Value::Number(Number::from(x)), field_type), + ReflectValueRef::U32(x) => tag_number(Number::from(x), field_type, is_unknown), + ReflectValueRef::U64(x) => tag_number(Number::from(x), field_type, is_unknown), ReflectValueRef::I32(x) => Value::Number(Number::from(x)), ReflectValueRef::I64(x) => Value::Number(Number::from(x)), ReflectValueRef::F32(x) => Value::Number(Number::from(x)), @@ -81,20 +87,23 @@ fn value_to_yaml(x: ReflectValueRef, field_type: Type) -> Value { } } -fn tag_number(value: Value, field_type: Type) -> Value { +fn tag_number(number: Number, field_type: Type, is_unknown: bool) -> Value { + if !is_unknown { + return Value::Number(number); + } match field_type { TYPE_UINT64 => Value::Tagged(Box::new(TaggedValue { tag: tags::VARINT.clone(), - value, + value: Value::Number(number), })), TYPE_FIXED64 => Value::Tagged(Box::new(TaggedValue { tag: tags::FIXED64.clone(), - value, + value: Value::Number(number), })), TYPE_FIXED32 => Value::Tagged(Box::new(TaggedValue { tag: tags::FIXED32.clone(), - value, + value: Value::Number(number), })), - _ => value, + _ => Value::Number(number), } } diff --git a/mitmproxy-contentviews/src/protobuf/reencode.rs b/mitmproxy-contentviews/src/protobuf/reencode.rs index d2daa8c5..1d00443a 100644 --- a/mitmproxy-contentviews/src/protobuf/reencode.rs +++ b/mitmproxy-contentviews/src/protobuf/reencode.rs @@ -78,17 +78,34 @@ fn add_field(message: &mut dyn MessageDyn, field_num: u32, value: Value) -> anyh .context("Invalid hex string")?; UnknownValue::LengthDelimited(value) } else if t.tag == *tags::FIXED32 { - let value = match t.value { - Value::Number(s) if s.as_u64().is_some() => s.as_u64().unwrap(), - _ => bail!("Fixed32 data is not a u32"), + let Value::Number(n) = t.value else { + bail!("!fixed32 is not a number"); }; - UnknownValue::Fixed32(value as u32) + let value = n + .as_u64() + .map(|n| n as u32) + .or_else(|| n.as_i64().map(|s| s as u32)) + .or_else(|| n.as_f64().map(|f| (f as f32).to_bits())) + .context("Failed to convert !fixed32 value to a valid number")?; + UnknownValue::Fixed32(value) } else if t.tag == *tags::FIXED64 { - let value = match t.value { - Value::Number(s) if s.as_u64().is_some() => s.as_u64().unwrap(), - _ => bail!("Fixed64 data is not a u64"), + let Value::Number(n) = t.value else { + bail!("!fixed64 is not a number"); }; + let value = n + .as_u64() + .or_else(|| n.as_i64().map(|s| s as u64)) + .or_else(|| n.as_f64().map(|f| f.to_bits())) + .context("Failed to convert !fixed64 value to a valid number")?; UnknownValue::Fixed64(value) + } else if t.tag == *tags::ZIGZAG { + let Value::Number(n) = t.value else { + bail!("!sint is not a number"); + }; + let Some(n) = n.as_i64() else { + bail!("!sint is not an integer"); + }; + UnknownValue::Varint(encode_zigzag64(n)) } else { log::info!("Unexpected YAML tag {}, discarding.", t.tag); return add_field(message, field_num, t.value); @@ -154,3 +171,8 @@ fn int_value(n: Number, field: Option<&FieldDescriptor>) -> UnknownValue { UnknownValue::double(n.as_f64().expect("as_f64 never fails")) } } + +// Zigzag-encode a 64-bit integer +fn encode_zigzag64(n: i64) -> u64 { + ((n << 1) ^ (n >> 63)) as u64 +} diff --git a/mitmproxy-contentviews/src/protobuf/view_grpc.rs b/mitmproxy-contentviews/src/protobuf/view_grpc.rs index bcc88941..2021f91b 100644 --- a/mitmproxy-contentviews/src/protobuf/view_grpc.rs +++ b/mitmproxy-contentviews/src/protobuf/view_grpc.rs @@ -116,7 +116,7 @@ mod tests { use super::*; use crate::test::TestMetadata; - const TEST_YAML: &str = "1: 150\n\n---\n\n1: 150\n"; + const TEST_YAML: &str = "1: 150 # !sint: 75\n\n---\n\n1: 150 # !sint: 75\n"; const TEST_GRPC: &[u8] = &[ 0, 0, 0, 0, 3, 8, 150, 1, // first message 0, 0, 0, 0, 3, 8, 150, 1, // second message @@ -149,14 +149,14 @@ mod tests { fn test_prettify_gzip() { let metadata = TestMetadata::default().with_header("grpc-encoding", "gzip"); let res = GRPC.prettify(TEST_GZIP, &metadata).unwrap(); - assert_eq!(res, "1: 150\n"); + assert_eq!(res, "1: 150 # !sint: 75\n"); } #[test] fn test_prettify_deflate() { let metadata = TestMetadata::default().with_header("grpc-encoding", "deflate"); let res = GRPC.prettify(TEST_DEFLATE, &metadata).unwrap(); - assert_eq!(res, "1: 150\n"); + assert_eq!(res, "1: 150 # !sint: 75\n"); } #[test] diff --git a/mitmproxy-contentviews/src/protobuf/view_protobuf.rs b/mitmproxy-contentviews/src/protobuf/view_protobuf.rs index 3667e61a..b96f3388 100644 --- a/mitmproxy-contentviews/src/protobuf/view_protobuf.rs +++ b/mitmproxy-contentviews/src/protobuf/view_protobuf.rs @@ -15,6 +15,7 @@ pub(super) mod tags { pub static BINARY: LazyLock = LazyLock::new(|| Tag::new("binary")); pub static VARINT: LazyLock = LazyLock::new(|| Tag::new("varint")); + pub static ZIGZAG: LazyLock = LazyLock::new(|| Tag::new("sint")); pub static FIXED32: LazyLock = LazyLock::new(|| Tag::new("fixed32")); pub static FIXED64: LazyLock = LazyLock::new(|| Tag::new("fixed64")); @@ -122,30 +123,35 @@ mod tests { }; } - test_roundtrip!(varint, b"\x08\x96\x01", "1: 150\n"); - test_roundtrip!(varint_negative, b"\x08\x0B", "1: 11 # signed: -6\n"); + test_roundtrip!(varint, b"\x08\x96\x01", "1: 150 # !sint: 75\n"); + test_roundtrip!(varint_zigzag_negative, b"\x08\x0B", "1: 11 # !sint: -6\n"); + test_roundtrip!( + varint_int64_negative, + b"\x08\xfe\xff\xff\xff\xff\xff\xff\xff\xff\x01", + "1: -2 # u64: 18446744073709551614\n" + ); test_roundtrip!(binary, b"\x32\x03\x01\x02\x03", "6: !binary '010203'\n"); test_roundtrip!(string, b"\x0A\x05\x68\x65\x6C\x6C\x6F", "1: hello\n"); - test_roundtrip!(nested, b"\x2A\x02\x08\x2A", "5:\n 1: 42\n"); + test_roundtrip!(nested, b"\x2A\x02\x08\x2A", "5:\n 1: 42 # !sint: 21\n"); test_roundtrip!( nested_twice, b"\x2A\x04\x2A\x02\x08\x2A", - "5:\n 5:\n 1: 42\n" + "5:\n 5:\n 1: 42 # !sint: 21\n" ); test_roundtrip!( fixed64, b"\x19\x00\x00\x00\x00\x00\x00\xF0\xBF", - "3: !fixed64 13830554455654793216 # double: -1, i64: -4616189618054758400\n" + "3: !fixed64 -1.0 # u64: 13830554455654793216, i64: -4616189618054758400\n" ); test_roundtrip!( fixed64_positive, b"\x19\x6E\x86\x1B\xF0\xF9\x21\x09\x40", - "3: !fixed64 4614256650576692846 # double: 3.14159\n" + "3: !fixed64 3.14159 # u64: 4614256650576692846\n" ); test_roundtrip!( fixed64_no_float, b"\x19\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", - "3: !fixed64 18446744073709551615 # i64: -1\n" + "3: !fixed64 -1 # u64: 18446744073709551615\n" ); test_roundtrip!( fixed64_positive_no_float, @@ -155,17 +161,17 @@ mod tests { test_roundtrip!( fixed32, b"\x15\x00\x00\x80\xBF", - "2: !fixed32 3212836864 # float: -1, i32: -1082130432\n" + "2: !fixed32 -1.0 # u32: 3212836864, i32: -1082130432\n" ); test_roundtrip!( fixed32_positive, b"\x15\xD0\x0F\x49\x40", - "2: !fixed32 1078530000 # float: 3.14159\n" + "2: !fixed32 3.14159 # u32: 1078530000\n" ); test_roundtrip!( fixed32_no_float, b"\x15\xFF\xFF\xFF\xFF", - "2: !fixed32 4294967295 # i32: -1\n" + "2: !fixed32 -1 # u32: 4294967295\n" ); test_roundtrip!( fixed32_positive_no_float, @@ -182,7 +188,7 @@ mod tests { test_roundtrip!( repeated_varint, b"\x08\x01\x08\x02\x08\x03", - "1:\n- 1 # signed: -1\n- 2\n- 3 # signed: -2\n" + "1:\n- 1 # !sint: -1\n- 2 # !sint: 1\n- 3 # !sint: -2\n" ); #[test] diff --git a/mitmproxy-contentviews/src/protobuf/yaml_to_pretty.rs b/mitmproxy-contentviews/src/protobuf/yaml_to_pretty.rs index e17748c4..943f2f79 100644 --- a/mitmproxy-contentviews/src/protobuf/yaml_to_pretty.rs +++ b/mitmproxy-contentviews/src/protobuf/yaml_to_pretty.rs @@ -1,66 +1,101 @@ /// YAML value => prettified text use crate::protobuf::view_protobuf::tags; use regex::Captures; +use std::fmt::{Display, Formatter}; + +/// Collect all representations of a number and output the "best" one as the YAML value +/// and the rest as comments. +struct NumReprs(Vec<(&'static str, String)>); + +impl NumReprs { + fn new(k: &'static str, v: impl ToString) -> Self { + let mut inst = Self(Vec::with_capacity(3)); + inst.push(k, v); + inst + } + fn push(&mut self, k: &'static str, v: impl ToString) { + self.0.push((k, v.to_string())); + } +} + +impl Display for NumReprs { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + // We first sort by t.len(), which is a hack to make sure that sint is not used + // as the main representation. + let (min_typ, min_val) = self + .0 + .iter() + .min_by_key(|(t, v)| (t.len(), v.len())) + .unwrap(); + let mut i = self.0.iter().filter(|(t, _)| t != min_typ); + + write!(f, "{}", min_val)?; + if let Some((t, v)) = i.next() { + write!(f, " # {}: {}", t, v)?; + } + for (t, v) in i { + write!(f, ", {}: {}", t, v)?; + } + Ok(()) + } +} // Helper method to apply regex replacements to the YAML output pub(super) fn apply_replacements(yaml_str: &str) -> anyhow::Result { // Replace !fixed32 tags with comments showing float and i32 interpretations let with_fixed32 = tags::FIXED32_RE.replace_all(yaml_str, |caps: &Captures| { let value = caps[1].parse::().unwrap_or_default(); + let mut repr = NumReprs::new("u32", value); + let float_value = f32::from_bits(value); - let i32_value = value as i32; + if !float_value.is_nan() { + let mut float = format!("{}", float_value); + if !float.contains(".") { + float.push_str(".0"); + } + repr.push("f32", float); + } - if !float_value.is_nan() && float_value < 0.0 { - format!( - "{} {} # float: {}, i32: {}", - *tags::FIXED32, - value, - float_value, - i32_value - ) - } else if !float_value.is_nan() { - format!("{} {} # float: {}", *tags::FIXED32, value, float_value) - } else if i32_value < 0 { - format!("{} {} # i32: {}", *tags::FIXED32, value, i32_value) - } else { - format!("{} {}", *tags::FIXED32, value) + if value.leading_zeros() == 0 { + repr.push("i32", value as i32); } + format!("{} {}", *tags::FIXED32, repr) }); // Replace !fixed64 tags with comments showing double and i64 interpretations let with_fixed64 = tags::FIXED64_RE.replace_all(&with_fixed32, |caps: &Captures| { let value = caps[1].parse::().unwrap_or_default(); + let mut repr = NumReprs::new("u64", value); + let double_value = f64::from_bits(value); - let i64_value = value as i64; + if !double_value.is_nan() { + let mut double = format!("{}", double_value); + if !double.contains(".") { + double.push_str(".0"); + } + repr.push("f64", double); + } - if !double_value.is_nan() && double_value < 0.0 { - format!( - "{} {} # double: {}, i64: {}", - *tags::FIXED64, - value, - double_value, - i64_value - ) - } else if !double_value.is_nan() { - format!("{} {} # double: {}", *tags::FIXED64, value, double_value) - } else if i64_value < 0 { - format!("{} {} # i64: {}", *tags::FIXED64, value, i64_value) - } else { - format!("{} {}", *tags::FIXED64, value) + if value.leading_zeros() == 0 { + repr.push("i64", value as i64); } + format!("{} {}", *tags::FIXED64, repr) }); // Replace !varint tags with comments showing signed interpretation if different let with_varint = tags::VARINT_RE.replace_all(&with_fixed64, |caps: &Captures| { - let unsigned_value = caps[1].parse::().unwrap_or_default(); - let i64_zigzag = decode_zigzag64(unsigned_value); + let value = caps[1].parse::().unwrap_or_default(); + let mut repr = NumReprs::new("u64", value); - // Only show signed value if it's different from unsigned - if i64_zigzag < 0 { - format!("{} # signed: {}", unsigned_value, i64_zigzag) + if value.leading_zeros() == 0 { + repr.push("i64", value as i64); + // We only show u64 and i64 reprs if the leading bit is a 1. + // It could technically be zigzag, but the odds are quite low. } else { - unsigned_value.to_string() + repr.push("!sint", decode_zigzag64(value)); } + + repr.to_string() }); Ok(with_varint.to_string())