From e3882ef1eabfaa76f3ff8eb7e41593624bbed9eb Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Wed, 12 Mar 2025 23:26:27 +0100 Subject: [PATCH 01/26] add hex contentview --- mitmproxy-rs/src/contentview.rs | 31 +++++++++++++ mitmproxy-rs/src/lib.rs | 13 ++++++ src/contentviews/mod.rs | 81 +++++++++++++++++++++++++++++++++ src/lib.rs | 1 + 4 files changed, 126 insertions(+) create mode 100644 mitmproxy-rs/src/contentview.rs create mode 100644 src/contentviews/mod.rs diff --git a/mitmproxy-rs/src/contentview.rs b/mitmproxy-rs/src/contentview.rs new file mode 100644 index 00000000..dac40b24 --- /dev/null +++ b/mitmproxy-rs/src/contentview.rs @@ -0,0 +1,31 @@ +use anyhow::Result; +use mitmproxy::contentviews::Contentview; +use pyo3::prelude::*; + +#[pyclass] +pub struct PyContentview(&'static dyn Contentview); + +impl PyContentview { + pub fn new<'py>( + py: Python<'py>, + contentview: &'static dyn Contentview, + ) -> PyResult> { + PyContentview(contentview).into_pyobject(py) + } +} + +#[pymethods] +impl PyContentview { + #[getter] + pub fn name(&self) -> &str { + self.0.name() + } + + pub fn deserialize<'py>(&self, data: Vec) -> Result { + self.0.deserialize(data) + } + + fn __repr__(&self) -> PyResult { + Ok(format!("<{} Contentview>", self.0.name())) + } +} diff --git a/mitmproxy-rs/src/lib.rs b/mitmproxy-rs/src/lib.rs index bde795bd..0b85a9c3 100644 --- a/mitmproxy-rs/src/lib.rs +++ b/mitmproxy-rs/src/lib.rs @@ -5,6 +5,7 @@ use std::sync::RwLock; use once_cell::sync::Lazy; use pyo3::{exceptions::PyException, prelude::*}; +mod contentview; mod dns_resolver; mod process_info; mod server; @@ -81,6 +82,18 @@ mod mitmproxy_rs { use crate::util::{genkey, pubkey}; } + #[pymodule] + mod contentviews { + use super::*; + use crate::contentview::PyContentview; + use mitmproxy::contentviews::*; + + #[pymodule_init] + fn init(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add("hex", PyContentview::new(m.py(), &HexStream())?) + } + } + #[pymodule_export] use crate::stream::Stream; diff --git a/src/contentviews/mod.rs b/src/contentviews/mod.rs new file mode 100644 index 00000000..c8055091 --- /dev/null +++ b/src/contentviews/mod.rs @@ -0,0 +1,81 @@ +use anyhow::Result; +use pretty_hex::{HexConfig, PrettyHex}; +use std::num::ParseIntError; + +#[derive(Debug)] +pub enum SerializeError { + InvalidFormat(String), +} + +pub trait Contentview: Send + Sync { + fn name(&self) -> &str; + fn deserialize(&self, data: Vec) -> Result; +} + +pub trait SerializableContentview: Contentview { + fn serialize(&self, data: String) -> Result, SerializeError>; +} + +#[derive(Default)] +pub struct HexStream(); + +impl Contentview for HexStream { + fn name(&self) -> &str { + "HexStream" + } + + fn deserialize(&self, data: Vec) -> Result { + Ok(data + .hex_conf(HexConfig { + title: false, + ascii: false, + width: 0, + group: 0, + chunk: 0, + max_bytes: usize::MAX, + display_offset: 0, + }) + .to_string()) + } +} + +impl SerializableContentview for HexStream { + fn serialize(&self, data: String) -> Result, SerializeError> { + (0..data.len()) + .step_by(2) + .map(|i| u8::from_str_radix(&data[i..i + 2], 16)) + .collect::, ParseIntError>>() + .map_err(|e| { + SerializeError::InvalidFormat(format!("Failed to parse hex string: {}", e)) + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_hexstream_deserialize() { + let hex_stream = HexStream::default(); + let data = b"foo".to_vec(); + let result = hex_stream.deserialize(data).unwrap(); + assert_eq!(result, "666f6f"); + } + + #[test] + fn test_hexstream_deserialize_empty() { + let hex_stream = HexStream::default(); + let data = vec![]; + let result = hex_stream.deserialize(data).unwrap(); + assert_eq!(result, ""); + } + + #[test] + fn test_hexstream_serialize() { + let hex_stream = HexStream::default(); + let data = "666f6f".to_string(); + let result = hex_stream.serialize(data).unwrap(); + assert_eq!(result, b"foo"); + } +} diff --git a/src/lib.rs b/src/lib.rs index ae3a8748..437e1cf7 100755 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ pub use network::MAX_PACKET_SIZE; pub mod certificates; +pub mod contentviews; pub mod dns; pub mod intercept_conf; pub mod ipc; From 36166b1cfbd091e08fdcd1b9a3bb0c5d2d41e4dc Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Fri, 14 Mar 2025 00:47:07 +0100 Subject: [PATCH 02/26] make contentviews interactive --- mitmproxy-rs/mitmproxy_rs/__init__.pyi | 3 +- mitmproxy-rs/mitmproxy_rs/contentviews.pyi | 22 +++++ mitmproxy-rs/src/contentview.rs | 54 +++++++++--- mitmproxy-rs/src/lib.rs | 29 ++++++- src/contentviews/hex_dump.rs | 47 +++++++++++ src/contentviews/hex_stream.rs | 61 ++++++++++++++ src/contentviews/mod.rs | 97 ++++++++-------------- 7 files changed, 236 insertions(+), 77 deletions(-) create mode 100644 mitmproxy-rs/mitmproxy_rs/contentviews.pyi create mode 100644 src/contentviews/hex_dump.rs create mode 100644 src/contentviews/hex_stream.rs diff --git a/mitmproxy-rs/mitmproxy_rs/__init__.pyi b/mitmproxy-rs/mitmproxy_rs/__init__.pyi index 75071a38..56504dfd 100644 --- a/mitmproxy-rs/mitmproxy_rs/__init__.pyi +++ b/mitmproxy-rs/mitmproxy_rs/__init__.pyi @@ -2,7 +2,7 @@ from __future__ import annotations from typing import Any, Literal from typing import final, overload, TypeVar -from . import certs, dns, local, process_info, tun, udp, wireguard +from . import certs, contentviews, dns, local, process_info, tun, udp, wireguard T = TypeVar("T") @@ -57,6 +57,7 @@ class Stream: __all__ = [ "certs", + "contentviews", "dns", "local", "process_info", diff --git a/mitmproxy-rs/mitmproxy_rs/contentviews.pyi b/mitmproxy-rs/mitmproxy_rs/contentviews.pyi new file mode 100644 index 00000000..ab56aa67 --- /dev/null +++ b/mitmproxy-rs/mitmproxy_rs/contentviews.pyi @@ -0,0 +1,22 @@ +from typing import ClassVar, final + +class Contentview: + name: ClassVar[str] + + def deserialize(self, data: bytes) -> str: + pass + +@final +class InteractiveContentview(Contentview): + def serialize(self, data: str) -> bytes: + pass + +hex_dump: Contentview +hex_stream: InteractiveContentview + +__all__ = [ + "Contentview", + "InteractiveContentview", + "hex_dump", + "hex_stream", +] diff --git a/mitmproxy-rs/src/contentview.rs b/mitmproxy-rs/src/contentview.rs index dac40b24..666a05dc 100644 --- a/mitmproxy-rs/src/contentview.rs +++ b/mitmproxy-rs/src/contentview.rs @@ -1,31 +1,65 @@ -use anyhow::Result; -use mitmproxy::contentviews::Contentview; +use anyhow::{anyhow, Result}; +use mitmproxy::contentviews::{Prettify, Reencode}; use pyo3::prelude::*; -#[pyclass] -pub struct PyContentview(&'static dyn Contentview); +#[pyclass(frozen, module = "mitmproxy_rs.contentviews", subclass)] +pub struct Contentview(&'static dyn Prettify); -impl PyContentview { +impl Contentview { pub fn new<'py>( py: Python<'py>, - contentview: &'static dyn Contentview, + contentview: &'static dyn Prettify, ) -> PyResult> { - PyContentview(contentview).into_pyobject(py) + Contentview(contentview).into_pyobject(py) } } #[pymethods] -impl PyContentview { +impl Contentview { + /// The name of this contentview. #[getter] pub fn name(&self) -> &str { self.0.name() } + /// Pretty-print an (encoded) message. pub fn deserialize<'py>(&self, data: Vec) -> Result { - self.0.deserialize(data) + self.0.deserialize(data).map_err(|e| anyhow!("{e}")) } fn __repr__(&self) -> PyResult { - Ok(format!("<{} Contentview>", self.0.name())) + Ok(format!( + "", + self.0.name() + )) + } +} + +#[pyclass(frozen, module = "mitmproxy_rs.contentviews", extends=Contentview)] +pub struct InteractiveContentview(&'static dyn Reencode); + +impl InteractiveContentview { + /// Argument passed twice because of https://github.com/rust-lang/rust/issues/65991 + pub fn new<'py, T: Prettify + Reencode>( + py: Python<'py>, + cv: &'static T, + ) -> PyResult> { + let cls = + PyClassInitializer::from(Contentview(cv)).add_subclass(InteractiveContentview(cv)); + Bound::new(py, cls) + } +} + +#[pymethods] +impl InteractiveContentview { + pub fn serialize<'py>(&self, data: String) -> Result> { + self.0.serialize(data).map_err(|e| anyhow!("{e}")) + } + + fn __repr__(self_: PyRef<'_, Self>) -> PyResult { + Ok(format!( + "", + self_.as_super().name() + )) } } diff --git a/mitmproxy-rs/src/lib.rs b/mitmproxy-rs/src/lib.rs index 0b85a9c3..fb371ef1 100644 --- a/mitmproxy-rs/src/lib.rs +++ b/mitmproxy-rs/src/lib.rs @@ -2,6 +2,8 @@ extern crate core; use std::sync::RwLock; +use crate::contentview::{Contentview, InteractiveContentview}; +use mitmproxy::contentviews::{Prettify, Reencode}; use once_cell::sync::Lazy; use pyo3::{exceptions::PyException, prelude::*}; @@ -85,12 +87,17 @@ mod mitmproxy_rs { #[pymodule] mod contentviews { use super::*; - use crate::contentview::PyContentview; - use mitmproxy::contentviews::*; + #[pymodule_export] + use crate::contentview::Contentview; + #[pymodule_export] + use crate::contentview::InteractiveContentview; + use mitmproxy::contentviews::{HexDump, HexStream}; #[pymodule_init] fn init(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add("hex", PyContentview::new(m.py(), &HexStream())?) + m.add_contentview(&HexDump)?; + m.add_interactive_contentview(&HexStream)?; + Ok(()) } } @@ -118,3 +125,19 @@ mod mitmproxy_rs { Ok(()) } } + +trait AddContentview { + fn add_contentview(&self, cv: &'static T) -> PyResult<()>; + fn add_interactive_contentview(&self, i: &'static T) -> PyResult<()>; +} + +impl AddContentview for Bound<'_, PyModule> { + fn add_contentview(&self, cv: &'static T) -> PyResult<()> { + let view = Contentview::new(self.py(), cv)?; + self.add(cv.instance_name(), view) + } + fn add_interactive_contentview(&self, cv: &'static T) -> PyResult<()> { + let view = InteractiveContentview::new(self.py(), cv)?; + self.add(cv.instance_name(), view) + } +} diff --git a/src/contentviews/hex_dump.rs b/src/contentviews/hex_dump.rs new file mode 100644 index 00000000..0ab463ad --- /dev/null +++ b/src/contentviews/hex_dump.rs @@ -0,0 +1,47 @@ +use crate::contentviews::{Prettify, PrettifyError}; +use pretty_hex::{HexConfig, PrettyHex}; + +pub struct HexDump; + +impl Prettify for HexDump { + fn name(&self) -> &'static str { + "Hex Dump" + } + + fn deserialize(&self, data: Vec) -> Result { + Ok(format!( + "{:?}", + data.hex_conf(HexConfig { + title: false, + ascii: true, + width: 16, + group: 4, + chunk: 1, + max_bytes: usize::MAX, + display_offset: 0, + }) + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_hexdump_deserialize() { + let data = b"abcd".to_vec(); + let result = HexDump.deserialize(data).unwrap(); + assert_eq!( + result, + "0000: 61 62 63 64 abcd" + ); + } + + #[test] + fn test_hexdump_deserialize_empty() { + let data = vec![]; + let result = HexDump.deserialize(data).unwrap(); + assert_eq!(result, ""); + } +} diff --git a/src/contentviews/hex_stream.rs b/src/contentviews/hex_stream.rs new file mode 100644 index 00000000..69a88b1c --- /dev/null +++ b/src/contentviews/hex_stream.rs @@ -0,0 +1,61 @@ +use crate::contentviews::{Prettify, PrettifyError, Reencode, ReencodeError}; +use pretty_hex::{HexConfig, PrettyHex}; +use std::num::ParseIntError; + +pub struct HexStream; + +impl Prettify for HexStream { + fn name(&self) -> &'static str { + "Hex Stream" + } + + fn deserialize(&self, data: Vec) -> Result { + Ok(data + .hex_conf(HexConfig { + title: false, + ascii: false, + width: 0, + group: 0, + chunk: 0, + max_bytes: usize::MAX, + display_offset: 0, + }) + .to_string()) + } +} + +impl Reencode for HexStream { + fn serialize(&self, data: String) -> anyhow::Result, ReencodeError> { + (0..data.len()) + .step_by(2) + .map(|i| u8::from_str_radix(&data[i..i + 2], 16)) + .collect::, ParseIntError>>() + .map_err(|e| ReencodeError::InvalidFormat(e.to_string())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_hexstream_deserialize() { + let data = b"foo".to_vec(); + let result = HexStream.deserialize(data).unwrap(); + assert_eq!(result, "666f6f"); + } + + #[test] + fn test_hexstream_deserialize_empty() { + let data = vec![]; + let result = HexStream.deserialize(data).unwrap(); + assert_eq!(result, ""); + } + + #[test] + fn test_hexstream_serialize() { + let data = "666f6f".to_string(); + let result = HexStream.serialize(data).unwrap(); + assert_eq!(result, b"foo"); + } +} diff --git a/src/contentviews/mod.rs b/src/contentviews/mod.rs index c8055091..d6dfd346 100644 --- a/src/contentviews/mod.rs +++ b/src/contentviews/mod.rs @@ -1,81 +1,52 @@ +mod hex_dump; +mod hex_stream; + use anyhow::Result; -use pretty_hex::{HexConfig, PrettyHex}; -use std::num::ParseIntError; +use std::fmt::{Display, Formatter}; + +pub use hex_dump::HexDump; +pub use hex_stream::HexStream; #[derive(Debug)] -pub enum SerializeError { +pub enum ReencodeError { InvalidFormat(String), } -pub trait Contentview: Send + Sync { - fn name(&self) -> &str; - fn deserialize(&self, data: Vec) -> Result; -} - -pub trait SerializableContentview: Contentview { - fn serialize(&self, data: String) -> Result, SerializeError>; -} - -#[derive(Default)] -pub struct HexStream(); - -impl Contentview for HexStream { - fn name(&self) -> &str { - "HexStream" +impl Display for ReencodeError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + ReencodeError::InvalidFormat(e) => { + write!(f, "invalid format: {}", e) + } + } } +} - fn deserialize(&self, data: Vec) -> Result { - Ok(data - .hex_conf(HexConfig { - title: false, - ascii: false, - width: 0, - group: 0, - chunk: 0, - max_bytes: usize::MAX, - display_offset: 0, - }) - .to_string()) - } +#[derive(Debug)] +pub enum PrettifyError { + Generic(String), } -impl SerializableContentview for HexStream { - fn serialize(&self, data: String) -> Result, SerializeError> { - (0..data.len()) - .step_by(2) - .map(|i| u8::from_str_radix(&data[i..i + 2], 16)) - .collect::, ParseIntError>>() - .map_err(|e| { - SerializeError::InvalidFormat(format!("Failed to parse hex string: {}", e)) - }) +impl Display for PrettifyError { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + PrettifyError::Generic(e) => { + write!(f, "deserialize error: {}", e) + } + } } } -#[cfg(test)] -mod tests { - use super::*; +pub trait Prettify: Send + Sync { + fn name(&self) -> &str; - #[test] - fn test_hexstream_deserialize() { - let hex_stream = HexStream::default(); - let data = b"foo".to_vec(); - let result = hex_stream.deserialize(data).unwrap(); - assert_eq!(result, "666f6f"); + fn instance_name(&self) -> String { + self.name().to_lowercase().replace(" ", "_") } - #[test] - fn test_hexstream_deserialize_empty() { - let hex_stream = HexStream::default(); - let data = vec![]; - let result = hex_stream.deserialize(data).unwrap(); - assert_eq!(result, ""); - } + fn deserialize(&self, data: Vec) -> Result; +} - #[test] - fn test_hexstream_serialize() { - let hex_stream = HexStream::default(); - let data = "666f6f".to_string(); - let result = hex_stream.serialize(data).unwrap(); - assert_eq!(result, b"foo"); - } +pub trait Reencode: Send + Sync { + fn serialize(&self, data: String) -> Result, ReencodeError>; } From 53263715f1be9ad29683391617082f5ced7e6df0 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Fri, 14 Mar 2025 20:57:58 +0100 Subject: [PATCH 03/26] add msgpack contentview --- Cargo.lock | 50 ++++++++ Cargo.toml | 3 + mitmproxy-rs/mitmproxy_rs/contentviews.pyi | 6 +- mitmproxy-rs/src/contentview.rs | 8 +- mitmproxy-rs/src/lib.rs | 3 +- src/contentviews/hex_dump.rs | 6 +- src/contentviews/hex_stream.rs | 10 +- src/contentviews/mod.rs | 6 +- src/contentviews/msgpack.rs | 127 +++++++++++++++++++++ 9 files changed, 202 insertions(+), 17 deletions(-) create mode 100644 src/contentviews/msgpack.rs diff --git a/Cargo.lock b/Cargo.lock index 8ff377a3..5730e532 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2203,7 +2203,10 @@ dependencies = [ "pretty-hex", "prost", "rand 0.9.0", + "rmp-serde", "security-framework", + "serde", + "serde_yaml", "smoltcp", "socket2", "sysinfo", @@ -2468,6 +2471,12 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "percent-encoding" version = "2.3.1" @@ -2974,6 +2983,28 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rmp" +version = "0.8.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "228ed7c16fa39782c3b3468e974aec2795e9089153cd08ee2e9aefb3613334c4" +dependencies = [ + "byteorder", + "num-traits", + "paste", +] + +[[package]] +name = "rmp-serde" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52e599a477cf9840e92f2cde9a7189e67b42c57532749bf90aea6ec10facd4db" +dependencies = [ + "byteorder", + "rmp", + "serde", +] + [[package]] name = "rustc-demangle" version = "0.1.24" @@ -3112,6 +3143,19 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap 2.8.0", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -3658,6 +3702,12 @@ dependencies = [ "subtle", ] +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "untrusted" version = "0.9.0" diff --git a/Cargo.toml b/Cargo.toml index 2d0c436b..db7e155d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,6 +68,9 @@ internet-packet = { version = "0.2.3", features = ["smoltcp"] } data-encoding = "2.8.0" hickory-resolver = "0.25.1" socket2 = "0.5.9" +serde = { version = "1.0", features = ["derive"] } +serde_yaml = "0.9" +rmp-serde = "1.1" [patch.crates-io] # tokio = { path = "../tokio/tokio" } diff --git a/mitmproxy-rs/mitmproxy_rs/contentviews.pyi b/mitmproxy-rs/mitmproxy_rs/contentviews.pyi index ab56aa67..91d5108c 100644 --- a/mitmproxy-rs/mitmproxy_rs/contentviews.pyi +++ b/mitmproxy-rs/mitmproxy_rs/contentviews.pyi @@ -3,20 +3,22 @@ from typing import ClassVar, final class Contentview: name: ClassVar[str] - def deserialize(self, data: bytes) -> str: + def prettify(self, data: bytes) -> str: pass @final class InteractiveContentview(Contentview): - def serialize(self, data: str) -> bytes: + def reencode(self, data: str) -> bytes: pass hex_dump: Contentview hex_stream: InteractiveContentview +msgpack: InteractiveContentview __all__ = [ "Contentview", "InteractiveContentview", "hex_dump", "hex_stream", + "msgpack", ] diff --git a/mitmproxy-rs/src/contentview.rs b/mitmproxy-rs/src/contentview.rs index 666a05dc..a3d67dbe 100644 --- a/mitmproxy-rs/src/contentview.rs +++ b/mitmproxy-rs/src/contentview.rs @@ -23,8 +23,8 @@ impl Contentview { } /// Pretty-print an (encoded) message. - pub fn deserialize<'py>(&self, data: Vec) -> Result { - self.0.deserialize(data).map_err(|e| anyhow!("{e}")) + pub fn prettify<'py>(&self, data: Vec) -> Result { + self.0.prettify(data).map_err(|e| anyhow!("{e}")) } fn __repr__(&self) -> PyResult { @@ -52,8 +52,8 @@ impl InteractiveContentview { #[pymethods] impl InteractiveContentview { - pub fn serialize<'py>(&self, data: String) -> Result> { - self.0.serialize(data).map_err(|e| anyhow!("{e}")) + pub fn reencode<'py>(&self, data: String) -> Result> { + self.0.reencode(data).map_err(|e| anyhow!("{e}")) } fn __repr__(self_: PyRef<'_, Self>) -> PyResult { diff --git a/mitmproxy-rs/src/lib.rs b/mitmproxy-rs/src/lib.rs index fb371ef1..9b93ab7c 100644 --- a/mitmproxy-rs/src/lib.rs +++ b/mitmproxy-rs/src/lib.rs @@ -91,12 +91,13 @@ mod mitmproxy_rs { use crate::contentview::Contentview; #[pymodule_export] use crate::contentview::InteractiveContentview; - use mitmproxy::contentviews::{HexDump, HexStream}; + use mitmproxy::contentviews::{HexDump, HexStream, MsgPack}; #[pymodule_init] fn init(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_contentview(&HexDump)?; m.add_interactive_contentview(&HexStream)?; + m.add_interactive_contentview(&MsgPack)?; Ok(()) } } diff --git a/src/contentviews/hex_dump.rs b/src/contentviews/hex_dump.rs index 0ab463ad..b5aa9c8c 100644 --- a/src/contentviews/hex_dump.rs +++ b/src/contentviews/hex_dump.rs @@ -8,7 +8,7 @@ impl Prettify for HexDump { "Hex Dump" } - fn deserialize(&self, data: Vec) -> Result { + fn prettify(&self, data: Vec) -> Result { Ok(format!( "{:?}", data.hex_conf(HexConfig { @@ -31,7 +31,7 @@ mod tests { #[test] fn test_hexdump_deserialize() { let data = b"abcd".to_vec(); - let result = HexDump.deserialize(data).unwrap(); + let result = HexDump.prettify(data).unwrap(); assert_eq!( result, "0000: 61 62 63 64 abcd" @@ -41,7 +41,7 @@ mod tests { #[test] fn test_hexdump_deserialize_empty() { let data = vec![]; - let result = HexDump.deserialize(data).unwrap(); + let result = HexDump.prettify(data).unwrap(); assert_eq!(result, ""); } } diff --git a/src/contentviews/hex_stream.rs b/src/contentviews/hex_stream.rs index 69a88b1c..bb154aee 100644 --- a/src/contentviews/hex_stream.rs +++ b/src/contentviews/hex_stream.rs @@ -9,7 +9,7 @@ impl Prettify for HexStream { "Hex Stream" } - fn deserialize(&self, data: Vec) -> Result { + fn prettify(&self, data: Vec) -> Result { Ok(data .hex_conf(HexConfig { title: false, @@ -25,7 +25,7 @@ impl Prettify for HexStream { } impl Reencode for HexStream { - fn serialize(&self, data: String) -> anyhow::Result, ReencodeError> { + fn reencode(&self, data: String) -> anyhow::Result, ReencodeError> { (0..data.len()) .step_by(2) .map(|i| u8::from_str_radix(&data[i..i + 2], 16)) @@ -41,21 +41,21 @@ mod tests { #[test] fn test_hexstream_deserialize() { let data = b"foo".to_vec(); - let result = HexStream.deserialize(data).unwrap(); + let result = HexStream.prettify(data).unwrap(); assert_eq!(result, "666f6f"); } #[test] fn test_hexstream_deserialize_empty() { let data = vec![]; - let result = HexStream.deserialize(data).unwrap(); + let result = HexStream.prettify(data).unwrap(); assert_eq!(result, ""); } #[test] fn test_hexstream_serialize() { let data = "666f6f".to_string(); - let result = HexStream.serialize(data).unwrap(); + let result = HexStream.reencode(data).unwrap(); assert_eq!(result, b"foo"); } } diff --git a/src/contentviews/mod.rs b/src/contentviews/mod.rs index d6dfd346..eee26251 100644 --- a/src/contentviews/mod.rs +++ b/src/contentviews/mod.rs @@ -1,11 +1,13 @@ mod hex_dump; mod hex_stream; +mod msgpack; use anyhow::Result; use std::fmt::{Display, Formatter}; pub use hex_dump::HexDump; pub use hex_stream::HexStream; +pub use msgpack::MsgPack; #[derive(Debug)] pub enum ReencodeError { @@ -44,9 +46,9 @@ pub trait Prettify: Send + Sync { self.name().to_lowercase().replace(" ", "_") } - fn deserialize(&self, data: Vec) -> Result; + fn prettify(&self, data: Vec) -> Result; } pub trait Reencode: Send + Sync { - fn serialize(&self, data: String) -> Result, ReencodeError>; + fn reencode(&self, data: String) -> Result, ReencodeError>; } diff --git a/src/contentviews/msgpack.rs b/src/contentviews/msgpack.rs new file mode 100644 index 00000000..fe001906 --- /dev/null +++ b/src/contentviews/msgpack.rs @@ -0,0 +1,127 @@ +use crate::contentviews::{Prettify, PrettifyError, Reencode, ReencodeError}; +use rmp_serde::{decode, encode}; +use serde_yaml; + +pub struct MsgPack; + +impl Prettify for MsgPack { + fn name(&self) -> &'static str { + "MsgPack" + } + + fn prettify(&self, data: Vec) -> Result { + // Deserialize MsgPack to a serde_yaml::Value + let value: serde_yaml::Value = decode::from_slice(&data) + .map_err(|e| PrettifyError::Generic(format!("Failed to deserialize MsgPack: {}", e)))?; + + // Convert the Value to prettified YAML + serde_yaml::to_string(&value) + .map_err(|e| PrettifyError::Generic(format!("Failed to convert to YAML: {}", e))) + } +} + +impl Reencode for MsgPack { + fn reencode(&self, data: String) -> anyhow::Result, ReencodeError> { + // Parse the YAML string to a serde_yaml::Value + let value: serde_yaml::Value = serde_yaml::from_str(&data) + .map_err(|e| ReencodeError::InvalidFormat(format!("Invalid YAML: {}", e)))?; + + // Serialize the Value to MsgPack + let mut buf = Vec::new(); + encode::write_named(&mut buf, &value).map_err(|e| { + ReencodeError::InvalidFormat(format!("Failed to encode to MsgPack: {}", e)) + })?; + + Ok(buf) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // Hardcoded MsgPack data for a simple object: + // { + // "name": "John Doe", + // "age": 30, + // "tags": ["developer", "rust"] + // } + const TEST_MSGPACK: &[u8] = &[ + 0x83, // map with 3 elements + 0xa4, 0x6e, 0x61, 0x6d, 0x65, // "name" + 0xa8, 0x4a, 0x6f, 0x68, 0x6e, 0x20, 0x44, 0x6f, 0x65, // "John Doe" + 0xa3, 0x61, 0x67, 0x65, // "age" + 0x1e, // 30 + 0xa4, 0x74, 0x61, 0x67, 0x73, // "tags" + 0x92, // array with 2 elements + 0xa9, 0x64, 0x65, 0x76, 0x65, 0x6c, 0x6f, 0x70, 0x65, 0x72, // "developer" + 0xa4, 0x72, 0x75, 0x73, 0x74, // "rust" + ]; + + // Expected YAML representation + const TEST_YAML: &str = r#"name: John Doe +age: 30 +tags: +- developer +- rust +"#; + + #[test] + fn test_msgpack_deserialize() { + let result = MsgPack.prettify(TEST_MSGPACK.to_vec()).unwrap(); + assert_eq!(result, TEST_YAML); + } + + #[test] + fn test_msgpack_serialize() { + let yaml_data = TEST_YAML.to_string(); + + let result = MsgPack.reencode(yaml_data).unwrap(); + + // Verify the MsgPack data contains the expected values + let value: serde_yaml::Value = decode::from_slice(&result).unwrap(); + + if let serde_yaml::Value::Mapping(map) = value { + assert_eq!( + map.get(serde_yaml::Value::String("name".to_string())), + Some(&serde_yaml::Value::String("John Doe".to_string())) + ); + + assert_eq!( + map.get(serde_yaml::Value::String("age".to_string())), + Some(&serde_yaml::Value::Number(serde_yaml::Number::from(30))) + ); + + if let Some(serde_yaml::Value::Sequence(tags)) = + map.get(serde_yaml::Value::String("tags".to_string())) + { + assert_eq!(tags.len(), 2); + assert_eq!(tags[0], serde_yaml::Value::String("developer".to_string())); + assert_eq!(tags[1], serde_yaml::Value::String("rust".to_string())); + } else { + panic!("tags is not a sequence"); + } + } else { + panic!("value is not a mapping"); + } + } + + #[test] + fn test_msgpack_roundtrip() { + // Start with the hardcoded MsgPack data + let msgpack_data = TEST_MSGPACK.to_vec(); + + // Deserialize to YAML + let yaml_result = MsgPack.prettify(msgpack_data).unwrap(); + + // Serialize back to MsgPack + let result = MsgPack.reencode(yaml_result).unwrap(); + + // Deserialize both the original and the result to Values for comparison + let original_value: serde_yaml::Value = decode::from_slice(TEST_MSGPACK).unwrap(); + let result_value: serde_yaml::Value = decode::from_slice(&result).unwrap(); + + // Compare the values + assert_eq!(original_value, result_value); + } +} From 0f0b55b242169f47bda45440006ceb4ad775da9d Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Wed, 19 Mar 2025 01:49:42 +0100 Subject: [PATCH 04/26] add protobuf contentview --- Cargo.lock | 22 ++ Cargo.toml | 2 + mitmproxy-rs/mitmproxy_rs/contentviews.pyi | 2 + mitmproxy-rs/src/contentview.rs | 2 +- mitmproxy-rs/src/lib.rs | 3 +- src/contentviews/mod.rs | 2 + src/contentviews/protobuf.rs | 354 +++++++++++++++++++++ 7 files changed, 385 insertions(+), 2 deletions(-) create mode 100644 src/contentviews/protobuf.rs diff --git a/Cargo.lock b/Cargo.lock index 5730e532..24f86d39 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2202,7 +2202,9 @@ dependencies = [ "once_cell", "pretty-hex", "prost", + "protobuf", "rand 0.9.0", + "regex", "rmp-serde", "security-framework", "serde", @@ -2710,6 +2712,26 @@ dependencies = [ "prost", ] +[[package]] +name = "protobuf" +version = "3.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d65a1d4ddae7d8b5de68153b48f6aa3bba8cb002b243dbdbc55a5afbc98f99f4" +dependencies = [ + "once_cell", + "protobuf-support", + "thiserror 1.0.69", +] + +[[package]] +name = "protobuf-support" +version = "3.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e36c2f31e0a47f9280fb347ef5e461ffcd2c52dd520d8e216b52f93b0b0d7d6" +dependencies = [ + "thiserror 1.0.69", +] + [[package]] name = "pyo3" version = "0.24.1" diff --git a/Cargo.toml b/Cargo.toml index db7e155d..d0d92e6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -71,6 +71,8 @@ socket2 = "0.5.9" serde = { version = "1.0", features = ["derive"] } serde_yaml = "0.9" rmp-serde = "1.1" +protobuf = "3.7.2" +regex = "1.10.3" [patch.crates-io] # tokio = { path = "../tokio/tokio" } diff --git a/mitmproxy-rs/mitmproxy_rs/contentviews.pyi b/mitmproxy-rs/mitmproxy_rs/contentviews.pyi index 91d5108c..f05c711b 100644 --- a/mitmproxy-rs/mitmproxy_rs/contentviews.pyi +++ b/mitmproxy-rs/mitmproxy_rs/contentviews.pyi @@ -14,6 +14,7 @@ class InteractiveContentview(Contentview): hex_dump: Contentview hex_stream: InteractiveContentview msgpack: InteractiveContentview +protobuf: Contentview __all__ = [ "Contentview", @@ -21,4 +22,5 @@ __all__ = [ "hex_dump", "hex_stream", "msgpack", + "protobuf", ] diff --git a/mitmproxy-rs/src/contentview.rs b/mitmproxy-rs/src/contentview.rs index a3d67dbe..d686b2d1 100644 --- a/mitmproxy-rs/src/contentview.rs +++ b/mitmproxy-rs/src/contentview.rs @@ -52,7 +52,7 @@ impl InteractiveContentview { #[pymethods] impl InteractiveContentview { - pub fn reencode<'py>(&self, data: String) -> Result> { + pub fn reencode(&self, data: String) -> Result> { self.0.reencode(data).map_err(|e| anyhow!("{e}")) } diff --git a/mitmproxy-rs/src/lib.rs b/mitmproxy-rs/src/lib.rs index 9b93ab7c..20d0b09a 100644 --- a/mitmproxy-rs/src/lib.rs +++ b/mitmproxy-rs/src/lib.rs @@ -91,13 +91,14 @@ mod mitmproxy_rs { use crate::contentview::Contentview; #[pymodule_export] use crate::contentview::InteractiveContentview; - use mitmproxy::contentviews::{HexDump, HexStream, MsgPack}; + use mitmproxy::contentviews::{HexDump, HexStream, MsgPack, Protobuf}; #[pymodule_init] fn init(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_contentview(&HexDump)?; m.add_interactive_contentview(&HexStream)?; m.add_interactive_contentview(&MsgPack)?; + m.add_contentview(&Protobuf)?; Ok(()) } } diff --git a/src/contentviews/mod.rs b/src/contentviews/mod.rs index eee26251..cc6621f0 100644 --- a/src/contentviews/mod.rs +++ b/src/contentviews/mod.rs @@ -1,6 +1,7 @@ mod hex_dump; mod hex_stream; mod msgpack; +mod protobuf; use anyhow::Result; use std::fmt::{Display, Formatter}; @@ -8,6 +9,7 @@ use std::fmt::{Display, Formatter}; pub use hex_dump::HexDump; pub use hex_stream::HexStream; pub use msgpack::MsgPack; +pub use protobuf::Protobuf; #[derive(Debug)] pub enum ReencodeError { diff --git a/src/contentviews/protobuf.rs b/src/contentviews/protobuf.rs new file mode 100644 index 00000000..a5c1bede --- /dev/null +++ b/src/contentviews/protobuf.rs @@ -0,0 +1,354 @@ +use crate::contentviews::{Prettify, PrettifyError}; +use once_cell::sync::Lazy; +use protobuf::descriptor::{DescriptorProto, FileDescriptorProto}; +use protobuf::reflect::FileDescriptor; +use protobuf::MessageDyn; +use protobuf::UnknownValueRef; +use regex::Captures; +use regex::Regex; +use serde_yaml::value::{Tag, TaggedValue}; +use serde_yaml::{Mapping, Number, Sequence, Value}; +use std::collections::HashMap; +use std::fmt::Write; + +// Define static regular expressions for better performance +static FIXED32_RE: Lazy = Lazy::new(|| Regex::new(r"!fixed32 (\d+)").unwrap()); +static FIXED64_RE: Lazy = Lazy::new(|| Regex::new(r"!fixed64 (\d+)").unwrap()); +static VARINT_RE: Lazy = Lazy::new(|| Regex::new(r"!varint (\d+)").unwrap()); + +pub struct Protobuf; + +impl Prettify for Protobuf { + fn name(&self) -> &str { + "Protocol Buffer" + } + + fn prettify(&self, data: Vec) -> Result { + // Check if data is empty first + if data.is_empty() { + return Err(PrettifyError::Generic("Empty protobuf data".to_string())); + } + + // Create a dynamic message + // TODO: Accept existing .proto files. + let mut dynamic_message = Self::create_dynamic_message()?; + + // Parse protobuf and convert to YAML + let yaml_value = self.parse_protobuf(data, &mut dynamic_message)?; + + // Convert the Value to prettified YAML + let yaml_str = serde_yaml::to_string(&yaml_value) + .map_err(|e| PrettifyError::Generic(format!("Failed to convert to YAML: {}", e)))?; + + // Apply regex replacements to transform the YAML output + Self::apply_replacements(&yaml_str) + } +} + +impl Protobuf { + // Helper method to create and return a dynamic message instance with no fields + fn create_dynamic_message() -> Result, PrettifyError> { + // Create a dynamic message with no fields to parse unknown fields + let mut file_proto = FileDescriptorProto::new(); + file_proto.message_type.push({ + let mut message_type = DescriptorProto::new(); + message_type.set_name("Unknown".to_string()); + message_type + }); + + let file_descriptor = FileDescriptor::new_dynamic(file_proto, &[]).map_err(|e| { + PrettifyError::Generic(format!("Failed to create dynamic message: {}", e)) + })?; + + let descriptor = file_descriptor + .message_by_package_relative_name("Unknown") + .ok_or_else(|| PrettifyError::Generic("Failed to get message by name".to_string()))?; + + Ok(descriptor.new_instance()) + } + + // Helper method to apply regex replacements to the YAML output + fn apply_replacements(yaml_str: &str) -> Result { + // Replace !fixed32 tags with comments showing float and i32 interpretations + let with_fixed32 = FIXED32_RE.replace_all(yaml_str, |caps: &Captures| { + let value = caps[1].parse::().unwrap_or_default(); + let float_value = f32::from_bits(value); + let i32_value = value as i32; + + if !float_value.is_nan() && float_value < 0.0 { + format!("{} # float: {}, i32: {}", value, float_value, i32_value) + } else if !float_value.is_nan() { + format!("{} # float: {}", value, float_value) + } else if i32_value < 0 { + format!("{} # i32: {}", value, i32_value) + } else { + value.to_string() + } + }); + + // Replace !fixed64 tags with comments showing double and i64 interpretations + let with_fixed64 = FIXED64_RE.replace_all(&with_fixed32, |caps: &Captures| { + let value = caps[1].parse::().unwrap_or_default(); + let double_value = f64::from_bits(value); + let i64_value = value as i64; + + if !double_value.is_nan() && double_value < 0.0 { + format!("{} # double: {}, i64: {}", value, double_value, i64_value) + } else if !double_value.is_nan() { + format!("{} # double: {}", value, double_value) + } else if i64_value < 0 { + format!("{} # i64: {}", value, i64_value) + } else { + value.to_string() + } + }); + + // Replace !varint tags with comments showing signed interpretation if different + let with_varint = VARINT_RE.replace_all(&with_fixed64, |caps: &Captures| { + let unsigned_value = caps[1].parse::().unwrap_or_default(); + let i64_zigzag = Self::decode_zigzag64(unsigned_value); + + // Only show signed value if it's different from unsigned + if i64_zigzag < 0 { + format!("{} # signed: {}", unsigned_value, i64_zigzag) + } else { + unsigned_value.to_string() + } + }); + + Ok(with_varint.to_string()) + } + + // Decode a zigzag-encoded 64-bit integer + fn decode_zigzag64(n: u64) -> i64 { + ((n >> 1) as i64) ^ (-((n & 1) as i64)) + } + + // Convert length-delimited protobuf data to a hex string + fn bytes_to_hex_string(bytes: &[u8]) -> String { + let mut result = String::with_capacity(bytes.len() * 2); + for b in bytes { + let _ = write!(result, "{:02x}", b); + } + result + } + + // Convert a single protobuf value to YAML + fn convert_value_to_yaml(&self, value: &UnknownValueRef) -> Result { + match value { + UnknownValueRef::Fixed32(v) => Ok(Value::Tagged(Box::new(TaggedValue { + tag: Tag::new("fixed32"), + value: Value::Number(Number::from(*v)), + }))), + UnknownValueRef::Fixed64(v) => Ok(Value::Tagged(Box::new(TaggedValue { + tag: Tag::new("fixed64"), + value: Value::Number(Number::from(*v)), + }))), + UnknownValueRef::Varint(v) => Ok(Value::Tagged(Box::new(TaggedValue { + tag: Tag::new("varint"), + value: Value::Number(Number::from(*v)), + }))), + UnknownValueRef::LengthDelimited(v) => self.process_length_delimited(v), + } + } + + // Process a length-delimited value (string, nested message, or binary) + fn process_length_delimited(&self, data: &[u8]) -> Result { + // Try to parse as a string first + if let Ok(s) = std::str::from_utf8(data) { + if s.chars() + .all(|c| c.is_ascii_graphic() || c.is_ascii_whitespace()) + { + return Ok(Value::String(s.to_string())); + } + } + + // Try to parse as a nested message + let mut nested_message = Self::create_dynamic_message()?; + if let Ok(nested_yaml) = self.parse_protobuf(data.to_vec(), &mut nested_message) { + return Ok(nested_yaml); + } + + // If not a string or nested message, format as binary data with YAML tag + let hex_string = Self::bytes_to_hex_string(data); + Ok(Value::Tagged(Box::new(TaggedValue { + tag: Tag::new("Binary"), + value: Value::String(hex_string), + }))) + } + + // Helper method to parse protobuf data and process unknown fields to convert to YAML + fn parse_protobuf( + &self, + data: Vec, + dynamic_message: &mut Box, + ) -> Result { + // Parse the protobuf data using the provided dynamic message + dynamic_message + .merge_from_bytes_dyn(&data) + .map_err(|e| PrettifyError::Generic(format!("Failed to parse protobuf: {}", e)))?; + + // Get unknown fields & group by field id. + let unknown_fields = dynamic_message.unknown_fields_dyn(); + let mut field_groups: HashMap> = HashMap::new(); + for (field_number, value) in unknown_fields.iter() { + field_groups.entry(field_number).or_default().push(value); + } + + // Convert unknown fields to a YAML value + let mut root = Mapping::new(); + for (field_number, values) in field_groups { + let key = Value::Number(Number::from(field_number)); + let value = if values.len() == 1 { + self.convert_value_to_yaml(&values[0])? + } else { + Value::Sequence( + values + .into_iter() + .map(|x| self.convert_value_to_yaml(&x)) + .collect::>()?, + ) + }; + root.insert(key, value); + } + Ok(Value::Mapping(root)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + // Varint tests + #[test] + fn test_varint() { + // From docs: field 1: varint 150 + const PROTO: &[u8] = &[0x08, 0x96, 0x01]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "1: 150\n"); + } + + #[test] + fn test_varint_signed() { + // field 1: varint 11 (zigzag encoded: -6) + const PROTO: &[u8] = &[0x08, 0x0B]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "1: 11 # signed: -6\n"); + } + + #[test] + fn test_repeated_numeric() { + // Example based on docs: repeated field 1 with values 1, 2, 3 + const PROTO: &[u8] = &[0x08, 0x01, 0x08, 0x02, 0x08, 0x03]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "1:\n- 1 # signed: -1\n- 2\n- 3 # signed: -2\n"); + } + + #[test] + fn test_packed_repeated() { + // From docs: "message Test5 { repeated int32 f = 6 [packed=true]; }" + // With values 3, 270, and 86942 + const PROTO: &[u8] = &[0x32, 0x06, 0x03, 0x8E, 0x02, 0x9E, 0xA7, 0x05]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + // Our implementation shows this as binary data as we don't have schema info + assert_eq!(result, "6: !Binary 038e029ea705\n"); + } + + // Fixed32 tests + #[test] + fn test_fixed32() { + const PROTO: &[u8] = &[0x15, 0x00, 0x00, 0x80, 0xBF]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "2: 3212836864 # float: -1, i32: -1082130432\n"); + } + + #[test] + fn test_fixed32_positive() { + const PROTO: &[u8] = &[0x15, 0xD0, 0x0F, 0x49, 0x40]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "2: 1078530000 # float: 3.14159\n"); + } + + #[test] + fn test_fixed32_no_float() { + const PROTO: &[u8] = &[0x15, 0xFF, 0xFF, 0xFF, 0xFF]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "2: 4294967295 # i32: -1\n"); + } + + #[test] + fn test_fixed32_positive_no_float() { + const PROTO: &[u8] = &[0x15, 0x01, 0x00, 0xC0, 0x7F]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "2: 2143289345\n"); + } + + // Fixed64 tests + #[test] + fn test_fixed64() { + const PROTO: &[u8] = &[0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0xBF]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!( + result, + "3: 13830554455654793216 # double: -1, i64: -4616189618054758400\n" + ); + } + + #[test] + fn test_fixed64_positive() { + const PROTO: &[u8] = &[0x19, 0x6E, 0x86, 0x1B, 0xF0, 0xF9, 0x21, 0x09, 0x40]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "3: 4614256650576692846 # double: 3.14159\n"); + } + + #[test] + fn test_fixed64_no_float() { + const PROTO: &[u8] = &[0x19, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "3: 18446744073709551615 # i64: -1\n"); + } + + #[test] + fn test_fixed64_positive_no_float() { + const PROTO: &[u8] = &[0x19, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF8, 0x7F]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "3: 9221120237041090561\n"); + } + + // String field test + #[test] + fn test_string_field() { + // field 4: string "hello" (LEN type field from docs) + const PROTO: &[u8] = &[0x22, 0x05, 0x68, 0x65, 0x6C, 0x6C, 0x6F]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "4: hello\n"); + } + + #[test] + fn test_nested_message() { + // From docs about nested messages: field 5 with a nested message containing field 1: varint 42 + const PROTO: &[u8] = &[0x2A, 0x02, 0x08, 0x2A]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "5:\n 1: 42\n"); + } + + #[test] + fn test_binary_data() { + // Binary data example: field 6: binary data [0x01, 0x02, 0x03] + const PROTO: &[u8] = &[0x32, 0x03, 0x01, 0x02, 0x03]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "6: !Binary '010203'\n"); + } + + #[test] + fn test_invalid_protobuf() { + let result = Protobuf.prettify(vec![0xFF, 0xFF]); + assert!(result.is_err()); + } + + #[test] + fn test_empty_protobuf() { + let result = Protobuf.prettify(vec![]); + assert!(result.is_err()); + } +} From 44f8551a29a634afebb03d2f4541cd487922b12d Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Fri, 28 Mar 2025 21:02:33 +0100 Subject: [PATCH 05/26] make protobuf contentview bidirectional --- mitmproxy-rs/src/contentview.rs | 6 +- src/contentviews/hex_stream.rs | 6 +- src/contentviews/mod.rs | 2 +- src/contentviews/msgpack.rs | 10 +- src/contentviews/protobuf.rs | 799 +++++++++++++++++++++++--------- 5 files changed, 596 insertions(+), 227 deletions(-) diff --git a/mitmproxy-rs/src/contentview.rs b/mitmproxy-rs/src/contentview.rs index d686b2d1..415f4baa 100644 --- a/mitmproxy-rs/src/contentview.rs +++ b/mitmproxy-rs/src/contentview.rs @@ -23,7 +23,7 @@ impl Contentview { } /// Pretty-print an (encoded) message. - pub fn prettify<'py>(&self, data: Vec) -> Result { + pub fn prettify(&self, data: Vec) -> Result { self.0.prettify(data).map_err(|e| anyhow!("{e}")) } @@ -52,8 +52,8 @@ impl InteractiveContentview { #[pymethods] impl InteractiveContentview { - pub fn reencode(&self, data: String) -> Result> { - self.0.reencode(data).map_err(|e| anyhow!("{e}")) + pub fn reencode(&self, data: &str) -> Result> { + self.0.reencode(data, &[]).map_err(|e| anyhow!("{e}")) } fn __repr__(self_: PyRef<'_, Self>) -> PyResult { diff --git a/src/contentviews/hex_stream.rs b/src/contentviews/hex_stream.rs index bb154aee..a242cfda 100644 --- a/src/contentviews/hex_stream.rs +++ b/src/contentviews/hex_stream.rs @@ -25,7 +25,7 @@ impl Prettify for HexStream { } impl Reencode for HexStream { - fn reencode(&self, data: String) -> anyhow::Result, ReencodeError> { + fn reencode(&self, data: &str, original: &[u8]) -> anyhow::Result, ReencodeError> { (0..data.len()) .step_by(2) .map(|i| u8::from_str_radix(&data[i..i + 2], 16)) @@ -54,8 +54,8 @@ mod tests { #[test] fn test_hexstream_serialize() { - let data = "666f6f".to_string(); - let result = HexStream.reencode(data).unwrap(); + let data = "666f6f"; + let result = HexStream.reencode(data, &[]).unwrap(); assert_eq!(result, b"foo"); } } diff --git a/src/contentviews/mod.rs b/src/contentviews/mod.rs index cc6621f0..ce260373 100644 --- a/src/contentviews/mod.rs +++ b/src/contentviews/mod.rs @@ -52,5 +52,5 @@ pub trait Prettify: Send + Sync { } pub trait Reencode: Send + Sync { - fn reencode(&self, data: String) -> Result, ReencodeError>; + fn reencode(&self, data: &str, original: &[u8]) -> Result, ReencodeError>; } diff --git a/src/contentviews/msgpack.rs b/src/contentviews/msgpack.rs index fe001906..2b057b77 100644 --- a/src/contentviews/msgpack.rs +++ b/src/contentviews/msgpack.rs @@ -21,9 +21,9 @@ impl Prettify for MsgPack { } impl Reencode for MsgPack { - fn reencode(&self, data: String) -> anyhow::Result, ReencodeError> { + fn reencode(&self, data: &str, original: &[u8]) -> anyhow::Result, ReencodeError> { // Parse the YAML string to a serde_yaml::Value - let value: serde_yaml::Value = serde_yaml::from_str(&data) + let value: serde_yaml::Value = serde_yaml::from_str(data) .map_err(|e| ReencodeError::InvalidFormat(format!("Invalid YAML: {}", e)))?; // Serialize the Value to MsgPack @@ -74,9 +74,7 @@ tags: #[test] fn test_msgpack_serialize() { - let yaml_data = TEST_YAML.to_string(); - - let result = MsgPack.reencode(yaml_data).unwrap(); + let result = MsgPack.reencode(TEST_YAML, &[]).unwrap(); // Verify the MsgPack data contains the expected values let value: serde_yaml::Value = decode::from_slice(&result).unwrap(); @@ -115,7 +113,7 @@ tags: let yaml_result = MsgPack.prettify(msgpack_data).unwrap(); // Serialize back to MsgPack - let result = MsgPack.reencode(yaml_result).unwrap(); + let result = MsgPack.reencode(&yaml_result, &[]).unwrap(); // Deserialize both the original and the result to Values for comparison let original_value: serde_yaml::Value = decode::from_slice(TEST_MSGPACK).unwrap(); diff --git a/src/contentviews/protobuf.rs b/src/contentviews/protobuf.rs index a5c1bede..4bc162a7 100644 --- a/src/contentviews/protobuf.rs +++ b/src/contentviews/protobuf.rs @@ -1,15 +1,28 @@ -use crate::contentviews::{Prettify, PrettifyError}; +use crate::contentviews::{Prettify, PrettifyError, Reencode, ReencodeError}; use once_cell::sync::Lazy; -use protobuf::descriptor::{DescriptorProto, FileDescriptorProto}; -use protobuf::reflect::FileDescriptor; -use protobuf::MessageDyn; +use protobuf::descriptor::field_descriptor_proto::Label::LABEL_REPEATED; +use protobuf::descriptor::field_descriptor_proto::Type; +use protobuf::descriptor::field_descriptor_proto::Type::{ + TYPE_BYTES, TYPE_FIXED32, TYPE_FIXED64, TYPE_STRING, TYPE_UINT64, +}; +use protobuf::descriptor::{DescriptorProto, FieldDescriptorProto, FileDescriptorProto}; +use protobuf::reflect::{ + FieldDescriptor, FileDescriptor, MessageDescriptor, ReflectFieldRef, ReflectValueRef, + RuntimeFieldType, RuntimeType, +}; +use protobuf::well_known_types::empty::Empty; use protobuf::UnknownValueRef; +use protobuf::{EnumOrUnknown, Message, MessageDyn, MessageFull, UnknownValue}; use regex::Captures; use regex::Regex; use serde_yaml::value::{Tag, TaggedValue}; -use serde_yaml::{Mapping, Number, Sequence, Value}; -use std::collections::HashMap; +use serde_yaml::Value::Tagged; +use serde_yaml::{Mapping, Number, Value}; +use std::collections::BTreeMap; use std::fmt::Write; +use std::num::ParseIntError; +use std::ops::Deref; +use std::str::FromStr; // Define static regular expressions for better performance static FIXED32_RE: Lazy = Lazy::new(|| Regex::new(r"!fixed32 (\d+)").unwrap()); @@ -18,6 +31,12 @@ static VARINT_RE: Lazy = Lazy::new(|| Regex::new(r"!varint (\d+)").unwrap pub struct Protobuf; +enum GuessedFieldType { + String, + Message(Box), + Unknown, +} + impl Prettify for Protobuf { fn name(&self) -> &str { "Protocol Buffer" @@ -29,12 +48,15 @@ impl Prettify for Protobuf { return Err(PrettifyError::Generic("Empty protobuf data".to_string())); } - // Create a dynamic message - // TODO: Accept existing .proto files. - let mut dynamic_message = Self::create_dynamic_message()?; + let existing = Empty::descriptor(); + let descriptor = Self::create_descriptor(&data, existing)?; + + let message = descriptor + .parse_from_bytes(&data) + .map_err(|e| PrettifyError::Generic(format!("Error parsing protobuf: {e}")))?; // Parse protobuf and convert to YAML - let yaml_value = self.parse_protobuf(data, &mut dynamic_message)?; + let yaml_value = Self::message_to_yaml(message.as_ref()); // Convert the Value to prettified YAML let yaml_str = serde_yaml::to_string(&yaml_value) @@ -45,26 +67,360 @@ impl Prettify for Protobuf { } } +impl Reencode for Protobuf { + fn reencode(&self, data: &str, original: &[u8]) -> Result, ReencodeError> { + let existing = Empty::descriptor(); + let descriptor = Self::create_descriptor(original, existing) + .map_err(|e| ReencodeError::InvalidFormat(format!("{e}")))?; + let message = descriptor.new_instance(); + + let value: Value = serde_yaml::from_str(data) + .map_err(|e| ReencodeError::InvalidFormat(format!("invalid yaml: {e}")))?; + + Self::merge_yaml_into_message(value, message) + } +} + +fn tag_number(value: Value, field_type: Type) -> Value { + match field_type { + TYPE_UINT64 => Tagged(Box::new(TaggedValue { + tag: Tag::new("varint"), + value, + })), + TYPE_FIXED64 => Tagged(Box::new(TaggedValue { + tag: Tag::new("fixed64"), + value, + })), + TYPE_FIXED32 => Tagged(Box::new(TaggedValue { + tag: Tag::new("fixed32"), + value, + })), + _ => value, + } +} + +fn int_value(n: Number, field: Option<&FieldDescriptor>) -> UnknownValue { + if let Some(field) = field { + if let Some(typ) = field.proto().type_.and_then(|t| t.enum_value().ok()) { + match typ { + TYPE_FIXED64 | Type::TYPE_SFIXED64 | Type::TYPE_DOUBLE => { + return if let Some(n) = n.as_u64() { + UnknownValue::Fixed64(n) + } else if let Some(n) = n.as_i64() { + UnknownValue::sfixed64(n) + } else { + UnknownValue::double(n.as_f64().expect("as_f64 never fails")) + } + } + TYPE_FIXED32 | Type::TYPE_SFIXED32 | Type::TYPE_FLOAT => { + return if let Some(n) = n.as_u64() { + UnknownValue::Fixed32(n as u32) + } else if let Some(n) = n.as_i64() { + UnknownValue::sfixed32(n as i32) + } else { + UnknownValue::float(n.as_f64().expect("as_f64 never fails") as f32) + } + } + _ => (), + } + } + } + if let Some(n) = n.as_u64() { + UnknownValue::Varint(n) + } else if let Some(n) = n.as_i64() { + UnknownValue::int64(n) + } else { + UnknownValue::double(n.as_f64().expect("as_f64 never fails")) + } +} + impl Protobuf { - // Helper method to create and return a dynamic message instance with no fields - fn create_dynamic_message() -> Result, PrettifyError> { - // Create a dynamic message with no fields to parse unknown fields - let mut file_proto = FileDescriptorProto::new(); - file_proto.message_type.push({ - let mut message_type = DescriptorProto::new(); - message_type.set_name("Unknown".to_string()); - message_type - }); + fn merge_yaml_into_message( + value: Value, + mut message: Box, + ) -> Result, ReencodeError> { + let Value::Mapping(mapping) = value else { + return Err(ReencodeError::InvalidFormat( + "yaml is not a mapping".to_string(), + )); + }; + + for (key, value) in mapping.into_iter() { + let field_num = match key { + Value::String(key) => { + if let Some(field) = message.descriptor_dyn().field_by_name(&key) { + field.number() + } else if let Ok(field_num) = i32::from_str(&key) { + field_num + } else { + return Err(ReencodeError::InvalidFormat(format!( + "unknown protobuf field key: {key}" + ))); + } + } + Value::Number(key) => { + let Some(field_num) = key.as_i64() else { + return Err(ReencodeError::InvalidFormat(format!( + "invalid protobuf field number: {key}" + ))); + }; + field_num as i32 + } + other => { + return Err(ReencodeError::InvalidFormat(format!( + "unexpected key: {other:?}" + ))) + } + } as u32; + + Self::add_field(message.as_mut(), field_num, value)?; + } - let file_descriptor = FileDescriptor::new_dynamic(file_proto, &[]).map_err(|e| { - PrettifyError::Generic(format!("Failed to create dynamic message: {}", e)) - })?; + message + .write_to_bytes_dyn() + .map_err(|e| ReencodeError::InvalidFormat(format!("failed to serialize protobuf: {e}"))) + } - let descriptor = file_descriptor - .message_by_package_relative_name("Unknown") - .ok_or_else(|| PrettifyError::Generic("Failed to get message by name".to_string()))?; + fn add_field( + message: &mut dyn MessageDyn, + field_num: u32, + value: Value, + ) -> Result<(), ReencodeError> { + let value = match value { + Value::Null => return Ok(()), + Value::Sequence(seq) => { + for s in seq.into_iter() { + Self::add_field(message, field_num, s)?; + } + return Ok(()); + } + Tagged(t) => { + if t.tag == "!Binary" { + let value = match t.value { + Value::String(s) => s, + _ => { + return Err(ReencodeError::InvalidFormat( + "binary data is not a string".to_string(), + )) + } + }; + let value = (0..value.len()) + .step_by(2) + .map(|i| u8::from_str_radix(&value[i..i + 2], 16)) + .collect::, ParseIntError>>() + .map_err(|e| ReencodeError::InvalidFormat(e.to_string()))?; + UnknownValue::LengthDelimited(value) + } else { + log::info!("Unexpected YAML tag {}, discarding.", t.tag); + return Self::add_field(message, field_num, t.value); + } + } + Value::Bool(b) => UnknownValue::Varint(b as u64), + Value::Number(n) => { + let field = message.descriptor_dyn().field_by_number(field_num); + int_value(n, field.as_ref()) + } + Value::String(s) => UnknownValue::LengthDelimited(s.into_bytes()), + Value::Mapping(m) => { + let mut descriptor = Empty::descriptor(); + if let Some(field) = message.descriptor_dyn().field_by_number(field_num) { + if let RuntimeFieldType::Singular(RuntimeType::Message(md)) = + field.runtime_field_type() + { + descriptor = md; + } else if let RuntimeFieldType::Map(k, v) = field.runtime_field_type() { + // TODO: handle maps. + } + } + let child_message = descriptor.new_instance(); + let val = Self::merge_yaml_into_message(Value::Mapping(m), child_message)?; + UnknownValue::LengthDelimited(val) + } + }; + message.mut_unknown_fields_dyn().add_value(field_num, value); + Ok(()) + } - Ok(descriptor.new_instance()) + fn primitive_type_to_yaml(x: ReflectValueRef, field_type: Type) -> Value { + match x { + ReflectValueRef::U32(x) => tag_number(Value::Number(Number::from(x)), field_type), + ReflectValueRef::U64(x) => tag_number(Value::Number(Number::from(x)), field_type), + ReflectValueRef::I32(x) => Value::Number(Number::from(x)), + ReflectValueRef::I64(x) => Value::Number(Number::from(x)), + ReflectValueRef::F32(x) => Value::Number(Number::from(x)), + ReflectValueRef::F64(x) => Value::Number(Number::from(x)), + ReflectValueRef::Bool(x) => Value::from(x), + ReflectValueRef::String(x) => Value::from(x), + ReflectValueRef::Bytes(x) => Value::Tagged(Box::new(TaggedValue { + tag: Tag::new("Binary"), + value: Value::String(Self::bytes_to_hex_string(x)), + })), + ReflectValueRef::Enum(descriptor, i) => descriptor + .value_by_number(i) + .map(|v| Value::String(v.name().to_string())) + .unwrap_or_else(|| Value::Number(Number::from(i))), + ReflectValueRef::Message(m) => Self::message_to_yaml(m.deref()), + } + } + pub(crate) fn message_to_yaml(message: &dyn MessageDyn) -> Value { + let mut ret = Mapping::new(); + + for field in message.descriptor_dyn().fields() { + let key = if field.name().is_empty() { + Value::from(field.number()) + } else { + Value::from(field.name()) + }; + let field_type = field + .proto() + .type_ + .map(|t| t.enum_value_or(TYPE_BYTES)) + .unwrap_or(TYPE_BYTES); + + let value = match field.get_reflect(message) { + ReflectFieldRef::Optional(x) => { + if let Some(x) = x.value() { + Self::primitive_type_to_yaml(x, field_type) + } else { + Value::Null + } + } + ReflectFieldRef::Repeated(x) => Value::Sequence( + x.into_iter() + .map(|x| Self::primitive_type_to_yaml(x, field_type)) + .collect(), + ), + ReflectFieldRef::Map(x) => Value::Mapping( + x.into_iter() + .map(|(k, v)| { + ( + Self::primitive_type_to_yaml(k, field_type), + Self::primitive_type_to_yaml(v, field_type), + ) + }) + .collect(), + ), + }; + ret.insert(key, value); + } + Value::Mapping(ret) + } + + fn create_descriptor( + data: &[u8], + existing: MessageDescriptor, + ) -> Result { + let proto = Self::create_descriptor_proto(data, existing, "Unknown".to_string())?; + + let descriptor = { + let mut proto_file = FileDescriptorProto::new(); + proto_file.message_type.push(proto); + + FileDescriptor::new_dynamic(proto_file, &[]) + // FIXME + .unwrap() + .messages() + .next() + .unwrap() + }; + + Ok(descriptor) + } + + fn create_descriptor_proto( + data: &[u8], + existing: MessageDescriptor, + name: String, + ) -> Result { + let message = existing + .parse_from_bytes(data) + .map_err(|e| PrettifyError::Generic(format!("failed to parse protobuf: {e}")))?; + + let mut descriptor = existing.proto().clone(); + + let mut field_groups: BTreeMap> = BTreeMap::new(); + for (field_number, value) in message.unknown_fields_dyn().iter() { + field_groups.entry(field_number).or_default().push(value); + } + + for (field_index, field_values) in field_groups.into_iter() { + let mut add_int = |typ: Type| { + descriptor.field.push(FieldDescriptorProto { + number: Some(field_index as i32), + type_: Some(EnumOrUnknown::from(typ)), + ..Default::default() + }); + }; + match field_values[0] { + // We can't use float/double here because of NaN handling. + UnknownValueRef::Fixed32(_) => add_int(TYPE_FIXED32), + UnknownValueRef::Fixed64(_) => add_int(TYPE_FIXED64), + UnknownValueRef::Varint(_) => add_int(TYPE_UINT64), + UnknownValueRef::LengthDelimited(data) => { + let field_values = field_values + .iter() + .map(|x| match x { + UnknownValueRef::LengthDelimited(data) => Ok(*data), + _ => Err(PrettifyError::Generic( + "varying types in protobuf".to_string(), + )), + }) + .collect::, PrettifyError>>()?; + + match Self::guess_field_type(&field_values, &name, field_index) { + GuessedFieldType::String => add_int(TYPE_STRING), + GuessedFieldType::Unknown => add_int(TYPE_BYTES), + GuessedFieldType::Message(m) => { + descriptor.field.push(FieldDescriptorProto { + number: Some(field_index as i32), + type_name: Some(format!(".{}.{}", name, m.name())), + type_: Some(EnumOrUnknown::from(Type::TYPE_MESSAGE)), + ..Default::default() + }); + descriptor.nested_type.push(*m); + } + } + } + } + if field_values.len() > 1 { + descriptor + .field + .last_mut() + .expect("we just added this field") + .set_label(LABEL_REPEATED); + } + } + + descriptor.set_name(name); + Ok(descriptor) + } + + fn guess_field_type(values: &[&[u8]], name: &str, field_index: u32) -> GuessedFieldType { + if values.iter().all(|data| { + std::str::from_utf8(data).is_ok_and(|s| { + s.chars() + .all(|c| c.is_ascii_graphic() || c.is_ascii_whitespace()) + }) + }) { + return GuessedFieldType::String; + } + + // Try to parse as a nested message + let name = format!("{name}.unknown_field_{field_index}"); + if let Ok(mut descriptor) = + { Self::create_descriptor_proto(values[0], Empty::descriptor(), name) } + { + if values + .iter() + .skip(1) + .all(|data| descriptor.descriptor_dyn().parse_from_bytes(data).is_ok()) + { + descriptor.set_name(format!("unknown_field_{field_index}")); + return GuessedFieldType::Message(Box::new(descriptor)); + } + } + + GuessedFieldType::Unknown } // Helper method to apply regex replacements to the YAML output @@ -132,223 +488,238 @@ impl Protobuf { } result } +} + +#[cfg(test)] +mod tests { + use super::*; - // Convert a single protobuf value to YAML - fn convert_value_to_yaml(&self, value: &UnknownValueRef) -> Result { - match value { - UnknownValueRef::Fixed32(v) => Ok(Value::Tagged(Box::new(TaggedValue { - tag: Tag::new("fixed32"), - value: Value::Number(Number::from(*v)), - }))), - UnknownValueRef::Fixed64(v) => Ok(Value::Tagged(Box::new(TaggedValue { - tag: Tag::new("fixed64"), - value: Value::Number(Number::from(*v)), - }))), - UnknownValueRef::Varint(v) => Ok(Value::Tagged(Box::new(TaggedValue { - tag: Tag::new("varint"), - value: Value::Number(Number::from(*v)), - }))), - UnknownValueRef::LengthDelimited(v) => self.process_length_delimited(v), + const VARINT_PROTO: &[u8] = &[0x08, 0x96, 0x01]; + const VARINT_YAML: &str = "1: 150"; + const VARINT_NEG_PROTO: &[u8] = &[0x08, 0x0B]; + const VARINT_NEG_YAML: &str = "1: 11 # signed: -6\n"; + const REPEATED_NUMERIC_PROTO: &[u8] = &[0x08, 0x01, 0x08, 0x02, 0x08, 0x03]; + const REPEATED_NUMERIC_YAML: &str = "1:\n- 1 # signed: -1\n- 2\n- 3 # signed: -2\n"; + const REPEATED_PACKED_PROTO: &[u8] = &[0x32, 0x06, 0x03, 0x8E, 0x02, 0x9E, 0xA7, 0x05]; + const REPEATED_PACKED_YAML: &str = "6: !Binary 038e029ea705\n"; + const FIXED32_PROTO: &[u8] = &[0x15, 0x00, 0x00, 0x80, 0xBF]; + const FIXED32_YAML: &str = "2: 3212836864 # float: -1, i32: -1082130432\n"; + const STRING_PROTO: &[u8] = &[0x22, 0x05, 0x68, 0x65, 0x6C, 0x6C, 0x6F]; + const STRING_YAML: &str = "4: hello\n"; + const NESTED_MESSAGE_PROTO: &[u8] = &[0x2A, 0x02, 0x08, 0x2A]; + const NESTED_MESSAGE_YAML: &str = "5:\n 1: 42\n"; + + mod reencode { + use super::*; + + #[test] + fn test_varint() { + let result = Protobuf.reencode(VARINT_YAML, VARINT_PROTO).unwrap(); + assert_eq!(result, VARINT_PROTO); } - } - // Process a length-delimited value (string, nested message, or binary) - fn process_length_delimited(&self, data: &[u8]) -> Result { - // Try to parse as a string first - if let Ok(s) = std::str::from_utf8(data) { - if s.chars() - .all(|c| c.is_ascii_graphic() || c.is_ascii_whitespace()) - { - return Ok(Value::String(s.to_string())); - } + #[test] + fn test_varint_signed() { + let result = Protobuf + .reencode(VARINT_NEG_YAML, VARINT_NEG_PROTO) + .unwrap(); + assert_eq!(result, VARINT_NEG_PROTO); } - // Try to parse as a nested message - let mut nested_message = Self::create_dynamic_message()?; - if let Ok(nested_yaml) = self.parse_protobuf(data.to_vec(), &mut nested_message) { - return Ok(nested_yaml); + #[test] + fn test_repeated_numeric() { + let result = Protobuf + .reencode(REPEATED_NUMERIC_YAML, REPEATED_NUMERIC_PROTO) + .unwrap(); + assert_eq!(result, REPEATED_NUMERIC_PROTO); } - // If not a string or nested message, format as binary data with YAML tag - let hex_string = Self::bytes_to_hex_string(data); - Ok(Value::Tagged(Box::new(TaggedValue { - tag: Tag::new("Binary"), - value: Value::String(hex_string), - }))) - } + #[test] + fn test_packed_repeated() { + let result = Protobuf + .reencode(REPEATED_PACKED_YAML, REPEATED_PACKED_PROTO) + .unwrap(); + assert_eq!(result, REPEATED_PACKED_PROTO); + } - // Helper method to parse protobuf data and process unknown fields to convert to YAML - fn parse_protobuf( - &self, - data: Vec, - dynamic_message: &mut Box, - ) -> Result { - // Parse the protobuf data using the provided dynamic message - dynamic_message - .merge_from_bytes_dyn(&data) - .map_err(|e| PrettifyError::Generic(format!("Failed to parse protobuf: {}", e)))?; - - // Get unknown fields & group by field id. - let unknown_fields = dynamic_message.unknown_fields_dyn(); - let mut field_groups: HashMap> = HashMap::new(); - for (field_number, value) in unknown_fields.iter() { - field_groups.entry(field_number).or_default().push(value); + // Fixed32 tests + #[test] + fn test_fixed32() { + let result = Protobuf.reencode(FIXED32_YAML, FIXED32_PROTO).unwrap(); + assert_eq!(result, FIXED32_PROTO); } - // Convert unknown fields to a YAML value - let mut root = Mapping::new(); - for (field_number, values) in field_groups { - let key = Value::Number(Number::from(field_number)); - let value = if values.len() == 1 { - self.convert_value_to_yaml(&values[0])? - } else { - Value::Sequence( - values - .into_iter() - .map(|x| self.convert_value_to_yaml(&x)) - .collect::>()?, - ) - }; - root.insert(key, value); + // String field test + #[test] + fn test_string_field() { + let result = Protobuf.reencode(STRING_YAML, STRING_PROTO).unwrap(); + assert_eq!(result, STRING_PROTO); } - Ok(Value::Mapping(root)) - } -} -#[cfg(test)] -mod tests { - use super::*; + #[test] + fn test_nested_message() { + let result = Protobuf + .reencode(NESTED_MESSAGE_YAML, NESTED_MESSAGE_PROTO) + .unwrap(); + assert_eq!(result, NESTED_MESSAGE_PROTO); + } - // Varint tests - #[test] - fn test_varint() { - // From docs: field 1: varint 150 - const PROTO: &[u8] = &[0x08, 0x96, 0x01]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "1: 150\n"); - } + #[test] + fn test_new_nested_message() { + let result = Protobuf + .reencode(NESTED_MESSAGE_YAML, FIXED32_PROTO) + .unwrap(); + assert_eq!(result, NESTED_MESSAGE_PROTO); + } - #[test] - fn test_varint_signed() { - // field 1: varint 11 (zigzag encoded: -6) - const PROTO: &[u8] = &[0x08, 0x0B]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "1: 11 # signed: -6\n"); + #[test] + fn test_new_string() { + let result = Protobuf.reencode(STRING_YAML, FIXED32_PROTO).unwrap(); + assert_eq!(result, STRING_PROTO); + } } - #[test] - fn test_repeated_numeric() { - // Example based on docs: repeated field 1 with values 1, 2, 3 - const PROTO: &[u8] = &[0x08, 0x01, 0x08, 0x02, 0x08, 0x03]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "1:\n- 1 # signed: -1\n- 2\n- 3 # signed: -2\n"); - } + mod prettify { + use super::*; - #[test] - fn test_packed_repeated() { - // From docs: "message Test5 { repeated int32 f = 6 [packed=true]; }" - // With values 3, 270, and 86942 - const PROTO: &[u8] = &[0x32, 0x06, 0x03, 0x8E, 0x02, 0x9E, 0xA7, 0x05]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - // Our implementation shows this as binary data as we don't have schema info - assert_eq!(result, "6: !Binary 038e029ea705\n"); - } + // Varint tests + #[test] + fn test_varint() { + // From docs: field 1: varint 150 + const PROTO: &[u8] = &[0x08, 0x96, 0x01]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "1: 150\n"); + } - // Fixed32 tests - #[test] - fn test_fixed32() { - const PROTO: &[u8] = &[0x15, 0x00, 0x00, 0x80, 0xBF]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "2: 3212836864 # float: -1, i32: -1082130432\n"); - } + #[test] + fn test_varint_signed() { + // field 1: varint 11 (zigzag encoded: -6) + const PROTO: &[u8] = &[0x08, 0x0B]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "1: 11 # signed: -6\n"); + } - #[test] - fn test_fixed32_positive() { - const PROTO: &[u8] = &[0x15, 0xD0, 0x0F, 0x49, 0x40]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "2: 1078530000 # float: 3.14159\n"); - } + #[test] + fn test_repeated_numeric() { + // Example based on docs: repeated field 1 with values 1, 2, 3 + const PROTO: &[u8] = &[0x08, 0x01, 0x08, 0x02, 0x08, 0x03]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "1:\n- 1 # signed: -1\n- 2\n- 3 # signed: -2\n"); + } - #[test] - fn test_fixed32_no_float() { - const PROTO: &[u8] = &[0x15, 0xFF, 0xFF, 0xFF, 0xFF]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "2: 4294967295 # i32: -1\n"); - } + #[test] + fn test_packed_repeated() { + // From docs: "message Test5 { repeated int32 f = 6 [packed=true]; }" + // With values 3, 270, and 86942 + const PROTO: &[u8] = &[0x32, 0x06, 0x03, 0x8E, 0x02, 0x9E, 0xA7, 0x05]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + // Our implementation shows this as binary data as we don't have schema info + assert_eq!(result, "6: !Binary 038e029ea705\n"); + } - #[test] - fn test_fixed32_positive_no_float() { - const PROTO: &[u8] = &[0x15, 0x01, 0x00, 0xC0, 0x7F]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "2: 2143289345\n"); - } + // Fixed32 tests + #[test] + fn test_fixed32() { + const PROTO: &[u8] = &[0x15, 0x00, 0x00, 0x80, 0xBF]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "2: 3212836864 # float: -1, i32: -1082130432\n"); + } - // Fixed64 tests - #[test] - fn test_fixed64() { - const PROTO: &[u8] = &[0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0xBF]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!( - result, - "3: 13830554455654793216 # double: -1, i64: -4616189618054758400\n" - ); - } + #[test] + fn test_fixed32_positive() { + const PROTO: &[u8] = &[0x15, 0xD0, 0x0F, 0x49, 0x40]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "2: 1078530000 # float: 3.14159\n"); + } - #[test] - fn test_fixed64_positive() { - const PROTO: &[u8] = &[0x19, 0x6E, 0x86, 0x1B, 0xF0, 0xF9, 0x21, 0x09, 0x40]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "3: 4614256650576692846 # double: 3.14159\n"); - } + #[test] + fn test_fixed32_no_float() { + const PROTO: &[u8] = &[0x15, 0xFF, 0xFF, 0xFF, 0xFF]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "2: 4294967295 # i32: -1\n"); + } - #[test] - fn test_fixed64_no_float() { - const PROTO: &[u8] = &[0x19, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "3: 18446744073709551615 # i64: -1\n"); - } + #[test] + fn test_fixed32_positive_no_float() { + const PROTO: &[u8] = &[0x15, 0x01, 0x00, 0xC0, 0x7F]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "2: 2143289345\n"); + } - #[test] - fn test_fixed64_positive_no_float() { - const PROTO: &[u8] = &[0x19, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF8, 0x7F]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "3: 9221120237041090561\n"); - } + // Fixed64 tests + #[test] + fn test_fixed64() { + const PROTO: &[u8] = &[0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0xBF]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!( + result, + "3: 13830554455654793216 # double: -1, i64: -4616189618054758400\n" + ); + } - // String field test - #[test] - fn test_string_field() { - // field 4: string "hello" (LEN type field from docs) - const PROTO: &[u8] = &[0x22, 0x05, 0x68, 0x65, 0x6C, 0x6C, 0x6F]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "4: hello\n"); - } + #[test] + fn test_fixed64_positive() { + const PROTO: &[u8] = &[0x19, 0x6E, 0x86, 0x1B, 0xF0, 0xF9, 0x21, 0x09, 0x40]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "3: 4614256650576692846 # double: 3.14159\n"); + } - #[test] - fn test_nested_message() { - // From docs about nested messages: field 5 with a nested message containing field 1: varint 42 - const PROTO: &[u8] = &[0x2A, 0x02, 0x08, 0x2A]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "5:\n 1: 42\n"); - } + #[test] + fn test_fixed64_no_float() { + const PROTO: &[u8] = &[0x19, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "3: 18446744073709551615 # i64: -1\n"); + } - #[test] - fn test_binary_data() { - // Binary data example: field 6: binary data [0x01, 0x02, 0x03] - const PROTO: &[u8] = &[0x32, 0x03, 0x01, 0x02, 0x03]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "6: !Binary '010203'\n"); - } + #[test] + fn test_fixed64_positive_no_float() { + const PROTO: &[u8] = &[0x19, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF8, 0x7F]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "3: 9221120237041090561\n"); + } - #[test] - fn test_invalid_protobuf() { - let result = Protobuf.prettify(vec![0xFF, 0xFF]); - assert!(result.is_err()); - } + // String field test + #[test] + fn test_string_field() { + // field 4: string "hello" (LEN type field from docs) + const PROTO: &[u8] = &[0x22, 0x05, 0x68, 0x65, 0x6C, 0x6C, 0x6F]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "4: hello\n"); + } + + #[test] + fn test_nested_message() { + // From docs about nested messages: field 5 with a nested message containing field 1: varint 42 + const PROTO: &[u8] = &[0x2A, 0x02, 0x08, 0x2A]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "5:\n 1: 42\n"); + } - #[test] - fn test_empty_protobuf() { - let result = Protobuf.prettify(vec![]); - assert!(result.is_err()); + #[test] + fn test_nested_twice() { + const PROTO: &[u8] = &[0x2A, 0x04, 0x2A, 0x02, 0x08, 0x2A]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "5:\n 5:\n 1: 42\n"); + } + + #[test] + fn test_binary_data() { + // Binary data example: field 6: binary data [0x01, 0x02, 0x03] + const PROTO: &[u8] = &[0x32, 0x03, 0x01, 0x02, 0x03]; + let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); + assert_eq!(result, "6: !Binary '010203'\n"); + } + + #[test] + fn test_invalid_protobuf() { + let result = Protobuf.prettify(vec![0xFF, 0xFF]); + assert!(result.is_err()); + } + + #[test] + fn test_empty_protobuf() { + let result = Protobuf.prettify(vec![]); + assert!(result.is_err()); + } } } From 013fd9a105aa97a5330270b1508802916625bb37 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sun, 30 Mar 2025 14:16:22 +0200 Subject: [PATCH 06/26] tests++ --- src/contentviews/hex_dump.rs | 8 +- src/contentviews/hex_stream.rs | 8 +- src/contentviews/mod.rs | 2 +- src/contentviews/msgpack.rs | 9 +- src/contentviews/protobuf.rs | 339 ++++++++++----------------------- 5 files changed, 113 insertions(+), 253 deletions(-) diff --git a/src/contentviews/hex_dump.rs b/src/contentviews/hex_dump.rs index b5aa9c8c..cb79f842 100644 --- a/src/contentviews/hex_dump.rs +++ b/src/contentviews/hex_dump.rs @@ -8,7 +8,7 @@ impl Prettify for HexDump { "Hex Dump" } - fn prettify(&self, data: Vec) -> Result { + fn prettify(&self, data: &[u8]) -> Result { Ok(format!( "{:?}", data.hex_conf(HexConfig { @@ -30,8 +30,7 @@ mod tests { #[test] fn test_hexdump_deserialize() { - let data = b"abcd".to_vec(); - let result = HexDump.prettify(data).unwrap(); + let result = HexDump.prettify(b"abcd").unwrap(); assert_eq!( result, "0000: 61 62 63 64 abcd" @@ -40,8 +39,7 @@ mod tests { #[test] fn test_hexdump_deserialize_empty() { - let data = vec![]; - let result = HexDump.prettify(data).unwrap(); + let result = HexDump.prettify(b"").unwrap(); assert_eq!(result, ""); } } diff --git a/src/contentviews/hex_stream.rs b/src/contentviews/hex_stream.rs index a242cfda..fdedd278 100644 --- a/src/contentviews/hex_stream.rs +++ b/src/contentviews/hex_stream.rs @@ -9,7 +9,7 @@ impl Prettify for HexStream { "Hex Stream" } - fn prettify(&self, data: Vec) -> Result { + fn prettify(&self, data: &[u8]) -> Result { Ok(data .hex_conf(HexConfig { title: false, @@ -40,15 +40,13 @@ mod tests { #[test] fn test_hexstream_deserialize() { - let data = b"foo".to_vec(); - let result = HexStream.prettify(data).unwrap(); + let result = HexStream.prettify(b"foo").unwrap(); assert_eq!(result, "666f6f"); } #[test] fn test_hexstream_deserialize_empty() { - let data = vec![]; - let result = HexStream.prettify(data).unwrap(); + let result = HexStream.prettify(b"").unwrap(); assert_eq!(result, ""); } diff --git a/src/contentviews/mod.rs b/src/contentviews/mod.rs index ce260373..c4b5d220 100644 --- a/src/contentviews/mod.rs +++ b/src/contentviews/mod.rs @@ -48,7 +48,7 @@ pub trait Prettify: Send + Sync { self.name().to_lowercase().replace(" ", "_") } - fn prettify(&self, data: Vec) -> Result; + fn prettify(&self, data: &[u8]) -> Result; } pub trait Reencode: Send + Sync { diff --git a/src/contentviews/msgpack.rs b/src/contentviews/msgpack.rs index 2b057b77..5b0b3e9d 100644 --- a/src/contentviews/msgpack.rs +++ b/src/contentviews/msgpack.rs @@ -9,7 +9,7 @@ impl Prettify for MsgPack { "MsgPack" } - fn prettify(&self, data: Vec) -> Result { + fn prettify(&self, data: &[u8]) -> Result { // Deserialize MsgPack to a serde_yaml::Value let value: serde_yaml::Value = decode::from_slice(&data) .map_err(|e| PrettifyError::Generic(format!("Failed to deserialize MsgPack: {}", e)))?; @@ -68,7 +68,7 @@ tags: #[test] fn test_msgpack_deserialize() { - let result = MsgPack.prettify(TEST_MSGPACK.to_vec()).unwrap(); + let result = MsgPack.prettify(TEST_MSGPACK).unwrap(); assert_eq!(result, TEST_YAML); } @@ -106,11 +106,8 @@ tags: #[test] fn test_msgpack_roundtrip() { - // Start with the hardcoded MsgPack data - let msgpack_data = TEST_MSGPACK.to_vec(); - // Deserialize to YAML - let yaml_result = MsgPack.prettify(msgpack_data).unwrap(); + let yaml_result = MsgPack.prettify(TEST_MSGPACK).unwrap(); // Serialize back to MsgPack let result = MsgPack.reencode(&yaml_result, &[]).unwrap(); diff --git a/src/contentviews/protobuf.rs b/src/contentviews/protobuf.rs index 4bc162a7..0392305d 100644 --- a/src/contentviews/protobuf.rs +++ b/src/contentviews/protobuf.rs @@ -24,10 +24,21 @@ use std::num::ParseIntError; use std::ops::Deref; use std::str::FromStr; -// Define static regular expressions for better performance -static FIXED32_RE: Lazy = Lazy::new(|| Regex::new(r"!fixed32 (\d+)").unwrap()); -static FIXED64_RE: Lazy = Lazy::new(|| Regex::new(r"!fixed64 (\d+)").unwrap()); -static VARINT_RE: Lazy = Lazy::new(|| Regex::new(r"!varint (\d+)").unwrap()); +mod tags { + use std::cell::LazyCell; + use once_cell::sync::Lazy; + use regex::Regex; + use serde_yaml::value::Tag; + + pub(super) const BINARY: LazyCell = LazyCell::new(|| Tag::new("binary")); + pub(super) const VARINT: LazyCell = LazyCell::new(|| Tag::new("varint")); + pub(super) const FIXED32: LazyCell = LazyCell::new(|| Tag::new("fixed32")); + pub(super) const FIXED64: LazyCell = LazyCell::new(|| Tag::new("fixed64")); + + pub(super) const VARINT_RE: Lazy = Lazy::new(|| Regex::new(&format!(r"{} (\d+)", *VARINT)).unwrap()); + pub(super) const FIXED32_RE: Lazy = Lazy::new(|| Regex::new(&format!(r"{} (\d+)", *FIXED32)).unwrap()); + pub(super) const FIXED64_RE: Lazy = Lazy::new(|| Regex::new(&format!(r"{} (\d+)", *FIXED64)).unwrap()); +} pub struct Protobuf; @@ -42,7 +53,7 @@ impl Prettify for Protobuf { "Protocol Buffer" } - fn prettify(&self, data: Vec) -> Result { + fn prettify(&self, data: &[u8]) -> Result { // Check if data is empty first if data.is_empty() { return Err(PrettifyError::Generic("Empty protobuf data".to_string())); @@ -69,9 +80,9 @@ impl Prettify for Protobuf { impl Reencode for Protobuf { fn reencode(&self, data: &str, original: &[u8]) -> Result, ReencodeError> { - let existing = Empty::descriptor(); - let descriptor = Self::create_descriptor(original, existing) - .map_err(|e| ReencodeError::InvalidFormat(format!("{e}")))?; + let descriptor = Empty::descriptor(); + //let descriptor = Self::create_descriptor(original, existing) + // .map_err(|e| ReencodeError::InvalidFormat(format!("{e}")))?; let message = descriptor.new_instance(); let value: Value = serde_yaml::from_str(data) @@ -84,15 +95,15 @@ impl Reencode for Protobuf { fn tag_number(value: Value, field_type: Type) -> Value { match field_type { TYPE_UINT64 => Tagged(Box::new(TaggedValue { - tag: Tag::new("varint"), + tag: tags::VARINT.clone(), value, })), TYPE_FIXED64 => Tagged(Box::new(TaggedValue { - tag: Tag::new("fixed64"), + tag: tags::FIXED64.clone(), value, })), TYPE_FIXED32 => Tagged(Box::new(TaggedValue { - tag: Tag::new("fixed32"), + tag: tags::FIXED32.clone(), value, })), _ => value, @@ -195,7 +206,8 @@ impl Protobuf { return Ok(()); } Tagged(t) => { - if t.tag == "!Binary" { + // t.tag doesn't work for Match statements + if t.tag == *tags::BINARY { let value = match t.value { Value::String(s) => s, _ => { @@ -210,7 +222,19 @@ impl Protobuf { .collect::, ParseIntError>>() .map_err(|e| ReencodeError::InvalidFormat(e.to_string()))?; UnknownValue::LengthDelimited(value) - } else { + } else if t.tag == *tags::FIXED32 { + let value = match t.value { + Value::Number(s) if s.as_u64().is_some() => s.as_u64().unwrap(), + _ => return Err(ReencodeError::InvalidFormat("fixed32 data is not a u32".to_string())) + }; + UnknownValue::Fixed32(value as u32) + } else if t.tag == *tags::FIXED64 { + let value = match t.value { + Value::Number(s) if s.as_u64().is_some() => s.as_u64().unwrap(), + _ => return Err(ReencodeError::InvalidFormat("fixed64 data is not a u64".to_string())) + }; + UnknownValue::Fixed64(value) + } else{ log::info!("Unexpected YAML tag {}, discarding.", t.tag); return Self::add_field(message, field_num, t.value); } @@ -252,7 +276,7 @@ impl Protobuf { ReflectValueRef::Bool(x) => Value::from(x), ReflectValueRef::String(x) => Value::from(x), ReflectValueRef::Bytes(x) => Value::Tagged(Box::new(TaggedValue { - tag: Tag::new("Binary"), + tag: tags::BINARY.clone(), value: Value::String(Self::bytes_to_hex_string(x)), })), ReflectValueRef::Enum(descriptor, i) => descriptor @@ -426,41 +450,41 @@ impl Protobuf { // Helper method to apply regex replacements to the YAML output fn apply_replacements(yaml_str: &str) -> Result { // Replace !fixed32 tags with comments showing float and i32 interpretations - let with_fixed32 = FIXED32_RE.replace_all(yaml_str, |caps: &Captures| { + let with_fixed32 = tags::FIXED32_RE.replace_all(yaml_str, |caps: &Captures| { let value = caps[1].parse::().unwrap_or_default(); let float_value = f32::from_bits(value); let i32_value = value as i32; if !float_value.is_nan() && float_value < 0.0 { - format!("{} # float: {}, i32: {}", value, float_value, i32_value) + format!("{} {} # float: {}, i32: {}", *tags::FIXED32, value, float_value, i32_value) } else if !float_value.is_nan() { - format!("{} # float: {}", value, float_value) + format!("{} {} # float: {}", *tags::FIXED32, value, float_value) } else if i32_value < 0 { - format!("{} # i32: {}", value, i32_value) + format!("{} {} # i32: {}", *tags::FIXED32, value, i32_value) } else { - value.to_string() + format!("{} {}", *tags::FIXED32, value) } }); // Replace !fixed64 tags with comments showing double and i64 interpretations - let with_fixed64 = FIXED64_RE.replace_all(&with_fixed32, |caps: &Captures| { + let with_fixed64 = tags::FIXED64_RE.replace_all(&with_fixed32, |caps: &Captures| { let value = caps[1].parse::().unwrap_or_default(); let double_value = f64::from_bits(value); let i64_value = value as i64; if !double_value.is_nan() && double_value < 0.0 { - format!("{} # double: {}, i64: {}", value, double_value, i64_value) + format!("{} {} # double: {}, i64: {}", *tags::FIXED64, value, double_value, i64_value) } else if !double_value.is_nan() { - format!("{} # double: {}", value, double_value) + format!("{} {} # double: {}", *tags::FIXED64, value, double_value) } else if i64_value < 0 { - format!("{} # i64: {}", value, i64_value) + format!("{} {} # i64: {}", *tags::FIXED64, value, i64_value) } else { - value.to_string() + format!("{} {}", *tags::FIXED64, value) } }); // Replace !varint tags with comments showing signed interpretation if different - let with_varint = VARINT_RE.replace_all(&with_fixed64, |caps: &Captures| { + let with_varint = tags::VARINT_RE.replace_all(&with_fixed64, |caps: &Captures| { let unsigned_value = caps[1].parse::().unwrap_or_default(); let i64_zigzag = Self::decode_zigzag64(unsigned_value); @@ -494,232 +518,75 @@ impl Protobuf { mod tests { use super::*; - const VARINT_PROTO: &[u8] = &[0x08, 0x96, 0x01]; - const VARINT_YAML: &str = "1: 150"; - const VARINT_NEG_PROTO: &[u8] = &[0x08, 0x0B]; - const VARINT_NEG_YAML: &str = "1: 11 # signed: -6\n"; - const REPEATED_NUMERIC_PROTO: &[u8] = &[0x08, 0x01, 0x08, 0x02, 0x08, 0x03]; - const REPEATED_NUMERIC_YAML: &str = "1:\n- 1 # signed: -1\n- 2\n- 3 # signed: -2\n"; - const REPEATED_PACKED_PROTO: &[u8] = &[0x32, 0x06, 0x03, 0x8E, 0x02, 0x9E, 0xA7, 0x05]; - const REPEATED_PACKED_YAML: &str = "6: !Binary 038e029ea705\n"; - const FIXED32_PROTO: &[u8] = &[0x15, 0x00, 0x00, 0x80, 0xBF]; - const FIXED32_YAML: &str = "2: 3212836864 # float: -1, i32: -1082130432\n"; - const STRING_PROTO: &[u8] = &[0x22, 0x05, 0x68, 0x65, 0x6C, 0x6C, 0x6F]; - const STRING_YAML: &str = "4: hello\n"; - const NESTED_MESSAGE_PROTO: &[u8] = &[0x2A, 0x02, 0x08, 0x2A]; - const NESTED_MESSAGE_YAML: &str = "5:\n 1: 42\n"; - - mod reencode { - use super::*; - - #[test] - fn test_varint() { - let result = Protobuf.reencode(VARINT_YAML, VARINT_PROTO).unwrap(); - assert_eq!(result, VARINT_PROTO); - } - - #[test] - fn test_varint_signed() { - let result = Protobuf - .reencode(VARINT_NEG_YAML, VARINT_NEG_PROTO) - .unwrap(); - assert_eq!(result, VARINT_NEG_PROTO); - } + macro_rules! test_roundtrip { + ($name:ident,$proto:literal,$yaml:literal) => { + mod $name { + use super::*; - #[test] - fn test_repeated_numeric() { - let result = Protobuf - .reencode(REPEATED_NUMERIC_YAML, REPEATED_NUMERIC_PROTO) - .unwrap(); - assert_eq!(result, REPEATED_NUMERIC_PROTO); - } + pub(super) const PROTO: &[u8] = $proto; + pub(super) const YAML: &str = $yaml; - #[test] - fn test_packed_repeated() { - let result = Protobuf - .reencode(REPEATED_PACKED_YAML, REPEATED_PACKED_PROTO) - .unwrap(); - assert_eq!(result, REPEATED_PACKED_PROTO); - } + #[test] + fn prettify() { + let result = Protobuf.prettify(PROTO).unwrap(); + assert_eq!(result, YAML); + } - // Fixed32 tests - #[test] - fn test_fixed32() { - let result = Protobuf.reencode(FIXED32_YAML, FIXED32_PROTO).unwrap(); - assert_eq!(result, FIXED32_PROTO); - } + #[test] + fn reencode() { + let result = Protobuf.reencode(YAML, PROTO).unwrap(); + assert_eq!(result, PROTO); + } + } + }; + } - // String field test - #[test] - fn test_string_field() { - let result = Protobuf.reencode(STRING_YAML, STRING_PROTO).unwrap(); - assert_eq!(result, STRING_PROTO); - } + test_roundtrip!(varint, b"\x08\x96\x01","1: 150\n"); + test_roundtrip!(varint_negative, b"\x08\x0B","1: 11 # signed: -6\n"); + test_roundtrip!(binary, b"\x32\x03\x01\x02\x03","6: !binary '010203'\n"); + test_roundtrip!(string, b"\x0A\x05\x68\x65\x6C\x6C\x6F","1: hello\n"); + test_roundtrip!(nested, b"\x2A\x02\x08\x2A","5:\n 1: 42\n"); + test_roundtrip!(nested_twice, b"\x2A\x04\x2A\x02\x08\x2A","5:\n 5:\n 1: 42\n"); + test_roundtrip!(fixed64, b"\x19\x00\x00\x00\x00\x00\x00\xF0\xBF","3: !fixed64 13830554455654793216 # double: -1, i64: -4616189618054758400\n"); + test_roundtrip!(fixed64_positive, b"\x19\x6E\x86\x1B\xF0\xF9\x21\x09\x40","3: !fixed64 4614256650576692846 # double: 3.14159\n"); + test_roundtrip!(fixed64_no_float, b"\x19\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF","3: !fixed64 18446744073709551615 # i64: -1\n"); + test_roundtrip!(fixed64_positive_no_float, b"\x19\x01\x00\x00\x00\x00\x00\xF8\x7F","3: !fixed64 9221120237041090561\n"); + test_roundtrip!(fixed32, b"\x15\x00\x00\x80\xBF","2: !fixed32 3212836864 # float: -1, i32: -1082130432\n"); + test_roundtrip!(fixed32_positive, b"\x15\xD0\x0F\x49\x40","2: !fixed32 1078530000 # float: 3.14159\n"); + test_roundtrip!(fixed32_no_float, b"\x15\xFF\xFF\xFF\xFF","2: !fixed32 4294967295 # i32: -1\n"); + test_roundtrip!(fixed32_positive_no_float, b"\x15\x01\x00\xC0\x7F","2: !fixed32 2143289345\n"); + // From docs: "message Test5 { repeated int32 f = 6 [packed=true]; }" + // With values 3, 270, and 86942 + test_roundtrip!(repeated_packed, b"\x32\x06\x03\x8E\x02\x9E\xA7\x05","6: !binary 038e029ea705\n"); + test_roundtrip!(repeated_varint, b"\x08\x01\x08\x02\x08\x03","1:\n- 1 # signed: -1\n- 2\n- 3 # signed: -2\n"); - #[test] - fn test_nested_message() { - let result = Protobuf - .reencode(NESTED_MESSAGE_YAML, NESTED_MESSAGE_PROTO) - .unwrap(); - assert_eq!(result, NESTED_MESSAGE_PROTO); - } + mod reencode { + use super::*; #[test] - fn test_new_nested_message() { + fn reencode_new_nested_message() { let result = Protobuf - .reencode(NESTED_MESSAGE_YAML, FIXED32_PROTO) + .reencode(nested::YAML, fixed32::PROTO) .unwrap(); - assert_eq!(result, NESTED_MESSAGE_PROTO); + assert_eq!(result, nested::PROTO); } #[test] - fn test_new_string() { - let result = Protobuf.reencode(STRING_YAML, FIXED32_PROTO).unwrap(); - assert_eq!(result, STRING_PROTO); + fn new_string_attr() { + let result = Protobuf.reencode(string::YAML, varint::PROTO).unwrap(); + assert_eq!(result, string::PROTO); } } - mod prettify { - use super::*; - - // Varint tests - #[test] - fn test_varint() { - // From docs: field 1: varint 150 - const PROTO: &[u8] = &[0x08, 0x96, 0x01]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "1: 150\n"); - } - - #[test] - fn test_varint_signed() { - // field 1: varint 11 (zigzag encoded: -6) - const PROTO: &[u8] = &[0x08, 0x0B]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "1: 11 # signed: -6\n"); - } - - #[test] - fn test_repeated_numeric() { - // Example based on docs: repeated field 1 with values 1, 2, 3 - const PROTO: &[u8] = &[0x08, 0x01, 0x08, 0x02, 0x08, 0x03]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "1:\n- 1 # signed: -1\n- 2\n- 3 # signed: -2\n"); - } - - #[test] - fn test_packed_repeated() { - // From docs: "message Test5 { repeated int32 f = 6 [packed=true]; }" - // With values 3, 270, and 86942 - const PROTO: &[u8] = &[0x32, 0x06, 0x03, 0x8E, 0x02, 0x9E, 0xA7, 0x05]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - // Our implementation shows this as binary data as we don't have schema info - assert_eq!(result, "6: !Binary 038e029ea705\n"); - } - - // Fixed32 tests - #[test] - fn test_fixed32() { - const PROTO: &[u8] = &[0x15, 0x00, 0x00, 0x80, 0xBF]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "2: 3212836864 # float: -1, i32: -1082130432\n"); - } - - #[test] - fn test_fixed32_positive() { - const PROTO: &[u8] = &[0x15, 0xD0, 0x0F, 0x49, 0x40]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "2: 1078530000 # float: 3.14159\n"); - } - - #[test] - fn test_fixed32_no_float() { - const PROTO: &[u8] = &[0x15, 0xFF, 0xFF, 0xFF, 0xFF]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "2: 4294967295 # i32: -1\n"); - } - - #[test] - fn test_fixed32_positive_no_float() { - const PROTO: &[u8] = &[0x15, 0x01, 0x00, 0xC0, 0x7F]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "2: 2143289345\n"); - } - - // Fixed64 tests - #[test] - fn test_fixed64() { - const PROTO: &[u8] = &[0x19, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0xBF]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!( - result, - "3: 13830554455654793216 # double: -1, i64: -4616189618054758400\n" - ); - } - - #[test] - fn test_fixed64_positive() { - const PROTO: &[u8] = &[0x19, 0x6E, 0x86, 0x1B, 0xF0, 0xF9, 0x21, 0x09, 0x40]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "3: 4614256650576692846 # double: 3.14159\n"); - } - - #[test] - fn test_fixed64_no_float() { - const PROTO: &[u8] = &[0x19, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "3: 18446744073709551615 # i64: -1\n"); - } - - #[test] - fn test_fixed64_positive_no_float() { - const PROTO: &[u8] = &[0x19, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF8, 0x7F]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "3: 9221120237041090561\n"); - } - - // String field test - #[test] - fn test_string_field() { - // field 4: string "hello" (LEN type field from docs) - const PROTO: &[u8] = &[0x22, 0x05, 0x68, 0x65, 0x6C, 0x6C, 0x6F]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "4: hello\n"); - } - - #[test] - fn test_nested_message() { - // From docs about nested messages: field 5 with a nested message containing field 1: varint 42 - const PROTO: &[u8] = &[0x2A, 0x02, 0x08, 0x2A]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "5:\n 1: 42\n"); - } - - #[test] - fn test_nested_twice() { - const PROTO: &[u8] = &[0x2A, 0x04, 0x2A, 0x02, 0x08, 0x2A]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "5:\n 5:\n 1: 42\n"); - } - - #[test] - fn test_binary_data() { - // Binary data example: field 6: binary data [0x01, 0x02, 0x03] - const PROTO: &[u8] = &[0x32, 0x03, 0x01, 0x02, 0x03]; - let result = Protobuf.prettify(PROTO.to_vec()).unwrap(); - assert_eq!(result, "6: !Binary '010203'\n"); - } - - #[test] - fn test_invalid_protobuf() { - let result = Protobuf.prettify(vec![0xFF, 0xFF]); - assert!(result.is_err()); - } + #[test] + fn test_invalid_protobuf() { + let result = Protobuf.prettify(b"\xFF\xFF"); + assert!(result.is_err()); + } - #[test] - fn test_empty_protobuf() { - let result = Protobuf.prettify(vec![]); - assert!(result.is_err()); - } + #[test] + fn test_empty_protobuf() { + let result = Protobuf.prettify(b""); + assert!(result.is_err()); } } From 20b47938d6ccc8c73a641605366b659b78622770 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sun, 30 Mar 2025 14:38:01 +0200 Subject: [PATCH 07/26] simplify error handling --- mitmproxy-rs/src/contentview.rs | 15 ++- src/contentviews/hex_dump.rs | 4 +- src/contentviews/hex_stream.rs | 13 +- src/contentviews/mod.rs | 35 +----- src/contentviews/msgpack.rs | 25 ++-- src/contentviews/protobuf.rs | 211 ++++++++++++++++++-------------- 6 files changed, 150 insertions(+), 153 deletions(-) diff --git a/mitmproxy-rs/src/contentview.rs b/mitmproxy-rs/src/contentview.rs index 415f4baa..2d12e7af 100644 --- a/mitmproxy-rs/src/contentview.rs +++ b/mitmproxy-rs/src/contentview.rs @@ -1,6 +1,5 @@ -use anyhow::{anyhow, Result}; use mitmproxy::contentviews::{Prettify, Reencode}; -use pyo3::prelude::*; +use pyo3::{exceptions::PyValueError, prelude::*}; #[pyclass(frozen, module = "mitmproxy_rs.contentviews", subclass)] pub struct Contentview(&'static dyn Prettify); @@ -23,8 +22,10 @@ impl Contentview { } /// Pretty-print an (encoded) message. - pub fn prettify(&self, data: Vec) -> Result { - self.0.prettify(data).map_err(|e| anyhow!("{e}")) + pub fn prettify(&self, data: Vec) -> PyResult { + self.0 + .prettify(&data) + .map_err(|e| PyValueError::new_err(e.to_string())) } fn __repr__(&self) -> PyResult { @@ -52,8 +53,10 @@ impl InteractiveContentview { #[pymethods] impl InteractiveContentview { - pub fn reencode(&self, data: &str) -> Result> { - self.0.reencode(data, &[]).map_err(|e| anyhow!("{e}")) + pub fn reencode(&self, data: &str) -> PyResult> { + self.0 + .reencode(data) + .map_err(|e| PyValueError::new_err(e.to_string())) } fn __repr__(self_: PyRef<'_, Self>) -> PyResult { diff --git a/src/contentviews/hex_dump.rs b/src/contentviews/hex_dump.rs index cb79f842..953f1717 100644 --- a/src/contentviews/hex_dump.rs +++ b/src/contentviews/hex_dump.rs @@ -1,4 +1,4 @@ -use crate::contentviews::{Prettify, PrettifyError}; +use crate::contentviews::Prettify; use pretty_hex::{HexConfig, PrettyHex}; pub struct HexDump; @@ -8,7 +8,7 @@ impl Prettify for HexDump { "Hex Dump" } - fn prettify(&self, data: &[u8]) -> Result { + fn prettify(&self, data: &[u8]) -> anyhow::Result { Ok(format!( "{:?}", data.hex_conf(HexConfig { diff --git a/src/contentviews/hex_stream.rs b/src/contentviews/hex_stream.rs index fdedd278..fa982ad2 100644 --- a/src/contentviews/hex_stream.rs +++ b/src/contentviews/hex_stream.rs @@ -1,4 +1,5 @@ -use crate::contentviews::{Prettify, PrettifyError, Reencode, ReencodeError}; +use crate::contentviews::{Prettify, Reencode}; +use anyhow::{Context, Result}; use pretty_hex::{HexConfig, PrettyHex}; use std::num::ParseIntError; @@ -9,7 +10,7 @@ impl Prettify for HexStream { "Hex Stream" } - fn prettify(&self, data: &[u8]) -> Result { + fn prettify(&self, data: &[u8]) -> Result { Ok(data .hex_conf(HexConfig { title: false, @@ -25,12 +26,12 @@ impl Prettify for HexStream { } impl Reencode for HexStream { - fn reencode(&self, data: &str, original: &[u8]) -> anyhow::Result, ReencodeError> { + fn reencode(&self, data: &str) -> Result> { (0..data.len()) .step_by(2) .map(|i| u8::from_str_radix(&data[i..i + 2], 16)) - .collect::, ParseIntError>>() - .map_err(|e| ReencodeError::InvalidFormat(e.to_string())) + .collect::, ParseIntError>>() + .context("Invalid hex string") } } @@ -53,7 +54,7 @@ mod tests { #[test] fn test_hexstream_serialize() { let data = "666f6f"; - let result = HexStream.reencode(data, &[]).unwrap(); + let result = HexStream.reencode(data).unwrap(); assert_eq!(result, b"foo"); } } diff --git a/src/contentviews/mod.rs b/src/contentviews/mod.rs index c4b5d220..771ec6b6 100644 --- a/src/contentviews/mod.rs +++ b/src/contentviews/mod.rs @@ -4,43 +4,12 @@ mod msgpack; mod protobuf; use anyhow::Result; -use std::fmt::{Display, Formatter}; pub use hex_dump::HexDump; pub use hex_stream::HexStream; pub use msgpack::MsgPack; pub use protobuf::Protobuf; -#[derive(Debug)] -pub enum ReencodeError { - InvalidFormat(String), -} - -impl Display for ReencodeError { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - ReencodeError::InvalidFormat(e) => { - write!(f, "invalid format: {}", e) - } - } - } -} - -#[derive(Debug)] -pub enum PrettifyError { - Generic(String), -} - -impl Display for PrettifyError { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - PrettifyError::Generic(e) => { - write!(f, "deserialize error: {}", e) - } - } - } -} - pub trait Prettify: Send + Sync { fn name(&self) -> &str; @@ -48,9 +17,9 @@ pub trait Prettify: Send + Sync { self.name().to_lowercase().replace(" ", "_") } - fn prettify(&self, data: &[u8]) -> Result; + fn prettify(&self, data: &[u8]) -> Result; } pub trait Reencode: Send + Sync { - fn reencode(&self, data: &str, original: &[u8]) -> Result, ReencodeError>; + fn reencode(&self, data: &str) -> Result>; } diff --git a/src/contentviews/msgpack.rs b/src/contentviews/msgpack.rs index 5b0b3e9d..dbfbdbde 100644 --- a/src/contentviews/msgpack.rs +++ b/src/contentviews/msgpack.rs @@ -1,4 +1,5 @@ -use crate::contentviews::{Prettify, PrettifyError, Reencode, ReencodeError}; +use crate::contentviews::{Prettify, Reencode}; +use anyhow::{Context, Result}; use rmp_serde::{decode, encode}; use serde_yaml; @@ -9,28 +10,24 @@ impl Prettify for MsgPack { "MsgPack" } - fn prettify(&self, data: &[u8]) -> Result { + fn prettify(&self, data: &[u8]) -> Result { // Deserialize MsgPack to a serde_yaml::Value - let value: serde_yaml::Value = decode::from_slice(&data) - .map_err(|e| PrettifyError::Generic(format!("Failed to deserialize MsgPack: {}", e)))?; + let value: serde_yaml::Value = + decode::from_slice(data).context("Failed to deserialize MsgPack")?; // Convert the Value to prettified YAML - serde_yaml::to_string(&value) - .map_err(|e| PrettifyError::Generic(format!("Failed to convert to YAML: {}", e))) + serde_yaml::to_string(&value).context("Failed to convert to YAML") } } impl Reencode for MsgPack { - fn reencode(&self, data: &str, original: &[u8]) -> anyhow::Result, ReencodeError> { + fn reencode(&self, data: &str) -> Result> { // Parse the YAML string to a serde_yaml::Value - let value: serde_yaml::Value = serde_yaml::from_str(data) - .map_err(|e| ReencodeError::InvalidFormat(format!("Invalid YAML: {}", e)))?; + let value: serde_yaml::Value = serde_yaml::from_str(data).context("Invalid YAML")?; // Serialize the Value to MsgPack let mut buf = Vec::new(); - encode::write_named(&mut buf, &value).map_err(|e| { - ReencodeError::InvalidFormat(format!("Failed to encode to MsgPack: {}", e)) - })?; + encode::write_named(&mut buf, &value).context("Failed to encode to MsgPack")?; Ok(buf) } @@ -74,7 +71,7 @@ tags: #[test] fn test_msgpack_serialize() { - let result = MsgPack.reencode(TEST_YAML, &[]).unwrap(); + let result = MsgPack.reencode(TEST_YAML).unwrap(); // Verify the MsgPack data contains the expected values let value: serde_yaml::Value = decode::from_slice(&result).unwrap(); @@ -110,7 +107,7 @@ tags: let yaml_result = MsgPack.prettify(TEST_MSGPACK).unwrap(); // Serialize back to MsgPack - let result = MsgPack.reencode(&yaml_result, &[]).unwrap(); + let result = MsgPack.reencode(&yaml_result).unwrap(); // Deserialize both the original and the result to Values for comparison let original_value: serde_yaml::Value = decode::from_slice(TEST_MSGPACK).unwrap(); diff --git a/src/contentviews/protobuf.rs b/src/contentviews/protobuf.rs index 0392305d..3fdf77da 100644 --- a/src/contentviews/protobuf.rs +++ b/src/contentviews/protobuf.rs @@ -1,5 +1,5 @@ -use crate::contentviews::{Prettify, PrettifyError, Reencode, ReencodeError}; -use once_cell::sync::Lazy; +use crate::contentviews::{Prettify, Reencode}; +use anyhow::{bail, Context, Result}; use protobuf::descriptor::field_descriptor_proto::Label::LABEL_REPEATED; use protobuf::descriptor::field_descriptor_proto::Type; use protobuf::descriptor::field_descriptor_proto::Type::{ @@ -14,8 +14,7 @@ use protobuf::well_known_types::empty::Empty; use protobuf::UnknownValueRef; use protobuf::{EnumOrUnknown, Message, MessageDyn, MessageFull, UnknownValue}; use regex::Captures; -use regex::Regex; -use serde_yaml::value::{Tag, TaggedValue}; +use serde_yaml::value::TaggedValue; use serde_yaml::Value::Tagged; use serde_yaml::{Mapping, Number, Value}; use std::collections::BTreeMap; @@ -25,19 +24,21 @@ use std::ops::Deref; use std::str::FromStr; mod tags { - use std::cell::LazyCell; use once_cell::sync::Lazy; use regex::Regex; use serde_yaml::value::Tag; - pub(super) const BINARY: LazyCell = LazyCell::new(|| Tag::new("binary")); - pub(super) const VARINT: LazyCell = LazyCell::new(|| Tag::new("varint")); - pub(super) const FIXED32: LazyCell = LazyCell::new(|| Tag::new("fixed32")); - pub(super) const FIXED64: LazyCell = LazyCell::new(|| Tag::new("fixed64")); - - pub(super) const VARINT_RE: Lazy = Lazy::new(|| Regex::new(&format!(r"{} (\d+)", *VARINT)).unwrap()); - pub(super) const FIXED32_RE: Lazy = Lazy::new(|| Regex::new(&format!(r"{} (\d+)", *FIXED32)).unwrap()); - pub(super) const FIXED64_RE: Lazy = Lazy::new(|| Regex::new(&format!(r"{} (\d+)", *FIXED64)).unwrap()); + pub(super) static BINARY: Lazy = Lazy::new(|| Tag::new("binary")); + pub(super) static VARINT: Lazy = Lazy::new(|| Tag::new("varint")); + pub(super) static FIXED32: Lazy = Lazy::new(|| Tag::new("fixed32")); + pub(super) static FIXED64: Lazy = Lazy::new(|| Tag::new("fixed64")); + + pub(super) static VARINT_RE: Lazy = + Lazy::new(|| Regex::new(&format!(r"{} (\d+)", *VARINT)).unwrap()); + pub(super) static FIXED32_RE: Lazy = + Lazy::new(|| Regex::new(&format!(r"{} (\d+)", *FIXED32)).unwrap()); + pub(super) static FIXED64_RE: Lazy = + Lazy::new(|| Regex::new(&format!(r"{} (\d+)", *FIXED64)).unwrap()); } pub struct Protobuf; @@ -53,25 +54,24 @@ impl Prettify for Protobuf { "Protocol Buffer" } - fn prettify(&self, data: &[u8]) -> Result { + fn prettify(&self, data: &[u8]) -> Result { // Check if data is empty first if data.is_empty() { - return Err(PrettifyError::Generic("Empty protobuf data".to_string())); + bail!("Empty protobuf data"); } let existing = Empty::descriptor(); - let descriptor = Self::create_descriptor(&data, existing)?; + let descriptor = Self::create_descriptor(data, existing)?; let message = descriptor - .parse_from_bytes(&data) - .map_err(|e| PrettifyError::Generic(format!("Error parsing protobuf: {e}")))?; + .parse_from_bytes(data) + .context("Error parsing protobuf")?; // Parse protobuf and convert to YAML let yaml_value = Self::message_to_yaml(message.as_ref()); // Convert the Value to prettified YAML - let yaml_str = serde_yaml::to_string(&yaml_value) - .map_err(|e| PrettifyError::Generic(format!("Failed to convert to YAML: {}", e)))?; + let yaml_str = serde_yaml::to_string(&yaml_value).context("Failed to convert to YAML")?; // Apply regex replacements to transform the YAML output Self::apply_replacements(&yaml_str) @@ -79,14 +79,11 @@ impl Prettify for Protobuf { } impl Reencode for Protobuf { - fn reencode(&self, data: &str, original: &[u8]) -> Result, ReencodeError> { + fn reencode(&self, data: &str) -> Result> { let descriptor = Empty::descriptor(); - //let descriptor = Self::create_descriptor(original, existing) - // .map_err(|e| ReencodeError::InvalidFormat(format!("{e}")))?; let message = descriptor.new_instance(); - let value: Value = serde_yaml::from_str(data) - .map_err(|e| ReencodeError::InvalidFormat(format!("invalid yaml: {e}")))?; + let value: Value = serde_yaml::from_str(data).context("Invalid YAML")?; Self::merge_yaml_into_message(value, message) } @@ -146,14 +143,9 @@ fn int_value(n: Number, field: Option<&FieldDescriptor>) -> UnknownValue { } impl Protobuf { - fn merge_yaml_into_message( - value: Value, - mut message: Box, - ) -> Result, ReencodeError> { + fn merge_yaml_into_message(value: Value, mut message: Box) -> Result> { let Value::Mapping(mapping) = value else { - return Err(ReencodeError::InvalidFormat( - "yaml is not a mapping".to_string(), - )); + bail!("YAML is not a mapping"); }; for (key, value) in mapping.into_iter() { @@ -164,23 +156,17 @@ impl Protobuf { } else if let Ok(field_num) = i32::from_str(&key) { field_num } else { - return Err(ReencodeError::InvalidFormat(format!( - "unknown protobuf field key: {key}" - ))); + bail!("Unknown protobuf field key: {key}"); } } Value::Number(key) => { let Some(field_num) = key.as_i64() else { - return Err(ReencodeError::InvalidFormat(format!( - "invalid protobuf field number: {key}" - ))); + bail!("Invalid protobuf field number: {key}"); }; field_num as i32 } other => { - return Err(ReencodeError::InvalidFormat(format!( - "unexpected key: {other:?}" - ))) + bail!("Unexpected key: {other:?}"); } } as u32; @@ -189,14 +175,10 @@ impl Protobuf { message .write_to_bytes_dyn() - .map_err(|e| ReencodeError::InvalidFormat(format!("failed to serialize protobuf: {e}"))) + .context("Failed to serialize protobuf") } - fn add_field( - message: &mut dyn MessageDyn, - field_num: u32, - value: Value, - ) -> Result<(), ReencodeError> { + fn add_field(message: &mut dyn MessageDyn, field_num: u32, value: Value) -> Result<()> { let value = match value { Value::Null => return Ok(()), Value::Sequence(seq) => { @@ -210,31 +192,27 @@ impl Protobuf { if t.tag == *tags::BINARY { let value = match t.value { Value::String(s) => s, - _ => { - return Err(ReencodeError::InvalidFormat( - "binary data is not a string".to_string(), - )) - } + _ => bail!("Binary data is not a string"), }; let value = (0..value.len()) .step_by(2) .map(|i| u8::from_str_radix(&value[i..i + 2], 16)) .collect::, ParseIntError>>() - .map_err(|e| ReencodeError::InvalidFormat(e.to_string()))?; + .context("Invalid hex string")?; UnknownValue::LengthDelimited(value) } else if t.tag == *tags::FIXED32 { let value = match t.value { Value::Number(s) if s.as_u64().is_some() => s.as_u64().unwrap(), - _ => return Err(ReencodeError::InvalidFormat("fixed32 data is not a u32".to_string())) + _ => bail!("Fixed32 data is not a u32"), }; UnknownValue::Fixed32(value as u32) } else if t.tag == *tags::FIXED64 { let value = match t.value { Value::Number(s) if s.as_u64().is_some() => s.as_u64().unwrap(), - _ => return Err(ReencodeError::InvalidFormat("fixed64 data is not a u64".to_string())) + _ => bail!("Fixed64 data is not a u64"), }; UnknownValue::Fixed64(value) - } else{ + } else { log::info!("Unexpected YAML tag {}, discarding.", t.tag); return Self::add_field(message, field_num, t.value); } @@ -252,7 +230,7 @@ impl Protobuf { field.runtime_field_type() { descriptor = md; - } else if let RuntimeFieldType::Map(k, v) = field.runtime_field_type() { + } else if let RuntimeFieldType::Map(_, _) = field.runtime_field_type() { // TODO: handle maps. } } @@ -330,10 +308,7 @@ impl Protobuf { Value::Mapping(ret) } - fn create_descriptor( - data: &[u8], - existing: MessageDescriptor, - ) -> Result { + fn create_descriptor(data: &[u8], existing: MessageDescriptor) -> Result { let proto = Self::create_descriptor_proto(data, existing, "Unknown".to_string())?; let descriptor = { @@ -355,10 +330,10 @@ impl Protobuf { data: &[u8], existing: MessageDescriptor, name: String, - ) -> Result { + ) -> Result { let message = existing .parse_from_bytes(data) - .map_err(|e| PrettifyError::Generic(format!("failed to parse protobuf: {e}")))?; + .context("failed to parse protobuf")?; let mut descriptor = existing.proto().clone(); @@ -380,16 +355,14 @@ impl Protobuf { UnknownValueRef::Fixed32(_) => add_int(TYPE_FIXED32), UnknownValueRef::Fixed64(_) => add_int(TYPE_FIXED64), UnknownValueRef::Varint(_) => add_int(TYPE_UINT64), - UnknownValueRef::LengthDelimited(data) => { + UnknownValueRef::LengthDelimited(_) => { let field_values = field_values .iter() .map(|x| match x { UnknownValueRef::LengthDelimited(data) => Ok(*data), - _ => Err(PrettifyError::Generic( - "varying types in protobuf".to_string(), - )), + _ => Err(anyhow::anyhow!("varying types in protobuf")), }) - .collect::, PrettifyError>>()?; + .collect::>>()?; match Self::guess_field_type(&field_values, &name, field_index) { GuessedFieldType::String => add_int(TYPE_STRING), @@ -448,7 +421,7 @@ impl Protobuf { } // Helper method to apply regex replacements to the YAML output - fn apply_replacements(yaml_str: &str) -> Result { + fn apply_replacements(yaml_str: &str) -> Result { // Replace !fixed32 tags with comments showing float and i32 interpretations let with_fixed32 = tags::FIXED32_RE.replace_all(yaml_str, |caps: &Captures| { let value = caps[1].parse::().unwrap_or_default(); @@ -456,7 +429,13 @@ impl Protobuf { let i32_value = value as i32; if !float_value.is_nan() && float_value < 0.0 { - format!("{} {} # float: {}, i32: {}", *tags::FIXED32, value, float_value, i32_value) + format!( + "{} {} # float: {}, i32: {}", + *tags::FIXED32, + value, + float_value, + i32_value + ) } else if !float_value.is_nan() { format!("{} {} # float: {}", *tags::FIXED32, value, float_value) } else if i32_value < 0 { @@ -473,7 +452,13 @@ impl Protobuf { let i64_value = value as i64; if !double_value.is_nan() && double_value < 0.0 { - format!("{} {} # double: {}, i64: {}", *tags::FIXED64, value, double_value, i64_value) + format!( + "{} {} # double: {}, i64: {}", + *tags::FIXED64, + value, + double_value, + i64_value + ) } else if !double_value.is_nan() { format!("{} {} # double: {}", *tags::FIXED64, value, double_value) } else if i64_value < 0 { @@ -534,46 +519,88 @@ mod tests { #[test] fn reencode() { - let result = Protobuf.reencode(YAML, PROTO).unwrap(); + let result = Protobuf.reencode(YAML).unwrap(); assert_eq!(result, PROTO); } } }; } - test_roundtrip!(varint, b"\x08\x96\x01","1: 150\n"); - test_roundtrip!(varint_negative, b"\x08\x0B","1: 11 # signed: -6\n"); - test_roundtrip!(binary, b"\x32\x03\x01\x02\x03","6: !binary '010203'\n"); - test_roundtrip!(string, b"\x0A\x05\x68\x65\x6C\x6C\x6F","1: hello\n"); - test_roundtrip!(nested, b"\x2A\x02\x08\x2A","5:\n 1: 42\n"); - test_roundtrip!(nested_twice, b"\x2A\x04\x2A\x02\x08\x2A","5:\n 5:\n 1: 42\n"); - test_roundtrip!(fixed64, b"\x19\x00\x00\x00\x00\x00\x00\xF0\xBF","3: !fixed64 13830554455654793216 # double: -1, i64: -4616189618054758400\n"); - test_roundtrip!(fixed64_positive, b"\x19\x6E\x86\x1B\xF0\xF9\x21\x09\x40","3: !fixed64 4614256650576692846 # double: 3.14159\n"); - test_roundtrip!(fixed64_no_float, b"\x19\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF","3: !fixed64 18446744073709551615 # i64: -1\n"); - test_roundtrip!(fixed64_positive_no_float, b"\x19\x01\x00\x00\x00\x00\x00\xF8\x7F","3: !fixed64 9221120237041090561\n"); - test_roundtrip!(fixed32, b"\x15\x00\x00\x80\xBF","2: !fixed32 3212836864 # float: -1, i32: -1082130432\n"); - test_roundtrip!(fixed32_positive, b"\x15\xD0\x0F\x49\x40","2: !fixed32 1078530000 # float: 3.14159\n"); - test_roundtrip!(fixed32_no_float, b"\x15\xFF\xFF\xFF\xFF","2: !fixed32 4294967295 # i32: -1\n"); - test_roundtrip!(fixed32_positive_no_float, b"\x15\x01\x00\xC0\x7F","2: !fixed32 2143289345\n"); + test_roundtrip!(varint, b"\x08\x96\x01", "1: 150\n"); + test_roundtrip!(varint_negative, b"\x08\x0B", "1: 11 # signed: -6\n"); + test_roundtrip!(binary, b"\x32\x03\x01\x02\x03", "6: !binary '010203'\n"); + test_roundtrip!(string, b"\x0A\x05\x68\x65\x6C\x6C\x6F", "1: hello\n"); + test_roundtrip!(nested, b"\x2A\x02\x08\x2A", "5:\n 1: 42\n"); + test_roundtrip!( + nested_twice, + b"\x2A\x04\x2A\x02\x08\x2A", + "5:\n 5:\n 1: 42\n" + ); + test_roundtrip!( + fixed64, + b"\x19\x00\x00\x00\x00\x00\x00\xF0\xBF", + "3: !fixed64 13830554455654793216 # double: -1, i64: -4616189618054758400\n" + ); + test_roundtrip!( + fixed64_positive, + b"\x19\x6E\x86\x1B\xF0\xF9\x21\x09\x40", + "3: !fixed64 4614256650576692846 # double: 3.14159\n" + ); + test_roundtrip!( + fixed64_no_float, + b"\x19\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", + "3: !fixed64 18446744073709551615 # i64: -1\n" + ); + test_roundtrip!( + fixed64_positive_no_float, + b"\x19\x01\x00\x00\x00\x00\x00\xF8\x7F", + "3: !fixed64 9221120237041090561\n" + ); + test_roundtrip!( + fixed32, + b"\x15\x00\x00\x80\xBF", + "2: !fixed32 3212836864 # float: -1, i32: -1082130432\n" + ); + test_roundtrip!( + fixed32_positive, + b"\x15\xD0\x0F\x49\x40", + "2: !fixed32 1078530000 # float: 3.14159\n" + ); + test_roundtrip!( + fixed32_no_float, + b"\x15\xFF\xFF\xFF\xFF", + "2: !fixed32 4294967295 # i32: -1\n" + ); + test_roundtrip!( + fixed32_positive_no_float, + b"\x15\x01\x00\xC0\x7F", + "2: !fixed32 2143289345\n" + ); // From docs: "message Test5 { repeated int32 f = 6 [packed=true]; }" // With values 3, 270, and 86942 - test_roundtrip!(repeated_packed, b"\x32\x06\x03\x8E\x02\x9E\xA7\x05","6: !binary 038e029ea705\n"); - test_roundtrip!(repeated_varint, b"\x08\x01\x08\x02\x08\x03","1:\n- 1 # signed: -1\n- 2\n- 3 # signed: -2\n"); + test_roundtrip!( + repeated_packed, + b"\x32\x06\x03\x8E\x02\x9E\xA7\x05", + "6: !binary 038e029ea705\n" + ); + test_roundtrip!( + repeated_varint, + b"\x08\x01\x08\x02\x08\x03", + "1:\n- 1 # signed: -1\n- 2\n- 3 # signed: -2\n" + ); mod reencode { use super::*; #[test] fn reencode_new_nested_message() { - let result = Protobuf - .reencode(nested::YAML, fixed32::PROTO) - .unwrap(); + let result = Protobuf.reencode(nested::YAML).unwrap(); assert_eq!(result, nested::PROTO); } #[test] fn new_string_attr() { - let result = Protobuf.reencode(string::YAML, varint::PROTO).unwrap(); + let result = Protobuf.reencode(string::YAML).unwrap(); assert_eq!(result, string::PROTO); } } From 8d5619a9b983cd8bc58decd0d6d10576bc6d1cdc Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sun, 30 Mar 2025 16:11:51 +0200 Subject: [PATCH 08/26] fix Python API compatibility --- mitmproxy-rs/mitmproxy_rs/contentviews.pyi | 4 +- mitmproxy-rs/src/contentview.rs | 47 +++++++++++++++++++--- mitmproxy-rs/src/lib.rs | 8 ++-- src/contentviews/hex_dump.rs | 13 +++--- src/contentviews/hex_stream.rs | 38 ++++++++++++----- src/contentviews/mod.rs | 34 +++++++++++++++- src/contentviews/msgpack.rs | 23 +++++++---- src/contentviews/protobuf.rs | 25 +++++++----- 8 files changed, 147 insertions(+), 45 deletions(-) diff --git a/mitmproxy-rs/mitmproxy_rs/contentviews.pyi b/mitmproxy-rs/mitmproxy_rs/contentviews.pyi index f05c711b..2cbe6e41 100644 --- a/mitmproxy-rs/mitmproxy_rs/contentviews.pyi +++ b/mitmproxy-rs/mitmproxy_rs/contentviews.pyi @@ -3,12 +3,12 @@ from typing import ClassVar, final class Contentview: name: ClassVar[str] - def prettify(self, data: bytes) -> str: + def prettify(self, data: bytes, metadata) -> str: pass @final class InteractiveContentview(Contentview): - def reencode(self, data: str) -> bytes: + def reencode(self, data: str, metadata) -> bytes: pass hex_dump: Contentview diff --git a/mitmproxy-rs/src/contentview.rs b/mitmproxy-rs/src/contentview.rs index 2d12e7af..8e29e6f2 100644 --- a/mitmproxy-rs/src/contentview.rs +++ b/mitmproxy-rs/src/contentview.rs @@ -1,6 +1,20 @@ -use mitmproxy::contentviews::{Prettify, Reencode}; +use mitmproxy::contentviews::{Metadata, Prettify, Reencode, SyntaxHighlight}; use pyo3::{exceptions::PyValueError, prelude::*}; +struct PythonMetadata(PyObject); + +impl Metadata for PythonMetadata { + fn content_type(&self) -> Option { + Python::with_gil(|py| { + self.0 + .getattr(py, "content_type") + .ok()? + .extract::(py) + .ok() + }) + } +} + #[pyclass(frozen, module = "mitmproxy_rs.contentviews", subclass)] pub struct Contentview(&'static dyn Prettify); @@ -22,12 +36,33 @@ impl Contentview { } /// Pretty-print an (encoded) message. - pub fn prettify(&self, data: Vec) -> PyResult { + pub fn prettify(&self, data: Vec, metadata: PyObject) -> PyResult { + let metadata = PythonMetadata(metadata); + self.0 - .prettify(&data) + .prettify(&data, &metadata) .map_err(|e| PyValueError::new_err(e.to_string())) } + /// Return the priority of this view for rendering data. + pub fn render_priority(&self, data: Vec, metadata: PyObject) -> PyResult { + let metadata = PythonMetadata(metadata); + Ok(self.0.render_priority(&data, &metadata)) + } + + /// Optional syntax highlighting that should be applied to the prettified output. + #[getter] + pub fn syntax_highlight(&self) -> &str { + match self.0.syntax_highlight() { + SyntaxHighlight::None => "none", + SyntaxHighlight::Yaml => "yaml", + } + } + + fn __lt__(&self, py: Python<'_>, other: PyObject) -> PyResult { + Ok(self.name() < other.getattr(py, "name")?.extract::(py)?.as_str()) + } + fn __repr__(&self) -> PyResult { Ok(format!( "", @@ -53,9 +88,11 @@ impl InteractiveContentview { #[pymethods] impl InteractiveContentview { - pub fn reencode(&self, data: &str) -> PyResult> { + pub fn reencode(&self, data: &str, metadata: PyObject) -> PyResult> { + let metadata = PythonMetadata(metadata); + self.0 - .reencode(data) + .reencode(data, &metadata) .map_err(|e| PyValueError::new_err(e.to_string())) } diff --git a/mitmproxy-rs/src/lib.rs b/mitmproxy-rs/src/lib.rs index 20d0b09a..116aec1c 100644 --- a/mitmproxy-rs/src/lib.rs +++ b/mitmproxy-rs/src/lib.rs @@ -87,10 +87,10 @@ mod mitmproxy_rs { #[pymodule] mod contentviews { use super::*; - #[pymodule_export] - use crate::contentview::Contentview; - #[pymodule_export] - use crate::contentview::InteractiveContentview; + //#[pymodule_export] + //use crate::contentview::Contentview; + //#[pymodule_export] + //use crate::contentview::InteractiveContentview; use mitmproxy::contentviews::{HexDump, HexStream, MsgPack, Protobuf}; #[pymodule_init] diff --git a/src/contentviews/hex_dump.rs b/src/contentviews/hex_dump.rs index 953f1717..bd51efbc 100644 --- a/src/contentviews/hex_dump.rs +++ b/src/contentviews/hex_dump.rs @@ -1,4 +1,4 @@ -use crate::contentviews::Prettify; +use crate::contentviews::{Metadata, Prettify}; use pretty_hex::{HexConfig, PrettyHex}; pub struct HexDump; @@ -8,7 +8,7 @@ impl Prettify for HexDump { "Hex Dump" } - fn prettify(&self, data: &[u8]) -> anyhow::Result { + fn prettify(&self, data: &[u8], _metadata: &dyn Metadata) -> anyhow::Result { Ok(format!( "{:?}", data.hex_conf(HexConfig { @@ -27,10 +27,11 @@ impl Prettify for HexDump { #[cfg(test)] mod tests { use super::*; + use crate::contentviews::TestMetadata; #[test] - fn test_hexdump_deserialize() { - let result = HexDump.prettify(b"abcd").unwrap(); + fn prettify_simple() { + let result = HexDump.prettify(b"abcd", &TestMetadata::default()).unwrap(); assert_eq!( result, "0000: 61 62 63 64 abcd" @@ -38,8 +39,8 @@ mod tests { } #[test] - fn test_hexdump_deserialize_empty() { - let result = HexDump.prettify(b"").unwrap(); + fn prettify_empty() { + let result = HexDump.prettify(b"", &TestMetadata::default()).unwrap(); assert_eq!(result, ""); } } diff --git a/src/contentviews/hex_stream.rs b/src/contentviews/hex_stream.rs index fa982ad2..693dfeff 100644 --- a/src/contentviews/hex_stream.rs +++ b/src/contentviews/hex_stream.rs @@ -1,4 +1,4 @@ -use crate::contentviews::{Prettify, Reencode}; +use crate::contentviews::{Metadata, Prettify, Reencode}; use anyhow::{Context, Result}; use pretty_hex::{HexConfig, PrettyHex}; use std::num::ParseIntError; @@ -10,7 +10,7 @@ impl Prettify for HexStream { "Hex Stream" } - fn prettify(&self, data: &[u8]) -> Result { + fn prettify(&self, data: &[u8], _metadata: &dyn Metadata) -> Result { Ok(data .hex_conf(HexConfig { title: false, @@ -23,10 +23,27 @@ impl Prettify for HexStream { }) .to_string()) } + + fn render_priority(&self, data: &[u8], _metadata: &dyn Metadata) -> f64 { + if data.is_empty() { + return 0.0; + } + let ratio = data + .iter() + .take(100) + .filter(|&&b| b < 9 || (13 < b && b < 32) || b > 126) + .count() as f64 + / data.len().min(100) as f64; + if ratio > 0.3 { + 1.0 + } else { + 0.0 + } + } } impl Reencode for HexStream { - fn reencode(&self, data: &str) -> Result> { + fn reencode(&self, data: &str, _metadata: &dyn Metadata) -> Result> { (0..data.len()) .step_by(2) .map(|i| u8::from_str_radix(&data[i..i + 2], 16)) @@ -38,23 +55,26 @@ impl Reencode for HexStream { #[cfg(test)] mod tests { use super::*; + use crate::contentviews::TestMetadata; #[test] - fn test_hexstream_deserialize() { - let result = HexStream.prettify(b"foo").unwrap(); + fn test_hex_stream() { + let result = HexStream + .prettify(b"foo", &TestMetadata::default()) + .unwrap(); assert_eq!(result, "666f6f"); } #[test] - fn test_hexstream_deserialize_empty() { - let result = HexStream.prettify(b"").unwrap(); + fn test_hex_stream_empty() { + let result = HexStream.prettify(b"", &TestMetadata::default()).unwrap(); assert_eq!(result, ""); } #[test] - fn test_hexstream_serialize() { + fn test_hex_stream_reencode() { let data = "666f6f"; - let result = HexStream.reencode(data).unwrap(); + let result = HexStream.reencode(data, &TestMetadata::default()).unwrap(); assert_eq!(result, b"foo"); } } diff --git a/src/contentviews/mod.rs b/src/contentviews/mod.rs index 771ec6b6..6457d558 100644 --- a/src/contentviews/mod.rs +++ b/src/contentviews/mod.rs @@ -10,6 +10,15 @@ pub use hex_stream::HexStream; pub use msgpack::MsgPack; pub use protobuf::Protobuf; +pub enum SyntaxHighlight { + None, + Yaml, +} + +pub trait Metadata { + fn content_type(&self) -> Option; +} + pub trait Prettify: Send + Sync { fn name(&self) -> &str; @@ -17,9 +26,30 @@ pub trait Prettify: Send + Sync { self.name().to_lowercase().replace(" ", "_") } - fn prettify(&self, data: &[u8]) -> Result; + fn prettify(&self, data: &[u8], metadata: &dyn Metadata) -> Result; + + fn render_priority(&self, _data: &[u8], _metadata: &dyn Metadata) -> f64 { + 0.0 + } + + fn syntax_highlight(&self) -> SyntaxHighlight { + SyntaxHighlight::None + } } pub trait Reencode: Send + Sync { - fn reencode(&self, data: &str) -> Result>; + fn reencode(&self, data: &str, metadata: &dyn Metadata) -> Result>; +} + +#[cfg(test)] +#[derive(Default)] +pub struct TestMetadata { + pub content_type: Option, +} + +#[cfg(test)] +impl Metadata for TestMetadata { + fn content_type(&self) -> Option { + self.content_type.clone() + } } diff --git a/src/contentviews/msgpack.rs b/src/contentviews/msgpack.rs index dbfbdbde..b046070e 100644 --- a/src/contentviews/msgpack.rs +++ b/src/contentviews/msgpack.rs @@ -1,4 +1,4 @@ -use crate::contentviews::{Prettify, Reencode}; +use crate::contentviews::{Metadata, Prettify, Reencode}; use anyhow::{Context, Result}; use rmp_serde::{decode, encode}; use serde_yaml; @@ -10,7 +10,7 @@ impl Prettify for MsgPack { "MsgPack" } - fn prettify(&self, data: &[u8]) -> Result { + fn prettify(&self, data: &[u8], _metadata: &dyn Metadata) -> Result { // Deserialize MsgPack to a serde_yaml::Value let value: serde_yaml::Value = decode::from_slice(data).context("Failed to deserialize MsgPack")?; @@ -21,7 +21,7 @@ impl Prettify for MsgPack { } impl Reencode for MsgPack { - fn reencode(&self, data: &str) -> Result> { + fn reencode(&self, data: &str, _metadata: &dyn Metadata) -> Result> { // Parse the YAML string to a serde_yaml::Value let value: serde_yaml::Value = serde_yaml::from_str(data).context("Invalid YAML")?; @@ -36,6 +36,7 @@ impl Reencode for MsgPack { #[cfg(test)] mod tests { use super::*; + use crate::contentviews::TestMetadata; // Hardcoded MsgPack data for a simple object: // { @@ -65,13 +66,17 @@ tags: #[test] fn test_msgpack_deserialize() { - let result = MsgPack.prettify(TEST_MSGPACK).unwrap(); + let result = MsgPack + .prettify(TEST_MSGPACK, &TestMetadata::default()) + .unwrap(); assert_eq!(result, TEST_YAML); } #[test] fn test_msgpack_serialize() { - let result = MsgPack.reencode(TEST_YAML).unwrap(); + let result = MsgPack + .reencode(TEST_YAML, &TestMetadata::default()) + .unwrap(); // Verify the MsgPack data contains the expected values let value: serde_yaml::Value = decode::from_slice(&result).unwrap(); @@ -104,10 +109,14 @@ tags: #[test] fn test_msgpack_roundtrip() { // Deserialize to YAML - let yaml_result = MsgPack.prettify(TEST_MSGPACK).unwrap(); + let yaml_result = MsgPack + .prettify(TEST_MSGPACK, &TestMetadata::default()) + .unwrap(); // Serialize back to MsgPack - let result = MsgPack.reencode(&yaml_result).unwrap(); + let result = MsgPack + .reencode(&yaml_result, &TestMetadata::default()) + .unwrap(); // Deserialize both the original and the result to Values for comparison let original_value: serde_yaml::Value = decode::from_slice(TEST_MSGPACK).unwrap(); diff --git a/src/contentviews/protobuf.rs b/src/contentviews/protobuf.rs index 3fdf77da..332eebc0 100644 --- a/src/contentviews/protobuf.rs +++ b/src/contentviews/protobuf.rs @@ -1,4 +1,4 @@ -use crate::contentviews::{Prettify, Reencode}; +use crate::contentviews::{Metadata, Prettify, Reencode}; use anyhow::{bail, Context, Result}; use protobuf::descriptor::field_descriptor_proto::Label::LABEL_REPEATED; use protobuf::descriptor::field_descriptor_proto::Type; @@ -12,7 +12,7 @@ use protobuf::reflect::{ }; use protobuf::well_known_types::empty::Empty; use protobuf::UnknownValueRef; -use protobuf::{EnumOrUnknown, Message, MessageDyn, MessageFull, UnknownValue}; +use protobuf::{EnumOrUnknown, MessageDyn, MessageFull, UnknownValue}; use regex::Captures; use serde_yaml::value::TaggedValue; use serde_yaml::Value::Tagged; @@ -54,7 +54,7 @@ impl Prettify for Protobuf { "Protocol Buffer" } - fn prettify(&self, data: &[u8]) -> Result { + fn prettify(&self, data: &[u8], _metadata: &dyn Metadata) -> Result { // Check if data is empty first if data.is_empty() { bail!("Empty protobuf data"); @@ -79,7 +79,7 @@ impl Prettify for Protobuf { } impl Reencode for Protobuf { - fn reencode(&self, data: &str) -> Result> { + fn reencode(&self, data: &str, _metadata: &dyn Metadata) -> Result> { let descriptor = Empty::descriptor(); let message = descriptor.new_instance(); @@ -502,6 +502,7 @@ impl Protobuf { #[cfg(test)] mod tests { use super::*; + use crate::contentviews::TestMetadata; macro_rules! test_roundtrip { ($name:ident,$proto:literal,$yaml:literal) => { @@ -513,13 +514,13 @@ mod tests { #[test] fn prettify() { - let result = Protobuf.prettify(PROTO).unwrap(); + let result = Protobuf.prettify(PROTO, &TestMetadata::default()).unwrap(); assert_eq!(result, YAML); } #[test] fn reencode() { - let result = Protobuf.reencode(YAML).unwrap(); + let result = Protobuf.reencode(YAML, &TestMetadata::default()).unwrap(); assert_eq!(result, PROTO); } } @@ -594,26 +595,30 @@ mod tests { #[test] fn reencode_new_nested_message() { - let result = Protobuf.reencode(nested::YAML).unwrap(); + let result = Protobuf + .reencode(nested::YAML, &TestMetadata::default()) + .unwrap(); assert_eq!(result, nested::PROTO); } #[test] fn new_string_attr() { - let result = Protobuf.reencode(string::YAML).unwrap(); + let result = Protobuf + .reencode(string::YAML, &TestMetadata::default()) + .unwrap(); assert_eq!(result, string::PROTO); } } #[test] fn test_invalid_protobuf() { - let result = Protobuf.prettify(b"\xFF\xFF"); + let result = Protobuf.prettify(b"\xFF\xFF", &TestMetadata::default()); assert!(result.is_err()); } #[test] fn test_empty_protobuf() { - let result = Protobuf.prettify(b""); + let result = Protobuf.prettify(b"", &TestMetadata::default()); assert!(result.is_err()); } } From ef99ef5d94170fa8e50bdc0548d43e9475447255 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Mon, 31 Mar 2025 19:19:14 +0200 Subject: [PATCH 09/26] fix nits --- src/contentviews/hex_stream.rs | 18 ++++++++++++++++++ src/contentviews/protobuf.rs | 12 +++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/contentviews/hex_stream.rs b/src/contentviews/hex_stream.rs index 693dfeff..229fed7f 100644 --- a/src/contentviews/hex_stream.rs +++ b/src/contentviews/hex_stream.rs @@ -44,6 +44,10 @@ impl Prettify for HexStream { impl Reencode for HexStream { fn reencode(&self, data: &str, _metadata: &dyn Metadata) -> Result> { + let data = data.trim_end_matches(['\n', '\r']); + if data.len() % 2 != 0 { + anyhow::bail!("Invalid hex string: uneven number of characters"); + } (0..data.len()) .step_by(2) .map(|i| u8::from_str_radix(&data[i..i + 2], 16)) @@ -77,4 +81,18 @@ mod tests { let result = HexStream.reencode(data, &TestMetadata::default()).unwrap(); assert_eq!(result, b"foo"); } + + #[test] + fn test_hex_stream_reencode_with_newlines() { + let data = "666f6f\r\n"; + let result = HexStream.reencode(data, &TestMetadata::default()).unwrap(); + assert_eq!(result, b"foo"); + } + + #[test] + fn test_hex_stream_reencode_uneven_chars() { + let data = "666f6"; + let result = HexStream.reencode(data, &TestMetadata::default()); + assert!(result.is_err()); + } } diff --git a/src/contentviews/protobuf.rs b/src/contentviews/protobuf.rs index 332eebc0..74a65be3 100644 --- a/src/contentviews/protobuf.rs +++ b/src/contentviews/protobuf.rs @@ -268,7 +268,7 @@ impl Protobuf { let mut ret = Mapping::new(); for field in message.descriptor_dyn().fields() { - let key = if field.name().is_empty() { + let key = if field.name().is_empty() || field.name().starts_with("@unknown_field_") { Value::from(field.number()) } else { Value::from(field.name()) @@ -346,6 +346,7 @@ impl Protobuf { let mut add_int = |typ: Type| { descriptor.field.push(FieldDescriptorProto { number: Some(field_index as i32), + name: Some(format!("@unknown_field_{}", field_index)), type_: Some(EnumOrUnknown::from(typ)), ..Default::default() }); @@ -370,6 +371,7 @@ impl Protobuf { GuessedFieldType::Message(m) => { descriptor.field.push(FieldDescriptorProto { number: Some(field_index as i32), + name: Some(format!("@unknown_field_{}", field_index)), type_name: Some(format!(".{}.{}", name, m.name())), type_: Some(EnumOrUnknown::from(Type::TYPE_MESSAGE)), ..Default::default() @@ -616,6 +618,14 @@ mod tests { assert!(result.is_err()); } + #[test] + fn test_no_crash() { + let result = Protobuf.prettify( + b"\n\x13gRPC testing server\x12\x07\n\x05Index\x12\x07\n\x05Empty\x12\x0c\n\nDummyUnary\x12\x0f\n\rSpecificError\x12\r\n\x0bRandomError\x12\x0e\n\x0cHeadersUnary\x12\x11\n\x0fNoResponseUnary", + &TestMetadata::default()).unwrap(); + assert_eq!(result, "1: gRPC testing server\n2:\n- 1: Index\n- 1: Empty\n- 1: DummyUnary\n- 1: SpecificError\n- 1: RandomError\n- 1: HeadersUnary\n- 1: NoResponseUnary\n"); + } + #[test] fn test_empty_protobuf() { let result = Protobuf.prettify(b"", &TestMetadata::default()); From cc7c0528d97dbe798e3eee27517257f1771b10e3 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Wed, 2 Apr 2025 21:25:38 +0200 Subject: [PATCH 10/26] fix render priority for hex views --- src/contentviews/hex_dump.rs | 9 +++++++++ src/contentviews/hex_stream.rs | 27 ++++++++++++++++----------- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/src/contentviews/hex_dump.rs b/src/contentviews/hex_dump.rs index bd51efbc..b97eaf4e 100644 --- a/src/contentviews/hex_dump.rs +++ b/src/contentviews/hex_dump.rs @@ -1,5 +1,6 @@ use crate::contentviews::{Metadata, Prettify}; use pretty_hex::{HexConfig, PrettyHex}; +use crate::contentviews::hex_stream::is_binary; pub struct HexDump; @@ -22,6 +23,14 @@ impl Prettify for HexDump { }) )) } + + fn render_priority(&self, data: &[u8], _metadata: &dyn Metadata) -> f64 { + if is_binary(data) { + 0.95 + } else { + 0.0 + } + } } #[cfg(test)] diff --git a/src/contentviews/hex_stream.rs b/src/contentviews/hex_stream.rs index 229fed7f..6c19bb73 100644 --- a/src/contentviews/hex_stream.rs +++ b/src/contentviews/hex_stream.rs @@ -5,6 +5,20 @@ use std::num::ParseIntError; pub struct HexStream; +pub(crate) fn is_binary(data: &[u8]) -> bool { + if data.is_empty() { + return false; + } + let ratio = data + .iter() + .take(100) + .filter(|&&b| b < 9 || (13 < b && b < 32) || b > 126) + .count() as f64 + / data.len().min(100) as f64; + + ratio > 0.3 +} + impl Prettify for HexStream { fn name(&self) -> &'static str { "Hex Stream" @@ -25,17 +39,8 @@ impl Prettify for HexStream { } fn render_priority(&self, data: &[u8], _metadata: &dyn Metadata) -> f64 { - if data.is_empty() { - return 0.0; - } - let ratio = data - .iter() - .take(100) - .filter(|&&b| b < 9 || (13 < b && b < 32) || b > 126) - .count() as f64 - / data.len().min(100) as f64; - if ratio > 0.3 { - 1.0 + if is_binary(data) { + 0.95 } else { 0.0 } From f6d3885db86cbc7924ee8f442892db75f33b45da Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Fri, 4 Apr 2025 18:11:33 +0200 Subject: [PATCH 11/26] add syntax highlight module --- Cargo.lock | 63 ++++++++++++++++++ Cargo.toml | 4 ++ mitmproxy-rs/mitmproxy_rs/__init__.pyi | 3 +- .../mitmproxy_rs/syntax_highlight.pyi | 9 +++ mitmproxy-rs/src/lib.rs | 9 +++ mitmproxy-rs/src/stream.rs | 2 +- mitmproxy-rs/src/syntax_highlight.rs | 29 +++++++++ src/contentviews/hex_dump.rs | 2 +- src/lib.rs | 1 + src/syntax_highlight/common.rs | 65 +++++++++++++++++++ src/syntax_highlight/mod.rs | 26 ++++++++ src/syntax_highlight/xml.rs | 60 +++++++++++++++++ src/syntax_highlight/yaml.rs | 64 ++++++++++++++++++ 13 files changed, 334 insertions(+), 3 deletions(-) create mode 100644 mitmproxy-rs/mitmproxy_rs/syntax_highlight.pyi create mode 100644 mitmproxy-rs/src/syntax_highlight.rs create mode 100644 src/syntax_highlight/common.rs create mode 100644 src/syntax_highlight/mod.rs create mode 100644 src/syntax_highlight/xml.rs create mode 100644 src/syntax_highlight/yaml.rs diff --git a/Cargo.lock b/Cargo.lock index 24f86d39..f2dd44f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2215,6 +2215,10 @@ dependencies = [ "tempfile", "tokio", "tokio-util", + "tree-sitter", + "tree-sitter-highlight", + "tree-sitter-xml", + "tree-sitter-yaml", "tun", "windows 0.61.1", ] @@ -3159,6 +3163,7 @@ version = "1.0.140" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" dependencies = [ + "indexmap 2.7.0", "itoa", "memchr", "ryu", @@ -3255,6 +3260,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + [[package]] name = "strsim" version = "0.11.1" @@ -3669,6 +3680,58 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "tree-sitter" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9ac5ea5e7f2f1700842ec071401010b9c59bf735295f6e9fa079c3dc035b167" +dependencies = [ + "cc", + "regex", + "regex-syntax 0.8.5", + "serde_json", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-highlight" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "076673d82b859652de3e7abe73a4592c173e51dfc9b83eb49f0479fd9fe4631c" +dependencies = [ + "regex", + "streaming-iterator", + "thiserror 2.0.12", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8" + +[[package]] +name = "tree-sitter-xml" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e670041f591d994f54d597ddcd8f4ebc930e282c4c76a42268743b71f0c8b6b3" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-yaml" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0c99f2b92b677f1a18b6b232fa9329afb5758118238a7d0b29cae324ef50d5e" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "try-lock" version = "0.2.5" diff --git a/Cargo.toml b/Cargo.toml index d0d92e6d..9cfc0215 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,6 +73,10 @@ serde_yaml = "0.9" rmp-serde = "1.1" protobuf = "3.7.2" regex = "1.10.3" +tree-sitter-highlight = "0.25.3" +tree-sitter-yaml = "0.7.0" +tree-sitter-xml = "0.7.0" +tree-sitter = "0.25.3" [patch.crates-io] # tokio = { path = "../tokio/tokio" } diff --git a/mitmproxy-rs/mitmproxy_rs/__init__.pyi b/mitmproxy-rs/mitmproxy_rs/__init__.pyi index 56504dfd..1afa64af 100644 --- a/mitmproxy-rs/mitmproxy_rs/__init__.pyi +++ b/mitmproxy-rs/mitmproxy_rs/__init__.pyi @@ -2,7 +2,7 @@ from __future__ import annotations from typing import Any, Literal from typing import final, overload, TypeVar -from . import certs, contentviews, dns, local, process_info, tun, udp, wireguard +from . import certs, contentviews, dns, local, process_info, tun, udp, wireguard, syntax_highlight T = TypeVar("T") @@ -61,6 +61,7 @@ __all__ = [ "dns", "local", "process_info", + "syntax_highlight", "tun", "udp", "wireguard", diff --git a/mitmproxy-rs/mitmproxy_rs/syntax_highlight.pyi b/mitmproxy-rs/mitmproxy_rs/syntax_highlight.pyi new file mode 100644 index 00000000..54f3652c --- /dev/null +++ b/mitmproxy-rs/mitmproxy_rs/syntax_highlight.pyi @@ -0,0 +1,9 @@ +from __future__ import annotations + +def syntax_highlight(s: str, language: str) -> list[tuple[str, str]]: + pass + +def all_tags(language: str) -> list[str]: + pass + +__all__ = ["syntax_highlight", "all_tags"] \ No newline at end of file diff --git a/mitmproxy-rs/src/lib.rs b/mitmproxy-rs/src/lib.rs index 116aec1c..bb034bb2 100644 --- a/mitmproxy-rs/src/lib.rs +++ b/mitmproxy-rs/src/lib.rs @@ -12,6 +12,7 @@ mod dns_resolver; mod process_info; mod server; mod stream; +mod syntax_highlight; pub mod task; mod udp_client; mod util; @@ -126,6 +127,14 @@ mod mitmproxy_rs { Ok(()) } + + #[pymodule] + mod syntax_highlight { + #[pymodule_export] + use crate::syntax_highlight::highlight; + #[pymodule_export] + use crate::syntax_highlight::all_tags; + } } trait AddContentview { diff --git a/mitmproxy-rs/src/stream.rs b/mitmproxy-rs/src/stream.rs index f55585ba..50084afb 100644 --- a/mitmproxy-rs/src/stream.rs +++ b/mitmproxy-rs/src/stream.rs @@ -186,7 +186,7 @@ impl Stream { } _ => (), }, - TunnelInfo::None {} => (), + TunnelInfo::None => (), } match default { Some(x) => Ok(x), diff --git a/mitmproxy-rs/src/syntax_highlight.rs b/mitmproxy-rs/src/syntax_highlight.rs new file mode 100644 index 00000000..66701817 --- /dev/null +++ b/mitmproxy-rs/src/syntax_highlight.rs @@ -0,0 +1,29 @@ +#[allow(unused_imports)] +use anyhow::{anyhow, Result}; + +use pyo3::{exceptions::PyValueError, prelude::*}; + +fn str_to_language(s: &str) -> PyResult { + match s { + "xml" => Ok(mitmproxy::syntax_highlight::Language::Xml), + "yaml" => Ok(mitmproxy::syntax_highlight::Language::Yaml), + other => Err(PyErr::new::(format!( + "Unsupported language: {other}" + ))), + } +} + +/// Transform a text into tagged chunks for text. +#[pyfunction] +pub fn highlight(s: String, language: &str) -> PyResult> { + let language = str_to_language(language)?; + language.highlight(s.as_bytes()) + .map_err(|e| PyValueError::new_err(e.to_string())) +} + +/// Return the list of all possible tags for a given language. +#[pyfunction] +pub fn all_tags(language: &str) -> PyResult<&[&str]> { + let language = str_to_language(language)?; + Ok(language.all_tags()) +} diff --git a/src/contentviews/hex_dump.rs b/src/contentviews/hex_dump.rs index b97eaf4e..45939961 100644 --- a/src/contentviews/hex_dump.rs +++ b/src/contentviews/hex_dump.rs @@ -1,6 +1,6 @@ +use crate::contentviews::hex_stream::is_binary; use crate::contentviews::{Metadata, Prettify}; use pretty_hex::{HexConfig, PrettyHex}; -use crate::contentviews::hex_stream::is_binary; pub struct HexDump; diff --git a/src/lib.rs b/src/lib.rs index 437e1cf7..2bae2b03 100755 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,5 +10,6 @@ pub mod network; pub mod packet_sources; pub mod processes; pub mod shutdown; +pub mod syntax_highlight; #[cfg(windows)] pub mod windows; diff --git a/src/syntax_highlight/common.rs b/src/syntax_highlight/common.rs new file mode 100644 index 00000000..fc41c57a --- /dev/null +++ b/src/syntax_highlight/common.rs @@ -0,0 +1,65 @@ +use super::Chunk; +use anyhow::{Context, Result}; +use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter}; + +pub fn highlight( + language: tree_sitter::Language, + highlights_query: &str, + tags: &[&'static str], + input: &[u8], +) -> Result> { + let mut highlighter = Highlighter::new(); + let mut config = HighlightConfiguration::new(language, "", highlights_query, "", "") + .context("failed to create highlight configuration")?; + config.configure(tags); + + let highlights = highlighter + .highlight(&config, input, None, |_| None) + .context("failed to highlight")?; + + let mut chunks: Vec = Vec::new(); + let mut tag: Option<&'static str> = None; + + for event in highlights { + let event = event.context("highlighter failure")?; + match event { + HighlightEvent::Source { start, end } => { + let contents = &input[start..end]; + let tag_str = tag.unwrap_or(""); + + match chunks.last_mut() { + Some(x) if x.0 == tag_str => { + x.1.push_str(&String::from_utf8_lossy(contents)); + } + _ => chunks.push( + (tag_str, String::from_utf8_lossy(contents).to_string()) + ), + } + } + HighlightEvent::HighlightStart(s) => { + tag = Some(tags[s.0]); + } + HighlightEvent::HighlightEnd => { + tag = None; + } + } + } + Ok(chunks) +} + +#[cfg(test)] +pub(super) fn test_tags_ok( + language: tree_sitter::Language, + highlights_query: &str, + tags: &[&'static str], +) { + let config = HighlightConfiguration::new(language, "", highlights_query, "", "").unwrap(); + for &tag in tags { + assert!( + config.names().iter().any(|name| name.contains(tag)), + "Invalid tag: {},\nAllowed tags: {:?}", + tag, + config.names() + ); + } +} diff --git a/src/syntax_highlight/mod.rs b/src/syntax_highlight/mod.rs new file mode 100644 index 00000000..c06edb92 --- /dev/null +++ b/src/syntax_highlight/mod.rs @@ -0,0 +1,26 @@ +mod common; +mod xml; +mod yaml; + +pub type Chunk = (&'static str, String); + +pub enum Language { + Xml, + Yaml, +} + +impl Language { + pub fn highlight(&self, input: &[u8]) -> anyhow::Result> { + match self { + Language::Yaml => yaml::highlight_yaml(input), + Language::Xml => xml::highlight_xml(input), + } + } + + pub fn all_tags(&self) -> &'static [&'static str] { + match self { + Language::Xml => xml::XML_TAGS, + Language::Yaml => yaml::YAML_TAGS, + } + } +} diff --git a/src/syntax_highlight/xml.rs b/src/syntax_highlight/xml.rs new file mode 100644 index 00000000..6fa349d0 --- /dev/null +++ b/src/syntax_highlight/xml.rs @@ -0,0 +1,60 @@ +use super::common::highlight; +use super::Chunk; +use anyhow::Result; + +pub(crate) const XML_TAGS: &[&str] = &[ + "tag", //
+ "property", // class or style + "comment", // + "punctuation", + "markup", +]; + +pub fn highlight_xml(input: &[u8]) -> Result> { + // There also is tree_sitter_xml, but tree_sitter_html produces slightly nicer output for us. + highlight( + tree_sitter_xml::LANGUAGE_XML.into(), + tree_sitter_xml::XML_HIGHLIGHT_QUERY, + XML_TAGS, + input, + ) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::syntax_highlight::common; + + #[test] + fn test_tags_ok() { + common::test_tags_ok( + tree_sitter_xml::LANGUAGE_XML.into(), + tree_sitter_xml::XML_HIGHLIGHT_QUERY, + XML_TAGS, + ); + } + + #[test] + fn test_highlight_xml() { + let input = b"
Hello
"; + let chunks = highlight_xml(input).unwrap(); + assert_eq!( + chunks, + vec![ + ("punctuation", "<".to_string()), + ("tag", "div".to_string()), + ("", " ".to_string()), + ("property", "class".to_string()), + ("", "=".to_string()), + ("punctuation", "\"".to_string()), + ("", "test".to_string()), + ("punctuation", "\">".to_string()), + ("markup", "Hello".to_string()), + ("punctuation", "".to_string()), + ("comment", "".to_string()) + ] + ); + } +} diff --git a/src/syntax_highlight/yaml.rs b/src/syntax_highlight/yaml.rs new file mode 100644 index 00000000..12c5ee9c --- /dev/null +++ b/src/syntax_highlight/yaml.rs @@ -0,0 +1,64 @@ +use super::common::highlight; +use super::Chunk; +use anyhow::Result; + +pub(crate) const YAML_TAGS: &[&str] = &[ + "boolean", "string", "number", "comment", // # comment + "type", // !fixed32 type annotations + "property", // key: +]; + +pub fn highlight_yaml(input: &[u8]) -> Result> { + highlight( + tree_sitter_yaml::LANGUAGE.into(), + tree_sitter_yaml::HIGHLIGHTS_QUERY, + YAML_TAGS, + input, + ) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::syntax_highlight::common; + + #[test] + fn test_tags_ok() { + common::test_tags_ok( + tree_sitter_yaml::LANGUAGE.into(), + tree_sitter_yaml::HIGHLIGHTS_QUERY, + YAML_TAGS, + ); + } + + #[test] + fn test_highlight_yaml() { + let input = b"\ + string: \"value\"\n\ + bool: true\n\ + number: !fixed32 42 # comment\n\ + "; + let chunks = highlight_yaml(input).unwrap(); + assert_eq!( + chunks, + vec![ + ("property", "string".to_string()), + ("", ": ".to_string()), + ("string", "\"value\"".to_string()), + ("", "\n".to_string()), + ("property", "bool".to_string()), + ("", ": ".to_string()), + ("boolean", "true".to_string()), + ("", "\n".to_string()), + ("property", "number".to_string()), + ("", ": ".to_string()), + ("type", "!fixed32".to_string()), + ("", " ".to_string()), + ("number", "42".to_string()), + ("", " ".to_string()), + ("comment", "# comment".to_string()), + ("", "\n".to_string()) + ] + ); + } +} From a67bebe0e5e70a7f81095b6d890e59aec0d7725a Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 5 Apr 2025 00:19:28 +0200 Subject: [PATCH 12/26] integrate new syntax highlighting --- .../mitmproxy_rs/syntax_highlight.pyi | 12 ++- mitmproxy-rs/src/contentview.rs | 9 +- mitmproxy-rs/src/lib.rs | 2 +- mitmproxy-rs/src/syntax_highlight.rs | 46 +++++++---- src/contentviews/mod.rs | 10 +-- src/syntax_highlight/common.rs | 69 ++++++++++++---- src/syntax_highlight/mod.rs | 82 +++++++++++++++++-- src/syntax_highlight/xml.rs | 50 +++++++---- src/syntax_highlight/yaml.rs | 52 +++++++----- 9 files changed, 235 insertions(+), 97 deletions(-) diff --git a/mitmproxy-rs/mitmproxy_rs/syntax_highlight.pyi b/mitmproxy-rs/mitmproxy_rs/syntax_highlight.pyi index 54f3652c..61ef248b 100644 --- a/mitmproxy-rs/mitmproxy_rs/syntax_highlight.pyi +++ b/mitmproxy-rs/mitmproxy_rs/syntax_highlight.pyi @@ -1,9 +1,15 @@ from __future__ import annotations -def syntax_highlight(s: str, language: str) -> list[tuple[str, str]]: +from typing import Literal + + +def highlight(text: str, language: Literal["xml", "yaml", "error", "none"]) -> list[tuple[str, str]]: pass -def all_tags(language: str) -> list[str]: +def tags() -> list[str]: pass -__all__ = ["syntax_highlight", "all_tags"] \ No newline at end of file +__all__ = [ + "highlight", + "tags", +] diff --git a/mitmproxy-rs/src/contentview.rs b/mitmproxy-rs/src/contentview.rs index 8e29e6f2..d38bc44e 100644 --- a/mitmproxy-rs/src/contentview.rs +++ b/mitmproxy-rs/src/contentview.rs @@ -1,4 +1,4 @@ -use mitmproxy::contentviews::{Metadata, Prettify, Reencode, SyntaxHighlight}; +use mitmproxy::contentviews::{Metadata, Prettify, Reencode}; use pyo3::{exceptions::PyValueError, prelude::*}; struct PythonMetadata(PyObject); @@ -52,11 +52,8 @@ impl Contentview { /// Optional syntax highlighting that should be applied to the prettified output. #[getter] - pub fn syntax_highlight(&self) -> &str { - match self.0.syntax_highlight() { - SyntaxHighlight::None => "none", - SyntaxHighlight::Yaml => "yaml", - } + pub fn syntax_highlight(&self) -> String { + self.0.syntax_highlight().to_string() } fn __lt__(&self, py: Python<'_>, other: PyObject) -> PyResult { diff --git a/mitmproxy-rs/src/lib.rs b/mitmproxy-rs/src/lib.rs index bb034bb2..14e98b75 100644 --- a/mitmproxy-rs/src/lib.rs +++ b/mitmproxy-rs/src/lib.rs @@ -133,7 +133,7 @@ mod mitmproxy_rs { #[pymodule_export] use crate::syntax_highlight::highlight; #[pymodule_export] - use crate::syntax_highlight::all_tags; + use crate::syntax_highlight::tags; } } diff --git a/mitmproxy-rs/src/syntax_highlight.rs b/mitmproxy-rs/src/syntax_highlight.rs index 66701817..6f143633 100644 --- a/mitmproxy-rs/src/syntax_highlight.rs +++ b/mitmproxy-rs/src/syntax_highlight.rs @@ -1,29 +1,39 @@ #[allow(unused_imports)] use anyhow::{anyhow, Result}; +use std::str::FromStr; +use mitmproxy::syntax_highlight::{Language, Tag}; use pyo3::{exceptions::PyValueError, prelude::*}; -fn str_to_language(s: &str) -> PyResult { - match s { - "xml" => Ok(mitmproxy::syntax_highlight::Language::Xml), - "yaml" => Ok(mitmproxy::syntax_highlight::Language::Yaml), - other => Err(PyErr::new::(format!( - "Unsupported language: {other}" - ))), - } -} - -/// Transform a text into tagged chunks for text. +/// Transform text into a list of tagged chunks. +/// +/// Example: +/// +/// ```python +/// from mitmproxy_rs.syntax_highlight import highlight +/// highlighted = highlight("key: 42", "yaml") +/// print(highlighted) # [('property', 'key'), ('', ': '), ('number', '42')] +/// ``` #[pyfunction] -pub fn highlight(s: String, language: &str) -> PyResult> { - let language = str_to_language(language)?; - language.highlight(s.as_bytes()) +pub fn highlight(text: String, language: &str) -> PyResult> { + let language = Language::from_str(language)?; + language + .highlight(text.as_bytes()) + .map(|chunks| { + chunks + .into_iter() + .map(|(tag, text)| (tag.to_str(), text)) + .collect() + }) .map_err(|e| PyValueError::new_err(e.to_string())) } -/// Return the list of all possible tags for a given language. +/// Return the list of all possible tag names for a given language. #[pyfunction] -pub fn all_tags(language: &str) -> PyResult<&[&str]> { - let language = str_to_language(language)?; - Ok(language.all_tags()) +pub fn tags() -> PyResult> { + Ok(Tag::VALUES + .iter() + .map(|tag| tag.to_str()) + .filter(|&x| !x.is_empty()) + .collect()) } diff --git a/src/contentviews/mod.rs b/src/contentviews/mod.rs index 6457d558..609cba0c 100644 --- a/src/contentviews/mod.rs +++ b/src/contentviews/mod.rs @@ -5,16 +5,12 @@ mod protobuf; use anyhow::Result; +use crate::syntax_highlight; pub use hex_dump::HexDump; pub use hex_stream::HexStream; pub use msgpack::MsgPack; pub use protobuf::Protobuf; -pub enum SyntaxHighlight { - None, - Yaml, -} - pub trait Metadata { fn content_type(&self) -> Option; } @@ -32,8 +28,8 @@ pub trait Prettify: Send + Sync { 0.0 } - fn syntax_highlight(&self) -> SyntaxHighlight { - SyntaxHighlight::None + fn syntax_highlight(&self) -> syntax_highlight::Language { + syntax_highlight::Language::None } } diff --git a/src/syntax_highlight/common.rs b/src/syntax_highlight/common.rs index fc41c57a..09cbd665 100644 --- a/src/syntax_highlight/common.rs +++ b/src/syntax_highlight/common.rs @@ -1,46 +1,43 @@ -use super::Chunk; +use super::{Chunk, Tag}; use anyhow::{Context, Result}; use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter}; pub fn highlight( language: tree_sitter::Language, highlights_query: &str, - tags: &[&'static str], + names: &[&str], + tags: &[Tag], input: &[u8], ) -> Result> { let mut highlighter = Highlighter::new(); let mut config = HighlightConfiguration::new(language, "", highlights_query, "", "") .context("failed to create highlight configuration")?; - config.configure(tags); + config.configure(names); let highlights = highlighter .highlight(&config, input, None, |_| None) .context("failed to highlight")?; let mut chunks: Vec = Vec::new(); - let mut tag: Option<&'static str> = None; + let mut tag: Tag = Tag::Text; for event in highlights { let event = event.context("highlighter failure")?; match event { HighlightEvent::Source { start, end } => { - let contents = &input[start..end]; - let tag_str = tag.unwrap_or(""); - + let contents = String::from_utf8_lossy(&input[start..end]); match chunks.last_mut() { - Some(x) if x.0 == tag_str => { - x.1.push_str(&String::from_utf8_lossy(contents)); + Some(x) if x.0 == tag || contents.trim_ascii().is_empty() => { + x.1.push_str(&contents); } - _ => chunks.push( - (tag_str, String::from_utf8_lossy(contents).to_string()) - ), + _ => chunks.push((tag, contents.to_string())), } } HighlightEvent::HighlightStart(s) => { - tag = Some(tags[s.0]); + tag = tags[s.0]; } HighlightEvent::HighlightEnd => { - tag = None; + tag = Tag::Text; } } } @@ -48,13 +45,15 @@ pub fn highlight( } #[cfg(test)] -pub(super) fn test_tags_ok( +pub(super) fn test_names_ok( language: tree_sitter::Language, highlights_query: &str, - tags: &[&'static str], + names: &[&str], + tags: &[Tag], ) { + assert_eq!(names.len(), tags.len()); let config = HighlightConfiguration::new(language, "", highlights_query, "", "").unwrap(); - for &tag in tags { + for &tag in names { assert!( config.names().iter().any(|name| name.contains(tag)), "Invalid tag: {},\nAllowed tags: {:?}", @@ -63,3 +62,39 @@ pub(super) fn test_tags_ok( ); } } + +#[allow(unused)] +#[cfg(test)] +pub(super) fn debug(language: tree_sitter::Language, highlights_query: &str, input: &[u8]) { + let mut highlighter = Highlighter::new(); + let mut config = HighlightConfiguration::new(language, "", highlights_query, "", "").unwrap(); + let names = config + .names() + .iter() + .map(|name| name.to_string()) + .collect::>(); + config.configure(&names); + let highlights = highlighter + .highlight(&config, input, None, |_| None) + .unwrap(); + + let mut tag: &str = ""; + for event in highlights { + match event.unwrap() { + HighlightEvent::Source { start, end } => { + let contents = &input[start..end]; + println!( + "{}: {:?}", + tag, + String::from_utf8_lossy(contents).to_string().as_str() + ); + } + HighlightEvent::HighlightStart(s) => { + tag = &names[s.0]; + } + HighlightEvent::HighlightEnd => { + tag = ""; + } + } + } +} diff --git a/src/syntax_highlight/mod.rs b/src/syntax_highlight/mod.rs index c06edb92..c3b85c01 100644 --- a/src/syntax_highlight/mod.rs +++ b/src/syntax_highlight/mod.rs @@ -1,12 +1,19 @@ +use anyhow::bail; +use std::fmt; +use std::fmt::Formatter; +use std::str::FromStr; + mod common; mod xml; mod yaml; -pub type Chunk = (&'static str, String); +pub type Chunk = (Tag, String); pub enum Language { Xml, Yaml, + Error, + None, } impl Language { @@ -14,13 +21,78 @@ impl Language { match self { Language::Yaml => yaml::highlight_yaml(input), Language::Xml => xml::highlight_xml(input), + Language::None => Ok(vec![( + Tag::Text, + String::from_utf8_lossy(input).to_string(), + )]), + Language::Error => Ok(vec![( + Tag::Error, + String::from_utf8_lossy(input).to_string(), + )]), } } - - pub fn all_tags(&self) -> &'static [&'static str] { +} + +impl FromStr for Language { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + Ok(match s { + "xml" => Language::Xml, + "yaml" => Language::Yaml, + "none" => Language::None, + "error" => Language::Error, + other => bail!("Unsupported language: {other}"), + }) + } +} + +impl fmt::Display for Language { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match self { + Language::Xml => "xml", + Language::Yaml => "yaml", + Language::Error => "error", + Language::None => "none", + } + ) + } +} + +#[derive(Debug, Eq, PartialEq, Copy, Clone)] +pub enum Tag { + Text, // Text that shouldn't be emphasized. + Name, // A tag, such as an HTML tag or a YAML key. + String, // A string value. + Number, // A number value. + Boolean, // A boolean value. + Comment, // A comment. + Error, // An error value. +} + +impl Tag { + pub const VALUES: [Self; 7] = [ + Self::Text, + Self::Name, + Self::String, + Self::Number, + Self::Boolean, + Self::Comment, + Self::Error, + ]; + + pub fn to_str(self) -> &'static str { match self { - Language::Xml => xml::XML_TAGS, - Language::Yaml => yaml::YAML_TAGS, + Tag::Text => "", + Tag::Name => "name", + Tag::String => "string", + Tag::Number => "number", + Tag::Boolean => "boolean", + Tag::Comment => "comment", + Tag::Error => "error", } } } diff --git a/src/syntax_highlight/xml.rs b/src/syntax_highlight/xml.rs index 6fa349d0..55f365a1 100644 --- a/src/syntax_highlight/xml.rs +++ b/src/syntax_highlight/xml.rs @@ -1,21 +1,31 @@ use super::common::highlight; -use super::Chunk; +use super::{Chunk, Tag}; use anyhow::Result; -pub(crate) const XML_TAGS: &[&str] = &[ +const NAMES: &[&str] = &[ "tag", //
"property", // class or style + "operator", // equal sign between class and value "comment", // "punctuation", "markup", ]; +const TAGS: &[Tag] = &[ + Tag::Name, //
+ Tag::Name, // class or style + Tag::Name, // equal sign between class and value + Tag::Comment, // + Tag::Name, // punctuation + Tag::Text, // markup +]; pub fn highlight_xml(input: &[u8]) -> Result> { // There also is tree_sitter_xml, but tree_sitter_html produces slightly nicer output for us. highlight( tree_sitter_xml::LANGUAGE_XML.into(), tree_sitter_xml::XML_HIGHLIGHT_QUERY, - XML_TAGS, + NAMES, + TAGS, input, ) } @@ -25,12 +35,23 @@ mod tests { use super::*; use crate::syntax_highlight::common; + #[ignore] + #[test] + fn debug() { + common::debug( + tree_sitter_xml::LANGUAGE_XML.into(), + tree_sitter_xml::XML_HIGHLIGHT_QUERY, + b"
Hello
", + ); + } + #[test] fn test_tags_ok() { - common::test_tags_ok( + common::test_names_ok( tree_sitter_xml::LANGUAGE_XML.into(), tree_sitter_xml::XML_HIGHLIGHT_QUERY, - XML_TAGS, + NAMES, + TAGS, ); } @@ -41,19 +62,12 @@ mod tests { assert_eq!( chunks, vec![ - ("punctuation", "<".to_string()), - ("tag", "div".to_string()), - ("", " ".to_string()), - ("property", "class".to_string()), - ("", "=".to_string()), - ("punctuation", "\"".to_string()), - ("", "test".to_string()), - ("punctuation", "\">".to_string()), - ("markup", "Hello".to_string()), - ("punctuation", "".to_string()), - ("comment", "".to_string()) + (Tag::Name, "
".to_string()), + (Tag::Text, "Hello".to_string()), + (Tag::Name, "
".to_string()), + (Tag::Comment, "".to_string()) ] ); } diff --git a/src/syntax_highlight/yaml.rs b/src/syntax_highlight/yaml.rs index 12c5ee9c..b56d58d5 100644 --- a/src/syntax_highlight/yaml.rs +++ b/src/syntax_highlight/yaml.rs @@ -1,18 +1,30 @@ use super::common::highlight; -use super::Chunk; +use super::{Chunk, Tag}; use anyhow::Result; -pub(crate) const YAML_TAGS: &[&str] = &[ - "boolean", "string", "number", "comment", // # comment +const NAMES: &[&str] = &[ + "boolean", // YAML booleans + "string", // YAML strings + "number", // YAML numbers + "comment", // # comment "type", // !fixed32 type annotations "property", // key: ]; +const TAGS: &[Tag] = &[ + Tag::Boolean, + Tag::String, + Tag::Number, + Tag::Comment, + Tag::Name, + Tag::Name, +]; pub fn highlight_yaml(input: &[u8]) -> Result> { highlight( tree_sitter_yaml::LANGUAGE.into(), tree_sitter_yaml::HIGHLIGHTS_QUERY, - YAML_TAGS, + NAMES, + TAGS, input, ) } @@ -24,10 +36,11 @@ mod tests { #[test] fn test_tags_ok() { - common::test_tags_ok( + common::test_names_ok( tree_sitter_yaml::LANGUAGE.into(), tree_sitter_yaml::HIGHLIGHTS_QUERY, - YAML_TAGS, + NAMES, + TAGS, ); } @@ -42,22 +55,17 @@ mod tests { assert_eq!( chunks, vec![ - ("property", "string".to_string()), - ("", ": ".to_string()), - ("string", "\"value\"".to_string()), - ("", "\n".to_string()), - ("property", "bool".to_string()), - ("", ": ".to_string()), - ("boolean", "true".to_string()), - ("", "\n".to_string()), - ("property", "number".to_string()), - ("", ": ".to_string()), - ("type", "!fixed32".to_string()), - ("", " ".to_string()), - ("number", "42".to_string()), - ("", " ".to_string()), - ("comment", "# comment".to_string()), - ("", "\n".to_string()) + (Tag::Name, "string".to_string()), + (Tag::Text, ": ".to_string()), + (Tag::String, "\"value\"\n".to_string()), + (Tag::Name, "bool".to_string()), + (Tag::Text, ": ".to_string()), + (Tag::Boolean, "true\n".to_string()), + (Tag::Name, "number".to_string()), + (Tag::Text, ": ".to_string()), + (Tag::Name, "!fixed32 ".to_string()), + (Tag::Number, "42 ".to_string()), + (Tag::Comment, "# comment\n".to_string()), ] ); } From 6ca6e92f11b5f9cf775af19e0d19201cdfa341e8 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 5 Apr 2025 11:49:03 +0200 Subject: [PATCH 13/26] add gRPC contentview --- mitmproxy-rs/mitmproxy_rs/contentviews.pyi | 4 +- mitmproxy-rs/src/contentview.rs | 33 ++++----- mitmproxy-rs/src/lib.rs | 13 ++-- src/contentviews/grpc.rs | 85 ++++++++++++++++++++++ src/contentviews/mod.rs | 2 + src/contentviews/protobuf.rs | 21 ++++-- 6 files changed, 127 insertions(+), 31 deletions(-) create mode 100644 src/contentviews/grpc.rs diff --git a/mitmproxy-rs/mitmproxy_rs/contentviews.pyi b/mitmproxy-rs/mitmproxy_rs/contentviews.pyi index 2cbe6e41..d21a30a9 100644 --- a/mitmproxy-rs/mitmproxy_rs/contentviews.pyi +++ b/mitmproxy-rs/mitmproxy_rs/contentviews.pyi @@ -14,7 +14,8 @@ class InteractiveContentview(Contentview): hex_dump: Contentview hex_stream: InteractiveContentview msgpack: InteractiveContentview -protobuf: Contentview +protobuf: InteractiveContentview +grpc: InteractiveContentview __all__ = [ "Contentview", @@ -23,4 +24,5 @@ __all__ = [ "hex_stream", "msgpack", "protobuf", + "grpc", ] diff --git a/mitmproxy-rs/src/contentview.rs b/mitmproxy-rs/src/contentview.rs index d38bc44e..4138b2dc 100644 --- a/mitmproxy-rs/src/contentview.rs +++ b/mitmproxy-rs/src/contentview.rs @@ -1,17 +1,21 @@ use mitmproxy::contentviews::{Metadata, Prettify, Reencode}; use pyo3::{exceptions::PyValueError, prelude::*}; -struct PythonMetadata(PyObject); +pub struct PythonMetadata<'py>(Bound<'py, PyAny>); -impl Metadata for PythonMetadata { +impl Metadata for PythonMetadata<'_> { fn content_type(&self) -> Option { - Python::with_gil(|py| { - self.0 - .getattr(py, "content_type") - .ok()? - .extract::(py) - .ok() - }) + self.0 + .getattr("content_type") + .ok()? + .extract::() + .ok() + } +} + +impl<'py> FromPyObject<'py> for PythonMetadata<'py> { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { + Ok(PythonMetadata(ob.clone())) } } @@ -36,17 +40,14 @@ impl Contentview { } /// Pretty-print an (encoded) message. - pub fn prettify(&self, data: Vec, metadata: PyObject) -> PyResult { - let metadata = PythonMetadata(metadata); - + pub fn prettify(&self, data: Vec, metadata: PythonMetadata) -> PyResult { self.0 .prettify(&data, &metadata) .map_err(|e| PyValueError::new_err(e.to_string())) } /// Return the priority of this view for rendering data. - pub fn render_priority(&self, data: Vec, metadata: PyObject) -> PyResult { - let metadata = PythonMetadata(metadata); + pub fn render_priority(&self, data: Vec, metadata: PythonMetadata) -> PyResult { Ok(self.0.render_priority(&data, &metadata)) } @@ -85,9 +86,7 @@ impl InteractiveContentview { #[pymethods] impl InteractiveContentview { - pub fn reencode(&self, data: &str, metadata: PyObject) -> PyResult> { - let metadata = PythonMetadata(metadata); - + pub fn reencode(&self, data: &str, metadata: PythonMetadata) -> PyResult> { self.0 .reencode(data, &metadata) .map_err(|e| PyValueError::new_err(e.to_string())) diff --git a/mitmproxy-rs/src/lib.rs b/mitmproxy-rs/src/lib.rs index 14e98b75..c7e85ee4 100644 --- a/mitmproxy-rs/src/lib.rs +++ b/mitmproxy-rs/src/lib.rs @@ -88,18 +88,19 @@ mod mitmproxy_rs { #[pymodule] mod contentviews { use super::*; - //#[pymodule_export] - //use crate::contentview::Contentview; - //#[pymodule_export] - //use crate::contentview::InteractiveContentview; - use mitmproxy::contentviews::{HexDump, HexStream, MsgPack, Protobuf}; + #[pymodule_export] + use crate::contentview::Contentview; + #[pymodule_export] + use crate::contentview::InteractiveContentview; + use mitmproxy::contentviews::{HexDump, HexStream, MsgPack, Protobuf, GRPC}; #[pymodule_init] fn init(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_contentview(&HexDump)?; m.add_interactive_contentview(&HexStream)?; m.add_interactive_contentview(&MsgPack)?; - m.add_contentview(&Protobuf)?; + m.add_interactive_contentview(&Protobuf)?; + m.add_interactive_contentview(&GRPC)?; Ok(()) } } diff --git a/src/contentviews/grpc.rs b/src/contentviews/grpc.rs new file mode 100644 index 00000000..f75a1955 --- /dev/null +++ b/src/contentviews/grpc.rs @@ -0,0 +1,85 @@ +use crate::contentviews::{Metadata, Prettify, Protobuf, Reencode}; +use crate::syntax_highlight::Language; +use anyhow::{bail, Context, Result}; +use serde::Deserialize; +use serde_yaml::Value; + +pub struct GRPC; + +impl Prettify for GRPC { + fn name(&self) -> &'static str { + "gRPC" + } + + fn syntax_highlight(&self) -> Language { + Language::Yaml + } + + fn prettify(&self, mut data: &[u8], metadata: &dyn Metadata) -> Result { + let mut protos = vec![]; + + while !data.is_empty() { + let compressed = match data[0] { + 0 => false, + 1 => true, + _ => bail!("invalid gRPC: first byte is not a boolean"), + }; + let len = match data.get(1..5) { + Some(x) => u32::from_be_bytes(x.try_into()?) as usize, + _ => bail!("invalid gRPC: first byte is not a boolean"), + }; + let Some(proto) = data.get(5..5 + len) else { + bail!("Invald gRPC: not enough data") + }; + if compressed { + todo!(); + } + protos.push(proto); + data = &data[5 + len..]; + } + + let prettified = protos + .into_iter() + .map(|proto| Protobuf.prettify(proto, metadata)) + .collect::>>()?; + Ok(prettified.join("\n---\n\n")) + } + + fn render_priority(&self, _data: &[u8], metadata: &dyn Metadata) -> f64 { + let Some(ct) = metadata.content_type() else { + return 0.0; + }; + match ct.as_str() { + "application/grpc" => 2.0, + "application/grpc+proto" => 2.0, + "application/prpc" => 2.0, + _ => 0.0, + } + } +} + +impl Reencode for GRPC { + fn reencode(&self, data: &str, metadata: &dyn Metadata) -> Result> { + let mut ret = vec![]; + for document in serde_yaml::Deserializer::from_str(data) { + let value = Value::deserialize(document).context("Invalid YAML")?; + let proto = Protobuf::reencode_yaml(value, metadata)?; + ret.push(0); // compressed + ret.extend(u32::to_be_bytes(proto.len() as u32)); + ret.extend(proto); + } + Ok(ret) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::contentviews::TestMetadata; + + #[test] + fn test_grpc() { + let result = GRPC.prettify(b"foo", &TestMetadata::default()).unwrap(); + assert_eq!(result, "666f6f"); + } +} diff --git a/src/contentviews/mod.rs b/src/contentviews/mod.rs index 609cba0c..967cbb65 100644 --- a/src/contentviews/mod.rs +++ b/src/contentviews/mod.rs @@ -1,3 +1,4 @@ +mod grpc; mod hex_dump; mod hex_stream; mod msgpack; @@ -6,6 +7,7 @@ mod protobuf; use anyhow::Result; use crate::syntax_highlight; +pub use grpc::GRPC; pub use hex_dump::HexDump; pub use hex_stream::HexStream; pub use msgpack::MsgPack; diff --git a/src/contentviews/protobuf.rs b/src/contentviews/protobuf.rs index 74a65be3..5f991b6d 100644 --- a/src/contentviews/protobuf.rs +++ b/src/contentviews/protobuf.rs @@ -1,4 +1,5 @@ use crate::contentviews::{Metadata, Prettify, Reencode}; +use crate::syntax_highlight::Language; use anyhow::{bail, Context, Result}; use protobuf::descriptor::field_descriptor_proto::Label::LABEL_REPEATED; use protobuf::descriptor::field_descriptor_proto::Type; @@ -54,10 +55,14 @@ impl Prettify for Protobuf { "Protocol Buffer" } + fn syntax_highlight(&self) -> Language { + Language::Yaml + } + fn prettify(&self, data: &[u8], _metadata: &dyn Metadata) -> Result { // Check if data is empty first if data.is_empty() { - bail!("Empty protobuf data"); + return Ok("{} # empty protobuf message".to_string()); } let existing = Empty::descriptor(); @@ -79,13 +84,9 @@ impl Prettify for Protobuf { } impl Reencode for Protobuf { - fn reencode(&self, data: &str, _metadata: &dyn Metadata) -> Result> { - let descriptor = Empty::descriptor(); - let message = descriptor.new_instance(); - + fn reencode(&self, data: &str, metadata: &dyn Metadata) -> Result> { let value: Value = serde_yaml::from_str(data).context("Invalid YAML")?; - - Self::merge_yaml_into_message(value, message) + Self::reencode_yaml(value, metadata) } } @@ -143,6 +144,12 @@ fn int_value(n: Number, field: Option<&FieldDescriptor>) -> UnknownValue { } impl Protobuf { + pub(super) fn reencode_yaml(value: Value, _metadata: &dyn Metadata) -> Result> { + let descriptor = Empty::descriptor(); + let message = descriptor.new_instance(); + Self::merge_yaml_into_message(value, message) + } + fn merge_yaml_into_message(value: Value, mut message: Box) -> Result> { let Value::Mapping(mapping) = value else { bail!("YAML is not a mapping"); From 06e5abf4441c0fd0e9f8db63110bff04090fe262 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 5 Apr 2025 13:46:36 +0200 Subject: [PATCH 14/26] bench++ --- Cargo.toml | 8 +++++ benches/contentviews.rs | 61 ++++++++++++++++++++++++++++++++++ benches/syntax_highlight.rs | 60 +++++++++++++++++++++++++++++++++ src/contentviews/mod.rs | 2 -- src/contentviews/msgpack.rs | 5 +++ src/syntax_highlight/common.rs | 10 ++---- src/syntax_highlight/mod.rs | 2 +- src/syntax_highlight/xml.rs | 20 +++++++---- src/syntax_highlight/yaml.rs | 19 ++++++++--- 9 files changed, 165 insertions(+), 22 deletions(-) create mode 100644 benches/contentviews.rs create mode 100644 benches/syntax_highlight.rs diff --git a/Cargo.toml b/Cargo.toml index 9cfc0215..13a6b6b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -123,6 +123,14 @@ hickory-server = "0.25.1" name = "process" harness = false +[[bench]] +name = "contentviews" +harness = false + +[[bench]] +name = "syntax_highlight" +harness = false + [profile.release] codegen-units = 1 lto = true diff --git a/benches/contentviews.rs b/benches/contentviews.rs new file mode 100644 index 00000000..cf05cd37 --- /dev/null +++ b/benches/contentviews.rs @@ -0,0 +1,61 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use mitmproxy::contentviews; +use mitmproxy::contentviews::{Prettify, Reencode, TestMetadata}; + +fn criterion_benchmark(c: &mut Criterion) { + c.bench_function("protobuf-prettify", |b| { + b.iter(|| { + contentviews::Protobuf.prettify(black_box(b"\n\x13gRPC testing server\x12\x07\n\x05Index\x12\x07\n\x05Empty\x12\x0c\n\nDummyUnary\x12\x0f\n\rSpecificError\x12\r\n\x0bRandomError\x12\x0e\n\x0cHeadersUnary\x12\x11\n\x0fNoResponseUnary"), &TestMetadata::default()).unwrap() + }) + }); + + c.bench_function("protobuf-reencode", |b| { + b.iter(|| { + contentviews::Protobuf.reencode( + black_box("1: gRPC testing server\n2:\n- 1: Index\n- 1: Empty\n- 1: DummyUnary\n- 1: SpecificError\n- 1: RandomError\n- 1: HeadersUnary\n- 1: NoResponseUnary\n"), + &TestMetadata::default() + ).unwrap() + }) + }); + + const TEST_MSGPACK: &[u8] = &[ + 0x83, // map with 3 elements + 0xa4, 0x6e, 0x61, 0x6d, 0x65, // "name" + 0xa8, 0x4a, 0x6f, 0x68, 0x6e, 0x20, 0x44, 0x6f, 0x65, // "John Doe" + 0xa3, 0x61, 0x67, 0x65, // "age" + 0x1e, // 30 + 0xa4, 0x74, 0x61, 0x67, 0x73, // "tags" + 0x92, // array with 2 elements + 0xa9, 0x64, 0x65, 0x76, 0x65, 0x6c, 0x6f, 0x70, 0x65, 0x72, // "developer" + 0xa4, 0x72, 0x75, 0x73, 0x74, // "rust" + ]; + c.bench_function("msgpack-prettify", |b| { + b.iter(|| { + contentviews::MsgPack + .prettify(black_box(TEST_MSGPACK), &TestMetadata::default()) + .unwrap() + }) + }); + + c.bench_function("msgpack-reencode", |b| { + b.iter(|| { + contentviews::MsgPack + .reencode( + black_box( + "\ + name: John Doe\n\ + age: 30\n\ + tags:\n\ + - developer\n\ + - rust\n\ + ", + ), + &TestMetadata::default(), + ) + .unwrap() + }) + }); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/benches/syntax_highlight.rs b/benches/syntax_highlight.rs new file mode 100644 index 00000000..f78b34f7 --- /dev/null +++ b/benches/syntax_highlight.rs @@ -0,0 +1,60 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use mitmproxy::syntax_highlight; +fn criterion_benchmark(c: &mut Criterion) { + c.bench_function("syntax_highlight small", |b| { + b.iter(|| { + syntax_highlight::Language::Xml + .highlight(black_box( + br#" + + + + + + Bootstrap demo + + +

Hello, world!

+ + "#, + )) + .unwrap() + }) + }); + + let data = "x".repeat(8096); + c.bench_function("syntax_highlight xml", |b| { + b.iter(|| { + syntax_highlight::Language::Xml + .highlight(black_box(data.as_bytes())) + .unwrap() + }) + }); + + // tree_sitter_html is faster, but not by orders of magnitude. + /* + let mut config = HighlightConfiguration::new( + tree_sitter_html::LANGUAGE.into(), + "", + tree_sitter_html::HIGHLIGHTS_QUERY, + "", + "" + ).unwrap(); + let names = config.names().iter().map(|x| x.to_string()).collect::>(); + let tags = names.iter().map(|_| Tag::Text).collect::>(); + config.configure(&names); + + c.bench_function("syntax_highlight html", |b| { + b.iter(|| { + common::highlight( + &config, + &tags, + data.as_bytes(), + ) + }) + }); + */ +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/src/contentviews/mod.rs b/src/contentviews/mod.rs index 967cbb65..aee90834 100644 --- a/src/contentviews/mod.rs +++ b/src/contentviews/mod.rs @@ -39,13 +39,11 @@ pub trait Reencode: Send + Sync { fn reencode(&self, data: &str, metadata: &dyn Metadata) -> Result>; } -#[cfg(test)] #[derive(Default)] pub struct TestMetadata { pub content_type: Option, } -#[cfg(test)] impl Metadata for TestMetadata { fn content_type(&self) -> Option { self.content_type.clone() diff --git a/src/contentviews/msgpack.rs b/src/contentviews/msgpack.rs index b046070e..ff2466b9 100644 --- a/src/contentviews/msgpack.rs +++ b/src/contentviews/msgpack.rs @@ -1,4 +1,5 @@ use crate::contentviews::{Metadata, Prettify, Reencode}; +use crate::syntax_highlight::Language; use anyhow::{Context, Result}; use rmp_serde::{decode, encode}; use serde_yaml; @@ -10,6 +11,10 @@ impl Prettify for MsgPack { "MsgPack" } + fn syntax_highlight(&self) -> Language { + Language::Yaml + } + fn prettify(&self, data: &[u8], _metadata: &dyn Metadata) -> Result { // Deserialize MsgPack to a serde_yaml::Value let value: serde_yaml::Value = diff --git a/src/syntax_highlight/common.rs b/src/syntax_highlight/common.rs index 09cbd665..d1225639 100644 --- a/src/syntax_highlight/common.rs +++ b/src/syntax_highlight/common.rs @@ -3,19 +3,13 @@ use anyhow::{Context, Result}; use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter}; pub fn highlight( - language: tree_sitter::Language, - highlights_query: &str, - names: &[&str], + config: &HighlightConfiguration, tags: &[Tag], input: &[u8], ) -> Result> { let mut highlighter = Highlighter::new(); - let mut config = HighlightConfiguration::new(language, "", highlights_query, "", "") - .context("failed to create highlight configuration")?; - config.configure(names); - let highlights = highlighter - .highlight(&config, input, None, |_| None) + .highlight(config, input, None, |_| None) .context("failed to highlight")?; let mut chunks: Vec = Vec::new(); diff --git a/src/syntax_highlight/mod.rs b/src/syntax_highlight/mod.rs index c3b85c01..70ec6815 100644 --- a/src/syntax_highlight/mod.rs +++ b/src/syntax_highlight/mod.rs @@ -3,7 +3,7 @@ use std::fmt; use std::fmt::Formatter; use std::str::FromStr; -mod common; +pub mod common; mod xml; mod yaml; diff --git a/src/syntax_highlight/xml.rs b/src/syntax_highlight/xml.rs index 55f365a1..a9c939bc 100644 --- a/src/syntax_highlight/xml.rs +++ b/src/syntax_highlight/xml.rs @@ -1,6 +1,8 @@ use super::common::highlight; use super::{Chunk, Tag}; use anyhow::Result; +use std::sync::LazyLock; +use tree_sitter_highlight::HighlightConfiguration; const NAMES: &[&str] = &[ "tag", //
@@ -19,15 +21,21 @@ const TAGS: &[Tag] = &[ Tag::Text, // markup ]; -pub fn highlight_xml(input: &[u8]) -> Result> { - // There also is tree_sitter_xml, but tree_sitter_html produces slightly nicer output for us. - highlight( +static XML_CONFIG: LazyLock = LazyLock::new(|| { + let mut config = HighlightConfiguration::new( tree_sitter_xml::LANGUAGE_XML.into(), + "", tree_sitter_xml::XML_HIGHLIGHT_QUERY, - NAMES, - TAGS, - input, + "", + "", ) + .expect("failed to build XML syntax highlighter"); + config.configure(NAMES); + config +}); + +pub fn highlight_xml(input: &[u8]) -> Result> { + highlight(&XML_CONFIG, TAGS, input) } #[cfg(test)] diff --git a/src/syntax_highlight/yaml.rs b/src/syntax_highlight/yaml.rs index b56d58d5..2f5de328 100644 --- a/src/syntax_highlight/yaml.rs +++ b/src/syntax_highlight/yaml.rs @@ -1,6 +1,8 @@ use super::common::highlight; use super::{Chunk, Tag}; use anyhow::Result; +use std::sync::LazyLock; +use tree_sitter_highlight::HighlightConfiguration; const NAMES: &[&str] = &[ "boolean", // YAML booleans @@ -19,14 +21,21 @@ const TAGS: &[Tag] = &[ Tag::Name, ]; -pub fn highlight_yaml(input: &[u8]) -> Result> { - highlight( +static YAML_CONFIG: LazyLock = LazyLock::new(|| { + let mut config = HighlightConfiguration::new( tree_sitter_yaml::LANGUAGE.into(), + "", tree_sitter_yaml::HIGHLIGHTS_QUERY, - NAMES, - TAGS, - input, + "", + "", ) + .expect("failed to build YAML syntax highlighter"); + config.configure(NAMES); + config +}); + +pub fn highlight_yaml(input: &[u8]) -> Result> { + highlight(&YAML_CONFIG, TAGS, input) } #[cfg(test)] From 8f547fb71bc1b4ce4eb9923dc272e087042bcde5 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 5 Apr 2025 19:47:11 +0200 Subject: [PATCH 15/26] fix nits --- Cargo.lock | 40 +++++++++++----------- mitmproxy-rs/mitmproxy_rs/contentviews.pyi | 9 ++++- src/contentviews/protobuf.rs | 2 +- 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f2dd44f0..ba836a6a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -568,9 +568,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.17" +version = "1.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fcb57c740ae1daf453ae85f16e37396f672b039e00d9d866e07ddb24e328e3a" +checksum = "525046617d8376e3db1deffb079e91cef90a89fc3ca5c185bbf8c9ecdd15cd5c" dependencies = [ "shlex", ] @@ -1001,9 +1001,9 @@ dependencies = [ [[package]] name = "deranged" -version = "0.4.1" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28cfac68e08048ae1883171632c2aef3ebc555621ae56fbccce1cbf22dd7f058" +checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" dependencies = [ "powerfmt", ] @@ -1085,9 +1085,9 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.10" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +checksum = "976dd42dc7e85965fe702eb8164f21f450704bdde31faefd6471dba214cb594e" dependencies = [ "libc", "windows-sys 0.59.0", @@ -1377,7 +1377,7 @@ dependencies = [ "futures-core", "futures-sink", "http", - "indexmap 2.8.0", + "indexmap 2.9.0", "slab", "tokio", "tokio-util", @@ -1823,9 +1823,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.8.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3954d50fe15b02142bf25d3b8bdadb634ec3948f103d04ffe3031bc8fe9d7058" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", "hashbrown 0.15.2", @@ -2147,9 +2147,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.5" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5" +checksum = "ff70ce3e48ae43fa075863cef62e8b43b71a4f2382229920e0df362592919430" dependencies = [ "adler2", "simd-adler32", @@ -2416,7 +2416,7 @@ checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" dependencies = [ "crc32fast", "hashbrown 0.15.2", - "indexmap 2.8.0", + "indexmap 2.9.0", "memchr", ] @@ -2935,9 +2935,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.10" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b8c0c260b63a8219631167be35e6a988e9554dbd323f8bd08439c8ed1302bd1" +checksum = "d2f103c6d277498fbceb16e84d317e2a400f160f46904d5f5410848c829511a3" dependencies = [ "bitflags 2.9.0", ] @@ -3163,7 +3163,7 @@ version = "1.0.140" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" dependencies = [ - "indexmap 2.7.0", + "indexmap 2.9.0", "itoa", "memchr", "ryu", @@ -3176,7 +3176,7 @@ version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.8.0", + "indexmap 2.9.0", "itoa", "ryu", "serde", @@ -3224,9 +3224,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.14.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" +checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" [[package]] name = "smoltcp" @@ -3483,9 +3483,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.44.1" +version = "1.44.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f382da615b842244d4b8738c82ed1275e6c5dd90c459a30941cd07080b06c91a" +checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48" dependencies = [ "backtrace", "bytes", diff --git a/mitmproxy-rs/mitmproxy_rs/contentviews.pyi b/mitmproxy-rs/mitmproxy_rs/contentviews.pyi index d21a30a9..65911af5 100644 --- a/mitmproxy-rs/mitmproxy_rs/contentviews.pyi +++ b/mitmproxy-rs/mitmproxy_rs/contentviews.pyi @@ -1,11 +1,18 @@ -from typing import ClassVar, final +from __future__ import annotations + +from typing import ClassVar, final, Literal class Contentview: name: ClassVar[str] + syntax_highlight: ClassVar[Literal["xml", "yaml", "none", "error"]] + def prettify(self, data: bytes, metadata) -> str: pass + def render_priority(self, data: bytes, metadata) -> float: + pass + @final class InteractiveContentview(Contentview): def reencode(self, data: str, metadata) -> bytes: diff --git a/src/contentviews/protobuf.rs b/src/contentviews/protobuf.rs index 5f991b6d..8eff0b30 100644 --- a/src/contentviews/protobuf.rs +++ b/src/contentviews/protobuf.rs @@ -52,7 +52,7 @@ enum GuessedFieldType { impl Prettify for Protobuf { fn name(&self) -> &str { - "Protocol Buffer" + "Protobuf" } fn syntax_highlight(&self) -> Language { From e67f83be47240b7ba10d4f3d0b17df5d2f879174 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sat, 5 Apr 2025 20:37:34 +0200 Subject: [PATCH 16/26] protobuf view: initial review --- src/contentviews/grpc.rs | 7 +- src/contentviews/mod.rs | 8 +- src/contentviews/msgpack.rs | 8 + src/contentviews/protobuf.rs | 521 ++++++++++++++++++----------------- 4 files changed, 284 insertions(+), 260 deletions(-) diff --git a/src/contentviews/grpc.rs b/src/contentviews/grpc.rs index f75a1955..699cde54 100644 --- a/src/contentviews/grpc.rs +++ b/src/contentviews/grpc.rs @@ -63,7 +63,7 @@ impl Reencode for GRPC { let mut ret = vec![]; for document in serde_yaml::Deserializer::from_str(data) { let value = Value::deserialize(document).context("Invalid YAML")?; - let proto = Protobuf::reencode_yaml(value, metadata)?; + let proto = super::protobuf::reencode::reencode_yaml(value, metadata)?; ret.push(0); // compressed ret.extend(u32::to_be_bytes(proto.len() as u32)); ret.extend(proto); @@ -74,12 +74,9 @@ impl Reencode for GRPC { #[cfg(test)] mod tests { - use super::*; - use crate::contentviews::TestMetadata; #[test] fn test_grpc() { - let result = GRPC.prettify(b"foo", &TestMetadata::default()).unwrap(); - assert_eq!(result, "666f6f"); + // FIXME } } diff --git a/src/contentviews/mod.rs b/src/contentviews/mod.rs index aee90834..09f2f87f 100644 --- a/src/contentviews/mod.rs +++ b/src/contentviews/mod.rs @@ -24,15 +24,15 @@ pub trait Prettify: Send + Sync { self.name().to_lowercase().replace(" ", "_") } + fn syntax_highlight(&self) -> syntax_highlight::Language { + syntax_highlight::Language::None + } + fn prettify(&self, data: &[u8], metadata: &dyn Metadata) -> Result; fn render_priority(&self, _data: &[u8], _metadata: &dyn Metadata) -> f64 { 0.0 } - - fn syntax_highlight(&self) -> syntax_highlight::Language { - syntax_highlight::Language::None - } } pub trait Reencode: Send + Sync { diff --git a/src/contentviews/msgpack.rs b/src/contentviews/msgpack.rs index ff2466b9..9c7439eb 100644 --- a/src/contentviews/msgpack.rs +++ b/src/contentviews/msgpack.rs @@ -130,4 +130,12 @@ tags: // Compare the values assert_eq!(original_value, result_value); } + + #[test] + fn test_invalid_yaml() { + let err = MsgPack + .reencode("@invalid_yaml", &TestMetadata::default()) + .unwrap_err(); + assert_eq!(format!("{err}"), "Invalid YAML"); + } } diff --git a/src/contentviews/protobuf.rs b/src/contentviews/protobuf.rs index 8eff0b30..6851f2d9 100644 --- a/src/contentviews/protobuf.rs +++ b/src/contentviews/protobuf.rs @@ -16,7 +16,6 @@ use protobuf::UnknownValueRef; use protobuf::{EnumOrUnknown, MessageDyn, MessageFull, UnknownValue}; use regex::Captures; use serde_yaml::value::TaggedValue; -use serde_yaml::Value::Tagged; use serde_yaml::{Mapping, Number, Value}; use std::collections::BTreeMap; use std::fmt::Write; @@ -66,257 +65,36 @@ impl Prettify for Protobuf { } let existing = Empty::descriptor(); - let descriptor = Self::create_descriptor(data, existing)?; + let descriptor = raw_to_proto::merge_proto_and_descriptor(data, existing)?; + // Parse protobuf and convert to YAML let message = descriptor .parse_from_bytes(data) .context("Error parsing protobuf")?; + let yaml_value = proto_to_yaml::message_to_yaml(message.as_ref()); - // Parse protobuf and convert to YAML - let yaml_value = Self::message_to_yaml(message.as_ref()); - - // Convert the Value to prettified YAML let yaml_str = serde_yaml::to_string(&yaml_value).context("Failed to convert to YAML")?; - - // Apply regex replacements to transform the YAML output - Self::apply_replacements(&yaml_str) + yaml_to_pretty::apply_replacements(&yaml_str) } } impl Reencode for Protobuf { fn reencode(&self, data: &str, metadata: &dyn Metadata) -> Result> { let value: Value = serde_yaml::from_str(data).context("Invalid YAML")?; - Self::reencode_yaml(value, metadata) - } -} - -fn tag_number(value: Value, field_type: Type) -> Value { - match field_type { - TYPE_UINT64 => Tagged(Box::new(TaggedValue { - tag: tags::VARINT.clone(), - value, - })), - TYPE_FIXED64 => Tagged(Box::new(TaggedValue { - tag: tags::FIXED64.clone(), - value, - })), - TYPE_FIXED32 => Tagged(Box::new(TaggedValue { - tag: tags::FIXED32.clone(), - value, - })), - _ => value, - } -} - -fn int_value(n: Number, field: Option<&FieldDescriptor>) -> UnknownValue { - if let Some(field) = field { - if let Some(typ) = field.proto().type_.and_then(|t| t.enum_value().ok()) { - match typ { - TYPE_FIXED64 | Type::TYPE_SFIXED64 | Type::TYPE_DOUBLE => { - return if let Some(n) = n.as_u64() { - UnknownValue::Fixed64(n) - } else if let Some(n) = n.as_i64() { - UnknownValue::sfixed64(n) - } else { - UnknownValue::double(n.as_f64().expect("as_f64 never fails")) - } - } - TYPE_FIXED32 | Type::TYPE_SFIXED32 | Type::TYPE_FLOAT => { - return if let Some(n) = n.as_u64() { - UnknownValue::Fixed32(n as u32) - } else if let Some(n) = n.as_i64() { - UnknownValue::sfixed32(n as i32) - } else { - UnknownValue::float(n.as_f64().expect("as_f64 never fails") as f32) - } - } - _ => (), - } - } - } - if let Some(n) = n.as_u64() { - UnknownValue::Varint(n) - } else if let Some(n) = n.as_i64() { - UnknownValue::int64(n) - } else { - UnknownValue::double(n.as_f64().expect("as_f64 never fails")) + reencode::reencode_yaml(value, metadata) } } -impl Protobuf { - pub(super) fn reencode_yaml(value: Value, _metadata: &dyn Metadata) -> Result> { - let descriptor = Empty::descriptor(); - let message = descriptor.new_instance(); - Self::merge_yaml_into_message(value, message) - } - - fn merge_yaml_into_message(value: Value, mut message: Box) -> Result> { - let Value::Mapping(mapping) = value else { - bail!("YAML is not a mapping"); - }; - - for (key, value) in mapping.into_iter() { - let field_num = match key { - Value::String(key) => { - if let Some(field) = message.descriptor_dyn().field_by_name(&key) { - field.number() - } else if let Ok(field_num) = i32::from_str(&key) { - field_num - } else { - bail!("Unknown protobuf field key: {key}"); - } - } - Value::Number(key) => { - let Some(field_num) = key.as_i64() else { - bail!("Invalid protobuf field number: {key}"); - }; - field_num as i32 - } - other => { - bail!("Unexpected key: {other:?}"); - } - } as u32; - - Self::add_field(message.as_mut(), field_num, value)?; - } - - message - .write_to_bytes_dyn() - .context("Failed to serialize protobuf") - } - - fn add_field(message: &mut dyn MessageDyn, field_num: u32, value: Value) -> Result<()> { - let value = match value { - Value::Null => return Ok(()), - Value::Sequence(seq) => { - for s in seq.into_iter() { - Self::add_field(message, field_num, s)?; - } - return Ok(()); - } - Tagged(t) => { - // t.tag doesn't work for Match statements - if t.tag == *tags::BINARY { - let value = match t.value { - Value::String(s) => s, - _ => bail!("Binary data is not a string"), - }; - let value = (0..value.len()) - .step_by(2) - .map(|i| u8::from_str_radix(&value[i..i + 2], 16)) - .collect::, ParseIntError>>() - .context("Invalid hex string")?; - UnknownValue::LengthDelimited(value) - } else if t.tag == *tags::FIXED32 { - let value = match t.value { - Value::Number(s) if s.as_u64().is_some() => s.as_u64().unwrap(), - _ => bail!("Fixed32 data is not a u32"), - }; - UnknownValue::Fixed32(value as u32) - } else if t.tag == *tags::FIXED64 { - let value = match t.value { - Value::Number(s) if s.as_u64().is_some() => s.as_u64().unwrap(), - _ => bail!("Fixed64 data is not a u64"), - }; - UnknownValue::Fixed64(value) - } else { - log::info!("Unexpected YAML tag {}, discarding.", t.tag); - return Self::add_field(message, field_num, t.value); - } - } - Value::Bool(b) => UnknownValue::Varint(b as u64), - Value::Number(n) => { - let field = message.descriptor_dyn().field_by_number(field_num); - int_value(n, field.as_ref()) - } - Value::String(s) => UnknownValue::LengthDelimited(s.into_bytes()), - Value::Mapping(m) => { - let mut descriptor = Empty::descriptor(); - if let Some(field) = message.descriptor_dyn().field_by_number(field_num) { - if let RuntimeFieldType::Singular(RuntimeType::Message(md)) = - field.runtime_field_type() - { - descriptor = md; - } else if let RuntimeFieldType::Map(_, _) = field.runtime_field_type() { - // TODO: handle maps. - } - } - let child_message = descriptor.new_instance(); - let val = Self::merge_yaml_into_message(Value::Mapping(m), child_message)?; - UnknownValue::LengthDelimited(val) - } - }; - message.mut_unknown_fields_dyn().add_value(field_num, value); - Ok(()) - } - - fn primitive_type_to_yaml(x: ReflectValueRef, field_type: Type) -> Value { - match x { - ReflectValueRef::U32(x) => tag_number(Value::Number(Number::from(x)), field_type), - ReflectValueRef::U64(x) => tag_number(Value::Number(Number::from(x)), field_type), - ReflectValueRef::I32(x) => Value::Number(Number::from(x)), - ReflectValueRef::I64(x) => Value::Number(Number::from(x)), - ReflectValueRef::F32(x) => Value::Number(Number::from(x)), - ReflectValueRef::F64(x) => Value::Number(Number::from(x)), - ReflectValueRef::Bool(x) => Value::from(x), - ReflectValueRef::String(x) => Value::from(x), - ReflectValueRef::Bytes(x) => Value::Tagged(Box::new(TaggedValue { - tag: tags::BINARY.clone(), - value: Value::String(Self::bytes_to_hex_string(x)), - })), - ReflectValueRef::Enum(descriptor, i) => descriptor - .value_by_number(i) - .map(|v| Value::String(v.name().to_string())) - .unwrap_or_else(|| Value::Number(Number::from(i))), - ReflectValueRef::Message(m) => Self::message_to_yaml(m.deref()), - } - } - pub(crate) fn message_to_yaml(message: &dyn MessageDyn) -> Value { - let mut ret = Mapping::new(); - - for field in message.descriptor_dyn().fields() { - let key = if field.name().is_empty() || field.name().starts_with("@unknown_field_") { - Value::from(field.number()) - } else { - Value::from(field.name()) - }; - let field_type = field - .proto() - .type_ - .map(|t| t.enum_value_or(TYPE_BYTES)) - .unwrap_or(TYPE_BYTES); - - let value = match field.get_reflect(message) { - ReflectFieldRef::Optional(x) => { - if let Some(x) = x.value() { - Self::primitive_type_to_yaml(x, field_type) - } else { - Value::Null - } - } - ReflectFieldRef::Repeated(x) => Value::Sequence( - x.into_iter() - .map(|x| Self::primitive_type_to_yaml(x, field_type)) - .collect(), - ), - ReflectFieldRef::Map(x) => Value::Mapping( - x.into_iter() - .map(|(k, v)| { - ( - Self::primitive_type_to_yaml(k, field_type), - Self::primitive_type_to_yaml(v, field_type), - ) - }) - .collect(), - ), - }; - ret.insert(key, value); - } - Value::Mapping(ret) - } +/// Existing protobuf definition + raw data => merged protobuf definition +mod raw_to_proto { + use super::*; - fn create_descriptor(data: &[u8], existing: MessageDescriptor) -> Result { - let proto = Self::create_descriptor_proto(data, existing, "Unknown".to_string())?; + /// Create a "merged" MessageDescriptor. Mostly a wrapper around `create_descriptor_proto`. + pub(super) fn merge_proto_and_descriptor( + data: &[u8], + existing: MessageDescriptor, + ) -> anyhow::Result { + let proto = create_descriptor_proto(data, existing, "Unknown".to_string())?; let descriptor = { let mut proto_file = FileDescriptorProto::new(); @@ -333,6 +111,8 @@ impl Protobuf { Ok(descriptor) } + /// Create a DescriptorProto that combines the `existing` MessageDescriptor with (guessed) + /// metadata for all unknown fields in the protobuf `data`. fn create_descriptor_proto( data: &[u8], existing: MessageDescriptor, @@ -353,7 +133,7 @@ impl Protobuf { let mut add_int = |typ: Type| { descriptor.field.push(FieldDescriptorProto { number: Some(field_index as i32), - name: Some(format!("@unknown_field_{}", field_index)), + name: Some(format!("unknown_field_{}", field_index)), type_: Some(EnumOrUnknown::from(typ)), ..Default::default() }); @@ -370,15 +150,15 @@ impl Protobuf { UnknownValueRef::LengthDelimited(data) => Ok(*data), _ => Err(anyhow::anyhow!("varying types in protobuf")), }) - .collect::>>()?; + .collect::>>()?; - match Self::guess_field_type(&field_values, &name, field_index) { + match guess_field_type(&field_values, &name, field_index) { GuessedFieldType::String => add_int(TYPE_STRING), GuessedFieldType::Unknown => add_int(TYPE_BYTES), GuessedFieldType::Message(m) => { descriptor.field.push(FieldDescriptorProto { number: Some(field_index as i32), - name: Some(format!("@unknown_field_{}", field_index)), + name: Some(format!("unknown_field_{}", field_index)), type_name: Some(format!(".{}.{}", name, m.name())), type_: Some(EnumOrUnknown::from(Type::TYPE_MESSAGE)), ..Default::default() @@ -401,6 +181,7 @@ impl Protobuf { Ok(descriptor) } + /// Given all `values` of a field, guess its type. fn guess_field_type(values: &[&[u8]], name: &str, field_index: u32) -> GuessedFieldType { if values.iter().all(|data| { std::str::from_utf8(data).is_ok_and(|s| { @@ -414,7 +195,7 @@ impl Protobuf { // Try to parse as a nested message let name = format!("{name}.unknown_field_{field_index}"); if let Ok(mut descriptor) = - { Self::create_descriptor_proto(values[0], Empty::descriptor(), name) } + { create_descriptor_proto(values[0], Empty::descriptor(), name) } { if values .iter() @@ -428,9 +209,111 @@ impl Protobuf { GuessedFieldType::Unknown } +} +/// Parsed protobuf message => YAML value +mod proto_to_yaml { + use super::*; + + pub(super) fn message_to_yaml(message: &dyn MessageDyn) -> Value { + let mut ret = Mapping::new(); + + for field in message.descriptor_dyn().fields() { + let key = if field.name().starts_with("unknown_field_") { + Value::from(field.number()) + } else { + Value::from(field.name()) + }; + let field_type = field + .proto() + .type_ + .map(|t| t.enum_value_or(TYPE_BYTES)) + .unwrap_or(TYPE_BYTES); + + let value = match field.get_reflect(message) { + ReflectFieldRef::Optional(x) => { + if let Some(x) = x.value() { + primitive_type_to_yaml(x, field_type) + } else { + Value::Null + } + } + ReflectFieldRef::Repeated(x) => Value::Sequence( + x.into_iter() + .map(|x| primitive_type_to_yaml(x, field_type)) + .collect(), + ), + ReflectFieldRef::Map(x) => Value::Mapping( + x.into_iter() + .map(|(k, v)| { + ( + primitive_type_to_yaml(k, field_type), + primitive_type_to_yaml(v, field_type), + ) + }) + .collect(), + ), + }; + ret.insert(key, value); + } + Value::Mapping(ret) + } + + fn primitive_type_to_yaml(x: ReflectValueRef, field_type: Type) -> Value { + match x { + ReflectValueRef::U32(x) => tag_number(Value::Number(Number::from(x)), field_type), + ReflectValueRef::U64(x) => tag_number(Value::Number(Number::from(x)), field_type), + ReflectValueRef::I32(x) => Value::Number(Number::from(x)), + ReflectValueRef::I64(x) => Value::Number(Number::from(x)), + ReflectValueRef::F32(x) => Value::Number(Number::from(x)), + ReflectValueRef::F64(x) => Value::Number(Number::from(x)), + ReflectValueRef::Bool(x) => Value::from(x), + ReflectValueRef::String(x) => Value::from(x), + ReflectValueRef::Bytes(x) => Value::Tagged(Box::new(TaggedValue { + tag: tags::BINARY.clone(), + value: Value::String(bytes_to_hex_string(x)), + })), + ReflectValueRef::Enum(descriptor, i) => descriptor + .value_by_number(i) + .map(|v| Value::String(v.name().to_string())) + .unwrap_or_else(|| Value::Number(Number::from(i))), + ReflectValueRef::Message(m) => message_to_yaml(m.deref()), + } + } + + fn tag_number(value: Value, field_type: Type) -> Value { + match field_type { + TYPE_UINT64 => Value::Tagged(Box::new(TaggedValue { + tag: tags::VARINT.clone(), + value, + })), + TYPE_FIXED64 => Value::Tagged(Box::new(TaggedValue { + tag: tags::FIXED64.clone(), + value, + })), + TYPE_FIXED32 => Value::Tagged(Box::new(TaggedValue { + tag: tags::FIXED32.clone(), + value, + })), + _ => value, + } + } + + // Convert length-delimited protobuf data to a hex string + fn bytes_to_hex_string(bytes: &[u8]) -> String { + let mut result = String::with_capacity(bytes.len() * 2); + for b in bytes { + let _ = write!(result, "{:02x}", b); + } + result + } +} + +/// YAML value => prettified text +mod yaml_to_pretty { + use super::*; // Helper method to apply regex replacements to the YAML output - fn apply_replacements(yaml_str: &str) -> Result { + pub(super) fn apply_replacements(yaml_str: &str) -> Result { // Replace !fixed32 tags with comments showing float and i32 interpretations let with_fixed32 = tags::FIXED32_RE.replace_all(yaml_str, |caps: &Captures| { let value = caps[1].parse::().unwrap_or_default(); @@ -480,7 +363,7 @@ impl Protobuf { // Replace !varint tags with comments showing signed interpretation if different let with_varint = tags::VARINT_RE.replace_all(&with_fixed64, |caps: &Captures| { let unsigned_value = caps[1].parse::().unwrap_or_default(); - let i64_zigzag = Self::decode_zigzag64(unsigned_value); + let i64_zigzag = decode_zigzag64(unsigned_value); // Only show signed value if it's different from unsigned if i64_zigzag < 0 { @@ -497,14 +380,150 @@ impl Protobuf { fn decode_zigzag64(n: u64) -> i64 { ((n >> 1) as i64) ^ (-((n & 1) as i64)) } +} + +pub(super) mod reencode { + use super::*; + + pub(crate) fn reencode_yaml(value: Value, _metadata: &dyn Metadata) -> Result> { + let descriptor = Empty::descriptor(); + let message = descriptor.new_instance(); + merge_yaml_into_message(value, message) + } - // Convert length-delimited protobuf data to a hex string - fn bytes_to_hex_string(bytes: &[u8]) -> String { - let mut result = String::with_capacity(bytes.len() * 2); - for b in bytes { - let _ = write!(result, "{:02x}", b); + fn merge_yaml_into_message(value: Value, mut message: Box) -> Result> { + let Value::Mapping(mapping) = value else { + bail!("YAML is not a mapping"); + }; + + for (key, value) in mapping.into_iter() { + let field_num = match key { + Value::String(key) => { + if let Some(field) = message.descriptor_dyn().field_by_name(&key) { + field.number() + } else if let Ok(field_num) = i32::from_str(&key) { + field_num + } else { + bail!("Unknown protobuf field key: {key}"); + } + } + Value::Number(key) => { + let Some(field_num) = key.as_i64() else { + bail!("Invalid protobuf field number: {key}"); + }; + field_num as i32 + } + other => { + bail!("Unexpected key: {other:?}"); + } + } as u32; + + add_field(message.as_mut(), field_num, value)?; + } + + message + .write_to_bytes_dyn() + .context("Failed to serialize protobuf") + } + + fn add_field(message: &mut dyn MessageDyn, field_num: u32, value: Value) -> Result<()> { + let value = match value { + Value::Null => return Ok(()), + Value::Sequence(seq) => { + for s in seq.into_iter() { + add_field(message, field_num, s)?; + } + return Ok(()); + } + Value::Tagged(t) => { + // t.tag doesn't work for Match statements + if t.tag == *tags::BINARY { + let value = match t.value { + Value::String(s) => s, + _ => bail!("Binary data is not a string"), + }; + let value = (0..value.len()) + .step_by(2) + .map(|i| u8::from_str_radix(&value[i..i + 2], 16)) + .collect::, ParseIntError>>() + .context("Invalid hex string")?; + UnknownValue::LengthDelimited(value) + } else if t.tag == *tags::FIXED32 { + let value = match t.value { + Value::Number(s) if s.as_u64().is_some() => s.as_u64().unwrap(), + _ => bail!("Fixed32 data is not a u32"), + }; + UnknownValue::Fixed32(value as u32) + } else if t.tag == *tags::FIXED64 { + let value = match t.value { + Value::Number(s) if s.as_u64().is_some() => s.as_u64().unwrap(), + _ => bail!("Fixed64 data is not a u64"), + }; + UnknownValue::Fixed64(value) + } else { + log::info!("Unexpected YAML tag {}, discarding.", t.tag); + return add_field(message, field_num, t.value); + } + } + Value::Bool(b) => UnknownValue::Varint(b as u64), + Value::Number(n) => { + let field = message.descriptor_dyn().field_by_number(field_num); + int_value(n, field.as_ref()) + } + Value::String(s) => UnknownValue::LengthDelimited(s.into_bytes()), + Value::Mapping(m) => { + let mut descriptor = Empty::descriptor(); + if let Some(field) = message.descriptor_dyn().field_by_number(field_num) { + if let RuntimeFieldType::Singular(RuntimeType::Message(md)) = + field.runtime_field_type() + { + descriptor = md; + } else if let RuntimeFieldType::Map(_, _) = field.runtime_field_type() { + // TODO: handle maps. + } + } + let child_message = descriptor.new_instance(); + let val = merge_yaml_into_message(Value::Mapping(m), child_message)?; + UnknownValue::LengthDelimited(val) + } + }; + message.mut_unknown_fields_dyn().add_value(field_num, value); + Ok(()) + } + + fn int_value(n: Number, field: Option<&FieldDescriptor>) -> UnknownValue { + if let Some(field) = field { + if let Some(typ) = field.proto().type_.and_then(|t| t.enum_value().ok()) { + match typ { + TYPE_FIXED64 | Type::TYPE_SFIXED64 | Type::TYPE_DOUBLE => { + return if let Some(n) = n.as_u64() { + UnknownValue::Fixed64(n) + } else if let Some(n) = n.as_i64() { + UnknownValue::sfixed64(n) + } else { + UnknownValue::double(n.as_f64().expect("as_f64 never fails")) + } + } + TYPE_FIXED32 | Type::TYPE_SFIXED32 | Type::TYPE_FLOAT => { + return if let Some(n) = n.as_u64() { + UnknownValue::Fixed32(n as u32) + } else if let Some(n) = n.as_i64() { + UnknownValue::sfixed32(n as i32) + } else { + UnknownValue::float(n.as_f64().expect("as_f64 never fails") as f32) + } + } + _ => (), + } + } + } + if let Some(n) = n.as_u64() { + UnknownValue::Varint(n) + } else if let Some(n) = n.as_i64() { + UnknownValue::int64(n) + } else { + UnknownValue::double(n.as_f64().expect("as_f64 never fails")) } - result } } @@ -635,7 +654,7 @@ mod tests { #[test] fn test_empty_protobuf() { - let result = Protobuf.prettify(b"", &TestMetadata::default()); - assert!(result.is_err()); + let result = Protobuf.prettify(b"", &TestMetadata::default()).unwrap(); + assert_eq!(result, "{} # empty protobuf message"); } } From 14696de491294b15d7982403ec123b67f485563d Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Tue, 8 Apr 2025 09:02:57 +0200 Subject: [PATCH 17/26] move contentviews and syntax highlighter into separate crates --- Cargo.lock | 35 +++++++++++++++---- Cargo.toml | 22 +++--------- README.md | 4 +++ mitmproxy-contentviews/Cargo.toml | 30 ++++++++++++++++ .../benches}/contentviews.rs | 11 +++--- .../src}/grpc.rs | 4 +-- .../src}/hex_dump.rs | 6 ++-- .../src}/hex_stream.rs | 4 +-- .../src/lib.rs | 6 ++-- .../src}/msgpack.rs | 7 ++-- .../src}/protobuf.rs | 30 ++++++++-------- mitmproxy-highlight/Cargo.toml | 26 ++++++++++++++ .../benches}/syntax_highlight.rs | 7 ++-- .../src}/common.rs | 0 .../mod.rs => mitmproxy-highlight/src/lib.rs | 0 .../src}/xml.rs | 2 +- .../src}/yaml.rs | 2 +- mitmproxy-rs/Cargo.toml | 2 ++ mitmproxy-rs/src/contentview.rs | 2 +- mitmproxy-rs/src/lib.rs | 4 +-- mitmproxy-rs/src/syntax_highlight.rs | 2 +- src/lib.rs | 2 -- 22 files changed, 137 insertions(+), 71 deletions(-) create mode 100644 mitmproxy-contentviews/Cargo.toml rename {benches => mitmproxy-contentviews/benches}/contentviews.rs (77%) rename {src/contentviews => mitmproxy-contentviews/src}/grpc.rs (95%) rename {src/contentviews => mitmproxy-contentviews/src}/hex_dump.rs (89%) rename {src/contentviews => mitmproxy-contentviews/src}/hex_stream.rs (96%) rename src/contentviews/mod.rs => mitmproxy-contentviews/src/lib.rs (87%) rename {src/contentviews => mitmproxy-contentviews/src}/msgpack.rs (96%) rename {src/contentviews => mitmproxy-contentviews/src}/protobuf.rs (96%) create mode 100644 mitmproxy-highlight/Cargo.toml rename {benches => mitmproxy-highlight/benches}/syntax_highlight.rs (93%) rename {src/syntax_highlight => mitmproxy-highlight/src}/common.rs (100%) rename src/syntax_highlight/mod.rs => mitmproxy-highlight/src/lib.rs (100%) rename {src/syntax_highlight => mitmproxy-highlight/src}/xml.rs (98%) rename {src/syntax_highlight => mitmproxy-highlight/src}/yaml.rs (98%) diff --git a/Cargo.lock b/Cargo.lock index ba836a6a..23f2e6ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2202,25 +2202,44 @@ dependencies = [ "once_cell", "pretty-hex", "prost", - "protobuf", "rand 0.9.0", - "regex", - "rmp-serde", "security-framework", - "serde", - "serde_yaml", "smoltcp", "socket2", "sysinfo", "tempfile", "tokio", "tokio-util", + "tun", + "windows 0.61.1", +] + +[[package]] +name = "mitmproxy-contentviews" +version = "0.12.0-dev" +dependencies = [ + "anyhow", + "criterion", + "log", + "mitmproxy-highlight", + "pretty-hex", + "protobuf", + "regex", + "rmp-serde", + "serde", + "serde_yaml", +] + +[[package]] +name = "mitmproxy-highlight" +version = "0.12.0-dev" +dependencies = [ + "anyhow", + "criterion", "tree-sitter", "tree-sitter-highlight", "tree-sitter-xml", "tree-sitter-yaml", - "tun", - "windows 0.61.1", ] [[package]] @@ -2273,6 +2292,8 @@ dependencies = [ "env_logger", "log", "mitmproxy", + "mitmproxy-contentviews", + "mitmproxy-highlight", "nix 0.29.0", "once_cell", "pyo3", diff --git a/Cargo.toml b/Cargo.toml index 13a6b6b8..6f2b86b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,8 @@ [workspace] members = [ ".", + "mitmproxy-contentviews", + "mitmproxy-highlight", "mitmproxy-rs", "mitmproxy-linux", "mitmproxy-linux-ebpf", @@ -11,6 +13,8 @@ members = [ ] default-members = [ ".", + "mitmproxy-contentviews", + "mitmproxy-highlight", "mitmproxy-rs", "mitmproxy-linux", "mitmproxy-linux-ebpf-common", @@ -68,15 +72,6 @@ internet-packet = { version = "0.2.3", features = ["smoltcp"] } data-encoding = "2.8.0" hickory-resolver = "0.25.1" socket2 = "0.5.9" -serde = { version = "1.0", features = ["derive"] } -serde_yaml = "0.9" -rmp-serde = "1.1" -protobuf = "3.7.2" -regex = "1.10.3" -tree-sitter-highlight = "0.25.3" -tree-sitter-yaml = "0.7.0" -tree-sitter-xml = "0.7.0" -tree-sitter = "0.25.3" [patch.crates-io] # tokio = { path = "../tokio/tokio" } @@ -118,19 +113,10 @@ rand = "0.9" criterion = "0.5.1" hickory-server = "0.25.1" - [[bench]] name = "process" harness = false -[[bench]] -name = "contentviews" -harness = false - -[[bench]] -name = "syntax_highlight" -harness = false - [profile.release] codegen-units = 1 lto = true diff --git a/README.md b/README.md index d4cd91af..a2b118b6 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,10 @@ This repository contains mitmproxy's Rust bits, most notably: ### Structure - [`src/`](./src): The `mitmproxy` crate containing most of the "meat". +- [`mitmproxy-contentviews/`](./mitmproxy-contentviews): + Pretty-printers for (HTTP) message bodies. +- [`mitmproxy-highlight/`](./mitmproxy-highlight): + Syntax highlighting backend for mitmproxy and mitmdump. - [`mitmproxy-rs/`](./mitmproxy-rs): The `mitmproxy-rs` Python package, which provides Python bindings for the Rust crate using [PyO3](https://pyo3.rs/). Source and binary distributions are available [on PyPI](https://pypi.org/project/mitmproxy-rs/). diff --git a/mitmproxy-contentviews/Cargo.toml b/mitmproxy-contentviews/Cargo.toml new file mode 100644 index 00000000..b06b5549 --- /dev/null +++ b/mitmproxy-contentviews/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "mitmproxy-contentviews" +license = "MIT" +authors.workspace = true +version.workspace = true +repository.workspace = true +edition.workspace = true +rust-version.workspace = true +publish.workspace = true + +[lints] +workspace = true + +[dependencies] +anyhow = { version = "1.0.97", features = ["backtrace"] } +log = "0.4.27" +pretty-hex = "0.4.1" +mitmproxy-highlight = { path = "../mitmproxy-highlight" } +serde = { version = "1.0", features = ["derive"] } +serde_yaml = "0.9" +rmp-serde = "1.1" +protobuf = "3.7.2" +regex = "1.10.3" + +[dev-dependencies] +criterion = "0.5.1" + +[[bench]] +name = "contentviews" +harness = false \ No newline at end of file diff --git a/benches/contentviews.rs b/mitmproxy-contentviews/benches/contentviews.rs similarity index 77% rename from benches/contentviews.rs rename to mitmproxy-contentviews/benches/contentviews.rs index cf05cd37..46a6e380 100644 --- a/benches/contentviews.rs +++ b/mitmproxy-contentviews/benches/contentviews.rs @@ -1,17 +1,16 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use mitmproxy::contentviews; -use mitmproxy::contentviews::{Prettify, Reencode, TestMetadata}; +use mitmproxy_contentviews::{MsgPack, Prettify, Protobuf, Reencode, TestMetadata}; fn criterion_benchmark(c: &mut Criterion) { c.bench_function("protobuf-prettify", |b| { b.iter(|| { - contentviews::Protobuf.prettify(black_box(b"\n\x13gRPC testing server\x12\x07\n\x05Index\x12\x07\n\x05Empty\x12\x0c\n\nDummyUnary\x12\x0f\n\rSpecificError\x12\r\n\x0bRandomError\x12\x0e\n\x0cHeadersUnary\x12\x11\n\x0fNoResponseUnary"), &TestMetadata::default()).unwrap() + Protobuf.prettify(black_box(b"\n\x13gRPC testing server\x12\x07\n\x05Index\x12\x07\n\x05Empty\x12\x0c\n\nDummyUnary\x12\x0f\n\rSpecificError\x12\r\n\x0bRandomError\x12\x0e\n\x0cHeadersUnary\x12\x11\n\x0fNoResponseUnary"), &TestMetadata::default()).unwrap() }) }); c.bench_function("protobuf-reencode", |b| { b.iter(|| { - contentviews::Protobuf.reencode( + Protobuf.reencode( black_box("1: gRPC testing server\n2:\n- 1: Index\n- 1: Empty\n- 1: DummyUnary\n- 1: SpecificError\n- 1: RandomError\n- 1: HeadersUnary\n- 1: NoResponseUnary\n"), &TestMetadata::default() ).unwrap() @@ -31,7 +30,7 @@ fn criterion_benchmark(c: &mut Criterion) { ]; c.bench_function("msgpack-prettify", |b| { b.iter(|| { - contentviews::MsgPack + MsgPack .prettify(black_box(TEST_MSGPACK), &TestMetadata::default()) .unwrap() }) @@ -39,7 +38,7 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("msgpack-reencode", |b| { b.iter(|| { - contentviews::MsgPack + MsgPack .reencode( black_box( "\ diff --git a/src/contentviews/grpc.rs b/mitmproxy-contentviews/src/grpc.rs similarity index 95% rename from src/contentviews/grpc.rs rename to mitmproxy-contentviews/src/grpc.rs index 699cde54..9fa301ba 100644 --- a/src/contentviews/grpc.rs +++ b/mitmproxy-contentviews/src/grpc.rs @@ -1,5 +1,5 @@ -use crate::contentviews::{Metadata, Prettify, Protobuf, Reencode}; -use crate::syntax_highlight::Language; +use crate::{Metadata, Prettify, Protobuf, Reencode}; +use mitmproxy_highlight::Language; use anyhow::{bail, Context, Result}; use serde::Deserialize; use serde_yaml::Value; diff --git a/src/contentviews/hex_dump.rs b/mitmproxy-contentviews/src/hex_dump.rs similarity index 89% rename from src/contentviews/hex_dump.rs rename to mitmproxy-contentviews/src/hex_dump.rs index 45939961..70ac9356 100644 --- a/src/contentviews/hex_dump.rs +++ b/mitmproxy-contentviews/src/hex_dump.rs @@ -1,5 +1,5 @@ -use crate::contentviews::hex_stream::is_binary; -use crate::contentviews::{Metadata, Prettify}; +use crate::hex_stream::is_binary; +use crate::{Metadata, Prettify}; use pretty_hex::{HexConfig, PrettyHex}; pub struct HexDump; @@ -36,7 +36,7 @@ impl Prettify for HexDump { #[cfg(test)] mod tests { use super::*; - use crate::contentviews::TestMetadata; + use crate::TestMetadata; #[test] fn prettify_simple() { diff --git a/src/contentviews/hex_stream.rs b/mitmproxy-contentviews/src/hex_stream.rs similarity index 96% rename from src/contentviews/hex_stream.rs rename to mitmproxy-contentviews/src/hex_stream.rs index 6c19bb73..8f5ef33d 100644 --- a/src/contentviews/hex_stream.rs +++ b/mitmproxy-contentviews/src/hex_stream.rs @@ -1,4 +1,4 @@ -use crate::contentviews::{Metadata, Prettify, Reencode}; +use crate::{Metadata, Prettify, Reencode}; use anyhow::{Context, Result}; use pretty_hex::{HexConfig, PrettyHex}; use std::num::ParseIntError; @@ -64,7 +64,7 @@ impl Reencode for HexStream { #[cfg(test)] mod tests { use super::*; - use crate::contentviews::TestMetadata; + use crate::TestMetadata; #[test] fn test_hex_stream() { diff --git a/src/contentviews/mod.rs b/mitmproxy-contentviews/src/lib.rs similarity index 87% rename from src/contentviews/mod.rs rename to mitmproxy-contentviews/src/lib.rs index 09f2f87f..e844c6a3 100644 --- a/src/contentviews/mod.rs +++ b/mitmproxy-contentviews/src/lib.rs @@ -6,7 +6,7 @@ mod protobuf; use anyhow::Result; -use crate::syntax_highlight; +use mitmproxy_highlight::Language; pub use grpc::GRPC; pub use hex_dump::HexDump; pub use hex_stream::HexStream; @@ -24,8 +24,8 @@ pub trait Prettify: Send + Sync { self.name().to_lowercase().replace(" ", "_") } - fn syntax_highlight(&self) -> syntax_highlight::Language { - syntax_highlight::Language::None + fn syntax_highlight(&self) -> Language { + Language::None } fn prettify(&self, data: &[u8], metadata: &dyn Metadata) -> Result; diff --git a/src/contentviews/msgpack.rs b/mitmproxy-contentviews/src/msgpack.rs similarity index 96% rename from src/contentviews/msgpack.rs rename to mitmproxy-contentviews/src/msgpack.rs index 9c7439eb..3160e95d 100644 --- a/src/contentviews/msgpack.rs +++ b/mitmproxy-contentviews/src/msgpack.rs @@ -1,8 +1,7 @@ -use crate::contentviews::{Metadata, Prettify, Reencode}; -use crate::syntax_highlight::Language; +use super::{Metadata, Prettify, Reencode}; +use mitmproxy_highlight::Language; use anyhow::{Context, Result}; use rmp_serde::{decode, encode}; -use serde_yaml; pub struct MsgPack; @@ -41,7 +40,7 @@ impl Reencode for MsgPack { #[cfg(test)] mod tests { use super::*; - use crate::contentviews::TestMetadata; + use crate::TestMetadata; // Hardcoded MsgPack data for a simple object: // { diff --git a/src/contentviews/protobuf.rs b/mitmproxy-contentviews/src/protobuf.rs similarity index 96% rename from src/contentviews/protobuf.rs rename to mitmproxy-contentviews/src/protobuf.rs index 6851f2d9..e23a3f46 100644 --- a/src/contentviews/protobuf.rs +++ b/mitmproxy-contentviews/src/protobuf.rs @@ -1,5 +1,5 @@ -use crate::contentviews::{Metadata, Prettify, Reencode}; -use crate::syntax_highlight::Language; +use crate::{Metadata, Prettify, Reencode}; +use mitmproxy_highlight::Language; use anyhow::{bail, Context, Result}; use protobuf::descriptor::field_descriptor_proto::Label::LABEL_REPEATED; use protobuf::descriptor::field_descriptor_proto::Type; @@ -24,21 +24,21 @@ use std::ops::Deref; use std::str::FromStr; mod tags { - use once_cell::sync::Lazy; + use std::sync::LazyLock; use regex::Regex; use serde_yaml::value::Tag; - pub(super) static BINARY: Lazy = Lazy::new(|| Tag::new("binary")); - pub(super) static VARINT: Lazy = Lazy::new(|| Tag::new("varint")); - pub(super) static FIXED32: Lazy = Lazy::new(|| Tag::new("fixed32")); - pub(super) static FIXED64: Lazy = Lazy::new(|| Tag::new("fixed64")); - - pub(super) static VARINT_RE: Lazy = - Lazy::new(|| Regex::new(&format!(r"{} (\d+)", *VARINT)).unwrap()); - pub(super) static FIXED32_RE: Lazy = - Lazy::new(|| Regex::new(&format!(r"{} (\d+)", *FIXED32)).unwrap()); - pub(super) static FIXED64_RE: Lazy = - Lazy::new(|| Regex::new(&format!(r"{} (\d+)", *FIXED64)).unwrap()); + pub(super) static BINARY: LazyLock = LazyLock::new(|| Tag::new("binary")); + pub(super) static VARINT: LazyLock = LazyLock::new(|| Tag::new("varint")); + pub(super) static FIXED32: LazyLock = LazyLock::new(|| Tag::new("fixed32")); + pub(super) static FIXED64: LazyLock = LazyLock::new(|| Tag::new("fixed64")); + + pub(super) static VARINT_RE: LazyLock = + LazyLock::new(|| Regex::new(&format!(r"{} (\d+)", *VARINT)).unwrap()); + pub(super) static FIXED32_RE: LazyLock = + LazyLock::new(|| Regex::new(&format!(r"{} (\d+)", *FIXED32)).unwrap()); + pub(super) static FIXED64_RE: LazyLock = + LazyLock::new(|| Regex::new(&format!(r"{} (\d+)", *FIXED64)).unwrap()); } pub struct Protobuf; @@ -530,7 +530,7 @@ pub(super) mod reencode { #[cfg(test)] mod tests { use super::*; - use crate::contentviews::TestMetadata; + use crate::TestMetadata; macro_rules! test_roundtrip { ($name:ident,$proto:literal,$yaml:literal) => { diff --git a/mitmproxy-highlight/Cargo.toml b/mitmproxy-highlight/Cargo.toml new file mode 100644 index 00000000..bd36f616 --- /dev/null +++ b/mitmproxy-highlight/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "mitmproxy-highlight" +license = "MIT" +authors.workspace = true +version.workspace = true +repository.workspace = true +edition.workspace = true +rust-version.workspace = true +publish.workspace = true + +[lints] +workspace = true + +[dependencies] +anyhow = { version = "1.0.97", features = ["backtrace"] } +tree-sitter-highlight = "0.25.3" +tree-sitter-yaml = "0.7.0" +tree-sitter-xml = "0.7.0" +tree-sitter = "0.25.3" + +[dev-dependencies] +criterion = "0.5.1" + +[[bench]] +name = "syntax_highlight" +harness = false \ No newline at end of file diff --git a/benches/syntax_highlight.rs b/mitmproxy-highlight/benches/syntax_highlight.rs similarity index 93% rename from benches/syntax_highlight.rs rename to mitmproxy-highlight/benches/syntax_highlight.rs index f78b34f7..aebd169a 100644 --- a/benches/syntax_highlight.rs +++ b/mitmproxy-highlight/benches/syntax_highlight.rs @@ -1,9 +1,10 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use mitmproxy::syntax_highlight; +use mitmproxy_highlight::Language; + fn criterion_benchmark(c: &mut Criterion) { c.bench_function("syntax_highlight small", |b| { b.iter(|| { - syntax_highlight::Language::Xml + Language::Xml .highlight(black_box( br#" @@ -25,7 +26,7 @@ fn criterion_benchmark(c: &mut Criterion) { let data = "x".repeat(8096); c.bench_function("syntax_highlight xml", |b| { b.iter(|| { - syntax_highlight::Language::Xml + Language::Xml .highlight(black_box(data.as_bytes())) .unwrap() }) diff --git a/src/syntax_highlight/common.rs b/mitmproxy-highlight/src/common.rs similarity index 100% rename from src/syntax_highlight/common.rs rename to mitmproxy-highlight/src/common.rs diff --git a/src/syntax_highlight/mod.rs b/mitmproxy-highlight/src/lib.rs similarity index 100% rename from src/syntax_highlight/mod.rs rename to mitmproxy-highlight/src/lib.rs diff --git a/src/syntax_highlight/xml.rs b/mitmproxy-highlight/src/xml.rs similarity index 98% rename from src/syntax_highlight/xml.rs rename to mitmproxy-highlight/src/xml.rs index a9c939bc..eb3ab939 100644 --- a/src/syntax_highlight/xml.rs +++ b/mitmproxy-highlight/src/xml.rs @@ -41,7 +41,7 @@ pub fn highlight_xml(input: &[u8]) -> Result> { #[cfg(test)] mod tests { use super::*; - use crate::syntax_highlight::common; + use crate::common; #[ignore] #[test] diff --git a/src/syntax_highlight/yaml.rs b/mitmproxy-highlight/src/yaml.rs similarity index 98% rename from src/syntax_highlight/yaml.rs rename to mitmproxy-highlight/src/yaml.rs index 2f5de328..fe0b79d2 100644 --- a/src/syntax_highlight/yaml.rs +++ b/mitmproxy-highlight/src/yaml.rs @@ -41,7 +41,7 @@ pub fn highlight_yaml(input: &[u8]) -> Result> { #[cfg(test)] mod tests { use super::*; - use crate::syntax_highlight::common; + use crate::common; #[test] fn test_tags_ok() { diff --git a/mitmproxy-rs/Cargo.toml b/mitmproxy-rs/Cargo.toml index 076fdc08..cfffc121 100644 --- a/mitmproxy-rs/Cargo.toml +++ b/mitmproxy-rs/Cargo.toml @@ -17,6 +17,8 @@ crate-type = ["lib", "cdylib"] [dependencies] mitmproxy = { path = "../" } +mitmproxy-highlight = { path = "../mitmproxy-highlight" } +mitmproxy-contentviews = { path = "../mitmproxy-contentviews" } anyhow = { version = "1.0.97", features = ["backtrace"] } data-encoding = "2.8.0" log = "0.4.27" diff --git a/mitmproxy-rs/src/contentview.rs b/mitmproxy-rs/src/contentview.rs index 4138b2dc..1eefc52f 100644 --- a/mitmproxy-rs/src/contentview.rs +++ b/mitmproxy-rs/src/contentview.rs @@ -1,4 +1,4 @@ -use mitmproxy::contentviews::{Metadata, Prettify, Reencode}; +use mitmproxy_contentviews::{Metadata, Prettify, Reencode}; use pyo3::{exceptions::PyValueError, prelude::*}; pub struct PythonMetadata<'py>(Bound<'py, PyAny>); diff --git a/mitmproxy-rs/src/lib.rs b/mitmproxy-rs/src/lib.rs index c7e85ee4..c61b7238 100644 --- a/mitmproxy-rs/src/lib.rs +++ b/mitmproxy-rs/src/lib.rs @@ -3,7 +3,7 @@ extern crate core; use std::sync::RwLock; use crate::contentview::{Contentview, InteractiveContentview}; -use mitmproxy::contentviews::{Prettify, Reencode}; +use mitmproxy_contentviews::{Prettify, Reencode}; use once_cell::sync::Lazy; use pyo3::{exceptions::PyException, prelude::*}; @@ -92,7 +92,7 @@ mod mitmproxy_rs { use crate::contentview::Contentview; #[pymodule_export] use crate::contentview::InteractiveContentview; - use mitmproxy::contentviews::{HexDump, HexStream, MsgPack, Protobuf, GRPC}; + use mitmproxy_contentviews::{HexDump, HexStream, MsgPack, Protobuf, GRPC}; #[pymodule_init] fn init(m: &Bound<'_, PyModule>) -> PyResult<()> { diff --git a/mitmproxy-rs/src/syntax_highlight.rs b/mitmproxy-rs/src/syntax_highlight.rs index 6f143633..6d30083b 100644 --- a/mitmproxy-rs/src/syntax_highlight.rs +++ b/mitmproxy-rs/src/syntax_highlight.rs @@ -2,7 +2,7 @@ use anyhow::{anyhow, Result}; use std::str::FromStr; -use mitmproxy::syntax_highlight::{Language, Tag}; +use mitmproxy_highlight::{Language, Tag}; use pyo3::{exceptions::PyValueError, prelude::*}; /// Transform text into a list of tagged chunks. diff --git a/src/lib.rs b/src/lib.rs index 2bae2b03..ae3a8748 100755 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,6 @@ pub use network::MAX_PACKET_SIZE; pub mod certificates; -pub mod contentviews; pub mod dns; pub mod intercept_conf; pub mod ipc; @@ -10,6 +9,5 @@ pub mod network; pub mod packet_sources; pub mod processes; pub mod shutdown; -pub mod syntax_highlight; #[cfg(windows)] pub mod windows; From e34dad9e0eefa071e58c01dab0fa159c4a761aa8 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Tue, 8 Apr 2025 09:20:11 +0200 Subject: [PATCH 18/26] use data_encoding crate for hex --- Cargo.lock | 1 + mitmproxy-contentviews/Cargo.toml | 1 + mitmproxy-contentviews/src/hex_stream.rs | 20 +++----------------- 3 files changed, 5 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 23f2e6ea..6d00f7d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2220,6 +2220,7 @@ version = "0.12.0-dev" dependencies = [ "anyhow", "criterion", + "data-encoding", "log", "mitmproxy-highlight", "pretty-hex", diff --git a/mitmproxy-contentviews/Cargo.toml b/mitmproxy-contentviews/Cargo.toml index b06b5549..130a6ac3 100644 --- a/mitmproxy-contentviews/Cargo.toml +++ b/mitmproxy-contentviews/Cargo.toml @@ -14,6 +14,7 @@ workspace = true [dependencies] anyhow = { version = "1.0.97", features = ["backtrace"] } log = "0.4.27" +data-encoding = "2.8.0" pretty-hex = "0.4.1" mitmproxy-highlight = { path = "../mitmproxy-highlight" } serde = { version = "1.0", features = ["derive"] } diff --git a/mitmproxy-contentviews/src/hex_stream.rs b/mitmproxy-contentviews/src/hex_stream.rs index 8f5ef33d..5627e3c7 100644 --- a/mitmproxy-contentviews/src/hex_stream.rs +++ b/mitmproxy-contentviews/src/hex_stream.rs @@ -1,7 +1,5 @@ use crate::{Metadata, Prettify, Reencode}; use anyhow::{Context, Result}; -use pretty_hex::{HexConfig, PrettyHex}; -use std::num::ParseIntError; pub struct HexStream; @@ -25,17 +23,7 @@ impl Prettify for HexStream { } fn prettify(&self, data: &[u8], _metadata: &dyn Metadata) -> Result { - Ok(data - .hex_conf(HexConfig { - title: false, - ascii: false, - width: 0, - group: 0, - chunk: 0, - max_bytes: usize::MAX, - display_offset: 0, - }) - .to_string()) + Ok(data_encoding::HEXLOWER.encode(data)) } fn render_priority(&self, data: &[u8], _metadata: &dyn Metadata) -> f64 { @@ -53,10 +41,8 @@ impl Reencode for HexStream { if data.len() % 2 != 0 { anyhow::bail!("Invalid hex string: uneven number of characters"); } - (0..data.len()) - .step_by(2) - .map(|i| u8::from_str_radix(&data[i..i + 2], 16)) - .collect::, ParseIntError>>() + data_encoding::HEXLOWER_PERMISSIVE + .decode(data.as_bytes()) .context("Invalid hex string") } } From 2d21be3a0ed24222ed1aec2e3ac44ae8d3986a39 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Tue, 8 Apr 2025 09:15:00 +0200 Subject: [PATCH 19/26] move off once_cell --- Cargo.lock | 2 -- Cargo.toml | 1 - mitmproxy-contentviews/src/grpc.rs | 2 +- mitmproxy-contentviews/src/lib.rs | 2 +- mitmproxy-contentviews/src/msgpack.rs | 2 +- mitmproxy-contentviews/src/protobuf.rs | 4 ++-- mitmproxy-highlight/benches/syntax_highlight.rs | 6 +----- mitmproxy-rs/Cargo.toml | 1 - mitmproxy-rs/src/dns_resolver.rs | 6 +++--- mitmproxy-rs/src/lib.rs | 9 ++++----- src/dns.rs | 17 ++++++++--------- src/processes/mod.rs | 5 +++-- src/processes/windows_list.rs | 7 +++---- 13 files changed, 27 insertions(+), 37 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6d00f7d6..d23aab72 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2199,7 +2199,6 @@ dependencies = [ "lru_time_cache", "nix 0.29.0", "objc", - "once_cell", "pretty-hex", "prost", "rand 0.9.0", @@ -2296,7 +2295,6 @@ dependencies = [ "mitmproxy-contentviews", "mitmproxy-highlight", "nix 0.29.0", - "once_cell", "pyo3", "pyo3-async-runtimes", "pyo3-log", diff --git a/Cargo.toml b/Cargo.toml index 6f2b86b1..b2285032 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,7 +57,6 @@ publish.workspace = true [dependencies] anyhow = { version = "1.0.97", features = ["backtrace"] } log = "0.4.27" -once_cell = "1" pretty-hex = "0.4.1" smoltcp = "0.12" tokio = { version = "1.44.1", features = ["macros", "net", "rt-multi-thread", "sync", "time", "io-util", "process"] } diff --git a/mitmproxy-contentviews/src/grpc.rs b/mitmproxy-contentviews/src/grpc.rs index 9fa301ba..8a1c28df 100644 --- a/mitmproxy-contentviews/src/grpc.rs +++ b/mitmproxy-contentviews/src/grpc.rs @@ -1,6 +1,6 @@ use crate::{Metadata, Prettify, Protobuf, Reencode}; -use mitmproxy_highlight::Language; use anyhow::{bail, Context, Result}; +use mitmproxy_highlight::Language; use serde::Deserialize; use serde_yaml::Value; diff --git a/mitmproxy-contentviews/src/lib.rs b/mitmproxy-contentviews/src/lib.rs index e844c6a3..0f550860 100644 --- a/mitmproxy-contentviews/src/lib.rs +++ b/mitmproxy-contentviews/src/lib.rs @@ -6,10 +6,10 @@ mod protobuf; use anyhow::Result; -use mitmproxy_highlight::Language; pub use grpc::GRPC; pub use hex_dump::HexDump; pub use hex_stream::HexStream; +use mitmproxy_highlight::Language; pub use msgpack::MsgPack; pub use protobuf::Protobuf; diff --git a/mitmproxy-contentviews/src/msgpack.rs b/mitmproxy-contentviews/src/msgpack.rs index 3160e95d..2eb08df3 100644 --- a/mitmproxy-contentviews/src/msgpack.rs +++ b/mitmproxy-contentviews/src/msgpack.rs @@ -1,6 +1,6 @@ use super::{Metadata, Prettify, Reencode}; -use mitmproxy_highlight::Language; use anyhow::{Context, Result}; +use mitmproxy_highlight::Language; use rmp_serde::{decode, encode}; pub struct MsgPack; diff --git a/mitmproxy-contentviews/src/protobuf.rs b/mitmproxy-contentviews/src/protobuf.rs index e23a3f46..092e2ab6 100644 --- a/mitmproxy-contentviews/src/protobuf.rs +++ b/mitmproxy-contentviews/src/protobuf.rs @@ -1,6 +1,6 @@ use crate::{Metadata, Prettify, Reencode}; -use mitmproxy_highlight::Language; use anyhow::{bail, Context, Result}; +use mitmproxy_highlight::Language; use protobuf::descriptor::field_descriptor_proto::Label::LABEL_REPEATED; use protobuf::descriptor::field_descriptor_proto::Type; use protobuf::descriptor::field_descriptor_proto::Type::{ @@ -24,9 +24,9 @@ use std::ops::Deref; use std::str::FromStr; mod tags { - use std::sync::LazyLock; use regex::Regex; use serde_yaml::value::Tag; + use std::sync::LazyLock; pub(super) static BINARY: LazyLock = LazyLock::new(|| Tag::new("binary")); pub(super) static VARINT: LazyLock = LazyLock::new(|| Tag::new("varint")); diff --git a/mitmproxy-highlight/benches/syntax_highlight.rs b/mitmproxy-highlight/benches/syntax_highlight.rs index aebd169a..bf7d5afe 100644 --- a/mitmproxy-highlight/benches/syntax_highlight.rs +++ b/mitmproxy-highlight/benches/syntax_highlight.rs @@ -25,11 +25,7 @@ fn criterion_benchmark(c: &mut Criterion) { let data = "x".repeat(8096); c.bench_function("syntax_highlight xml", |b| { - b.iter(|| { - Language::Xml - .highlight(black_box(data.as_bytes())) - .unwrap() - }) + b.iter(|| Language::Xml.highlight(black_box(data.as_bytes())).unwrap()) }); // tree_sitter_html is faster, but not by orders of magnitude. diff --git a/mitmproxy-rs/Cargo.toml b/mitmproxy-rs/Cargo.toml index cfffc121..2735d6d0 100644 --- a/mitmproxy-rs/Cargo.toml +++ b/mitmproxy-rs/Cargo.toml @@ -22,7 +22,6 @@ mitmproxy-contentviews = { path = "../mitmproxy-contentviews" } anyhow = { version = "1.0.97", features = ["backtrace"] } data-encoding = "2.8.0" log = "0.4.27" -once_cell = "1" pyo3 = { version = "0.24", features = ["abi3", "abi3-py312", "anyhow"] } pyo3-async-runtimes = { version = "0.24", features = ["tokio-runtime", "testing", "attributes"] } pyo3-log = "0.12" diff --git a/mitmproxy-rs/src/dns_resolver.rs b/mitmproxy-rs/src/dns_resolver.rs index f50115ca..6a473cf9 100644 --- a/mitmproxy-rs/src/dns_resolver.rs +++ b/mitmproxy-rs/src/dns_resolver.rs @@ -1,8 +1,8 @@ use mitmproxy::dns::{ResolveError, DNS_SERVERS}; -use once_cell::sync::OnceCell; use pyo3::exceptions::socket::gaierror; use pyo3::prelude::*; use pyo3::types::PyAny; +use std::sync::OnceLock; use std::{net::IpAddr, net::SocketAddr, sync::Arc}; /// A DNS resolver backed by [hickory-dns](https://github.com/hickory-dns/hickory-dns). @@ -78,10 +78,10 @@ pub fn get_system_dns_servers() -> PyResult> { }) } -struct AddrInfoErrorConst(&'static str, OnceCell); +struct AddrInfoErrorConst(&'static str, OnceLock); impl AddrInfoErrorConst { const fn new(identifier: &'static str) -> Self { - AddrInfoErrorConst(identifier, OnceCell::new()) + AddrInfoErrorConst(identifier, OnceLock::new()) } fn get(&self) -> isize { *self.1.get_or_init(|| { diff --git a/mitmproxy-rs/src/lib.rs b/mitmproxy-rs/src/lib.rs index c61b7238..44e9e195 100644 --- a/mitmproxy-rs/src/lib.rs +++ b/mitmproxy-rs/src/lib.rs @@ -1,10 +1,9 @@ extern crate core; -use std::sync::RwLock; +use std::sync::{LazyLock, Mutex}; use crate::contentview::{Contentview, InteractiveContentview}; use mitmproxy_contentviews::{Prettify, Reencode}; -use once_cell::sync::Lazy; use pyo3::{exceptions::PyException, prelude::*}; mod contentview; @@ -17,15 +16,15 @@ pub mod task; mod udp_client; mod util; -static LOGGER_INITIALIZED: Lazy> = Lazy::new(|| RwLock::new(false)); +static LOGGER_INITIALIZED: LazyLock> = LazyLock::new(|| Mutex::new(false)); fn init_logger() -> PyResult<()> { - if *LOGGER_INITIALIZED.read().unwrap() { + if *LOGGER_INITIALIZED.lock().unwrap() { // logger already initialized Ok(()) } else if pyo3_log::try_init().is_ok() { // logger successfully initialized - *LOGGER_INITIALIZED.write().unwrap() = true; + *LOGGER_INITIALIZED.lock().unwrap() = true; Ok(()) } else { // logger was not initialized and could not be initialized diff --git a/src/dns.rs b/src/dns.rs index 18d45df6..25887edc 100644 --- a/src/dns.rs +++ b/src/dns.rs @@ -1,21 +1,20 @@ -use hickory_resolver::config::{LookupIpStrategy, ResolveHosts}; -use hickory_resolver::lookup_ip::LookupIp; -use hickory_resolver::system_conf::read_system_conf; -use hickory_resolver::TokioResolver; -use once_cell::sync::Lazy; -use std::net::IpAddr; -use std::net::SocketAddr; - use hickory_resolver::config::NameServerConfig; use hickory_resolver::config::ResolverConfig; +use hickory_resolver::config::{LookupIpStrategy, ResolveHosts}; +use hickory_resolver::lookup_ip::LookupIp; use hickory_resolver::name_server::TokioConnectionProvider; pub use hickory_resolver::proto::op::Query; pub use hickory_resolver::proto::op::ResponseCode; use hickory_resolver::proto::xfer::Protocol; use hickory_resolver::proto::ProtoError; +use hickory_resolver::system_conf::read_system_conf; pub use hickory_resolver::ResolveError; +use hickory_resolver::TokioResolver; +use std::net::IpAddr; +use std::net::SocketAddr; +use std::sync::LazyLock; -pub static DNS_SERVERS: Lazy, ResolveError>> = Lazy::new(|| { +pub static DNS_SERVERS: LazyLock, ResolveError>> = LazyLock::new(|| { let (config, _opts) = read_system_conf()?; Ok(config .name_servers() diff --git a/src/processes/mod.rs b/src/processes/mod.rs index 4ba0794c..1b1b5e94 100644 --- a/src/processes/mod.rs +++ b/src/processes/mod.rs @@ -1,5 +1,6 @@ pub use image; use std::path::PathBuf; +use std::sync::LazyLock; #[cfg(any(target_os = "linux", target_os = "macos"))] mod nix_list; @@ -34,8 +35,8 @@ pub struct ProcessInfo { pub type ProcessList = Vec; #[cfg(any(windows, target_os = "macos"))] -pub static ICON_CACHE: once_cell::sync::Lazy> = - once_cell::sync::Lazy::new(|| std::sync::Mutex::new(IconCache::default())); +pub static ICON_CACHE: std::sync::LazyLock> = + std::sync::LazyLock::new(|| std::sync::Mutex::new(IconCache::default())); pub mod bench { #[cfg(target_os = "macos")] diff --git a/src/processes/windows_list.rs b/src/processes/windows_list.rs index 663a8be7..ba536595 100644 --- a/src/processes/windows_list.rs +++ b/src/processes/windows_list.rs @@ -5,10 +5,9 @@ use std::iter; use std::mem::size_of; use std::os::windows::prelude::{OsStrExt, OsStringExt}; use std::path::{Path, PathBuf}; -use std::sync::Mutex; +use std::sync::{LazyLock, Mutex}; use anyhow::{anyhow, Result}; -use once_cell::sync::Lazy; use windows::core::w; use windows::core::{BOOL, PCWSTR, PWSTR}; use windows::Win32::Foundation::{CloseHandle, HANDLE, HWND, LPARAM, MAX_PATH}; @@ -85,8 +84,8 @@ pub fn enumerate_pids() -> Result> { Ok(pids) } -pub static DISPLAY_NAME_CACHE: Lazy> = - Lazy::new(|| Mutex::new(DisplayNameCache::default())); +pub static DISPLAY_NAME_CACHE: LazyLock> = + LazyLock::new(|| Mutex::new(DisplayNameCache::default())); #[derive(Default)] pub struct DisplayNameCache(HashMap>); From 073f3bb8b32c89cb9ffa7e1ee74c93c2ac33db55 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Tue, 8 Apr 2025 10:02:44 +0200 Subject: [PATCH 20/26] fix content_type handling, add render_priority implementations --- mitmproxy-contentviews/src/grpc.rs | 11 +++----- mitmproxy-contentviews/src/hex_dump.rs | 2 +- mitmproxy-contentviews/src/hex_stream.rs | 2 +- mitmproxy-contentviews/src/lib.rs | 15 ++++++----- mitmproxy-contentviews/src/msgpack.rs | 8 ++++++ mitmproxy-contentviews/src/protobuf.rs | 8 ++++++ mitmproxy-rs/src/contentview.rs | 34 ++++++++++++++++++------ src/processes/mod.rs | 1 - 8 files changed, 56 insertions(+), 25 deletions(-) diff --git a/mitmproxy-contentviews/src/grpc.rs b/mitmproxy-contentviews/src/grpc.rs index 8a1c28df..b0496b2f 100644 --- a/mitmproxy-contentviews/src/grpc.rs +++ b/mitmproxy-contentviews/src/grpc.rs @@ -46,13 +46,10 @@ impl Prettify for GRPC { } fn render_priority(&self, _data: &[u8], metadata: &dyn Metadata) -> f64 { - let Some(ct) = metadata.content_type() else { - return 0.0; - }; - match ct.as_str() { - "application/grpc" => 2.0, - "application/grpc+proto" => 2.0, - "application/prpc" => 2.0, + match metadata.content_type() { + Some("application/grpc") => 1.0, + Some("application/grpc+proto") => 1.0, + Some("application/prpc") => 1.0, _ => 0.0, } } diff --git a/mitmproxy-contentviews/src/hex_dump.rs b/mitmproxy-contentviews/src/hex_dump.rs index 70ac9356..af3473cf 100644 --- a/mitmproxy-contentviews/src/hex_dump.rs +++ b/mitmproxy-contentviews/src/hex_dump.rs @@ -26,7 +26,7 @@ impl Prettify for HexDump { fn render_priority(&self, data: &[u8], _metadata: &dyn Metadata) -> f64 { if is_binary(data) { - 0.95 + 0.5 } else { 0.0 } diff --git a/mitmproxy-contentviews/src/hex_stream.rs b/mitmproxy-contentviews/src/hex_stream.rs index 5627e3c7..2035126c 100644 --- a/mitmproxy-contentviews/src/hex_stream.rs +++ b/mitmproxy-contentviews/src/hex_stream.rs @@ -28,7 +28,7 @@ impl Prettify for HexStream { fn render_priority(&self, data: &[u8], _metadata: &dyn Metadata) -> f64 { if is_binary(data) { - 0.95 + 0.4 } else { 0.0 } diff --git a/mitmproxy-contentviews/src/lib.rs b/mitmproxy-contentviews/src/lib.rs index 0f550860..77bcdb2d 100644 --- a/mitmproxy-contentviews/src/lib.rs +++ b/mitmproxy-contentviews/src/lib.rs @@ -4,17 +4,17 @@ mod hex_stream; mod msgpack; mod protobuf; -use anyhow::Result; - pub use grpc::GRPC; pub use hex_dump::HexDump; pub use hex_stream::HexStream; -use mitmproxy_highlight::Language; pub use msgpack::MsgPack; pub use protobuf::Protobuf; +use anyhow::Result; +use mitmproxy_highlight::Language; + pub trait Metadata { - fn content_type(&self) -> Option; + fn content_type(&self) -> Option<&str>; } pub trait Prettify: Send + Sync { @@ -30,7 +30,8 @@ pub trait Prettify: Send + Sync { fn prettify(&self, data: &[u8], metadata: &dyn Metadata) -> Result; - fn render_priority(&self, _data: &[u8], _metadata: &dyn Metadata) -> f64 { + #[allow(unused_variables)] + fn render_priority(&self, data: &[u8], metadata: &dyn Metadata) -> f64 { 0.0 } } @@ -45,7 +46,7 @@ pub struct TestMetadata { } impl Metadata for TestMetadata { - fn content_type(&self) -> Option { - self.content_type.clone() + fn content_type(&self) -> Option<&str> { + self.content_type.as_deref() } } diff --git a/mitmproxy-contentviews/src/msgpack.rs b/mitmproxy-contentviews/src/msgpack.rs index 2eb08df3..9e6574c7 100644 --- a/mitmproxy-contentviews/src/msgpack.rs +++ b/mitmproxy-contentviews/src/msgpack.rs @@ -22,6 +22,14 @@ impl Prettify for MsgPack { // Convert the Value to prettified YAML serde_yaml::to_string(&value).context("Failed to convert to YAML") } + + fn render_priority(&self, _data: &[u8], metadata: &dyn Metadata) -> f64 { + match metadata.content_type() { + Some("application/msgpack") => 1.0, + Some("application/x-msgpack") => 1.0, + _ => 0.0, + } + } } impl Reencode for MsgPack { diff --git a/mitmproxy-contentviews/src/protobuf.rs b/mitmproxy-contentviews/src/protobuf.rs index 092e2ab6..9f3c41e3 100644 --- a/mitmproxy-contentviews/src/protobuf.rs +++ b/mitmproxy-contentviews/src/protobuf.rs @@ -76,6 +76,14 @@ impl Prettify for Protobuf { let yaml_str = serde_yaml::to_string(&yaml_value).context("Failed to convert to YAML")?; yaml_to_pretty::apply_replacements(&yaml_str) } + + fn render_priority(&self, _data: &[u8], metadata: &dyn Metadata) -> f64 { + match metadata.content_type() { + Some("application/x-protobuf") => 1.0, + Some("application/x-protobuffer") => 1.0, + _ => 0.0, + } + } } impl Reencode for Protobuf { diff --git a/mitmproxy-rs/src/contentview.rs b/mitmproxy-rs/src/contentview.rs index 1eefc52f..c3956bce 100644 --- a/mitmproxy-rs/src/contentview.rs +++ b/mitmproxy-rs/src/contentview.rs @@ -1,21 +1,39 @@ use mitmproxy_contentviews::{Metadata, Prettify, Reencode}; use pyo3::{exceptions::PyValueError, prelude::*}; +use std::cell::OnceCell; -pub struct PythonMetadata<'py>(Bound<'py, PyAny>); +pub struct PythonMetadata<'py> { + inner: Bound<'py, PyAny>, + content_type: OnceCell>, +} + +impl<'py> PythonMetadata<'py> { + pub fn new(inner: Bound<'py, PyAny>) -> Self { + PythonMetadata { + inner, + content_type: OnceCell::new(), + } + } +} impl Metadata for PythonMetadata<'_> { - fn content_type(&self) -> Option { - self.0 - .getattr("content_type") - .ok()? - .extract::() - .ok() + fn content_type(&self) -> Option<&str> { + self.content_type + .get_or_init(|| { + self.inner + .getattr("content_type") + .ok()? + .extract::() + .ok() + }) + .as_ref() + .map(|ct| ct.as_str()) } } impl<'py> FromPyObject<'py> for PythonMetadata<'py> { fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { - Ok(PythonMetadata(ob.clone())) + Ok(PythonMetadata::new(ob.clone())) } } diff --git a/src/processes/mod.rs b/src/processes/mod.rs index 1b1b5e94..8801fcf4 100644 --- a/src/processes/mod.rs +++ b/src/processes/mod.rs @@ -1,6 +1,5 @@ pub use image; use std::path::PathBuf; -use std::sync::LazyLock; #[cfg(any(target_os = "linux", target_os = "macos"))] mod nix_list; From 8749adb35ab68fc4102102edbac7b5721a4467bf Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Tue, 8 Apr 2025 23:18:27 +0200 Subject: [PATCH 21/26] grpc tests + cleanup --- .../benches/contentviews.rs | 2 +- mitmproxy-contentviews/src/grpc.rs | 31 +++++++++++++++- mitmproxy-contentviews/src/hex_dump.rs | 2 +- mitmproxy-contentviews/src/hex_stream.rs | 2 +- mitmproxy-contentviews/src/lib.rs | 37 +++++++++++++++---- mitmproxy-contentviews/src/msgpack.rs | 2 +- mitmproxy-contentviews/src/protobuf.rs | 2 +- mitmproxy-rs/src/contentview.rs | 3 +- 8 files changed, 65 insertions(+), 16 deletions(-) diff --git a/mitmproxy-contentviews/benches/contentviews.rs b/mitmproxy-contentviews/benches/contentviews.rs index 46a6e380..0b0305ab 100644 --- a/mitmproxy-contentviews/benches/contentviews.rs +++ b/mitmproxy-contentviews/benches/contentviews.rs @@ -1,5 +1,5 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use mitmproxy_contentviews::{MsgPack, Prettify, Protobuf, Reencode, TestMetadata}; +use mitmproxy_contentviews::{test::TestMetadata, MsgPack, Prettify, Protobuf, Reencode}; fn criterion_benchmark(c: &mut Criterion) { c.bench_function("protobuf-prettify", |b| { diff --git a/mitmproxy-contentviews/src/grpc.rs b/mitmproxy-contentviews/src/grpc.rs index b0496b2f..4bf83d53 100644 --- a/mitmproxy-contentviews/src/grpc.rs +++ b/mitmproxy-contentviews/src/grpc.rs @@ -71,9 +71,36 @@ impl Reencode for GRPC { #[cfg(test)] mod tests { + use super::*; + use crate::test::TestMetadata; + + const TEST_YAML: &str = "1: 150\n\n---\n\n1: 150\n"; + const TEST_GRPC: &[u8] = &[ + 0, 0, 0, 0, 3, 8, 150, 1, // first message + 0, 0, 0, 0, 3, 8, 150, 1, // second message + ]; + + #[test] + fn test_empty() { + let res = GRPC.prettify(&vec![], &TestMetadata::default()).unwrap(); + assert_eq!(res, ""); + } + + #[test] + fn test_prettify_two_messages() { + let res = GRPC.prettify(TEST_GRPC, &TestMetadata::default()).unwrap(); + assert_eq!(res, TEST_YAML); + } + + #[test] + fn test_reencode_two_messages() { + let res = GRPC.reencode(TEST_YAML, &TestMetadata::default()).unwrap(); + assert_eq!(res, TEST_GRPC); + } #[test] - fn test_grpc() { - // FIXME + fn test_render_priority() { + assert_eq!(GRPC.render_priority(b"", &TestMetadata::default().with_content_type("application/grpc")), 1.0); + assert_eq!(GRPC.render_priority(b"", &TestMetadata::default().with_content_type("text/plain")), 0.0); } } diff --git a/mitmproxy-contentviews/src/hex_dump.rs b/mitmproxy-contentviews/src/hex_dump.rs index af3473cf..c96c0443 100644 --- a/mitmproxy-contentviews/src/hex_dump.rs +++ b/mitmproxy-contentviews/src/hex_dump.rs @@ -36,7 +36,7 @@ impl Prettify for HexDump { #[cfg(test)] mod tests { use super::*; - use crate::TestMetadata; + use crate::test::TestMetadata; #[test] fn prettify_simple() { diff --git a/mitmproxy-contentviews/src/hex_stream.rs b/mitmproxy-contentviews/src/hex_stream.rs index 2035126c..f6ca0e98 100644 --- a/mitmproxy-contentviews/src/hex_stream.rs +++ b/mitmproxy-contentviews/src/hex_stream.rs @@ -50,7 +50,7 @@ impl Reencode for HexStream { #[cfg(test)] mod tests { use super::*; - use crate::TestMetadata; + use crate::test::TestMetadata; #[test] fn test_hex_stream() { diff --git a/mitmproxy-contentviews/src/lib.rs b/mitmproxy-contentviews/src/lib.rs index 77bcdb2d..3aa974ea 100644 --- a/mitmproxy-contentviews/src/lib.rs +++ b/mitmproxy-contentviews/src/lib.rs @@ -14,22 +14,31 @@ use anyhow::Result; use mitmproxy_highlight::Language; pub trait Metadata { + /// The HTTP `content-type` of this message. fn content_type(&self) -> Option<&str>; } +/// See https://docs.mitmproxy.org/dev/api/mitmproxy/contentviews.html +/// for API details. pub trait Prettify: Send + Sync { + /// The name for this contentview, e.g. `gRPC` or `Protobuf`. + /// Favor brevity. fn name(&self) -> &str; fn instance_name(&self) -> String { self.name().to_lowercase().replace(" ", "_") } + /// The syntax highlighting that should be applied to the prettified output. + /// This is useful for contentviews that prettify to JSON or YAML. fn syntax_highlight(&self) -> Language { Language::None } + /// Pretty-print `data`. fn prettify(&self, data: &[u8], metadata: &dyn Metadata) -> Result; + /// Render priority - typically a float between 0 and 1 for builtin views. #[allow(unused_variables)] fn render_priority(&self, data: &[u8], metadata: &dyn Metadata) -> f64 { 0.0 @@ -40,13 +49,27 @@ pub trait Reencode: Send + Sync { fn reencode(&self, data: &str, metadata: &dyn Metadata) -> Result>; } -#[derive(Default)] -pub struct TestMetadata { - pub content_type: Option, -} +// no cfg(test) gate because it's used in benchmarks as well +pub mod test { + use crate::Metadata; + + #[derive(Default)] + pub struct TestMetadata { + pub content_type: Option, + } -impl Metadata for TestMetadata { - fn content_type(&self) -> Option<&str> { - self.content_type.as_deref() + impl TestMetadata { + pub fn with_content_type(mut self, content_type: &str) -> Self { + self.content_type = Some(content_type.to_string()); + self + } } + + impl Metadata for TestMetadata { + fn content_type(&self) -> Option<&str> { + self.content_type.as_deref() + } + } + + } diff --git a/mitmproxy-contentviews/src/msgpack.rs b/mitmproxy-contentviews/src/msgpack.rs index 9e6574c7..c4295f82 100644 --- a/mitmproxy-contentviews/src/msgpack.rs +++ b/mitmproxy-contentviews/src/msgpack.rs @@ -48,7 +48,7 @@ impl Reencode for MsgPack { #[cfg(test)] mod tests { use super::*; - use crate::TestMetadata; + use crate::test::TestMetadata; // Hardcoded MsgPack data for a simple object: // { diff --git a/mitmproxy-contentviews/src/protobuf.rs b/mitmproxy-contentviews/src/protobuf.rs index 9f3c41e3..1d9d11ed 100644 --- a/mitmproxy-contentviews/src/protobuf.rs +++ b/mitmproxy-contentviews/src/protobuf.rs @@ -538,7 +538,7 @@ pub(super) mod reencode { #[cfg(test)] mod tests { use super::*; - use crate::TestMetadata; + use crate::test::TestMetadata; macro_rules! test_roundtrip { ($name:ident,$proto:literal,$yaml:literal) => { diff --git a/mitmproxy-rs/src/contentview.rs b/mitmproxy-rs/src/contentview.rs index c3956bce..dc7523e9 100644 --- a/mitmproxy-rs/src/contentview.rs +++ b/mitmproxy-rs/src/contentview.rs @@ -26,8 +26,7 @@ impl Metadata for PythonMetadata<'_> { .extract::() .ok() }) - .as_ref() - .map(|ct| ct.as_str()) + .as_deref() } } From 4a181fb865f845af6dec7fd662a10587e1daa1a9 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Fri, 11 Apr 2025 15:43:22 +0200 Subject: [PATCH 22/26] gRPC: add compression support --- Cargo.lock | 1 + mitmproxy-contentviews/Cargo.toml | 1 + mitmproxy-contentviews/src/grpc.rs | 85 ++++++++++++++++++++++++------ mitmproxy-contentviews/src/lib.rs | 15 +++++- mitmproxy-rs/src/contentview.rs | 6 +++ 5 files changed, 90 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d23aab72..b01d4817 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2220,6 +2220,7 @@ dependencies = [ "anyhow", "criterion", "data-encoding", + "flate2", "log", "mitmproxy-highlight", "pretty-hex", diff --git a/mitmproxy-contentviews/Cargo.toml b/mitmproxy-contentviews/Cargo.toml index 130a6ac3..dbb8658a 100644 --- a/mitmproxy-contentviews/Cargo.toml +++ b/mitmproxy-contentviews/Cargo.toml @@ -22,6 +22,7 @@ serde_yaml = "0.9" rmp-serde = "1.1" protobuf = "3.7.2" regex = "1.10.3" +flate2 = "1.0.28" [dev-dependencies] criterion = "0.5.1" diff --git a/mitmproxy-contentviews/src/grpc.rs b/mitmproxy-contentviews/src/grpc.rs index 4bf83d53..2d1ce615 100644 --- a/mitmproxy-contentviews/src/grpc.rs +++ b/mitmproxy-contentviews/src/grpc.rs @@ -1,8 +1,10 @@ use crate::{Metadata, Prettify, Protobuf, Reencode}; use anyhow::{bail, Context, Result}; +use flate2::read::{DeflateDecoder, GzDecoder}; use mitmproxy_highlight::Language; use serde::Deserialize; use serde_yaml::Value; +use std::io::Read; pub struct GRPC; @@ -29,20 +31,34 @@ impl Prettify for GRPC { _ => bail!("invalid gRPC: first byte is not a boolean"), }; let Some(proto) = data.get(5..5 + len) else { - bail!("Invald gRPC: not enough data") + bail!("Invalid gRPC: not enough data") }; - if compressed { - todo!(); - } - protos.push(proto); + + let mut decompressed = Vec::new(); + let proto = if compressed { + let encoding = metadata.get_header("grpc-encoding").unwrap_or_default(); + match encoding.as_str() { + "deflate" => { + let mut decoder = DeflateDecoder::new(proto); + decoder.read_to_end(&mut decompressed)?; + &decompressed + } + "gzip" => { + let mut decoder = GzDecoder::new(proto); + decoder.read_to_end(&mut decompressed)?; + &decompressed + } + "identity" => proto, + _ => bail!("unsupported compression: {}", encoding), + } + } else { + proto + }; + protos.push(Protobuf.prettify(proto, metadata)?); data = &data[5 + len..]; } - let prettified = protos - .into_iter() - .map(|proto| Protobuf.prettify(proto, metadata)) - .collect::>>()?; - Ok(prettified.join("\n---\n\n")) + Ok(protos.join("\n---\n\n")) } fn render_priority(&self, _data: &[u8], metadata: &dyn Metadata) -> f64 { @@ -61,7 +77,7 @@ impl Reencode for GRPC { for document in serde_yaml::Deserializer::from_str(data) { let value = Value::deserialize(document).context("Invalid YAML")?; let proto = super::protobuf::reencode::reencode_yaml(value, metadata)?; - ret.push(0); // compressed + ret.push(0); // uncompressed ret.extend(u32::to_be_bytes(proto.len() as u32)); ret.extend(proto); } @@ -76,13 +92,24 @@ mod tests { const TEST_YAML: &str = "1: 150\n\n---\n\n1: 150\n"; const TEST_GRPC: &[u8] = &[ - 0, 0, 0, 0, 3, 8, 150, 1, // first message - 0, 0, 0, 0, 3, 8, 150, 1, // second message + 0, 0, 0, 0, 3, 8, 150, 1, // first message + 0, 0, 0, 0, 3, 8, 150, 1, // second message + ]; + + const TEST_GZIP: &[u8] = &[ + 1, 0, 0, 0, 23, // compressed flag and length + 31, 139, 8, 0, 0, 0, 0, 0, 0, 255, 227, 152, 198, 8, 0, 160, 149, 78, 161, 3, 0, 0, + 0, // gzip data + ]; + + const TEST_DEFLATE: &[u8] = &[ + 1, 0, 0, 0, 5, // compressed flag and length + 227, 152, 198, 8, 0, // deflate data ]; #[test] fn test_empty() { - let res = GRPC.prettify(&vec![], &TestMetadata::default()).unwrap(); + let res = GRPC.prettify(&[], &TestMetadata::default()).unwrap(); assert_eq!(res, ""); } @@ -92,6 +119,20 @@ mod tests { assert_eq!(res, TEST_YAML); } + #[test] + fn test_prettify_gzip() { + let metadata = TestMetadata::default().with_header("grpc-encoding", "gzip"); + let res = GRPC.prettify(TEST_GZIP, &metadata).unwrap(); + assert_eq!(res, "1: 150\n"); + } + + #[test] + fn test_prettify_deflate() { + let metadata = TestMetadata::default().with_header("grpc-encoding", "deflate"); + let res = GRPC.prettify(TEST_DEFLATE, &metadata).unwrap(); + assert_eq!(res, "1: 150\n"); + } + #[test] fn test_reencode_two_messages() { let res = GRPC.reencode(TEST_YAML, &TestMetadata::default()).unwrap(); @@ -100,7 +141,19 @@ mod tests { #[test] fn test_render_priority() { - assert_eq!(GRPC.render_priority(b"", &TestMetadata::default().with_content_type("application/grpc")), 1.0); - assert_eq!(GRPC.render_priority(b"", &TestMetadata::default().with_content_type("text/plain")), 0.0); + assert_eq!( + GRPC.render_priority( + b"", + &TestMetadata::default().with_content_type("application/grpc") + ), + 1.0 + ); + assert_eq!( + GRPC.render_priority( + b"", + &TestMetadata::default().with_content_type("text/plain") + ), + 0.0 + ); } } diff --git a/mitmproxy-contentviews/src/lib.rs b/mitmproxy-contentviews/src/lib.rs index 3aa974ea..cffc5e10 100644 --- a/mitmproxy-contentviews/src/lib.rs +++ b/mitmproxy-contentviews/src/lib.rs @@ -16,6 +16,9 @@ use mitmproxy_highlight::Language; pub trait Metadata { /// The HTTP `content-type` of this message. fn content_type(&self) -> Option<&str>; + /// Get an HTTP header value by name. + /// `name` is case-insensitive. + fn get_header(&self, name: &str) -> Option; } /// See https://docs.mitmproxy.org/dev/api/mitmproxy/contentviews.html @@ -56,6 +59,7 @@ pub mod test { #[derive(Default)] pub struct TestMetadata { pub content_type: Option, + pub headers: std::collections::HashMap, } impl TestMetadata { @@ -63,13 +67,20 @@ pub mod test { self.content_type = Some(content_type.to_string()); self } + + pub fn with_header(mut self, name: &str, value: &str) -> Self { + self.headers.insert(name.to_lowercase(), value.to_string()); + self + } } impl Metadata for TestMetadata { fn content_type(&self) -> Option<&str> { self.content_type.as_deref() } - } - + fn get_header(&self, name: &str) -> Option { + self.headers.get(&name.to_lowercase()).cloned() + } + } } diff --git a/mitmproxy-rs/src/contentview.rs b/mitmproxy-rs/src/contentview.rs index dc7523e9..f8020635 100644 --- a/mitmproxy-rs/src/contentview.rs +++ b/mitmproxy-rs/src/contentview.rs @@ -28,6 +28,12 @@ impl Metadata for PythonMetadata<'_> { }) .as_deref() } + + fn get_header(&self, name: &str) -> Option { + let http_message = self.inner.getattr("http_message").ok()?; + let headers = http_message.getattr("headers").ok()?; + headers.get_item(name).ok()?.extract::().ok() + } } impl<'py> FromPyObject<'py> for PythonMetadata<'py> { From 8d0379ee5159348cba4b66da51733668509df030 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Sun, 13 Apr 2025 17:11:00 +0200 Subject: [PATCH 23/26] gRPC: incorporate existing protobuf definitions --- Cargo.lock | 40 +- mitmproxy-contentviews/Cargo.toml | 5 +- mitmproxy-contentviews/src/lib.rs | 49 +- mitmproxy-contentviews/src/protobuf.rs | 668 ------------------ .../protobuf/existing_proto_definitions.rs | 142 ++++ mitmproxy-contentviews/src/protobuf/mod.rs | 10 + .../src/protobuf/proto_to_yaml.rs | 114 +++ .../src/protobuf/raw_to_proto.rs | 184 +++++ .../src/protobuf/reencode.rs | 155 ++++ .../src/protobuf/test.proto | 5 + .../src/{grpc.rs => protobuf/view_grpc.rs} | 70 +- .../src/protobuf/view_protobuf.rs | 214 ++++++ .../src/protobuf/yaml_to_pretty.rs | 72 ++ .../testdata/protobuf/grpcbin.proto | 77 ++ .../testdata/protobuf/nested.proto | 17 + .../testdata/protobuf/simple.proto | 5 + .../testdata/protobuf/simple_package.proto | 18 + .../testdata/protobuf/simple_service.proto | 16 + mitmproxy-rs/src/contentview.rs | 49 +- mitmproxy-rs/src/server/local_redirector.rs | 3 +- mitmproxy-rs/src/syntax_highlight.rs | 2 +- 21 files changed, 1235 insertions(+), 680 deletions(-) delete mode 100644 mitmproxy-contentviews/src/protobuf.rs create mode 100644 mitmproxy-contentviews/src/protobuf/existing_proto_definitions.rs create mode 100644 mitmproxy-contentviews/src/protobuf/mod.rs create mode 100644 mitmproxy-contentviews/src/protobuf/proto_to_yaml.rs create mode 100644 mitmproxy-contentviews/src/protobuf/raw_to_proto.rs create mode 100644 mitmproxy-contentviews/src/protobuf/reencode.rs create mode 100644 mitmproxy-contentviews/src/protobuf/test.proto rename mitmproxy-contentviews/src/{grpc.rs => protobuf/view_grpc.rs} (64%) create mode 100644 mitmproxy-contentviews/src/protobuf/view_protobuf.rs create mode 100644 mitmproxy-contentviews/src/protobuf/yaml_to_pretty.rs create mode 100644 mitmproxy-contentviews/testdata/protobuf/grpcbin.proto create mode 100644 mitmproxy-contentviews/testdata/protobuf/nested.proto create mode 100644 mitmproxy-contentviews/testdata/protobuf/simple.proto create mode 100644 mitmproxy-contentviews/testdata/protobuf/simple_package.proto create mode 100644 mitmproxy-contentviews/testdata/protobuf/simple_service.proto diff --git a/Cargo.lock b/Cargo.lock index b01d4817..5b22755b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1542,6 +1542,15 @@ dependencies = [ "digest", ] +[[package]] +name = "home" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" +dependencies = [ + "windows-sys 0.59.0", +] + [[package]] name = "hostname" version = "0.4.0" @@ -2225,6 +2234,7 @@ dependencies = [ "mitmproxy-highlight", "pretty-hex", "protobuf", + "protobuf-parse", "regex", "rmp-serde", "serde", @@ -2272,7 +2282,7 @@ dependencies = [ "aya-ebpf", "aya-log-ebpf", "mitmproxy-linux-ebpf-common", - "which", + "which 7.0.2", ] [[package]] @@ -2748,6 +2758,22 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "protobuf-parse" +version = "3.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4aeaa1f2460f1d348eeaeed86aea999ce98c1bded6f089ff8514c9d9dbdc973" +dependencies = [ + "anyhow", + "indexmap 2.9.0", + "log", + "protobuf", + "protobuf-support", + "tempfile", + "thiserror 1.0.69", + "which 4.4.2", +] + [[package]] name = "protobuf-support" version = "3.7.2" @@ -3979,6 +4005,18 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix 0.38.44", +] + [[package]] name = "which" version = "7.0.2" diff --git a/mitmproxy-contentviews/Cargo.toml b/mitmproxy-contentviews/Cargo.toml index dbb8658a..cdbb0b18 100644 --- a/mitmproxy-contentviews/Cargo.toml +++ b/mitmproxy-contentviews/Cargo.toml @@ -22,11 +22,12 @@ serde_yaml = "0.9" rmp-serde = "1.1" protobuf = "3.7.2" regex = "1.10.3" -flate2 = "1.0.28" +flate2 = "1.0" +protobuf-parse = "3.7" [dev-dependencies] criterion = "0.5.1" [[bench]] name = "contentviews" -harness = false \ No newline at end of file +harness = false diff --git a/mitmproxy-contentviews/src/lib.rs b/mitmproxy-contentviews/src/lib.rs index cffc5e10..fdaf5a95 100644 --- a/mitmproxy-contentviews/src/lib.rs +++ b/mitmproxy-contentviews/src/lib.rs @@ -1,17 +1,17 @@ -mod grpc; mod hex_dump; mod hex_stream; mod msgpack; mod protobuf; -pub use grpc::GRPC; pub use hex_dump::HexDump; pub use hex_stream::HexStream; pub use msgpack::MsgPack; pub use protobuf::Protobuf; +pub use protobuf::GRPC; use anyhow::Result; use mitmproxy_highlight::Language; +use std::path::Path; pub trait Metadata { /// The HTTP `content-type` of this message. @@ -19,6 +19,18 @@ pub trait Metadata { /// Get an HTTP header value by name. /// `name` is case-insensitive. fn get_header(&self, name: &str) -> Option; + /// Get the path from the flow's request. + fn get_path(&self) -> Option<&str> { + None + } + /// Check if this is an HTTP request. + fn is_http_request(&self) -> bool { + false + } + /// Get the protobuf definitions for this message. + fn protobuf_definitions(&self) -> Option<&Path> { + None + } } /// See https://docs.mitmproxy.org/dev/api/mitmproxy/contentviews.html @@ -55,11 +67,15 @@ pub trait Reencode: Send + Sync { // no cfg(test) gate because it's used in benchmarks as well pub mod test { use crate::Metadata; + use std::path::Path; #[derive(Default)] pub struct TestMetadata { pub content_type: Option, pub headers: std::collections::HashMap, + pub protobuf_definitions: Option, + pub path: Option, + pub is_http_request: bool, } impl TestMetadata { @@ -72,6 +88,21 @@ pub mod test { self.headers.insert(name.to_lowercase(), value.to_string()); self } + + pub fn with_path(mut self, path: &str) -> Self { + self.path = Some(path.to_string()); + self + } + + pub fn with_protobuf_definitions>(mut self, definitions: P) -> Self { + self.protobuf_definitions = Some(definitions.as_ref().to_path_buf()); + self + } + + pub fn with_is_http_request(mut self, is_http_request: bool) -> Self { + self.is_http_request = is_http_request; + self + } } impl Metadata for TestMetadata { @@ -80,7 +111,19 @@ pub mod test { } fn get_header(&self, name: &str) -> Option { - self.headers.get(&name.to_lowercase()).cloned() + self.headers.get(name).cloned() + } + + fn get_path(&self) -> Option<&str> { + self.path.as_deref() + } + + fn protobuf_definitions(&self) -> Option<&Path> { + self.protobuf_definitions.as_deref() + } + + fn is_http_request(&self) -> bool { + self.is_http_request } } } diff --git a/mitmproxy-contentviews/src/protobuf.rs b/mitmproxy-contentviews/src/protobuf.rs deleted file mode 100644 index 1d9d11ed..00000000 --- a/mitmproxy-contentviews/src/protobuf.rs +++ /dev/null @@ -1,668 +0,0 @@ -use crate::{Metadata, Prettify, Reencode}; -use anyhow::{bail, Context, Result}; -use mitmproxy_highlight::Language; -use protobuf::descriptor::field_descriptor_proto::Label::LABEL_REPEATED; -use protobuf::descriptor::field_descriptor_proto::Type; -use protobuf::descriptor::field_descriptor_proto::Type::{ - TYPE_BYTES, TYPE_FIXED32, TYPE_FIXED64, TYPE_STRING, TYPE_UINT64, -}; -use protobuf::descriptor::{DescriptorProto, FieldDescriptorProto, FileDescriptorProto}; -use protobuf::reflect::{ - FieldDescriptor, FileDescriptor, MessageDescriptor, ReflectFieldRef, ReflectValueRef, - RuntimeFieldType, RuntimeType, -}; -use protobuf::well_known_types::empty::Empty; -use protobuf::UnknownValueRef; -use protobuf::{EnumOrUnknown, MessageDyn, MessageFull, UnknownValue}; -use regex::Captures; -use serde_yaml::value::TaggedValue; -use serde_yaml::{Mapping, Number, Value}; -use std::collections::BTreeMap; -use std::fmt::Write; -use std::num::ParseIntError; -use std::ops::Deref; -use std::str::FromStr; - -mod tags { - use regex::Regex; - use serde_yaml::value::Tag; - use std::sync::LazyLock; - - pub(super) static BINARY: LazyLock = LazyLock::new(|| Tag::new("binary")); - pub(super) static VARINT: LazyLock = LazyLock::new(|| Tag::new("varint")); - pub(super) static FIXED32: LazyLock = LazyLock::new(|| Tag::new("fixed32")); - pub(super) static FIXED64: LazyLock = LazyLock::new(|| Tag::new("fixed64")); - - pub(super) static VARINT_RE: LazyLock = - LazyLock::new(|| Regex::new(&format!(r"{} (\d+)", *VARINT)).unwrap()); - pub(super) static FIXED32_RE: LazyLock = - LazyLock::new(|| Regex::new(&format!(r"{} (\d+)", *FIXED32)).unwrap()); - pub(super) static FIXED64_RE: LazyLock = - LazyLock::new(|| Regex::new(&format!(r"{} (\d+)", *FIXED64)).unwrap()); -} - -pub struct Protobuf; - -enum GuessedFieldType { - String, - Message(Box), - Unknown, -} - -impl Prettify for Protobuf { - fn name(&self) -> &str { - "Protobuf" - } - - fn syntax_highlight(&self) -> Language { - Language::Yaml - } - - fn prettify(&self, data: &[u8], _metadata: &dyn Metadata) -> Result { - // Check if data is empty first - if data.is_empty() { - return Ok("{} # empty protobuf message".to_string()); - } - - let existing = Empty::descriptor(); - let descriptor = raw_to_proto::merge_proto_and_descriptor(data, existing)?; - - // Parse protobuf and convert to YAML - let message = descriptor - .parse_from_bytes(data) - .context("Error parsing protobuf")?; - let yaml_value = proto_to_yaml::message_to_yaml(message.as_ref()); - - let yaml_str = serde_yaml::to_string(&yaml_value).context("Failed to convert to YAML")?; - yaml_to_pretty::apply_replacements(&yaml_str) - } - - fn render_priority(&self, _data: &[u8], metadata: &dyn Metadata) -> f64 { - match metadata.content_type() { - Some("application/x-protobuf") => 1.0, - Some("application/x-protobuffer") => 1.0, - _ => 0.0, - } - } -} - -impl Reencode for Protobuf { - fn reencode(&self, data: &str, metadata: &dyn Metadata) -> Result> { - let value: Value = serde_yaml::from_str(data).context("Invalid YAML")?; - reencode::reencode_yaml(value, metadata) - } -} - -/// Existing protobuf definition + raw data => merged protobuf definition -mod raw_to_proto { - use super::*; - - /// Create a "merged" MessageDescriptor. Mostly a wrapper around `create_descriptor_proto`. - pub(super) fn merge_proto_and_descriptor( - data: &[u8], - existing: MessageDescriptor, - ) -> anyhow::Result { - let proto = create_descriptor_proto(data, existing, "Unknown".to_string())?; - - let descriptor = { - let mut proto_file = FileDescriptorProto::new(); - proto_file.message_type.push(proto); - - FileDescriptor::new_dynamic(proto_file, &[]) - // FIXME - .unwrap() - .messages() - .next() - .unwrap() - }; - - Ok(descriptor) - } - - /// Create a DescriptorProto that combines the `existing` MessageDescriptor with (guessed) - /// metadata for all unknown fields in the protobuf `data`. - fn create_descriptor_proto( - data: &[u8], - existing: MessageDescriptor, - name: String, - ) -> Result { - let message = existing - .parse_from_bytes(data) - .context("failed to parse protobuf")?; - - let mut descriptor = existing.proto().clone(); - - let mut field_groups: BTreeMap> = BTreeMap::new(); - for (field_number, value) in message.unknown_fields_dyn().iter() { - field_groups.entry(field_number).or_default().push(value); - } - - for (field_index, field_values) in field_groups.into_iter() { - let mut add_int = |typ: Type| { - descriptor.field.push(FieldDescriptorProto { - number: Some(field_index as i32), - name: Some(format!("unknown_field_{}", field_index)), - type_: Some(EnumOrUnknown::from(typ)), - ..Default::default() - }); - }; - match field_values[0] { - // We can't use float/double here because of NaN handling. - UnknownValueRef::Fixed32(_) => add_int(TYPE_FIXED32), - UnknownValueRef::Fixed64(_) => add_int(TYPE_FIXED64), - UnknownValueRef::Varint(_) => add_int(TYPE_UINT64), - UnknownValueRef::LengthDelimited(_) => { - let field_values = field_values - .iter() - .map(|x| match x { - UnknownValueRef::LengthDelimited(data) => Ok(*data), - _ => Err(anyhow::anyhow!("varying types in protobuf")), - }) - .collect::>>()?; - - match guess_field_type(&field_values, &name, field_index) { - GuessedFieldType::String => add_int(TYPE_STRING), - GuessedFieldType::Unknown => add_int(TYPE_BYTES), - GuessedFieldType::Message(m) => { - descriptor.field.push(FieldDescriptorProto { - number: Some(field_index as i32), - name: Some(format!("unknown_field_{}", field_index)), - type_name: Some(format!(".{}.{}", name, m.name())), - type_: Some(EnumOrUnknown::from(Type::TYPE_MESSAGE)), - ..Default::default() - }); - descriptor.nested_type.push(*m); - } - } - } - } - if field_values.len() > 1 { - descriptor - .field - .last_mut() - .expect("we just added this field") - .set_label(LABEL_REPEATED); - } - } - - descriptor.set_name(name); - Ok(descriptor) - } - - /// Given all `values` of a field, guess its type. - fn guess_field_type(values: &[&[u8]], name: &str, field_index: u32) -> GuessedFieldType { - if values.iter().all(|data| { - std::str::from_utf8(data).is_ok_and(|s| { - s.chars() - .all(|c| c.is_ascii_graphic() || c.is_ascii_whitespace()) - }) - }) { - return GuessedFieldType::String; - } - - // Try to parse as a nested message - let name = format!("{name}.unknown_field_{field_index}"); - if let Ok(mut descriptor) = - { create_descriptor_proto(values[0], Empty::descriptor(), name) } - { - if values - .iter() - .skip(1) - .all(|data| descriptor.descriptor_dyn().parse_from_bytes(data).is_ok()) - { - descriptor.set_name(format!("unknown_field_{field_index}")); - return GuessedFieldType::Message(Box::new(descriptor)); - } - } - - GuessedFieldType::Unknown - } -} - -/// Parsed protobuf message => YAML value -mod proto_to_yaml { - use super::*; - - pub(super) fn message_to_yaml(message: &dyn MessageDyn) -> Value { - let mut ret = Mapping::new(); - - for field in message.descriptor_dyn().fields() { - let key = if field.name().starts_with("unknown_field_") { - Value::from(field.number()) - } else { - Value::from(field.name()) - }; - let field_type = field - .proto() - .type_ - .map(|t| t.enum_value_or(TYPE_BYTES)) - .unwrap_or(TYPE_BYTES); - - let value = match field.get_reflect(message) { - ReflectFieldRef::Optional(x) => { - if let Some(x) = x.value() { - primitive_type_to_yaml(x, field_type) - } else { - Value::Null - } - } - ReflectFieldRef::Repeated(x) => Value::Sequence( - x.into_iter() - .map(|x| primitive_type_to_yaml(x, field_type)) - .collect(), - ), - ReflectFieldRef::Map(x) => Value::Mapping( - x.into_iter() - .map(|(k, v)| { - ( - primitive_type_to_yaml(k, field_type), - primitive_type_to_yaml(v, field_type), - ) - }) - .collect(), - ), - }; - ret.insert(key, value); - } - Value::Mapping(ret) - } - - fn primitive_type_to_yaml(x: ReflectValueRef, field_type: Type) -> Value { - match x { - ReflectValueRef::U32(x) => tag_number(Value::Number(Number::from(x)), field_type), - ReflectValueRef::U64(x) => tag_number(Value::Number(Number::from(x)), field_type), - ReflectValueRef::I32(x) => Value::Number(Number::from(x)), - ReflectValueRef::I64(x) => Value::Number(Number::from(x)), - ReflectValueRef::F32(x) => Value::Number(Number::from(x)), - ReflectValueRef::F64(x) => Value::Number(Number::from(x)), - ReflectValueRef::Bool(x) => Value::from(x), - ReflectValueRef::String(x) => Value::from(x), - ReflectValueRef::Bytes(x) => Value::Tagged(Box::new(TaggedValue { - tag: tags::BINARY.clone(), - value: Value::String(bytes_to_hex_string(x)), - })), - ReflectValueRef::Enum(descriptor, i) => descriptor - .value_by_number(i) - .map(|v| Value::String(v.name().to_string())) - .unwrap_or_else(|| Value::Number(Number::from(i))), - ReflectValueRef::Message(m) => message_to_yaml(m.deref()), - } - } - - fn tag_number(value: Value, field_type: Type) -> Value { - match field_type { - TYPE_UINT64 => Value::Tagged(Box::new(TaggedValue { - tag: tags::VARINT.clone(), - value, - })), - TYPE_FIXED64 => Value::Tagged(Box::new(TaggedValue { - tag: tags::FIXED64.clone(), - value, - })), - TYPE_FIXED32 => Value::Tagged(Box::new(TaggedValue { - tag: tags::FIXED32.clone(), - value, - })), - _ => value, - } - } - - // Convert length-delimited protobuf data to a hex string - fn bytes_to_hex_string(bytes: &[u8]) -> String { - let mut result = String::with_capacity(bytes.len() * 2); - for b in bytes { - let _ = write!(result, "{:02x}", b); - } - result - } -} - -/// YAML value => prettified text -mod yaml_to_pretty { - use super::*; - // Helper method to apply regex replacements to the YAML output - pub(super) fn apply_replacements(yaml_str: &str) -> Result { - // Replace !fixed32 tags with comments showing float and i32 interpretations - let with_fixed32 = tags::FIXED32_RE.replace_all(yaml_str, |caps: &Captures| { - let value = caps[1].parse::().unwrap_or_default(); - let float_value = f32::from_bits(value); - let i32_value = value as i32; - - if !float_value.is_nan() && float_value < 0.0 { - format!( - "{} {} # float: {}, i32: {}", - *tags::FIXED32, - value, - float_value, - i32_value - ) - } else if !float_value.is_nan() { - format!("{} {} # float: {}", *tags::FIXED32, value, float_value) - } else if i32_value < 0 { - format!("{} {} # i32: {}", *tags::FIXED32, value, i32_value) - } else { - format!("{} {}", *tags::FIXED32, value) - } - }); - - // Replace !fixed64 tags with comments showing double and i64 interpretations - let with_fixed64 = tags::FIXED64_RE.replace_all(&with_fixed32, |caps: &Captures| { - let value = caps[1].parse::().unwrap_or_default(); - let double_value = f64::from_bits(value); - let i64_value = value as i64; - - if !double_value.is_nan() && double_value < 0.0 { - format!( - "{} {} # double: {}, i64: {}", - *tags::FIXED64, - value, - double_value, - i64_value - ) - } else if !double_value.is_nan() { - format!("{} {} # double: {}", *tags::FIXED64, value, double_value) - } else if i64_value < 0 { - format!("{} {} # i64: {}", *tags::FIXED64, value, i64_value) - } else { - format!("{} {}", *tags::FIXED64, value) - } - }); - - // Replace !varint tags with comments showing signed interpretation if different - let with_varint = tags::VARINT_RE.replace_all(&with_fixed64, |caps: &Captures| { - let unsigned_value = caps[1].parse::().unwrap_or_default(); - let i64_zigzag = decode_zigzag64(unsigned_value); - - // Only show signed value if it's different from unsigned - if i64_zigzag < 0 { - format!("{} # signed: {}", unsigned_value, i64_zigzag) - } else { - unsigned_value.to_string() - } - }); - - Ok(with_varint.to_string()) - } - - // Decode a zigzag-encoded 64-bit integer - fn decode_zigzag64(n: u64) -> i64 { - ((n >> 1) as i64) ^ (-((n & 1) as i64)) - } -} - -pub(super) mod reencode { - use super::*; - - pub(crate) fn reencode_yaml(value: Value, _metadata: &dyn Metadata) -> Result> { - let descriptor = Empty::descriptor(); - let message = descriptor.new_instance(); - merge_yaml_into_message(value, message) - } - - fn merge_yaml_into_message(value: Value, mut message: Box) -> Result> { - let Value::Mapping(mapping) = value else { - bail!("YAML is not a mapping"); - }; - - for (key, value) in mapping.into_iter() { - let field_num = match key { - Value::String(key) => { - if let Some(field) = message.descriptor_dyn().field_by_name(&key) { - field.number() - } else if let Ok(field_num) = i32::from_str(&key) { - field_num - } else { - bail!("Unknown protobuf field key: {key}"); - } - } - Value::Number(key) => { - let Some(field_num) = key.as_i64() else { - bail!("Invalid protobuf field number: {key}"); - }; - field_num as i32 - } - other => { - bail!("Unexpected key: {other:?}"); - } - } as u32; - - add_field(message.as_mut(), field_num, value)?; - } - - message - .write_to_bytes_dyn() - .context("Failed to serialize protobuf") - } - - fn add_field(message: &mut dyn MessageDyn, field_num: u32, value: Value) -> Result<()> { - let value = match value { - Value::Null => return Ok(()), - Value::Sequence(seq) => { - for s in seq.into_iter() { - add_field(message, field_num, s)?; - } - return Ok(()); - } - Value::Tagged(t) => { - // t.tag doesn't work for Match statements - if t.tag == *tags::BINARY { - let value = match t.value { - Value::String(s) => s, - _ => bail!("Binary data is not a string"), - }; - let value = (0..value.len()) - .step_by(2) - .map(|i| u8::from_str_radix(&value[i..i + 2], 16)) - .collect::, ParseIntError>>() - .context("Invalid hex string")?; - UnknownValue::LengthDelimited(value) - } else if t.tag == *tags::FIXED32 { - let value = match t.value { - Value::Number(s) if s.as_u64().is_some() => s.as_u64().unwrap(), - _ => bail!("Fixed32 data is not a u32"), - }; - UnknownValue::Fixed32(value as u32) - } else if t.tag == *tags::FIXED64 { - let value = match t.value { - Value::Number(s) if s.as_u64().is_some() => s.as_u64().unwrap(), - _ => bail!("Fixed64 data is not a u64"), - }; - UnknownValue::Fixed64(value) - } else { - log::info!("Unexpected YAML tag {}, discarding.", t.tag); - return add_field(message, field_num, t.value); - } - } - Value::Bool(b) => UnknownValue::Varint(b as u64), - Value::Number(n) => { - let field = message.descriptor_dyn().field_by_number(field_num); - int_value(n, field.as_ref()) - } - Value::String(s) => UnknownValue::LengthDelimited(s.into_bytes()), - Value::Mapping(m) => { - let mut descriptor = Empty::descriptor(); - if let Some(field) = message.descriptor_dyn().field_by_number(field_num) { - if let RuntimeFieldType::Singular(RuntimeType::Message(md)) = - field.runtime_field_type() - { - descriptor = md; - } else if let RuntimeFieldType::Map(_, _) = field.runtime_field_type() { - // TODO: handle maps. - } - } - let child_message = descriptor.new_instance(); - let val = merge_yaml_into_message(Value::Mapping(m), child_message)?; - UnknownValue::LengthDelimited(val) - } - }; - message.mut_unknown_fields_dyn().add_value(field_num, value); - Ok(()) - } - - fn int_value(n: Number, field: Option<&FieldDescriptor>) -> UnknownValue { - if let Some(field) = field { - if let Some(typ) = field.proto().type_.and_then(|t| t.enum_value().ok()) { - match typ { - TYPE_FIXED64 | Type::TYPE_SFIXED64 | Type::TYPE_DOUBLE => { - return if let Some(n) = n.as_u64() { - UnknownValue::Fixed64(n) - } else if let Some(n) = n.as_i64() { - UnknownValue::sfixed64(n) - } else { - UnknownValue::double(n.as_f64().expect("as_f64 never fails")) - } - } - TYPE_FIXED32 | Type::TYPE_SFIXED32 | Type::TYPE_FLOAT => { - return if let Some(n) = n.as_u64() { - UnknownValue::Fixed32(n as u32) - } else if let Some(n) = n.as_i64() { - UnknownValue::sfixed32(n as i32) - } else { - UnknownValue::float(n.as_f64().expect("as_f64 never fails") as f32) - } - } - _ => (), - } - } - } - if let Some(n) = n.as_u64() { - UnknownValue::Varint(n) - } else if let Some(n) = n.as_i64() { - UnknownValue::int64(n) - } else { - UnknownValue::double(n.as_f64().expect("as_f64 never fails")) - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::test::TestMetadata; - - macro_rules! test_roundtrip { - ($name:ident,$proto:literal,$yaml:literal) => { - mod $name { - use super::*; - - pub(super) const PROTO: &[u8] = $proto; - pub(super) const YAML: &str = $yaml; - - #[test] - fn prettify() { - let result = Protobuf.prettify(PROTO, &TestMetadata::default()).unwrap(); - assert_eq!(result, YAML); - } - - #[test] - fn reencode() { - let result = Protobuf.reencode(YAML, &TestMetadata::default()).unwrap(); - assert_eq!(result, PROTO); - } - } - }; - } - - test_roundtrip!(varint, b"\x08\x96\x01", "1: 150\n"); - test_roundtrip!(varint_negative, b"\x08\x0B", "1: 11 # signed: -6\n"); - test_roundtrip!(binary, b"\x32\x03\x01\x02\x03", "6: !binary '010203'\n"); - test_roundtrip!(string, b"\x0A\x05\x68\x65\x6C\x6C\x6F", "1: hello\n"); - test_roundtrip!(nested, b"\x2A\x02\x08\x2A", "5:\n 1: 42\n"); - test_roundtrip!( - nested_twice, - b"\x2A\x04\x2A\x02\x08\x2A", - "5:\n 5:\n 1: 42\n" - ); - test_roundtrip!( - fixed64, - b"\x19\x00\x00\x00\x00\x00\x00\xF0\xBF", - "3: !fixed64 13830554455654793216 # double: -1, i64: -4616189618054758400\n" - ); - test_roundtrip!( - fixed64_positive, - b"\x19\x6E\x86\x1B\xF0\xF9\x21\x09\x40", - "3: !fixed64 4614256650576692846 # double: 3.14159\n" - ); - test_roundtrip!( - fixed64_no_float, - b"\x19\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", - "3: !fixed64 18446744073709551615 # i64: -1\n" - ); - test_roundtrip!( - fixed64_positive_no_float, - b"\x19\x01\x00\x00\x00\x00\x00\xF8\x7F", - "3: !fixed64 9221120237041090561\n" - ); - test_roundtrip!( - fixed32, - b"\x15\x00\x00\x80\xBF", - "2: !fixed32 3212836864 # float: -1, i32: -1082130432\n" - ); - test_roundtrip!( - fixed32_positive, - b"\x15\xD0\x0F\x49\x40", - "2: !fixed32 1078530000 # float: 3.14159\n" - ); - test_roundtrip!( - fixed32_no_float, - b"\x15\xFF\xFF\xFF\xFF", - "2: !fixed32 4294967295 # i32: -1\n" - ); - test_roundtrip!( - fixed32_positive_no_float, - b"\x15\x01\x00\xC0\x7F", - "2: !fixed32 2143289345\n" - ); - // From docs: "message Test5 { repeated int32 f = 6 [packed=true]; }" - // With values 3, 270, and 86942 - test_roundtrip!( - repeated_packed, - b"\x32\x06\x03\x8E\x02\x9E\xA7\x05", - "6: !binary 038e029ea705\n" - ); - test_roundtrip!( - repeated_varint, - b"\x08\x01\x08\x02\x08\x03", - "1:\n- 1 # signed: -1\n- 2\n- 3 # signed: -2\n" - ); - - mod reencode { - use super::*; - - #[test] - fn reencode_new_nested_message() { - let result = Protobuf - .reencode(nested::YAML, &TestMetadata::default()) - .unwrap(); - assert_eq!(result, nested::PROTO); - } - - #[test] - fn new_string_attr() { - let result = Protobuf - .reencode(string::YAML, &TestMetadata::default()) - .unwrap(); - assert_eq!(result, string::PROTO); - } - } - - #[test] - fn test_invalid_protobuf() { - let result = Protobuf.prettify(b"\xFF\xFF", &TestMetadata::default()); - assert!(result.is_err()); - } - - #[test] - fn test_no_crash() { - let result = Protobuf.prettify( - b"\n\x13gRPC testing server\x12\x07\n\x05Index\x12\x07\n\x05Empty\x12\x0c\n\nDummyUnary\x12\x0f\n\rSpecificError\x12\r\n\x0bRandomError\x12\x0e\n\x0cHeadersUnary\x12\x11\n\x0fNoResponseUnary", - &TestMetadata::default()).unwrap(); - assert_eq!(result, "1: gRPC testing server\n2:\n- 1: Index\n- 1: Empty\n- 1: DummyUnary\n- 1: SpecificError\n- 1: RandomError\n- 1: HeadersUnary\n- 1: NoResponseUnary\n"); - } - - #[test] - fn test_empty_protobuf() { - let result = Protobuf.prettify(b"", &TestMetadata::default()).unwrap(); - assert_eq!(result, "{} # empty protobuf message"); - } -} diff --git a/mitmproxy-contentviews/src/protobuf/existing_proto_definitions.rs b/mitmproxy-contentviews/src/protobuf/existing_proto_definitions.rs new file mode 100644 index 00000000..ac595da2 --- /dev/null +++ b/mitmproxy-contentviews/src/protobuf/existing_proto_definitions.rs @@ -0,0 +1,142 @@ +use crate::Metadata; +use anyhow::Context; +use protobuf::reflect::{FileDescriptor, MessageDescriptor}; +use protobuf_parse::Parser; +use std::path::Path; + +pub(super) fn find_best_match( + metadata: &dyn Metadata, +) -> anyhow::Result)>> { + // Parse existing protobuf definitions if available + let file_descriptors = metadata + .protobuf_definitions() + .map(parse_file_descriptor_set) + .transpose() + .context("failed to parse proto file(s)")?; + let Some(file_descriptors) = file_descriptors else { + return Ok(None); + }; + + // Find MessageDescriptor for the RPC. + let rpc_info = RpcInfo::from_metadata(metadata); + let Some(message) = find_best_message(&file_descriptors, rpc_info, metadata.is_http_request()) + else { + return Ok(None); + }; + + Ok(Some((message, file_descriptors))) +} + +fn find_best_message( + fds: &[FileDescriptor], + rpc: Option, + is_request: bool, +) -> Option { + if let Some(rpc) = rpc { + for file in fds { + if file.proto().package() != rpc.package { + continue; + } + for service in file.services() { + if service.proto().name() != rpc.service { + continue; + } + for method in service.methods() { + if method.proto().name() != rpc.method { + continue; + } + + return Some(if is_request { + method.input_type() + } else { + method.output_type() + }); + } + log::info!( + "Found service {} in {}, but no method '{}'.", + rpc.service, + file.name(), + rpc.method + ); + } + } + log::info!("Did not find {rpc} in protobuf definitions."); + } + + let file = fds.first()?; + if let Some(service) = file.services().next() { + if let Some(method) = service.methods().next() { + log::info!( + "Falling back to first defined service in {}: {}", + file.name(), + service.proto().name() + ); + return Some(if is_request { + method.input_type() + } else { + method.output_type() + }); + } + } + if let Some(method) = file.messages().next() { + log::info!( + "Falling back to first defined message in {}: {}", + file.name(), + method.proto().name() + ); + return Some(method); + } + None +} + +#[derive(Debug)] +struct RpcInfo { + package: String, + service: String, + method: String, +} + +impl RpcInfo { + fn from_metadata(metadata: &dyn Metadata) -> Option { + let path = metadata.get_path()?; + if path.contains('?') { + return None; + } + let mut parts = path.trim_start_matches('/').split('/'); + let service_and_package = parts.next()?; + let method = parts.next()?; + if parts.next().is_some() { + return None; + } + let (package, service) = service_and_package + .rsplit_once('.') + .unwrap_or(("", service_and_package)); + + Some(Self { + package: package.to_string(), + service: service.to_string(), + method: method.to_string(), + }) + } +} + +impl std::fmt::Display for RpcInfo { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if !self.package.is_empty() { + write!(f, "{}.", self.package)?; + } + write!(f, "{}.{}", self.service, self.method) + } +} + +fn parse_file_descriptor_set(definitions_path: &Path) -> anyhow::Result> { + let mut parser = Parser::new(); + parser.pure(); + if let Some(parent) = definitions_path.parent() { + parser.include(parent); + } + parser.input(definitions_path); + let fds = parser.file_descriptor_set()?; + FileDescriptor::new_dynamic_fds(fds.file, &[]) + .context("failed to create dynamic file descriptors") +} diff --git a/mitmproxy-contentviews/src/protobuf/mod.rs b/mitmproxy-contentviews/src/protobuf/mod.rs new file mode 100644 index 00000000..9a740744 --- /dev/null +++ b/mitmproxy-contentviews/src/protobuf/mod.rs @@ -0,0 +1,10 @@ +mod existing_proto_definitions; +mod proto_to_yaml; +mod raw_to_proto; +mod reencode; +mod view_grpc; +mod view_protobuf; +mod yaml_to_pretty; + +pub use view_grpc::GRPC; +pub use view_protobuf::Protobuf; diff --git a/mitmproxy-contentviews/src/protobuf/proto_to_yaml.rs b/mitmproxy-contentviews/src/protobuf/proto_to_yaml.rs new file mode 100644 index 00000000..71eeb789 --- /dev/null +++ b/mitmproxy-contentviews/src/protobuf/proto_to_yaml.rs @@ -0,0 +1,114 @@ +/// Parsed protobuf message => YAML value +use protobuf::descriptor::field_descriptor_proto::Type; +use protobuf::descriptor::field_descriptor_proto::Type::{ + TYPE_BYTES, TYPE_FIXED32, TYPE_FIXED64, TYPE_UINT64, +}; +use protobuf::reflect::{ReflectFieldRef, ReflectValueRef}; +use protobuf::MessageDyn; +use serde_yaml::value::TaggedValue; +use serde_yaml::{Mapping, Number, Value}; +use std::fmt::Write; +use std::ops::Deref; + +pub(crate) fn message_to_yaml(message: &dyn MessageDyn) -> Value { + let mut ret = Mapping::new(); + + for field in message.descriptor_dyn().fields() { + let key = if field.name().starts_with("unknown_field_") { + Value::from(field.number()) + } else { + Value::from(field.name()) + }; + let field_type = field + .proto() + .type_ + .map(|t| t.enum_value_or(TYPE_BYTES)) + .unwrap_or(TYPE_BYTES); + + let value = match field.get_reflect(message) { + ReflectFieldRef::Optional(x) => { + if let Some(x) = x.value() { + primitive_type_to_yaml(x, field_type) + } else { + continue; + } + } + ReflectFieldRef::Repeated(x) => { + if x.is_empty() { + continue; + } + Value::Sequence( + x.into_iter() + .map(|x| primitive_type_to_yaml(x, field_type)) + .collect(), + ) + } + ReflectFieldRef::Map(x) => { + if x.is_empty() { + continue; + } + Value::Mapping( + x.into_iter() + .map(|(k, v)| { + ( + primitive_type_to_yaml(k, field_type), + primitive_type_to_yaml(v, field_type), + ) + }) + .collect(), + ) + } + }; + ret.insert(key, value); + } + Value::Mapping(ret) +} + +fn primitive_type_to_yaml(x: ReflectValueRef, field_type: Type) -> Value { + match x { + ReflectValueRef::U32(x) => tag_number(Value::Number(Number::from(x)), field_type), + ReflectValueRef::U64(x) => tag_number(Value::Number(Number::from(x)), field_type), + ReflectValueRef::I32(x) => Value::Number(Number::from(x)), + ReflectValueRef::I64(x) => Value::Number(Number::from(x)), + ReflectValueRef::F32(x) => Value::Number(Number::from(x)), + ReflectValueRef::F64(x) => Value::Number(Number::from(x)), + ReflectValueRef::Bool(x) => Value::from(x), + ReflectValueRef::String(x) => Value::from(x), + ReflectValueRef::Bytes(x) => Value::Tagged(Box::new(TaggedValue { + tag: crate::protobuf::view_protobuf::tags::BINARY.clone(), + value: Value::String(bytes_to_hex_string(x)), + })), + ReflectValueRef::Enum(descriptor, i) => descriptor + .value_by_number(i) + .map(|v| Value::String(v.name().to_string())) + .unwrap_or_else(|| Value::Number(Number::from(i))), + ReflectValueRef::Message(m) => message_to_yaml(m.deref()), + } +} + +fn tag_number(value: Value, field_type: Type) -> Value { + match field_type { + TYPE_UINT64 => Value::Tagged(Box::new(TaggedValue { + tag: crate::protobuf::view_protobuf::tags::VARINT.clone(), + value, + })), + TYPE_FIXED64 => Value::Tagged(Box::new(TaggedValue { + tag: crate::protobuf::view_protobuf::tags::FIXED64.clone(), + value, + })), + TYPE_FIXED32 => Value::Tagged(Box::new(TaggedValue { + tag: crate::protobuf::view_protobuf::tags::FIXED32.clone(), + value, + })), + _ => value, + } +} + +// Convert length-delimited protobuf data to a hex string +fn bytes_to_hex_string(bytes: &[u8]) -> String { + let mut result = String::with_capacity(bytes.len() * 2); + for b in bytes { + let _ = write!(result, "{:02x}", b); + } + result +} diff --git a/mitmproxy-contentviews/src/protobuf/raw_to_proto.rs b/mitmproxy-contentviews/src/protobuf/raw_to_proto.rs new file mode 100644 index 00000000..7e8b8f3c --- /dev/null +++ b/mitmproxy-contentviews/src/protobuf/raw_to_proto.rs @@ -0,0 +1,184 @@ +use anyhow::Context; +use protobuf::descriptor::field_descriptor_proto::Label::LABEL_REPEATED; +use protobuf::descriptor::field_descriptor_proto::Type; +use protobuf::descriptor::field_descriptor_proto::Type::{ + TYPE_BYTES, TYPE_FIXED32, TYPE_FIXED64, TYPE_STRING, TYPE_UINT64, +}; +use protobuf::descriptor::{DescriptorProto, FieldDescriptorProto, FileDescriptorProto}; +use protobuf::reflect::{FileDescriptor, MessageDescriptor}; +use protobuf::{EnumOrUnknown, MessageDyn, UnknownValueRef}; +/// Existing protobuf definition + raw data => merged protobuf definition +use std::collections::BTreeMap; + +enum GuessedFieldType { + String, + Message(Box), + Unknown, +} + +/// Create a "merged" MessageDescriptor. Mostly a wrapper around `create_descriptor_proto`. +pub(crate) fn merge_proto_and_descriptor( + data: &[u8], + existing: &MessageDescriptor, + dependencies: &[FileDescriptor], +) -> anyhow::Result { + let new_proto = create_descriptor_proto(data, existing)?; + + let descriptor = { + let mut file_descriptor_proto = existing.file_descriptor_proto().clone(); + + let message_idx = file_descriptor_proto + .message_type + .iter() + .enumerate() + .filter_map(|(i, d)| (d.name() == existing.name_to_package()).then_some(i)) + .next() + .context("failed to find existing message descriptor index")?; + file_descriptor_proto.message_type[message_idx] = new_proto; + + /* + XXX: Not sure if this would be the right thing to do here + let dependencies = dependencies + .iter() + .cloned() + .filter(|d| d != existing.file_descriptor()) + .collect::>(); + */ + + FileDescriptor::new_dynamic(file_descriptor_proto, dependencies) + .context("failed to create new dynamic file descriptor")? + .message_by_package_relative_name(existing.name_to_package()) + .with_context(|| format!("did not find {} in descriptor", existing.name_to_package()))? + }; + + Ok(descriptor) +} + +/// Create a new (empty) MessageDescriptor for the given package and name. +pub(super) fn new_empty_descriptor(package: Option, name: &str) -> MessageDescriptor { + let mut parts = name.rsplit("."); + let mut head = { + let mut descriptor = DescriptorProto::new(); + descriptor.set_name(parts.next().unwrap().to_string()); + descriptor + }; + for p in parts { + let mut descriptor = DescriptorProto::new(); + descriptor.set_name(p.to_string()); + descriptor.nested_type.push(head); + head = descriptor; + } + + let file_descriptor_proto = { + let mut fd = FileDescriptorProto::new(); + fd.package = package; + fd.message_type.push(head); + fd + }; + FileDescriptor::new_dynamic(file_descriptor_proto, &[]) + .unwrap() + .message_by_package_relative_name(name) + .unwrap() +} + +/// Create a DescriptorProto that combines the `existing` MessageDescriptor with (guessed) +/// metadata for all unknown fields in the protobuf `data`. +fn create_descriptor_proto( + data: &[u8], + existing: &MessageDescriptor, +) -> anyhow::Result { + let message = existing + .parse_from_bytes(data) + .context("failed to parse protobuf")?; + + let mut descriptor = existing.proto().clone(); + + let mut field_groups: BTreeMap> = BTreeMap::new(); + for (field_number, value) in message.unknown_fields_dyn().iter() { + field_groups.entry(field_number).or_default().push(value); + } + + for (field_index, field_values) in field_groups.into_iter() { + let name = Some(format!("unknown_field_{}", field_index)); + let mut add_int = |name: Option, typ: Type| { + descriptor.field.push(FieldDescriptorProto { + number: Some(field_index as i32), + name, + type_: Some(EnumOrUnknown::from(typ)), + ..Default::default() + }); + }; + match field_values[0] { + // We can't use float/double here because of NaN handling. + UnknownValueRef::Fixed32(_) => add_int(name, TYPE_FIXED32), + UnknownValueRef::Fixed64(_) => add_int(name, TYPE_FIXED64), + UnknownValueRef::Varint(_) => add_int(name, TYPE_UINT64), + UnknownValueRef::LengthDelimited(_) => { + let field_values = field_values + .iter() + .map(|x| match x { + UnknownValueRef::LengthDelimited(data) => Ok(*data), + _ => Err(anyhow::anyhow!("varying types in protobuf")), + }) + .collect::>>()?; + + match guess_field_type(existing, field_index, &field_values) { + GuessedFieldType::String => add_int(name, TYPE_STRING), + GuessedFieldType::Unknown => add_int(name, TYPE_BYTES), + GuessedFieldType::Message(m) => { + descriptor.field.push(FieldDescriptorProto { + name, + number: Some(field_index as i32), + type_name: Some(format!(".{}.{}", existing.full_name(), m.name())), + type_: Some(EnumOrUnknown::from(Type::TYPE_MESSAGE)), + ..Default::default() + }); + descriptor.nested_type.push(*m); + } + } + } + } + if field_values.len() > 1 { + descriptor + .field + .last_mut() + .expect("we just added this field") + .set_label(LABEL_REPEATED); + } + } + + Ok(descriptor) +} + +/// Given all `values` of a field, guess its type. +fn guess_field_type( + parent: &MessageDescriptor, + field_index: u32, + values: &[&[u8]], +) -> GuessedFieldType { + if values.iter().all(|data| { + std::str::from_utf8(data).is_ok_and(|s| { + s.chars() + .all(|c| c.is_ascii_graphic() || c.is_ascii_whitespace()) + }) + }) { + return GuessedFieldType::String; + } + + // Try to parse as a nested message + let existing = new_empty_descriptor( + parent.file_descriptor_proto().package.clone(), + &format!("{}.UnknownField{}", parent.name_to_package(), field_index), + ); + if let Ok(descriptor) = create_descriptor_proto(values[0], &existing) { + if values + .iter() + .skip(1) + .all(|data| descriptor.descriptor_dyn().parse_from_bytes(data).is_ok()) + { + return GuessedFieldType::Message(Box::new(descriptor)); + } + } + + GuessedFieldType::Unknown +} diff --git a/mitmproxy-contentviews/src/protobuf/reencode.rs b/mitmproxy-contentviews/src/protobuf/reencode.rs new file mode 100644 index 00000000..37fb462b --- /dev/null +++ b/mitmproxy-contentviews/src/protobuf/reencode.rs @@ -0,0 +1,155 @@ +use crate::protobuf::view_protobuf::tags; +use crate::Metadata; +use anyhow::{bail, Context}; +use protobuf::descriptor::field_descriptor_proto::Type; +use protobuf::descriptor::field_descriptor_proto::Type::{TYPE_FIXED32, TYPE_FIXED64}; +use protobuf::reflect::{FieldDescriptor, RuntimeFieldType, RuntimeType}; +use protobuf::well_known_types::empty::Empty; +use protobuf::{MessageDyn, MessageFull, UnknownValue}; +use serde_yaml::{Number, Value}; +use std::num::ParseIntError; +use std::str::FromStr; + +pub(crate) fn reencode_yaml(value: Value, _metadata: &dyn Metadata) -> anyhow::Result> { + let descriptor = Empty::descriptor(); + let message = descriptor.new_instance(); + merge_yaml_into_message(value, message) +} + +fn merge_yaml_into_message( + value: Value, + mut message: Box, +) -> anyhow::Result> { + let Value::Mapping(mapping) = value else { + bail!("YAML is not a mapping"); + }; + + for (key, value) in mapping.into_iter() { + let field_num = match key { + Value::String(key) => { + if let Some(field) = message.descriptor_dyn().field_by_name(&key) { + field.number() + } else if let Ok(field_num) = i32::from_str(&key) { + field_num + } else { + bail!("Unknown protobuf field key: {key}"); + } + } + Value::Number(key) => { + let Some(field_num) = key.as_i64() else { + bail!("Invalid protobuf field number: {key}"); + }; + field_num as i32 + } + other => { + bail!("Unexpected key: {other:?}"); + } + } as u32; + + add_field(message.as_mut(), field_num, value)?; + } + + message + .write_to_bytes_dyn() + .context("Failed to serialize protobuf") +} + +fn add_field(message: &mut dyn MessageDyn, field_num: u32, value: Value) -> anyhow::Result<()> { + let value = match value { + Value::Null => return Ok(()), + Value::Sequence(seq) => { + for s in seq.into_iter() { + add_field(message, field_num, s)?; + } + return Ok(()); + } + Value::Tagged(t) => { + // t.tag doesn't work for Match statements + if t.tag == *tags::BINARY { + let value = match t.value { + Value::String(s) => s, + _ => bail!("Binary data is not a string"), + }; + let value = (0..value.len()) + .step_by(2) + .map(|i| u8::from_str_radix(&value[i..i + 2], 16)) + .collect::, ParseIntError>>() + .context("Invalid hex string")?; + UnknownValue::LengthDelimited(value) + } else if t.tag == *tags::FIXED32 { + let value = match t.value { + Value::Number(s) if s.as_u64().is_some() => s.as_u64().unwrap(), + _ => bail!("Fixed32 data is not a u32"), + }; + UnknownValue::Fixed32(value as u32) + } else if t.tag == *tags::FIXED64 { + let value = match t.value { + Value::Number(s) if s.as_u64().is_some() => s.as_u64().unwrap(), + _ => bail!("Fixed64 data is not a u64"), + }; + UnknownValue::Fixed64(value) + } else { + log::info!("Unexpected YAML tag {}, discarding.", t.tag); + return add_field(message, field_num, t.value); + } + } + Value::Bool(b) => UnknownValue::Varint(b as u64), + Value::Number(n) => { + let field = message.descriptor_dyn().field_by_number(field_num); + int_value(n, field.as_ref()) + } + Value::String(s) => UnknownValue::LengthDelimited(s.into_bytes()), + Value::Mapping(m) => { + let mut descriptor = Empty::descriptor(); + if let Some(field) = message.descriptor_dyn().field_by_number(field_num) { + if let RuntimeFieldType::Singular(RuntimeType::Message(md)) = + field.runtime_field_type() + { + descriptor = md; + } else if let RuntimeFieldType::Map(_, _) = field.runtime_field_type() { + // TODO: handle maps. + } + } + let child_message = descriptor.new_instance(); + let val = merge_yaml_into_message(Value::Mapping(m), child_message)?; + UnknownValue::LengthDelimited(val) + } + }; + message.mut_unknown_fields_dyn().add_value(field_num, value); + Ok(()) +} + +fn int_value(n: Number, field: Option<&FieldDescriptor>) -> UnknownValue { + if let Some(field) = field { + if let Some(typ) = field.proto().type_.and_then(|t| t.enum_value().ok()) { + match typ { + TYPE_FIXED64 | Type::TYPE_SFIXED64 | Type::TYPE_DOUBLE => { + return if let Some(n) = n.as_u64() { + UnknownValue::Fixed64(n) + } else if let Some(n) = n.as_i64() { + UnknownValue::sfixed64(n) + } else { + UnknownValue::double(n.as_f64().expect("as_f64 never fails")) + } + } + TYPE_FIXED32 | Type::TYPE_SFIXED32 | Type::TYPE_FLOAT => { + return if let Some(n) = n.as_u64() { + UnknownValue::Fixed32(n as u32) + } else if let Some(n) = n.as_i64() { + UnknownValue::sfixed32(n as i32) + } else { + UnknownValue::float(n.as_f64().expect("as_f64 never fails") as f32) + } + } + _ => (), + } + } + } + if let Some(n) = n.as_u64() { + UnknownValue::Varint(n) + } else if let Some(n) = n.as_i64() { + UnknownValue::int64(n) + } else { + UnknownValue::double(n.as_f64().expect("as_f64 never fails")) + } +} diff --git a/mitmproxy-contentviews/src/protobuf/test.proto b/mitmproxy-contentviews/src/protobuf/test.proto new file mode 100644 index 00000000..e3e6363c --- /dev/null +++ b/mitmproxy-contentviews/src/protobuf/test.proto @@ -0,0 +1,5 @@ +syntax = "proto3"; + +message TestMessage { + int32 example = 1; +} \ No newline at end of file diff --git a/mitmproxy-contentviews/src/grpc.rs b/mitmproxy-contentviews/src/protobuf/view_grpc.rs similarity index 64% rename from mitmproxy-contentviews/src/grpc.rs rename to mitmproxy-contentviews/src/protobuf/view_grpc.rs index 2d1ce615..61bbd364 100644 --- a/mitmproxy-contentviews/src/grpc.rs +++ b/mitmproxy-contentviews/src/protobuf/view_grpc.rs @@ -1,3 +1,5 @@ +use super::{existing_proto_definitions, reencode}; +use crate::protobuf::raw_to_proto::new_empty_descriptor; use crate::{Metadata, Prettify, Protobuf, Reencode}; use anyhow::{bail, Context, Result}; use flate2::read::{DeflateDecoder, GzDecoder}; @@ -20,6 +22,9 @@ impl Prettify for GRPC { fn prettify(&self, mut data: &[u8], metadata: &dyn Metadata) -> Result { let mut protos = vec![]; + let (descriptor, dependencies) = existing_proto_definitions::find_best_match(metadata)? + .unwrap_or_else(|| (new_empty_descriptor(None, "Unknown"), vec![])); + while !data.is_empty() { let compressed = match data[0] { 0 => false, @@ -54,7 +59,7 @@ impl Prettify for GRPC { } else { proto }; - protos.push(Protobuf.prettify(proto, metadata)?); + protos.push(Protobuf.prettify_with_descriptor(proto, &descriptor, &dependencies)?); data = &data[5 + len..]; } @@ -76,7 +81,7 @@ impl Reencode for GRPC { let mut ret = vec![]; for document in serde_yaml::Deserializer::from_str(data) { let value = Value::deserialize(document).context("Invalid YAML")?; - let proto = super::protobuf::reencode::reencode_yaml(value, metadata)?; + let proto = reencode::reencode_yaml(value, metadata)?; ret.push(0); // uncompressed ret.extend(u32::to_be_bytes(proto.len() as u32)); ret.extend(proto); @@ -91,6 +96,7 @@ mod tests { use crate::test::TestMetadata; const TEST_YAML: &str = "1: 150\n\n---\n\n1: 150\n"; + const TEST_YAML_KNOWN: &str = "example: 150\n\n---\n\nexample: 150\n"; const TEST_GRPC: &[u8] = &[ 0, 0, 0, 0, 3, 8, 150, 1, // first message 0, 0, 0, 0, 3, 8, 150, 1, // second message @@ -156,4 +162,64 @@ mod tests { 0.0 ); } + + #[test] + fn test_existing_proto() { + let metadata = TestMetadata::default().with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/simple.proto" + )); + let res = GRPC.prettify(TEST_GRPC, &metadata).unwrap(); + assert_eq!(res, TEST_YAML_KNOWN); + } + + #[test] + fn test_existing_service_request() { + let metadata = TestMetadata::default() + .with_is_http_request(true) + .with_path("/Service/Method") + .with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/simple_service.proto" + )); + let req = GRPC.prettify(TEST_GRPC, &metadata).unwrap(); + assert_eq!(req, TEST_YAML); + } + + #[test] + fn test_existing_service_response() { + let metadata = TestMetadata::default() + .with_is_http_request(false) + .with_path("/Service/Method") + .with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/simple_service.proto" + )); + let req = GRPC.prettify(TEST_GRPC, &metadata).unwrap(); + assert_eq!(req, TEST_YAML_KNOWN); + } + + #[test] + fn test_existing_package() { + let metadata = TestMetadata::default() + .with_path("/example.simple.Service/Method") + .with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/simple_package.proto" + )); + let req = GRPC.prettify(TEST_GRPC, &metadata).unwrap(); + assert_eq!(req, TEST_YAML_KNOWN); + } + + #[test] + fn test_existing_nested() { + let metadata = TestMetadata::default() + .with_path("/example.nested.Service/Method") + .with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/nested.proto" + )); + let req = GRPC.prettify(TEST_GRPC, &metadata).unwrap(); + assert_eq!(req, TEST_YAML_KNOWN); + } } diff --git a/mitmproxy-contentviews/src/protobuf/view_protobuf.rs b/mitmproxy-contentviews/src/protobuf/view_protobuf.rs new file mode 100644 index 00000000..36d39623 --- /dev/null +++ b/mitmproxy-contentviews/src/protobuf/view_protobuf.rs @@ -0,0 +1,214 @@ +use crate::protobuf::raw_to_proto::new_empty_descriptor; +use crate::protobuf::{proto_to_yaml, raw_to_proto, reencode, yaml_to_pretty}; +use crate::{Metadata, Prettify, Reencode}; +use anyhow::{Context, Result}; +use mitmproxy_highlight::Language; +use protobuf::reflect::{FileDescriptor, MessageDescriptor}; +use serde_yaml::Value; + +pub(super) mod tags { + use regex::Regex; + use serde_yaml::value::Tag; + use std::sync::LazyLock; + + pub static BINARY: LazyLock = LazyLock::new(|| Tag::new("binary")); + pub static VARINT: LazyLock = LazyLock::new(|| Tag::new("varint")); + pub static FIXED32: LazyLock = LazyLock::new(|| Tag::new("fixed32")); + pub static FIXED64: LazyLock = LazyLock::new(|| Tag::new("fixed64")); + + pub static VARINT_RE: LazyLock = + LazyLock::new(|| Regex::new(&format!(r"{} (\d+)", *VARINT)).unwrap()); + pub static FIXED32_RE: LazyLock = + LazyLock::new(|| Regex::new(&format!(r"{} (\d+)", *FIXED32)).unwrap()); + pub static FIXED64_RE: LazyLock = + LazyLock::new(|| Regex::new(&format!(r"{} (\d+)", *FIXED64)).unwrap()); +} + +pub struct Protobuf; + +impl Prettify for Protobuf { + fn name(&self) -> &str { + "Protobuf" + } + + fn syntax_highlight(&self) -> Language { + Language::Yaml + } + + fn prettify(&self, data: &[u8], _metadata: &dyn Metadata) -> Result { + // FIXME use new create_new + self.prettify_with_descriptor(data, &new_empty_descriptor(None, "Unknown"), &[]) + } + + fn render_priority(&self, _data: &[u8], metadata: &dyn Metadata) -> f64 { + match metadata.content_type() { + Some("application/x-protobuf") => 1.0, + Some("application/x-protobuffer") => 1.0, + _ => 0.0, + } + } +} + +impl Protobuf { + pub(super) fn prettify_with_descriptor( + &self, + data: &[u8], + descriptor: &MessageDescriptor, + dependencies: &[FileDescriptor], + ) -> Result { + // Check if data is empty first + if data.is_empty() { + return Ok("{} # empty protobuf message".to_string()); + } + + let descriptor = raw_to_proto::merge_proto_and_descriptor(data, descriptor, dependencies)?; + + // Parse protobuf and convert to YAML + let message = descriptor + .parse_from_bytes(data) + .context("Error parsing protobuf")?; + let yaml_value = proto_to_yaml::message_to_yaml(message.as_ref()); + + let yaml_str = serde_yaml::to_string(&yaml_value).context("Failed to convert to YAML")?; + yaml_to_pretty::apply_replacements(&yaml_str) + } +} + +impl Reencode for Protobuf { + fn reencode(&self, data: &str, metadata: &dyn Metadata) -> Result> { + let value: Value = serde_yaml::from_str(data).context("Invalid YAML")?; + reencode::reencode_yaml(value, metadata) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test::TestMetadata; + + macro_rules! test_roundtrip { + ($name:ident,$proto:literal,$yaml:literal) => { + mod $name { + use super::*; + + pub(super) const PROTO: &[u8] = $proto; + pub(super) const YAML: &str = $yaml; + + #[test] + fn prettify() { + let result = Protobuf.prettify(PROTO, &TestMetadata::default()).unwrap(); + assert_eq!(result, YAML); + } + + #[test] + fn reencode() { + let result = Protobuf.reencode(YAML, &TestMetadata::default()).unwrap(); + assert_eq!(result, PROTO); + } + } + }; + } + + test_roundtrip!(varint, b"\x08\x96\x01", "1: 150\n"); + test_roundtrip!(varint_negative, b"\x08\x0B", "1: 11 # signed: -6\n"); + test_roundtrip!(binary, b"\x32\x03\x01\x02\x03", "6: !binary '010203'\n"); + test_roundtrip!(string, b"\x0A\x05\x68\x65\x6C\x6C\x6F", "1: hello\n"); + test_roundtrip!(nested, b"\x2A\x02\x08\x2A", "5:\n 1: 42\n"); + test_roundtrip!( + nested_twice, + b"\x2A\x04\x2A\x02\x08\x2A", + "5:\n 5:\n 1: 42\n" + ); + test_roundtrip!( + fixed64, + b"\x19\x00\x00\x00\x00\x00\x00\xF0\xBF", + "3: !fixed64 13830554455654793216 # double: -1, i64: -4616189618054758400\n" + ); + test_roundtrip!( + fixed64_positive, + b"\x19\x6E\x86\x1B\xF0\xF9\x21\x09\x40", + "3: !fixed64 4614256650576692846 # double: 3.14159\n" + ); + test_roundtrip!( + fixed64_no_float, + b"\x19\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", + "3: !fixed64 18446744073709551615 # i64: -1\n" + ); + test_roundtrip!( + fixed64_positive_no_float, + b"\x19\x01\x00\x00\x00\x00\x00\xF8\x7F", + "3: !fixed64 9221120237041090561\n" + ); + test_roundtrip!( + fixed32, + b"\x15\x00\x00\x80\xBF", + "2: !fixed32 3212836864 # float: -1, i32: -1082130432\n" + ); + test_roundtrip!( + fixed32_positive, + b"\x15\xD0\x0F\x49\x40", + "2: !fixed32 1078530000 # float: 3.14159\n" + ); + test_roundtrip!( + fixed32_no_float, + b"\x15\xFF\xFF\xFF\xFF", + "2: !fixed32 4294967295 # i32: -1\n" + ); + test_roundtrip!( + fixed32_positive_no_float, + b"\x15\x01\x00\xC0\x7F", + "2: !fixed32 2143289345\n" + ); + // From docs: "message Test5 { repeated int32 f = 6 [packed=true]; }" + // With values 3, 270, and 86942 + test_roundtrip!( + repeated_packed, + b"\x32\x06\x03\x8E\x02\x9E\xA7\x05", + "6: !binary 038e029ea705\n" + ); + test_roundtrip!( + repeated_varint, + b"\x08\x01\x08\x02\x08\x03", + "1:\n- 1 # signed: -1\n- 2\n- 3 # signed: -2\n" + ); + + mod reencode { + use super::*; + + #[test] + fn reencode_new_nested_message() { + let result = Protobuf + .reencode(nested::YAML, &TestMetadata::default()) + .unwrap(); + assert_eq!(result, nested::PROTO); + } + + #[test] + fn new_string_attr() { + let result = Protobuf + .reencode(string::YAML, &TestMetadata::default()) + .unwrap(); + assert_eq!(result, string::PROTO); + } + } + + #[test] + fn test_invalid_protobuf() { + let result = Protobuf.prettify(b"\xFF\xFF", &TestMetadata::default()); + assert!(result.is_err()); + } + + #[test] + fn test_no_crash() { + let result = Protobuf.prettify( + b"\n\x13gRPC testing server\x12\x07\n\x05Index\x12\x07\n\x05Empty\x12\x0c\n\nDummyUnary\x12\x0f\n\rSpecificError\x12\r\n\x0bRandomError\x12\x0e\n\x0cHeadersUnary\x12\x11\n\x0fNoResponseUnary", + &TestMetadata::default()).unwrap(); + assert_eq!(result, "1: gRPC testing server\n2:\n- 1: Index\n- 1: Empty\n- 1: DummyUnary\n- 1: SpecificError\n- 1: RandomError\n- 1: HeadersUnary\n- 1: NoResponseUnary\n"); + } + + #[test] + fn test_empty_protobuf() { + let result = Protobuf.prettify(b"", &TestMetadata::default()).unwrap(); + assert_eq!(result, "{} # empty protobuf message"); + } +} diff --git a/mitmproxy-contentviews/src/protobuf/yaml_to_pretty.rs b/mitmproxy-contentviews/src/protobuf/yaml_to_pretty.rs new file mode 100644 index 00000000..1ba8013a --- /dev/null +++ b/mitmproxy-contentviews/src/protobuf/yaml_to_pretty.rs @@ -0,0 +1,72 @@ +/// YAML value => prettified text +use crate::protobuf::view_protobuf::tags; +use regex::Captures; + +// Helper method to apply regex replacements to the YAML output +pub(crate) fn apply_replacements(yaml_str: &str) -> anyhow::Result { + // Replace !fixed32 tags with comments showing float and i32 interpretations + let with_fixed32 = tags::FIXED32_RE.replace_all(yaml_str, |caps: &Captures| { + let value = caps[1].parse::().unwrap_or_default(); + let float_value = f32::from_bits(value); + let i32_value = value as i32; + + if !float_value.is_nan() && float_value < 0.0 { + format!( + "{} {} # float: {}, i32: {}", + *tags::FIXED32, + value, + float_value, + i32_value + ) + } else if !float_value.is_nan() { + format!("{} {} # float: {}", *tags::FIXED32, value, float_value) + } else if i32_value < 0 { + format!("{} {} # i32: {}", *tags::FIXED32, value, i32_value) + } else { + format!("{} {}", *tags::FIXED32, value) + } + }); + + // Replace !fixed64 tags with comments showing double and i64 interpretations + let with_fixed64 = tags::FIXED64_RE.replace_all(&with_fixed32, |caps: &Captures| { + let value = caps[1].parse::().unwrap_or_default(); + let double_value = f64::from_bits(value); + let i64_value = value as i64; + + if !double_value.is_nan() && double_value < 0.0 { + format!( + "{} {} # double: {}, i64: {}", + *tags::FIXED64, + value, + double_value, + i64_value + ) + } else if !double_value.is_nan() { + format!("{} {} # double: {}", *tags::FIXED64, value, double_value) + } else if i64_value < 0 { + format!("{} {} # i64: {}", *tags::FIXED64, value, i64_value) + } else { + format!("{} {}", *tags::FIXED64, value) + } + }); + + // Replace !varint tags with comments showing signed interpretation if different + let with_varint = tags::VARINT_RE.replace_all(&with_fixed64, |caps: &Captures| { + let unsigned_value = caps[1].parse::().unwrap_or_default(); + let i64_zigzag = decode_zigzag64(unsigned_value); + + // Only show signed value if it's different from unsigned + if i64_zigzag < 0 { + format!("{} # signed: {}", unsigned_value, i64_zigzag) + } else { + unsigned_value.to_string() + } + }); + + Ok(with_varint.to_string()) +} + +// Decode a zigzag-encoded 64-bit integer +fn decode_zigzag64(n: u64) -> i64 { + ((n >> 1) as i64) ^ (-((n & 1) as i64)) +} diff --git a/mitmproxy-contentviews/testdata/protobuf/grpcbin.proto b/mitmproxy-contentviews/testdata/protobuf/grpcbin.proto new file mode 100644 index 00000000..4729bee2 --- /dev/null +++ b/mitmproxy-contentviews/testdata/protobuf/grpcbin.proto @@ -0,0 +1,77 @@ +syntax = "proto3"; + +package grpcbin; + +service GRPCBin { + // This endpoint + rpc Index(EmptyMessage) returns (IndexReply) {} + // Unary endpoint that takes no argument and replies an empty message. + rpc Empty(EmptyMessage) returns (EmptyMessage) {} + // Unary endpoint that replies a received DummyMessage + rpc DummyUnary(DummyMessage) returns (DummyMessage) {} + // Stream endpoint that sends back 10 times the received DummyMessage + rpc DummyServerStream(DummyMessage) returns (stream DummyMessage) {} + // Stream endpoint that receives 10 DummyMessages and replies with the last received one + rpc DummyClientStream(stream DummyMessage) returns (DummyMessage) {} + // Stream endpoint that sends back a received DummyMessage indefinitely (chat mode) + rpc DummyBidirectionalStreamStream(stream DummyMessage) returns (stream DummyMessage) {} + // Unary endpoint that raises a specified (by code) gRPC error + rpc SpecificError(SpecificErrorRequest) returns (EmptyMessage) {} + // Unary endpoint that raises a random gRPC error + rpc RandomError(EmptyMessage) returns (EmptyMessage) {} + // Unary endpoint that returns headers + rpc HeadersUnary(EmptyMessage) returns (HeadersMessage) {} + // Unary endpoint that returns no respnose + rpc NoResponseUnary(EmptyMessage) returns (EmptyMessage) {} +} + +message HeadersMessage { + message Values { + repeated string values = 1; + } + map Metadata = 1; +} + +message SpecificErrorRequest { + uint32 code = 1; + string reason = 2; +} + +message EmptyMessage {} + +message DummyMessage { + message Sub { + string f_string = 1; + } + enum Enum { + ENUM_0 = 0; + ENUM_1 = 1; + ENUM_2 = 2; + } + string f_string = 1; + repeated string f_strings = 2; + int32 f_int32 = 3; + repeated int32 f_int32s = 4; + Enum f_enum = 5; + repeated Enum f_enums = 6; + Sub f_sub = 7; + repeated Sub f_subs = 8; + bool f_bool = 9; + repeated bool f_bools = 10; + int64 f_int64 = 11; + repeated int64 f_int64s= 12; + bytes f_bytes = 13; + repeated bytes f_bytess = 14; + float f_float = 15; + repeated float f_floats = 16; + // TODO: timestamp, duration, oneof, any, maps, fieldmask, wrapper type, struct, listvalue, value, nullvalue, deprecated +} + +message IndexReply { + message Endpoint { + string path = 1; + string description = 2; + } + string description = 1; + repeated Endpoint endpoints = 2; +} diff --git a/mitmproxy-contentviews/testdata/protobuf/nested.proto b/mitmproxy-contentviews/testdata/protobuf/nested.proto new file mode 100644 index 00000000..d3992aa7 --- /dev/null +++ b/mitmproxy-contentviews/testdata/protobuf/nested.proto @@ -0,0 +1,17 @@ +syntax = "proto3"; + +package example.nested; + +service Service { + rpc Method(Empty) returns (Response) {} +} + +message Empty {} + +message Response { + message Nested { + int32 example = 1; + } + int32 example = 1; + Nested nested = 2; +} diff --git a/mitmproxy-contentviews/testdata/protobuf/simple.proto b/mitmproxy-contentviews/testdata/protobuf/simple.proto new file mode 100644 index 00000000..f1eb85bc --- /dev/null +++ b/mitmproxy-contentviews/testdata/protobuf/simple.proto @@ -0,0 +1,5 @@ +syntax = "proto3"; + +message TestMessage { + int32 example = 1; +} diff --git a/mitmproxy-contentviews/testdata/protobuf/simple_package.proto b/mitmproxy-contentviews/testdata/protobuf/simple_package.proto new file mode 100644 index 00000000..fd46b80e --- /dev/null +++ b/mitmproxy-contentviews/testdata/protobuf/simple_package.proto @@ -0,0 +1,18 @@ +syntax = "proto3"; + +package example.simple; + +service Other { + rpc Method(Empty) returns (Response) {} +} + +service Service { + // This endpoint + rpc Method(Empty) returns (Response) {} +} + +message Empty {} + +message Response { + int32 example = 1; +} diff --git a/mitmproxy-contentviews/testdata/protobuf/simple_service.proto b/mitmproxy-contentviews/testdata/protobuf/simple_service.proto new file mode 100644 index 00000000..7b12659e --- /dev/null +++ b/mitmproxy-contentviews/testdata/protobuf/simple_service.proto @@ -0,0 +1,16 @@ +syntax = "proto3"; + +service Other { + rpc Method(Empty) returns (Response) {} +} + +service Service { + // This endpoint + rpc Method(Empty) returns (Response) {} +} + +message Empty {} + +message Response { + int32 example = 1; +} diff --git a/mitmproxy-rs/src/contentview.rs b/mitmproxy-rs/src/contentview.rs index f8020635..036e8c95 100644 --- a/mitmproxy-rs/src/contentview.rs +++ b/mitmproxy-rs/src/contentview.rs @@ -1,10 +1,13 @@ use mitmproxy_contentviews::{Metadata, Prettify, Reencode}; use pyo3::{exceptions::PyValueError, prelude::*}; use std::cell::OnceCell; +use std::path::Path; pub struct PythonMetadata<'py> { inner: Bound<'py, PyAny>, content_type: OnceCell>, + protobuf_definitions: OnceCell>, + path: OnceCell>, } impl<'py> PythonMetadata<'py> { @@ -12,6 +15,8 @@ impl<'py> PythonMetadata<'py> { PythonMetadata { inner, content_type: OnceCell::new(), + protobuf_definitions: OnceCell::new(), + path: OnceCell::new(), } } } @@ -34,6 +39,46 @@ impl Metadata for PythonMetadata<'_> { let headers = http_message.getattr("headers").ok()?; headers.get_item(name).ok()?.extract::().ok() } + + fn get_path(&self) -> Option<&str> { + self.path + .get_or_init(|| { + let flow = self.inner.getattr("flow").ok()?; + let request = flow.getattr("request").ok()?; + request.getattr("path").ok()?.extract::().ok() + }) + .as_deref() + } + + fn protobuf_definitions(&self) -> Option<&Path> { + self.protobuf_definitions + .get_or_init(|| { + self.inner + .getattr("protobuf_definitions") + .ok()? + .extract::() + .ok() + .map(std::path::PathBuf::from) + }) + .as_deref() + } + + fn is_http_request(&self) -> bool { + let Ok(http_message) = self.inner.getattr("http_message") else { + return false; + }; + let Ok(flow) = self + .inner + .getattr("flow") + .and_then(|flow| flow.getattr("request")) + else { + return false; + }; + let Ok(request) = flow.getattr("request") else { + return false; + }; + http_message.is(&request) + } } impl<'py> FromPyObject<'py> for PythonMetadata<'py> { @@ -66,7 +111,7 @@ impl Contentview { pub fn prettify(&self, data: Vec, metadata: PythonMetadata) -> PyResult { self.0 .prettify(&data, &metadata) - .map_err(|e| PyValueError::new_err(e.to_string())) + .map_err(|e| PyValueError::new_err(format!("{:?}", e))) } /// Return the priority of this view for rendering data. @@ -112,7 +157,7 @@ impl InteractiveContentview { pub fn reencode(&self, data: &str, metadata: PythonMetadata) -> PyResult> { self.0 .reencode(data, &metadata) - .map_err(|e| PyValueError::new_err(e.to_string())) + .map_err(|e| PyValueError::new_err(format!("{:?}", e))) } fn __repr__(self_: PyRef<'_, Self>) -> PyResult { diff --git a/mitmproxy-rs/src/server/local_redirector.rs b/mitmproxy-rs/src/server/local_redirector.rs index 4b8164f3..6390a9e0 100644 --- a/mitmproxy-rs/src/server/local_redirector.rs +++ b/mitmproxy-rs/src/server/local_redirector.rs @@ -1,4 +1,5 @@ use mitmproxy::intercept_conf::InterceptConf; +use pyo3::exceptions::PyValueError; #[cfg(target_os = "linux")] use mitmproxy::packet_sources::linux::LinuxConf; @@ -38,7 +39,7 @@ impl LocalRedirector { fn describe_spec(spec: &str) -> PyResult { InterceptConf::try_from(spec) .map(|conf| conf.description()) - .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string())) + .map_err(|e| PyValueError::new_err(format!("{:?}", e))) } /// Set a new intercept spec. diff --git a/mitmproxy-rs/src/syntax_highlight.rs b/mitmproxy-rs/src/syntax_highlight.rs index 6d30083b..5e9fe6bf 100644 --- a/mitmproxy-rs/src/syntax_highlight.rs +++ b/mitmproxy-rs/src/syntax_highlight.rs @@ -25,7 +25,7 @@ pub fn highlight(text: String, language: &str) -> PyResult Date: Sun, 13 Apr 2025 19:49:06 +0200 Subject: [PATCH 24/26] first review pass for existing protobuf parsing --- .../src/protobuf/raw_to_proto.rs | 4 +-- .../src/protobuf/view_protobuf.rs | 31 ++++++++++++++++--- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/mitmproxy-contentviews/src/protobuf/raw_to_proto.rs b/mitmproxy-contentviews/src/protobuf/raw_to_proto.rs index 7e8b8f3c..fecfe223 100644 --- a/mitmproxy-contentviews/src/protobuf/raw_to_proto.rs +++ b/mitmproxy-contentviews/src/protobuf/raw_to_proto.rs @@ -37,7 +37,7 @@ pub(crate) fn merge_proto_and_descriptor( file_descriptor_proto.message_type[message_idx] = new_proto; /* - XXX: Not sure if this would be the right thing to do here + XXX: Skipping this as it doesn't seem to bring any immediate benefits. let dependencies = dependencies .iter() .cloned() @@ -89,7 +89,7 @@ fn create_descriptor_proto( ) -> anyhow::Result { let message = existing .parse_from_bytes(data) - .context("failed to parse protobuf")?; + .with_context(|| format!("failed to parse protobuf: {}", existing.full_name()))?; let mut descriptor = existing.proto().clone(); diff --git a/mitmproxy-contentviews/src/protobuf/view_protobuf.rs b/mitmproxy-contentviews/src/protobuf/view_protobuf.rs index 36d39623..45b0b622 100644 --- a/mitmproxy-contentviews/src/protobuf/view_protobuf.rs +++ b/mitmproxy-contentviews/src/protobuf/view_protobuf.rs @@ -1,5 +1,7 @@ use crate::protobuf::raw_to_proto::new_empty_descriptor; -use crate::protobuf::{proto_to_yaml, raw_to_proto, reencode, yaml_to_pretty}; +use crate::protobuf::{ + existing_proto_definitions, proto_to_yaml, raw_to_proto, reencode, yaml_to_pretty, +}; use crate::{Metadata, Prettify, Reencode}; use anyhow::{Context, Result}; use mitmproxy_highlight::Language; @@ -35,9 +37,10 @@ impl Prettify for Protobuf { Language::Yaml } - fn prettify(&self, data: &[u8], _metadata: &dyn Metadata) -> Result { - // FIXME use new create_new - self.prettify_with_descriptor(data, &new_empty_descriptor(None, "Unknown"), &[]) + fn prettify(&self, data: &[u8], metadata: &dyn Metadata) -> Result { + let (descriptor, dependencies) = existing_proto_definitions::find_best_match(metadata)? + .unwrap_or_else(|| (new_empty_descriptor(None, "Unknown"), vec![])); + self.prettify_with_descriptor(data, &descriptor, &dependencies) } fn render_priority(&self, _data: &[u8], metadata: &dyn Metadata) -> f64 { @@ -211,4 +214,24 @@ mod tests { let result = Protobuf.prettify(b"", &TestMetadata::default()).unwrap(); assert_eq!(result, "{} # empty protobuf message"); } + + #[test] + fn test_existing() { + let metadata = TestMetadata::default().with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/simple.proto" + )); + let result = Protobuf.prettify(varint::PROTO, &metadata).unwrap(); + assert_eq!(result, "example: 150\n"); + } + + #[test] + fn test_existing_mismatch() { + let metadata = TestMetadata::default().with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/simple.proto" + )); + let result = Protobuf.prettify(string::PROTO, &metadata); + assert!(result.is_err()); + } } From 4a2d9b18d796210baba214a67b828780c04e93b4 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Mon, 14 Apr 2025 11:13:05 +0200 Subject: [PATCH 25/26] fix review nits --- .../protobuf/existing_proto_definitions.rs | 31 ++++- .../src/protobuf/proto_to_yaml.rs | 36 ++---- .../src/protobuf/raw_to_proto.rs | 26 +++-- .../src/protobuf/reencode.rs | 9 +- .../src/protobuf/view_grpc.rs | 11 +- .../src/protobuf/view_protobuf.rs | 110 +++++++++--------- .../src/protobuf/yaml_to_pretty.rs | 2 +- 7 files changed, 123 insertions(+), 102 deletions(-) diff --git a/mitmproxy-contentviews/src/protobuf/existing_proto_definitions.rs b/mitmproxy-contentviews/src/protobuf/existing_proto_definitions.rs index ac595da2..5dd6e14a 100644 --- a/mitmproxy-contentviews/src/protobuf/existing_proto_definitions.rs +++ b/mitmproxy-contentviews/src/protobuf/existing_proto_definitions.rs @@ -1,30 +1,49 @@ +use crate::protobuf::raw_to_proto::new_empty_descriptor; use crate::Metadata; use anyhow::Context; use protobuf::reflect::{FileDescriptor, MessageDescriptor}; use protobuf_parse::Parser; use std::path::Path; +pub(super) struct DescriptorWithDeps { + pub descriptor: MessageDescriptor, + pub dependencies: Vec, +} + +impl Default for DescriptorWithDeps { + fn default() -> Self { + Self { + descriptor: new_empty_descriptor(None, "Unknown"), + dependencies: vec![], + } + } +} + pub(super) fn find_best_match( metadata: &dyn Metadata, -) -> anyhow::Result)>> { +) -> anyhow::Result> { // Parse existing protobuf definitions if available - let file_descriptors = metadata + let Some(file_descriptors) = metadata .protobuf_definitions() .map(parse_file_descriptor_set) .transpose() - .context("failed to parse proto file(s)")?; - let Some(file_descriptors) = file_descriptors else { + .context("failed to parse proto file(s)")? + else { return Ok(None); }; // Find MessageDescriptor for the RPC. let rpc_info = RpcInfo::from_metadata(metadata); - let Some(message) = find_best_message(&file_descriptors, rpc_info, metadata.is_http_request()) + let Some(descriptor) = + find_best_message(&file_descriptors, rpc_info, metadata.is_http_request()) else { return Ok(None); }; - Ok(Some((message, file_descriptors))) + Ok(Some(DescriptorWithDeps { + descriptor, + dependencies: file_descriptors, + })) } fn find_best_message( diff --git a/mitmproxy-contentviews/src/protobuf/proto_to_yaml.rs b/mitmproxy-contentviews/src/protobuf/proto_to_yaml.rs index 71eeb789..58911d4d 100644 --- a/mitmproxy-contentviews/src/protobuf/proto_to_yaml.rs +++ b/mitmproxy-contentviews/src/protobuf/proto_to_yaml.rs @@ -1,3 +1,4 @@ +use crate::protobuf::view_protobuf::tags; /// Parsed protobuf message => YAML value use protobuf::descriptor::field_descriptor_proto::Type; use protobuf::descriptor::field_descriptor_proto::Type::{ @@ -7,10 +8,9 @@ use protobuf::reflect::{ReflectFieldRef, ReflectValueRef}; use protobuf::MessageDyn; use serde_yaml::value::TaggedValue; use serde_yaml::{Mapping, Number, Value}; -use std::fmt::Write; use std::ops::Deref; -pub(crate) fn message_to_yaml(message: &dyn MessageDyn) -> Value { +pub(super) fn message_to_yaml(message: &dyn MessageDyn) -> Value { let mut ret = Mapping::new(); for field in message.descriptor_dyn().fields() { @@ -28,7 +28,7 @@ pub(crate) fn message_to_yaml(message: &dyn MessageDyn) -> Value { let value = match field.get_reflect(message) { ReflectFieldRef::Optional(x) => { if let Some(x) = x.value() { - primitive_type_to_yaml(x, field_type) + value_to_yaml(x, field_type) } else { continue; } @@ -39,7 +39,7 @@ pub(crate) fn message_to_yaml(message: &dyn MessageDyn) -> Value { } Value::Sequence( x.into_iter() - .map(|x| primitive_type_to_yaml(x, field_type)) + .map(|x| value_to_yaml(x, field_type)) .collect(), ) } @@ -49,12 +49,7 @@ pub(crate) fn message_to_yaml(message: &dyn MessageDyn) -> Value { } Value::Mapping( x.into_iter() - .map(|(k, v)| { - ( - primitive_type_to_yaml(k, field_type), - primitive_type_to_yaml(v, field_type), - ) - }) + .map(|(k, v)| (value_to_yaml(k, field_type), value_to_yaml(v, field_type))) .collect(), ) } @@ -64,7 +59,7 @@ pub(crate) fn message_to_yaml(message: &dyn MessageDyn) -> Value { Value::Mapping(ret) } -fn primitive_type_to_yaml(x: ReflectValueRef, field_type: Type) -> Value { +fn value_to_yaml(x: ReflectValueRef, field_type: Type) -> Value { match x { ReflectValueRef::U32(x) => tag_number(Value::Number(Number::from(x)), field_type), ReflectValueRef::U64(x) => tag_number(Value::Number(Number::from(x)), field_type), @@ -75,8 +70,8 @@ fn primitive_type_to_yaml(x: ReflectValueRef, field_type: Type) -> Value { ReflectValueRef::Bool(x) => Value::from(x), ReflectValueRef::String(x) => Value::from(x), ReflectValueRef::Bytes(x) => Value::Tagged(Box::new(TaggedValue { - tag: crate::protobuf::view_protobuf::tags::BINARY.clone(), - value: Value::String(bytes_to_hex_string(x)), + tag: tags::BINARY.clone(), + value: Value::String(data_encoding::HEXLOWER.encode(x)), })), ReflectValueRef::Enum(descriptor, i) => descriptor .value_by_number(i) @@ -89,26 +84,17 @@ fn primitive_type_to_yaml(x: ReflectValueRef, field_type: Type) -> Value { fn tag_number(value: Value, field_type: Type) -> Value { match field_type { TYPE_UINT64 => Value::Tagged(Box::new(TaggedValue { - tag: crate::protobuf::view_protobuf::tags::VARINT.clone(), + tag: tags::VARINT.clone(), value, })), TYPE_FIXED64 => Value::Tagged(Box::new(TaggedValue { - tag: crate::protobuf::view_protobuf::tags::FIXED64.clone(), + tag: tags::FIXED64.clone(), value, })), TYPE_FIXED32 => Value::Tagged(Box::new(TaggedValue { - tag: crate::protobuf::view_protobuf::tags::FIXED32.clone(), + tag: tags::FIXED32.clone(), value, })), _ => value, } } - -// Convert length-delimited protobuf data to a hex string -fn bytes_to_hex_string(bytes: &[u8]) -> String { - let mut result = String::with_capacity(bytes.len() * 2); - for b in bytes { - let _ = write!(result, "{:02x}", b); - } - result -} diff --git a/mitmproxy-contentviews/src/protobuf/raw_to_proto.rs b/mitmproxy-contentviews/src/protobuf/raw_to_proto.rs index fecfe223..613bf709 100644 --- a/mitmproxy-contentviews/src/protobuf/raw_to_proto.rs +++ b/mitmproxy-contentviews/src/protobuf/raw_to_proto.rs @@ -1,3 +1,4 @@ +use crate::protobuf::existing_proto_definitions::DescriptorWithDeps; use anyhow::Context; use protobuf::descriptor::field_descriptor_proto::Label::LABEL_REPEATED; use protobuf::descriptor::field_descriptor_proto::Type; @@ -17,21 +18,20 @@ enum GuessedFieldType { } /// Create a "merged" MessageDescriptor. Mostly a wrapper around `create_descriptor_proto`. -pub(crate) fn merge_proto_and_descriptor( +pub(super) fn merge_proto_and_descriptor( data: &[u8], - existing: &MessageDescriptor, - dependencies: &[FileDescriptor], + desc: &DescriptorWithDeps, ) -> anyhow::Result { - let new_proto = create_descriptor_proto(data, existing)?; + let new_proto = create_descriptor_proto(data, &desc.descriptor)?; let descriptor = { - let mut file_descriptor_proto = existing.file_descriptor_proto().clone(); + let mut file_descriptor_proto = desc.descriptor.file_descriptor_proto().clone(); let message_idx = file_descriptor_proto .message_type .iter() .enumerate() - .filter_map(|(i, d)| (d.name() == existing.name_to_package()).then_some(i)) + .filter_map(|(i, d)| (d.name() == desc.descriptor.name_to_package()).then_some(i)) .next() .context("failed to find existing message descriptor index")?; file_descriptor_proto.message_type[message_idx] = new_proto; @@ -45,10 +45,15 @@ pub(crate) fn merge_proto_and_descriptor( .collect::>(); */ - FileDescriptor::new_dynamic(file_descriptor_proto, dependencies) + FileDescriptor::new_dynamic(file_descriptor_proto, &desc.dependencies) .context("failed to create new dynamic file descriptor")? - .message_by_package_relative_name(existing.name_to_package()) - .with_context(|| format!("did not find {} in descriptor", existing.name_to_package()))? + .message_by_package_relative_name(desc.descriptor.name_to_package()) + .with_context(|| { + format!( + "did not find {} in descriptor", + desc.descriptor.name_to_package() + ) + })? }; Ok(descriptor) @@ -56,6 +61,9 @@ pub(crate) fn merge_proto_and_descriptor( /// Create a new (empty) MessageDescriptor for the given package and name. pub(super) fn new_empty_descriptor(package: Option, name: &str) -> MessageDescriptor { + // Create nested descriptor protos. For example, if the name is OuterMessage.InnerMessage, + // we create a descriptor for InnerMessage and set it as a nested type of OuterMessage. + // This is a bit of a hack, but the best way to get type_name right. let mut parts = name.rsplit("."); let mut head = { let mut descriptor = DescriptorProto::new(); diff --git a/mitmproxy-contentviews/src/protobuf/reencode.rs b/mitmproxy-contentviews/src/protobuf/reencode.rs index 37fb462b..d2daa8c5 100644 --- a/mitmproxy-contentviews/src/protobuf/reencode.rs +++ b/mitmproxy-contentviews/src/protobuf/reencode.rs @@ -1,17 +1,18 @@ use crate::protobuf::view_protobuf::tags; -use crate::Metadata; use anyhow::{bail, Context}; use protobuf::descriptor::field_descriptor_proto::Type; use protobuf::descriptor::field_descriptor_proto::Type::{TYPE_FIXED32, TYPE_FIXED64}; -use protobuf::reflect::{FieldDescriptor, RuntimeFieldType, RuntimeType}; +use protobuf::reflect::{FieldDescriptor, MessageDescriptor, RuntimeFieldType, RuntimeType}; use protobuf::well_known_types::empty::Empty; use protobuf::{MessageDyn, MessageFull, UnknownValue}; use serde_yaml::{Number, Value}; use std::num::ParseIntError; use std::str::FromStr; -pub(crate) fn reencode_yaml(value: Value, _metadata: &dyn Metadata) -> anyhow::Result> { - let descriptor = Empty::descriptor(); +pub(super) fn reencode_yaml( + value: Value, + descriptor: &MessageDescriptor, +) -> anyhow::Result> { let message = descriptor.new_instance(); merge_yaml_into_message(value, message) } diff --git a/mitmproxy-contentviews/src/protobuf/view_grpc.rs b/mitmproxy-contentviews/src/protobuf/view_grpc.rs index 61bbd364..c1a0131c 100644 --- a/mitmproxy-contentviews/src/protobuf/view_grpc.rs +++ b/mitmproxy-contentviews/src/protobuf/view_grpc.rs @@ -1,5 +1,4 @@ use super::{existing_proto_definitions, reencode}; -use crate::protobuf::raw_to_proto::new_empty_descriptor; use crate::{Metadata, Prettify, Protobuf, Reencode}; use anyhow::{bail, Context, Result}; use flate2::read::{DeflateDecoder, GzDecoder}; @@ -22,8 +21,7 @@ impl Prettify for GRPC { fn prettify(&self, mut data: &[u8], metadata: &dyn Metadata) -> Result { let mut protos = vec![]; - let (descriptor, dependencies) = existing_proto_definitions::find_best_match(metadata)? - .unwrap_or_else(|| (new_empty_descriptor(None, "Unknown"), vec![])); + let descriptor = existing_proto_definitions::find_best_match(metadata)?.unwrap_or_default(); while !data.is_empty() { let compressed = match data[0] { @@ -59,7 +57,7 @@ impl Prettify for GRPC { } else { proto }; - protos.push(Protobuf.prettify_with_descriptor(proto, &descriptor, &dependencies)?); + protos.push(Protobuf.prettify_with_descriptor(proto, &descriptor)?); data = &data[5 + len..]; } @@ -78,10 +76,13 @@ impl Prettify for GRPC { impl Reencode for GRPC { fn reencode(&self, data: &str, metadata: &dyn Metadata) -> Result> { + let descriptor = existing_proto_definitions::find_best_match(metadata)? + .unwrap_or_default() + .descriptor; let mut ret = vec![]; for document in serde_yaml::Deserializer::from_str(data) { let value = Value::deserialize(document).context("Invalid YAML")?; - let proto = reencode::reencode_yaml(value, metadata)?; + let proto = reencode::reencode_yaml(value, &descriptor)?; ret.push(0); // uncompressed ret.extend(u32::to_be_bytes(proto.len() as u32)); ret.extend(proto); diff --git a/mitmproxy-contentviews/src/protobuf/view_protobuf.rs b/mitmproxy-contentviews/src/protobuf/view_protobuf.rs index 45b0b622..c2ae25db 100644 --- a/mitmproxy-contentviews/src/protobuf/view_protobuf.rs +++ b/mitmproxy-contentviews/src/protobuf/view_protobuf.rs @@ -1,11 +1,10 @@ -use crate::protobuf::raw_to_proto::new_empty_descriptor; +use crate::protobuf::existing_proto_definitions::DescriptorWithDeps; use crate::protobuf::{ existing_proto_definitions, proto_to_yaml, raw_to_proto, reencode, yaml_to_pretty, }; use crate::{Metadata, Prettify, Reencode}; use anyhow::{Context, Result}; use mitmproxy_highlight::Language; -use protobuf::reflect::{FileDescriptor, MessageDescriptor}; use serde_yaml::Value; pub(super) mod tags { @@ -38,9 +37,8 @@ impl Prettify for Protobuf { } fn prettify(&self, data: &[u8], metadata: &dyn Metadata) -> Result { - let (descriptor, dependencies) = existing_proto_definitions::find_best_match(metadata)? - .unwrap_or_else(|| (new_empty_descriptor(None, "Unknown"), vec![])); - self.prettify_with_descriptor(data, &descriptor, &dependencies) + let descriptor = existing_proto_definitions::find_best_match(metadata)?.unwrap_or_default(); + self.prettify_with_descriptor(data, &descriptor) } fn render_priority(&self, _data: &[u8], metadata: &dyn Metadata) -> f64 { @@ -52,19 +50,28 @@ impl Prettify for Protobuf { } } +impl Reencode for Protobuf { + fn reencode(&self, data: &str, metadata: &dyn Metadata) -> Result> { + let descriptor = existing_proto_definitions::find_best_match(metadata)? + .unwrap_or_default() + .descriptor; + let value: Value = serde_yaml::from_str(data).context("Invalid YAML")?; + reencode::reencode_yaml(value, &descriptor) + } +} + impl Protobuf { pub(super) fn prettify_with_descriptor( &self, data: &[u8], - descriptor: &MessageDescriptor, - dependencies: &[FileDescriptor], + descriptor: &DescriptorWithDeps, ) -> Result { // Check if data is empty first if data.is_empty() { return Ok("{} # empty protobuf message".to_string()); } - let descriptor = raw_to_proto::merge_proto_and_descriptor(data, descriptor, dependencies)?; + let descriptor = raw_to_proto::merge_proto_and_descriptor(data, descriptor)?; // Parse protobuf and convert to YAML let message = descriptor @@ -77,13 +84,6 @@ impl Protobuf { } } -impl Reencode for Protobuf { - fn reencode(&self, data: &str, metadata: &dyn Metadata) -> Result> { - let value: Value = serde_yaml::from_str(data).context("Invalid YAML")?; - reencode::reencode_yaml(value, metadata) - } -} - #[cfg(test)] mod tests { use super::*; @@ -175,26 +175,6 @@ mod tests { "1:\n- 1 # signed: -1\n- 2\n- 3 # signed: -2\n" ); - mod reencode { - use super::*; - - #[test] - fn reencode_new_nested_message() { - let result = Protobuf - .reencode(nested::YAML, &TestMetadata::default()) - .unwrap(); - assert_eq!(result, nested::PROTO); - } - - #[test] - fn new_string_attr() { - let result = Protobuf - .reencode(string::YAML, &TestMetadata::default()) - .unwrap(); - assert_eq!(result, string::PROTO); - } - } - #[test] fn test_invalid_protobuf() { let result = Protobuf.prettify(b"\xFF\xFF", &TestMetadata::default()); @@ -215,23 +195,49 @@ mod tests { assert_eq!(result, "{} # empty protobuf message"); } - #[test] - fn test_existing() { - let metadata = TestMetadata::default().with_protobuf_definitions(concat!( - env!("CARGO_MANIFEST_DIR"), - "/testdata/protobuf/simple.proto" - )); - let result = Protobuf.prettify(varint::PROTO, &metadata).unwrap(); - assert_eq!(result, "example: 150\n"); - } + mod existing_definition { + use super::*; - #[test] - fn test_existing_mismatch() { - let metadata = TestMetadata::default().with_protobuf_definitions(concat!( - env!("CARGO_MANIFEST_DIR"), - "/testdata/protobuf/simple.proto" - )); - let result = Protobuf.prettify(string::PROTO, &metadata); - assert!(result.is_err()); + const VARINT_PRETTY_YAML: &str = "example: 150\n"; + + #[test] + fn prettify() { + let metadata = TestMetadata::default().with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/simple.proto" + )); + let result = Protobuf.prettify(varint::PROTO, &metadata).unwrap(); + assert_eq!(result, VARINT_PRETTY_YAML); + } + + #[test] + fn prettify_mismatch() { + let metadata = TestMetadata::default().with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/simple.proto" + )); + let result = Protobuf.prettify(string::PROTO, &metadata); + assert!(result.is_err()); + } + + #[test] + fn reencode() { + let metadata = TestMetadata::default().with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/simple.proto" + )); + let result = Protobuf.reencode(VARINT_PRETTY_YAML, &metadata).unwrap(); + assert_eq!(result, varint::PROTO); + } + + #[test] + fn reencode_mismatch() { + let metadata = TestMetadata::default().with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/simple.proto" + )); + let result = Protobuf.reencode("example: hello", &metadata).unwrap(); + assert_eq!(result, string::PROTO); + } } } diff --git a/mitmproxy-contentviews/src/protobuf/yaml_to_pretty.rs b/mitmproxy-contentviews/src/protobuf/yaml_to_pretty.rs index 1ba8013a..e17748c4 100644 --- a/mitmproxy-contentviews/src/protobuf/yaml_to_pretty.rs +++ b/mitmproxy-contentviews/src/protobuf/yaml_to_pretty.rs @@ -3,7 +3,7 @@ use crate::protobuf::view_protobuf::tags; use regex::Captures; // Helper method to apply regex replacements to the YAML output -pub(crate) fn apply_replacements(yaml_str: &str) -> anyhow::Result { +pub(super) fn apply_replacements(yaml_str: &str) -> anyhow::Result { // Replace !fixed32 tags with comments showing float and i32 interpretations let with_fixed32 = tags::FIXED32_RE.replace_all(yaml_str, |caps: &Captures| { let value = caps[1].parse::().unwrap_or_default(); From 39cd985c0e25d2bdd94fea855c73f9cb2e366dc8 Mon Sep 17 00:00:00 2001 From: Maximilian Hils Date: Mon, 14 Apr 2025 11:22:35 +0200 Subject: [PATCH 26/26] fix more review nits --- .../src/protobuf/test.proto | 5 -- .../src/protobuf/view_grpc.rs | 2 +- .../testdata/protobuf/grpcbin.proto | 77 ------------------- mitmproxy-rs/src/syntax_highlight.rs | 2 +- 4 files changed, 2 insertions(+), 84 deletions(-) delete mode 100644 mitmproxy-contentviews/src/protobuf/test.proto delete mode 100644 mitmproxy-contentviews/testdata/protobuf/grpcbin.proto diff --git a/mitmproxy-contentviews/src/protobuf/test.proto b/mitmproxy-contentviews/src/protobuf/test.proto deleted file mode 100644 index e3e6363c..00000000 --- a/mitmproxy-contentviews/src/protobuf/test.proto +++ /dev/null @@ -1,5 +0,0 @@ -syntax = "proto3"; - -message TestMessage { - int32 example = 1; -} \ No newline at end of file diff --git a/mitmproxy-contentviews/src/protobuf/view_grpc.rs b/mitmproxy-contentviews/src/protobuf/view_grpc.rs index c1a0131c..aa7a9f90 100644 --- a/mitmproxy-contentviews/src/protobuf/view_grpc.rs +++ b/mitmproxy-contentviews/src/protobuf/view_grpc.rs @@ -31,7 +31,7 @@ impl Prettify for GRPC { }; let len = match data.get(1..5) { Some(x) => u32::from_be_bytes(x.try_into()?) as usize, - _ => bail!("invalid gRPC: first byte is not a boolean"), + _ => bail!("invalid gRPC: not enough bytes"), }; let Some(proto) = data.get(5..5 + len) else { bail!("Invalid gRPC: not enough data") diff --git a/mitmproxy-contentviews/testdata/protobuf/grpcbin.proto b/mitmproxy-contentviews/testdata/protobuf/grpcbin.proto deleted file mode 100644 index 4729bee2..00000000 --- a/mitmproxy-contentviews/testdata/protobuf/grpcbin.proto +++ /dev/null @@ -1,77 +0,0 @@ -syntax = "proto3"; - -package grpcbin; - -service GRPCBin { - // This endpoint - rpc Index(EmptyMessage) returns (IndexReply) {} - // Unary endpoint that takes no argument and replies an empty message. - rpc Empty(EmptyMessage) returns (EmptyMessage) {} - // Unary endpoint that replies a received DummyMessage - rpc DummyUnary(DummyMessage) returns (DummyMessage) {} - // Stream endpoint that sends back 10 times the received DummyMessage - rpc DummyServerStream(DummyMessage) returns (stream DummyMessage) {} - // Stream endpoint that receives 10 DummyMessages and replies with the last received one - rpc DummyClientStream(stream DummyMessage) returns (DummyMessage) {} - // Stream endpoint that sends back a received DummyMessage indefinitely (chat mode) - rpc DummyBidirectionalStreamStream(stream DummyMessage) returns (stream DummyMessage) {} - // Unary endpoint that raises a specified (by code) gRPC error - rpc SpecificError(SpecificErrorRequest) returns (EmptyMessage) {} - // Unary endpoint that raises a random gRPC error - rpc RandomError(EmptyMessage) returns (EmptyMessage) {} - // Unary endpoint that returns headers - rpc HeadersUnary(EmptyMessage) returns (HeadersMessage) {} - // Unary endpoint that returns no respnose - rpc NoResponseUnary(EmptyMessage) returns (EmptyMessage) {} -} - -message HeadersMessage { - message Values { - repeated string values = 1; - } - map Metadata = 1; -} - -message SpecificErrorRequest { - uint32 code = 1; - string reason = 2; -} - -message EmptyMessage {} - -message DummyMessage { - message Sub { - string f_string = 1; - } - enum Enum { - ENUM_0 = 0; - ENUM_1 = 1; - ENUM_2 = 2; - } - string f_string = 1; - repeated string f_strings = 2; - int32 f_int32 = 3; - repeated int32 f_int32s = 4; - Enum f_enum = 5; - repeated Enum f_enums = 6; - Sub f_sub = 7; - repeated Sub f_subs = 8; - bool f_bool = 9; - repeated bool f_bools = 10; - int64 f_int64 = 11; - repeated int64 f_int64s= 12; - bytes f_bytes = 13; - repeated bytes f_bytess = 14; - float f_float = 15; - repeated float f_floats = 16; - // TODO: timestamp, duration, oneof, any, maps, fieldmask, wrapper type, struct, listvalue, value, nullvalue, deprecated -} - -message IndexReply { - message Endpoint { - string path = 1; - string description = 2; - } - string description = 1; - repeated Endpoint endpoints = 2; -} diff --git a/mitmproxy-rs/src/syntax_highlight.rs b/mitmproxy-rs/src/syntax_highlight.rs index 5e9fe6bf..5efb241f 100644 --- a/mitmproxy-rs/src/syntax_highlight.rs +++ b/mitmproxy-rs/src/syntax_highlight.rs @@ -12,7 +12,7 @@ use pyo3::{exceptions::PyValueError, prelude::*}; /// ```python /// from mitmproxy_rs.syntax_highlight import highlight /// highlighted = highlight("key: 42", "yaml") -/// print(highlighted) # [('property', 'key'), ('', ': '), ('number', '42')] +/// print(highlighted) # [('name', 'key'), ('', ': '), ('number', '42')] /// ``` #[pyfunction] pub fn highlight(text: String, language: &str) -> PyResult> {