diff --git a/Cargo.lock b/Cargo.lock index 8ff377a3..5b22755b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -568,9 +568,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.2.17" +version = "1.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fcb57c740ae1daf453ae85f16e37396f672b039e00d9d866e07ddb24e328e3a" +checksum = "525046617d8376e3db1deffb079e91cef90a89fc3ca5c185bbf8c9ecdd15cd5c" dependencies = [ "shlex", ] @@ -1001,9 +1001,9 @@ dependencies = [ [[package]] name = "deranged" -version = "0.4.1" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28cfac68e08048ae1883171632c2aef3ebc555621ae56fbccce1cbf22dd7f058" +checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e" dependencies = [ "powerfmt", ] @@ -1085,9 +1085,9 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.10" +version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +checksum = "976dd42dc7e85965fe702eb8164f21f450704bdde31faefd6471dba214cb594e" dependencies = [ "libc", "windows-sys 0.59.0", @@ -1377,7 +1377,7 @@ dependencies = [ "futures-core", "futures-sink", "http", - "indexmap 2.8.0", + "indexmap 2.9.0", "slab", "tokio", "tokio-util", @@ -1542,6 +1542,15 @@ dependencies = [ "digest", ] +[[package]] +name = "home" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" +dependencies = [ + "windows-sys 0.59.0", +] + [[package]] name = "hostname" version = "0.4.0" @@ -1823,9 +1832,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.8.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3954d50fe15b02142bf25d3b8bdadb634ec3948f103d04ffe3031bc8fe9d7058" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", "hashbrown 0.15.2", @@ -2147,9 +2156,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.5" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5" +checksum = "ff70ce3e48ae43fa075863cef62e8b43b71a4f2382229920e0df362592919430" dependencies = [ "adler2", "simd-adler32", @@ -2199,7 +2208,6 @@ dependencies = [ "lru_time_cache", "nix 0.29.0", "objc", - "once_cell", "pretty-hex", "prost", "rand 0.9.0", @@ -2214,6 +2222,37 @@ dependencies = [ "windows 0.61.1", ] +[[package]] +name = "mitmproxy-contentviews" +version = "0.12.0-dev" +dependencies = [ + "anyhow", + "criterion", + "data-encoding", + "flate2", + "log", + "mitmproxy-highlight", + "pretty-hex", + "protobuf", + "protobuf-parse", + "regex", + "rmp-serde", + "serde", + "serde_yaml", +] + +[[package]] +name = "mitmproxy-highlight" +version = "0.12.0-dev" +dependencies = [ + "anyhow", + "criterion", + "tree-sitter", + "tree-sitter-highlight", + "tree-sitter-xml", + "tree-sitter-yaml", +] + [[package]] name = "mitmproxy-linux" version = "0.12.0-dev" @@ -2243,7 +2282,7 @@ dependencies = [ "aya-ebpf", "aya-log-ebpf", "mitmproxy-linux-ebpf-common", - "which", + "which 7.0.2", ] [[package]] @@ -2264,8 +2303,9 @@ dependencies = [ "env_logger", "log", "mitmproxy", + "mitmproxy-contentviews", + "mitmproxy-highlight", "nix 0.29.0", - "once_cell", "pyo3", "pyo3-async-runtimes", "pyo3-log", @@ -2407,7 +2447,7 @@ checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" dependencies = [ "crc32fast", "hashbrown 0.15.2", - "indexmap 2.8.0", + "indexmap 2.9.0", "memchr", ] @@ -2468,6 +2508,12 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "percent-encoding" version = "2.3.1" @@ -2701,6 +2747,42 @@ dependencies = [ "prost", ] +[[package]] +name = "protobuf" +version = "3.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d65a1d4ddae7d8b5de68153b48f6aa3bba8cb002b243dbdbc55a5afbc98f99f4" +dependencies = [ + "once_cell", + "protobuf-support", + "thiserror 1.0.69", +] + +[[package]] +name = "protobuf-parse" +version = "3.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4aeaa1f2460f1d348eeaeed86aea999ce98c1bded6f089ff8514c9d9dbdc973" +dependencies = [ + "anyhow", + "indexmap 2.9.0", + "log", + "protobuf", + "protobuf-support", + "tempfile", + "thiserror 1.0.69", + "which 4.4.2", +] + +[[package]] +name = "protobuf-support" +version = "3.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e36c2f31e0a47f9280fb347ef5e461ffcd2c52dd520d8e216b52f93b0b0d7d6" +dependencies = [ + "thiserror 1.0.69", +] + [[package]] name = "pyo3" version = "0.24.1" @@ -2900,9 +2982,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.10" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b8c0c260b63a8219631167be35e6a988e9554dbd323f8bd08439c8ed1302bd1" +checksum = "d2f103c6d277498fbceb16e84d317e2a400f160f46904d5f5410848c829511a3" dependencies = [ "bitflags 2.9.0", ] @@ -2974,6 +3056,28 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rmp" +version = "0.8.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "228ed7c16fa39782c3b3468e974aec2795e9089153cd08ee2e9aefb3613334c4" +dependencies = [ + "byteorder", + "num-traits", + "paste", +] + +[[package]] +name = "rmp-serde" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52e599a477cf9840e92f2cde9a7189e67b42c57532749bf90aea6ec10facd4db" +dependencies = [ + "byteorder", + "rmp", + "serde", +] + [[package]] name = "rustc-demangle" version = "0.1.24" @@ -3106,12 +3210,26 @@ version = "1.0.140" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" dependencies = [ + "indexmap 2.9.0", "itoa", "memchr", "ryu", "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap 2.9.0", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "sharded-slab" version = "0.1.7" @@ -3153,9 +3271,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.14.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd" +checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" [[package]] name = "smoltcp" @@ -3189,6 +3307,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + [[package]] name = "strsim" version = "0.11.1" @@ -3406,9 +3530,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.44.1" +version = "1.44.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f382da615b842244d4b8738c82ed1275e6c5dd90c459a30941cd07080b06c91a" +checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48" dependencies = [ "backtrace", "bytes", @@ -3603,6 +3727,58 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "tree-sitter" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9ac5ea5e7f2f1700842ec071401010b9c59bf735295f6e9fa079c3dc035b167" +dependencies = [ + "cc", + "regex", + "regex-syntax 0.8.5", + "serde_json", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-highlight" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "076673d82b859652de3e7abe73a4592c173e51dfc9b83eb49f0479fd9fe4631c" +dependencies = [ + "regex", + "streaming-iterator", + "thiserror 2.0.12", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8" + +[[package]] +name = "tree-sitter-xml" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e670041f591d994f54d597ddcd8f4ebc930e282c4c76a42268743b71f0c8b6b3" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-yaml" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0c99f2b92b677f1a18b6b232fa9329afb5758118238a7d0b29cae324ef50d5e" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "try-lock" version = "0.2.5" @@ -3658,6 +3834,12 @@ dependencies = [ "subtle", ] +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "untrusted" version = "0.9.0" @@ -3823,6 +4005,18 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082" +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix 0.38.44", +] + [[package]] name = "which" version = "7.0.2" diff --git a/Cargo.toml b/Cargo.toml index 2d0c436b..b2285032 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,8 @@ [workspace] members = [ ".", + "mitmproxy-contentviews", + "mitmproxy-highlight", "mitmproxy-rs", "mitmproxy-linux", "mitmproxy-linux-ebpf", @@ -11,6 +13,8 @@ members = [ ] default-members = [ ".", + "mitmproxy-contentviews", + "mitmproxy-highlight", "mitmproxy-rs", "mitmproxy-linux", "mitmproxy-linux-ebpf-common", @@ -53,7 +57,6 @@ publish.workspace = true [dependencies] anyhow = { version = "1.0.97", features = ["backtrace"] } log = "0.4.27" -once_cell = "1" pretty-hex = "0.4.1" smoltcp = "0.12" tokio = { version = "1.44.1", features = ["macros", "net", "rt-multi-thread", "sync", "time", "io-util", "process"] } @@ -109,7 +112,6 @@ rand = "0.9" criterion = "0.5.1" hickory-server = "0.25.1" - [[bench]] name = "process" harness = false diff --git a/README.md b/README.md index d4cd91af..a2b118b6 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,10 @@ This repository contains mitmproxy's Rust bits, most notably: ### Structure - [`src/`](./src): The `mitmproxy` crate containing most of the "meat". +- [`mitmproxy-contentviews/`](./mitmproxy-contentviews): + Pretty-printers for (HTTP) message bodies. +- [`mitmproxy-highlight/`](./mitmproxy-highlight): + Syntax highlighting backend for mitmproxy and mitmdump. - [`mitmproxy-rs/`](./mitmproxy-rs): The `mitmproxy-rs` Python package, which provides Python bindings for the Rust crate using [PyO3](https://pyo3.rs/). Source and binary distributions are available [on PyPI](https://pypi.org/project/mitmproxy-rs/). diff --git a/mitmproxy-contentviews/Cargo.toml b/mitmproxy-contentviews/Cargo.toml new file mode 100644 index 00000000..cdbb0b18 --- /dev/null +++ b/mitmproxy-contentviews/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "mitmproxy-contentviews" +license = "MIT" +authors.workspace = true +version.workspace = true +repository.workspace = true +edition.workspace = true +rust-version.workspace = true +publish.workspace = true + +[lints] +workspace = true + +[dependencies] +anyhow = { version = "1.0.97", features = ["backtrace"] } +log = "0.4.27" +data-encoding = "2.8.0" +pretty-hex = "0.4.1" +mitmproxy-highlight = { path = "../mitmproxy-highlight" } +serde = { version = "1.0", features = ["derive"] } +serde_yaml = "0.9" +rmp-serde = "1.1" +protobuf = "3.7.2" +regex = "1.10.3" +flate2 = "1.0" +protobuf-parse = "3.7" + +[dev-dependencies] +criterion = "0.5.1" + +[[bench]] +name = "contentviews" +harness = false diff --git a/mitmproxy-contentviews/benches/contentviews.rs b/mitmproxy-contentviews/benches/contentviews.rs new file mode 100644 index 00000000..0b0305ab --- /dev/null +++ b/mitmproxy-contentviews/benches/contentviews.rs @@ -0,0 +1,60 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use mitmproxy_contentviews::{test::TestMetadata, MsgPack, Prettify, Protobuf, Reencode}; + +fn criterion_benchmark(c: &mut Criterion) { + c.bench_function("protobuf-prettify", |b| { + b.iter(|| { + Protobuf.prettify(black_box(b"\n\x13gRPC testing server\x12\x07\n\x05Index\x12\x07\n\x05Empty\x12\x0c\n\nDummyUnary\x12\x0f\n\rSpecificError\x12\r\n\x0bRandomError\x12\x0e\n\x0cHeadersUnary\x12\x11\n\x0fNoResponseUnary"), &TestMetadata::default()).unwrap() + }) + }); + + c.bench_function("protobuf-reencode", |b| { + b.iter(|| { + Protobuf.reencode( + black_box("1: gRPC testing server\n2:\n- 1: Index\n- 1: Empty\n- 1: DummyUnary\n- 1: SpecificError\n- 1: RandomError\n- 1: HeadersUnary\n- 1: NoResponseUnary\n"), + &TestMetadata::default() + ).unwrap() + }) + }); + + const TEST_MSGPACK: &[u8] = &[ + 0x83, // map with 3 elements + 0xa4, 0x6e, 0x61, 0x6d, 0x65, // "name" + 0xa8, 0x4a, 0x6f, 0x68, 0x6e, 0x20, 0x44, 0x6f, 0x65, // "John Doe" + 0xa3, 0x61, 0x67, 0x65, // "age" + 0x1e, // 30 + 0xa4, 0x74, 0x61, 0x67, 0x73, // "tags" + 0x92, // array with 2 elements + 0xa9, 0x64, 0x65, 0x76, 0x65, 0x6c, 0x6f, 0x70, 0x65, 0x72, // "developer" + 0xa4, 0x72, 0x75, 0x73, 0x74, // "rust" + ]; + c.bench_function("msgpack-prettify", |b| { + b.iter(|| { + MsgPack + .prettify(black_box(TEST_MSGPACK), &TestMetadata::default()) + .unwrap() + }) + }); + + c.bench_function("msgpack-reencode", |b| { + b.iter(|| { + MsgPack + .reencode( + black_box( + "\ + name: John Doe\n\ + age: 30\n\ + tags:\n\ + - developer\n\ + - rust\n\ + ", + ), + &TestMetadata::default(), + ) + .unwrap() + }) + }); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/mitmproxy-contentviews/src/hex_dump.rs b/mitmproxy-contentviews/src/hex_dump.rs new file mode 100644 index 00000000..c96c0443 --- /dev/null +++ b/mitmproxy-contentviews/src/hex_dump.rs @@ -0,0 +1,55 @@ +use crate::hex_stream::is_binary; +use crate::{Metadata, Prettify}; +use pretty_hex::{HexConfig, PrettyHex}; + +pub struct HexDump; + +impl Prettify for HexDump { + fn name(&self) -> &'static str { + "Hex Dump" + } + + fn prettify(&self, data: &[u8], _metadata: &dyn Metadata) -> anyhow::Result { + Ok(format!( + "{:?}", + data.hex_conf(HexConfig { + title: false, + ascii: true, + width: 16, + group: 4, + chunk: 1, + max_bytes: usize::MAX, + display_offset: 0, + }) + )) + } + + fn render_priority(&self, data: &[u8], _metadata: &dyn Metadata) -> f64 { + if is_binary(data) { + 0.5 + } else { + 0.0 + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test::TestMetadata; + + #[test] + fn prettify_simple() { + let result = HexDump.prettify(b"abcd", &TestMetadata::default()).unwrap(); + assert_eq!( + result, + "0000: 61 62 63 64 abcd" + ); + } + + #[test] + fn prettify_empty() { + let result = HexDump.prettify(b"", &TestMetadata::default()).unwrap(); + assert_eq!(result, ""); + } +} diff --git a/mitmproxy-contentviews/src/hex_stream.rs b/mitmproxy-contentviews/src/hex_stream.rs new file mode 100644 index 00000000..f6ca0e98 --- /dev/null +++ b/mitmproxy-contentviews/src/hex_stream.rs @@ -0,0 +1,89 @@ +use crate::{Metadata, Prettify, Reencode}; +use anyhow::{Context, Result}; + +pub struct HexStream; + +pub(crate) fn is_binary(data: &[u8]) -> bool { + if data.is_empty() { + return false; + } + let ratio = data + .iter() + .take(100) + .filter(|&&b| b < 9 || (13 < b && b < 32) || b > 126) + .count() as f64 + / data.len().min(100) as f64; + + ratio > 0.3 +} + +impl Prettify for HexStream { + fn name(&self) -> &'static str { + "Hex Stream" + } + + fn prettify(&self, data: &[u8], _metadata: &dyn Metadata) -> Result { + Ok(data_encoding::HEXLOWER.encode(data)) + } + + fn render_priority(&self, data: &[u8], _metadata: &dyn Metadata) -> f64 { + if is_binary(data) { + 0.4 + } else { + 0.0 + } + } +} + +impl Reencode for HexStream { + fn reencode(&self, data: &str, _metadata: &dyn Metadata) -> Result> { + let data = data.trim_end_matches(['\n', '\r']); + if data.len() % 2 != 0 { + anyhow::bail!("Invalid hex string: uneven number of characters"); + } + data_encoding::HEXLOWER_PERMISSIVE + .decode(data.as_bytes()) + .context("Invalid hex string") + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test::TestMetadata; + + #[test] + fn test_hex_stream() { + let result = HexStream + .prettify(b"foo", &TestMetadata::default()) + .unwrap(); + assert_eq!(result, "666f6f"); + } + + #[test] + fn test_hex_stream_empty() { + let result = HexStream.prettify(b"", &TestMetadata::default()).unwrap(); + assert_eq!(result, ""); + } + + #[test] + fn test_hex_stream_reencode() { + let data = "666f6f"; + let result = HexStream.reencode(data, &TestMetadata::default()).unwrap(); + assert_eq!(result, b"foo"); + } + + #[test] + fn test_hex_stream_reencode_with_newlines() { + let data = "666f6f\r\n"; + let result = HexStream.reencode(data, &TestMetadata::default()).unwrap(); + assert_eq!(result, b"foo"); + } + + #[test] + fn test_hex_stream_reencode_uneven_chars() { + let data = "666f6"; + let result = HexStream.reencode(data, &TestMetadata::default()); + assert!(result.is_err()); + } +} diff --git a/mitmproxy-contentviews/src/lib.rs b/mitmproxy-contentviews/src/lib.rs new file mode 100644 index 00000000..fdaf5a95 --- /dev/null +++ b/mitmproxy-contentviews/src/lib.rs @@ -0,0 +1,129 @@ +mod hex_dump; +mod hex_stream; +mod msgpack; +mod protobuf; + +pub use hex_dump::HexDump; +pub use hex_stream::HexStream; +pub use msgpack::MsgPack; +pub use protobuf::Protobuf; +pub use protobuf::GRPC; + +use anyhow::Result; +use mitmproxy_highlight::Language; +use std::path::Path; + +pub trait Metadata { + /// The HTTP `content-type` of this message. + fn content_type(&self) -> Option<&str>; + /// Get an HTTP header value by name. + /// `name` is case-insensitive. + fn get_header(&self, name: &str) -> Option; + /// Get the path from the flow's request. + fn get_path(&self) -> Option<&str> { + None + } + /// Check if this is an HTTP request. + fn is_http_request(&self) -> bool { + false + } + /// Get the protobuf definitions for this message. + fn protobuf_definitions(&self) -> Option<&Path> { + None + } +} + +/// See https://docs.mitmproxy.org/dev/api/mitmproxy/contentviews.html +/// for API details. +pub trait Prettify: Send + Sync { + /// The name for this contentview, e.g. `gRPC` or `Protobuf`. + /// Favor brevity. + fn name(&self) -> &str; + + fn instance_name(&self) -> String { + self.name().to_lowercase().replace(" ", "_") + } + + /// The syntax highlighting that should be applied to the prettified output. + /// This is useful for contentviews that prettify to JSON or YAML. + fn syntax_highlight(&self) -> Language { + Language::None + } + + /// Pretty-print `data`. + fn prettify(&self, data: &[u8], metadata: &dyn Metadata) -> Result; + + /// Render priority - typically a float between 0 and 1 for builtin views. + #[allow(unused_variables)] + fn render_priority(&self, data: &[u8], metadata: &dyn Metadata) -> f64 { + 0.0 + } +} + +pub trait Reencode: Send + Sync { + fn reencode(&self, data: &str, metadata: &dyn Metadata) -> Result>; +} + +// no cfg(test) gate because it's used in benchmarks as well +pub mod test { + use crate::Metadata; + use std::path::Path; + + #[derive(Default)] + pub struct TestMetadata { + pub content_type: Option, + pub headers: std::collections::HashMap, + pub protobuf_definitions: Option, + pub path: Option, + pub is_http_request: bool, + } + + impl TestMetadata { + pub fn with_content_type(mut self, content_type: &str) -> Self { + self.content_type = Some(content_type.to_string()); + self + } + + pub fn with_header(mut self, name: &str, value: &str) -> Self { + self.headers.insert(name.to_lowercase(), value.to_string()); + self + } + + pub fn with_path(mut self, path: &str) -> Self { + self.path = Some(path.to_string()); + self + } + + pub fn with_protobuf_definitions>(mut self, definitions: P) -> Self { + self.protobuf_definitions = Some(definitions.as_ref().to_path_buf()); + self + } + + pub fn with_is_http_request(mut self, is_http_request: bool) -> Self { + self.is_http_request = is_http_request; + self + } + } + + impl Metadata for TestMetadata { + fn content_type(&self) -> Option<&str> { + self.content_type.as_deref() + } + + fn get_header(&self, name: &str) -> Option { + self.headers.get(name).cloned() + } + + fn get_path(&self) -> Option<&str> { + self.path.as_deref() + } + + fn protobuf_definitions(&self) -> Option<&Path> { + self.protobuf_definitions.as_deref() + } + + fn is_http_request(&self) -> bool { + self.is_http_request + } + } +} diff --git a/mitmproxy-contentviews/src/msgpack.rs b/mitmproxy-contentviews/src/msgpack.rs new file mode 100644 index 00000000..c4295f82 --- /dev/null +++ b/mitmproxy-contentviews/src/msgpack.rs @@ -0,0 +1,148 @@ +use super::{Metadata, Prettify, Reencode}; +use anyhow::{Context, Result}; +use mitmproxy_highlight::Language; +use rmp_serde::{decode, encode}; + +pub struct MsgPack; + +impl Prettify for MsgPack { + fn name(&self) -> &'static str { + "MsgPack" + } + + fn syntax_highlight(&self) -> Language { + Language::Yaml + } + + fn prettify(&self, data: &[u8], _metadata: &dyn Metadata) -> Result { + // Deserialize MsgPack to a serde_yaml::Value + let value: serde_yaml::Value = + decode::from_slice(data).context("Failed to deserialize MsgPack")?; + + // Convert the Value to prettified YAML + serde_yaml::to_string(&value).context("Failed to convert to YAML") + } + + fn render_priority(&self, _data: &[u8], metadata: &dyn Metadata) -> f64 { + match metadata.content_type() { + Some("application/msgpack") => 1.0, + Some("application/x-msgpack") => 1.0, + _ => 0.0, + } + } +} + +impl Reencode for MsgPack { + fn reencode(&self, data: &str, _metadata: &dyn Metadata) -> Result> { + // Parse the YAML string to a serde_yaml::Value + let value: serde_yaml::Value = serde_yaml::from_str(data).context("Invalid YAML")?; + + // Serialize the Value to MsgPack + let mut buf = Vec::new(); + encode::write_named(&mut buf, &value).context("Failed to encode to MsgPack")?; + + Ok(buf) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test::TestMetadata; + + // Hardcoded MsgPack data for a simple object: + // { + // "name": "John Doe", + // "age": 30, + // "tags": ["developer", "rust"] + // } + const TEST_MSGPACK: &[u8] = &[ + 0x83, // map with 3 elements + 0xa4, 0x6e, 0x61, 0x6d, 0x65, // "name" + 0xa8, 0x4a, 0x6f, 0x68, 0x6e, 0x20, 0x44, 0x6f, 0x65, // "John Doe" + 0xa3, 0x61, 0x67, 0x65, // "age" + 0x1e, // 30 + 0xa4, 0x74, 0x61, 0x67, 0x73, // "tags" + 0x92, // array with 2 elements + 0xa9, 0x64, 0x65, 0x76, 0x65, 0x6c, 0x6f, 0x70, 0x65, 0x72, // "developer" + 0xa4, 0x72, 0x75, 0x73, 0x74, // "rust" + ]; + + // Expected YAML representation + const TEST_YAML: &str = r#"name: John Doe +age: 30 +tags: +- developer +- rust +"#; + + #[test] + fn test_msgpack_deserialize() { + let result = MsgPack + .prettify(TEST_MSGPACK, &TestMetadata::default()) + .unwrap(); + assert_eq!(result, TEST_YAML); + } + + #[test] + fn test_msgpack_serialize() { + let result = MsgPack + .reencode(TEST_YAML, &TestMetadata::default()) + .unwrap(); + + // Verify the MsgPack data contains the expected values + let value: serde_yaml::Value = decode::from_slice(&result).unwrap(); + + if let serde_yaml::Value::Mapping(map) = value { + assert_eq!( + map.get(serde_yaml::Value::String("name".to_string())), + Some(&serde_yaml::Value::String("John Doe".to_string())) + ); + + assert_eq!( + map.get(serde_yaml::Value::String("age".to_string())), + Some(&serde_yaml::Value::Number(serde_yaml::Number::from(30))) + ); + + if let Some(serde_yaml::Value::Sequence(tags)) = + map.get(serde_yaml::Value::String("tags".to_string())) + { + assert_eq!(tags.len(), 2); + assert_eq!(tags[0], serde_yaml::Value::String("developer".to_string())); + assert_eq!(tags[1], serde_yaml::Value::String("rust".to_string())); + } else { + panic!("tags is not a sequence"); + } + } else { + panic!("value is not a mapping"); + } + } + + #[test] + fn test_msgpack_roundtrip() { + // Deserialize to YAML + let yaml_result = MsgPack + .prettify(TEST_MSGPACK, &TestMetadata::default()) + .unwrap(); + + // Serialize back to MsgPack + let result = MsgPack + .reencode(&yaml_result, &TestMetadata::default()) + .unwrap(); + + // Deserialize both the original and the result to Values for comparison + let original_value: serde_yaml::Value = decode::from_slice(TEST_MSGPACK).unwrap(); + let result_value: serde_yaml::Value = decode::from_slice(&result).unwrap(); + + // Compare the values + assert_eq!(original_value, result_value); + } + + #[test] + fn test_invalid_yaml() { + let err = MsgPack + .reencode("@invalid_yaml", &TestMetadata::default()) + .unwrap_err(); + assert_eq!(format!("{err}"), "Invalid YAML"); + } +} diff --git a/mitmproxy-contentviews/src/protobuf/existing_proto_definitions.rs b/mitmproxy-contentviews/src/protobuf/existing_proto_definitions.rs new file mode 100644 index 00000000..5dd6e14a --- /dev/null +++ b/mitmproxy-contentviews/src/protobuf/existing_proto_definitions.rs @@ -0,0 +1,161 @@ +use crate::protobuf::raw_to_proto::new_empty_descriptor; +use crate::Metadata; +use anyhow::Context; +use protobuf::reflect::{FileDescriptor, MessageDescriptor}; +use protobuf_parse::Parser; +use std::path::Path; + +pub(super) struct DescriptorWithDeps { + pub descriptor: MessageDescriptor, + pub dependencies: Vec, +} + +impl Default for DescriptorWithDeps { + fn default() -> Self { + Self { + descriptor: new_empty_descriptor(None, "Unknown"), + dependencies: vec![], + } + } +} + +pub(super) fn find_best_match( + metadata: &dyn Metadata, +) -> anyhow::Result> { + // Parse existing protobuf definitions if available + let Some(file_descriptors) = metadata + .protobuf_definitions() + .map(parse_file_descriptor_set) + .transpose() + .context("failed to parse proto file(s)")? + else { + return Ok(None); + }; + + // Find MessageDescriptor for the RPC. + let rpc_info = RpcInfo::from_metadata(metadata); + let Some(descriptor) = + find_best_message(&file_descriptors, rpc_info, metadata.is_http_request()) + else { + return Ok(None); + }; + + Ok(Some(DescriptorWithDeps { + descriptor, + dependencies: file_descriptors, + })) +} + +fn find_best_message( + fds: &[FileDescriptor], + rpc: Option, + is_request: bool, +) -> Option { + if let Some(rpc) = rpc { + for file in fds { + if file.proto().package() != rpc.package { + continue; + } + for service in file.services() { + if service.proto().name() != rpc.service { + continue; + } + for method in service.methods() { + if method.proto().name() != rpc.method { + continue; + } + + return Some(if is_request { + method.input_type() + } else { + method.output_type() + }); + } + log::info!( + "Found service {} in {}, but no method '{}'.", + rpc.service, + file.name(), + rpc.method + ); + } + } + log::info!("Did not find {rpc} in protobuf definitions."); + } + + let file = fds.first()?; + if let Some(service) = file.services().next() { + if let Some(method) = service.methods().next() { + log::info!( + "Falling back to first defined service in {}: {}", + file.name(), + service.proto().name() + ); + return Some(if is_request { + method.input_type() + } else { + method.output_type() + }); + } + } + if let Some(method) = file.messages().next() { + log::info!( + "Falling back to first defined message in {}: {}", + file.name(), + method.proto().name() + ); + return Some(method); + } + None +} + +#[derive(Debug)] +struct RpcInfo { + package: String, + service: String, + method: String, +} + +impl RpcInfo { + fn from_metadata(metadata: &dyn Metadata) -> Option { + let path = metadata.get_path()?; + if path.contains('?') { + return None; + } + let mut parts = path.trim_start_matches('/').split('/'); + let service_and_package = parts.next()?; + let method = parts.next()?; + if parts.next().is_some() { + return None; + } + let (package, service) = service_and_package + .rsplit_once('.') + .unwrap_or(("", service_and_package)); + + Some(Self { + package: package.to_string(), + service: service.to_string(), + method: method.to_string(), + }) + } +} + +impl std::fmt::Display for RpcInfo { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if !self.package.is_empty() { + write!(f, "{}.", self.package)?; + } + write!(f, "{}.{}", self.service, self.method) + } +} + +fn parse_file_descriptor_set(definitions_path: &Path) -> anyhow::Result> { + let mut parser = Parser::new(); + parser.pure(); + if let Some(parent) = definitions_path.parent() { + parser.include(parent); + } + parser.input(definitions_path); + let fds = parser.file_descriptor_set()?; + FileDescriptor::new_dynamic_fds(fds.file, &[]) + .context("failed to create dynamic file descriptors") +} diff --git a/mitmproxy-contentviews/src/protobuf/mod.rs b/mitmproxy-contentviews/src/protobuf/mod.rs new file mode 100644 index 00000000..9a740744 --- /dev/null +++ b/mitmproxy-contentviews/src/protobuf/mod.rs @@ -0,0 +1,10 @@ +mod existing_proto_definitions; +mod proto_to_yaml; +mod raw_to_proto; +mod reencode; +mod view_grpc; +mod view_protobuf; +mod yaml_to_pretty; + +pub use view_grpc::GRPC; +pub use view_protobuf::Protobuf; diff --git a/mitmproxy-contentviews/src/protobuf/proto_to_yaml.rs b/mitmproxy-contentviews/src/protobuf/proto_to_yaml.rs new file mode 100644 index 00000000..58911d4d --- /dev/null +++ b/mitmproxy-contentviews/src/protobuf/proto_to_yaml.rs @@ -0,0 +1,100 @@ +use crate::protobuf::view_protobuf::tags; +/// Parsed protobuf message => YAML value +use protobuf::descriptor::field_descriptor_proto::Type; +use protobuf::descriptor::field_descriptor_proto::Type::{ + TYPE_BYTES, TYPE_FIXED32, TYPE_FIXED64, TYPE_UINT64, +}; +use protobuf::reflect::{ReflectFieldRef, ReflectValueRef}; +use protobuf::MessageDyn; +use serde_yaml::value::TaggedValue; +use serde_yaml::{Mapping, Number, Value}; +use std::ops::Deref; + +pub(super) fn message_to_yaml(message: &dyn MessageDyn) -> Value { + let mut ret = Mapping::new(); + + for field in message.descriptor_dyn().fields() { + let key = if field.name().starts_with("unknown_field_") { + Value::from(field.number()) + } else { + Value::from(field.name()) + }; + let field_type = field + .proto() + .type_ + .map(|t| t.enum_value_or(TYPE_BYTES)) + .unwrap_or(TYPE_BYTES); + + let value = match field.get_reflect(message) { + ReflectFieldRef::Optional(x) => { + if let Some(x) = x.value() { + value_to_yaml(x, field_type) + } else { + continue; + } + } + ReflectFieldRef::Repeated(x) => { + if x.is_empty() { + continue; + } + Value::Sequence( + x.into_iter() + .map(|x| value_to_yaml(x, field_type)) + .collect(), + ) + } + ReflectFieldRef::Map(x) => { + if x.is_empty() { + continue; + } + Value::Mapping( + x.into_iter() + .map(|(k, v)| (value_to_yaml(k, field_type), value_to_yaml(v, field_type))) + .collect(), + ) + } + }; + ret.insert(key, value); + } + Value::Mapping(ret) +} + +fn value_to_yaml(x: ReflectValueRef, field_type: Type) -> Value { + match x { + ReflectValueRef::U32(x) => tag_number(Value::Number(Number::from(x)), field_type), + ReflectValueRef::U64(x) => tag_number(Value::Number(Number::from(x)), field_type), + ReflectValueRef::I32(x) => Value::Number(Number::from(x)), + ReflectValueRef::I64(x) => Value::Number(Number::from(x)), + ReflectValueRef::F32(x) => Value::Number(Number::from(x)), + ReflectValueRef::F64(x) => Value::Number(Number::from(x)), + ReflectValueRef::Bool(x) => Value::from(x), + ReflectValueRef::String(x) => Value::from(x), + ReflectValueRef::Bytes(x) => Value::Tagged(Box::new(TaggedValue { + tag: tags::BINARY.clone(), + value: Value::String(data_encoding::HEXLOWER.encode(x)), + })), + ReflectValueRef::Enum(descriptor, i) => descriptor + .value_by_number(i) + .map(|v| Value::String(v.name().to_string())) + .unwrap_or_else(|| Value::Number(Number::from(i))), + ReflectValueRef::Message(m) => message_to_yaml(m.deref()), + } +} + +fn tag_number(value: Value, field_type: Type) -> Value { + match field_type { + TYPE_UINT64 => Value::Tagged(Box::new(TaggedValue { + tag: tags::VARINT.clone(), + value, + })), + TYPE_FIXED64 => Value::Tagged(Box::new(TaggedValue { + tag: tags::FIXED64.clone(), + value, + })), + TYPE_FIXED32 => Value::Tagged(Box::new(TaggedValue { + tag: tags::FIXED32.clone(), + value, + })), + _ => value, + } +} diff --git a/mitmproxy-contentviews/src/protobuf/raw_to_proto.rs b/mitmproxy-contentviews/src/protobuf/raw_to_proto.rs new file mode 100644 index 00000000..613bf709 --- /dev/null +++ b/mitmproxy-contentviews/src/protobuf/raw_to_proto.rs @@ -0,0 +1,192 @@ +use crate::protobuf::existing_proto_definitions::DescriptorWithDeps; +use anyhow::Context; +use protobuf::descriptor::field_descriptor_proto::Label::LABEL_REPEATED; +use protobuf::descriptor::field_descriptor_proto::Type; +use protobuf::descriptor::field_descriptor_proto::Type::{ + TYPE_BYTES, TYPE_FIXED32, TYPE_FIXED64, TYPE_STRING, TYPE_UINT64, +}; +use protobuf::descriptor::{DescriptorProto, FieldDescriptorProto, FileDescriptorProto}; +use protobuf::reflect::{FileDescriptor, MessageDescriptor}; +use protobuf::{EnumOrUnknown, MessageDyn, UnknownValueRef}; +/// Existing protobuf definition + raw data => merged protobuf definition +use std::collections::BTreeMap; + +enum GuessedFieldType { + String, + Message(Box), + Unknown, +} + +/// Create a "merged" MessageDescriptor. Mostly a wrapper around `create_descriptor_proto`. +pub(super) fn merge_proto_and_descriptor( + data: &[u8], + desc: &DescriptorWithDeps, +) -> anyhow::Result { + let new_proto = create_descriptor_proto(data, &desc.descriptor)?; + + let descriptor = { + let mut file_descriptor_proto = desc.descriptor.file_descriptor_proto().clone(); + + let message_idx = file_descriptor_proto + .message_type + .iter() + .enumerate() + .filter_map(|(i, d)| (d.name() == desc.descriptor.name_to_package()).then_some(i)) + .next() + .context("failed to find existing message descriptor index")?; + file_descriptor_proto.message_type[message_idx] = new_proto; + + /* + XXX: Skipping this as it doesn't seem to bring any immediate benefits. + let dependencies = dependencies + .iter() + .cloned() + .filter(|d| d != existing.file_descriptor()) + .collect::>(); + */ + + FileDescriptor::new_dynamic(file_descriptor_proto, &desc.dependencies) + .context("failed to create new dynamic file descriptor")? + .message_by_package_relative_name(desc.descriptor.name_to_package()) + .with_context(|| { + format!( + "did not find {} in descriptor", + desc.descriptor.name_to_package() + ) + })? + }; + + Ok(descriptor) +} + +/// Create a new (empty) MessageDescriptor for the given package and name. +pub(super) fn new_empty_descriptor(package: Option, name: &str) -> MessageDescriptor { + // Create nested descriptor protos. For example, if the name is OuterMessage.InnerMessage, + // we create a descriptor for InnerMessage and set it as a nested type of OuterMessage. + // This is a bit of a hack, but the best way to get type_name right. + let mut parts = name.rsplit("."); + let mut head = { + let mut descriptor = DescriptorProto::new(); + descriptor.set_name(parts.next().unwrap().to_string()); + descriptor + }; + for p in parts { + let mut descriptor = DescriptorProto::new(); + descriptor.set_name(p.to_string()); + descriptor.nested_type.push(head); + head = descriptor; + } + + let file_descriptor_proto = { + let mut fd = FileDescriptorProto::new(); + fd.package = package; + fd.message_type.push(head); + fd + }; + FileDescriptor::new_dynamic(file_descriptor_proto, &[]) + .unwrap() + .message_by_package_relative_name(name) + .unwrap() +} + +/// Create a DescriptorProto that combines the `existing` MessageDescriptor with (guessed) +/// metadata for all unknown fields in the protobuf `data`. +fn create_descriptor_proto( + data: &[u8], + existing: &MessageDescriptor, +) -> anyhow::Result { + let message = existing + .parse_from_bytes(data) + .with_context(|| format!("failed to parse protobuf: {}", existing.full_name()))?; + + let mut descriptor = existing.proto().clone(); + + let mut field_groups: BTreeMap> = BTreeMap::new(); + for (field_number, value) in message.unknown_fields_dyn().iter() { + field_groups.entry(field_number).or_default().push(value); + } + + for (field_index, field_values) in field_groups.into_iter() { + let name = Some(format!("unknown_field_{}", field_index)); + let mut add_int = |name: Option, typ: Type| { + descriptor.field.push(FieldDescriptorProto { + number: Some(field_index as i32), + name, + type_: Some(EnumOrUnknown::from(typ)), + ..Default::default() + }); + }; + match field_values[0] { + // We can't use float/double here because of NaN handling. + UnknownValueRef::Fixed32(_) => add_int(name, TYPE_FIXED32), + UnknownValueRef::Fixed64(_) => add_int(name, TYPE_FIXED64), + UnknownValueRef::Varint(_) => add_int(name, TYPE_UINT64), + UnknownValueRef::LengthDelimited(_) => { + let field_values = field_values + .iter() + .map(|x| match x { + UnknownValueRef::LengthDelimited(data) => Ok(*data), + _ => Err(anyhow::anyhow!("varying types in protobuf")), + }) + .collect::>>()?; + + match guess_field_type(existing, field_index, &field_values) { + GuessedFieldType::String => add_int(name, TYPE_STRING), + GuessedFieldType::Unknown => add_int(name, TYPE_BYTES), + GuessedFieldType::Message(m) => { + descriptor.field.push(FieldDescriptorProto { + name, + number: Some(field_index as i32), + type_name: Some(format!(".{}.{}", existing.full_name(), m.name())), + type_: Some(EnumOrUnknown::from(Type::TYPE_MESSAGE)), + ..Default::default() + }); + descriptor.nested_type.push(*m); + } + } + } + } + if field_values.len() > 1 { + descriptor + .field + .last_mut() + .expect("we just added this field") + .set_label(LABEL_REPEATED); + } + } + + Ok(descriptor) +} + +/// Given all `values` of a field, guess its type. +fn guess_field_type( + parent: &MessageDescriptor, + field_index: u32, + values: &[&[u8]], +) -> GuessedFieldType { + if values.iter().all(|data| { + std::str::from_utf8(data).is_ok_and(|s| { + s.chars() + .all(|c| c.is_ascii_graphic() || c.is_ascii_whitespace()) + }) + }) { + return GuessedFieldType::String; + } + + // Try to parse as a nested message + let existing = new_empty_descriptor( + parent.file_descriptor_proto().package.clone(), + &format!("{}.UnknownField{}", parent.name_to_package(), field_index), + ); + if let Ok(descriptor) = create_descriptor_proto(values[0], &existing) { + if values + .iter() + .skip(1) + .all(|data| descriptor.descriptor_dyn().parse_from_bytes(data).is_ok()) + { + return GuessedFieldType::Message(Box::new(descriptor)); + } + } + + GuessedFieldType::Unknown +} diff --git a/mitmproxy-contentviews/src/protobuf/reencode.rs b/mitmproxy-contentviews/src/protobuf/reencode.rs new file mode 100644 index 00000000..d2daa8c5 --- /dev/null +++ b/mitmproxy-contentviews/src/protobuf/reencode.rs @@ -0,0 +1,156 @@ +use crate::protobuf::view_protobuf::tags; +use anyhow::{bail, Context}; +use protobuf::descriptor::field_descriptor_proto::Type; +use protobuf::descriptor::field_descriptor_proto::Type::{TYPE_FIXED32, TYPE_FIXED64}; +use protobuf::reflect::{FieldDescriptor, MessageDescriptor, RuntimeFieldType, RuntimeType}; +use protobuf::well_known_types::empty::Empty; +use protobuf::{MessageDyn, MessageFull, UnknownValue}; +use serde_yaml::{Number, Value}; +use std::num::ParseIntError; +use std::str::FromStr; + +pub(super) fn reencode_yaml( + value: Value, + descriptor: &MessageDescriptor, +) -> anyhow::Result> { + let message = descriptor.new_instance(); + merge_yaml_into_message(value, message) +} + +fn merge_yaml_into_message( + value: Value, + mut message: Box, +) -> anyhow::Result> { + let Value::Mapping(mapping) = value else { + bail!("YAML is not a mapping"); + }; + + for (key, value) in mapping.into_iter() { + let field_num = match key { + Value::String(key) => { + if let Some(field) = message.descriptor_dyn().field_by_name(&key) { + field.number() + } else if let Ok(field_num) = i32::from_str(&key) { + field_num + } else { + bail!("Unknown protobuf field key: {key}"); + } + } + Value::Number(key) => { + let Some(field_num) = key.as_i64() else { + bail!("Invalid protobuf field number: {key}"); + }; + field_num as i32 + } + other => { + bail!("Unexpected key: {other:?}"); + } + } as u32; + + add_field(message.as_mut(), field_num, value)?; + } + + message + .write_to_bytes_dyn() + .context("Failed to serialize protobuf") +} + +fn add_field(message: &mut dyn MessageDyn, field_num: u32, value: Value) -> anyhow::Result<()> { + let value = match value { + Value::Null => return Ok(()), + Value::Sequence(seq) => { + for s in seq.into_iter() { + add_field(message, field_num, s)?; + } + return Ok(()); + } + Value::Tagged(t) => { + // t.tag doesn't work for Match statements + if t.tag == *tags::BINARY { + let value = match t.value { + Value::String(s) => s, + _ => bail!("Binary data is not a string"), + }; + let value = (0..value.len()) + .step_by(2) + .map(|i| u8::from_str_radix(&value[i..i + 2], 16)) + .collect::, ParseIntError>>() + .context("Invalid hex string")?; + UnknownValue::LengthDelimited(value) + } else if t.tag == *tags::FIXED32 { + let value = match t.value { + Value::Number(s) if s.as_u64().is_some() => s.as_u64().unwrap(), + _ => bail!("Fixed32 data is not a u32"), + }; + UnknownValue::Fixed32(value as u32) + } else if t.tag == *tags::FIXED64 { + let value = match t.value { + Value::Number(s) if s.as_u64().is_some() => s.as_u64().unwrap(), + _ => bail!("Fixed64 data is not a u64"), + }; + UnknownValue::Fixed64(value) + } else { + log::info!("Unexpected YAML tag {}, discarding.", t.tag); + return add_field(message, field_num, t.value); + } + } + Value::Bool(b) => UnknownValue::Varint(b as u64), + Value::Number(n) => { + let field = message.descriptor_dyn().field_by_number(field_num); + int_value(n, field.as_ref()) + } + Value::String(s) => UnknownValue::LengthDelimited(s.into_bytes()), + Value::Mapping(m) => { + let mut descriptor = Empty::descriptor(); + if let Some(field) = message.descriptor_dyn().field_by_number(field_num) { + if let RuntimeFieldType::Singular(RuntimeType::Message(md)) = + field.runtime_field_type() + { + descriptor = md; + } else if let RuntimeFieldType::Map(_, _) = field.runtime_field_type() { + // TODO: handle maps. + } + } + let child_message = descriptor.new_instance(); + let val = merge_yaml_into_message(Value::Mapping(m), child_message)?; + UnknownValue::LengthDelimited(val) + } + }; + message.mut_unknown_fields_dyn().add_value(field_num, value); + Ok(()) +} + +fn int_value(n: Number, field: Option<&FieldDescriptor>) -> UnknownValue { + if let Some(field) = field { + if let Some(typ) = field.proto().type_.and_then(|t| t.enum_value().ok()) { + match typ { + TYPE_FIXED64 | Type::TYPE_SFIXED64 | Type::TYPE_DOUBLE => { + return if let Some(n) = n.as_u64() { + UnknownValue::Fixed64(n) + } else if let Some(n) = n.as_i64() { + UnknownValue::sfixed64(n) + } else { + UnknownValue::double(n.as_f64().expect("as_f64 never fails")) + } + } + TYPE_FIXED32 | Type::TYPE_SFIXED32 | Type::TYPE_FLOAT => { + return if let Some(n) = n.as_u64() { + UnknownValue::Fixed32(n as u32) + } else if let Some(n) = n.as_i64() { + UnknownValue::sfixed32(n as i32) + } else { + UnknownValue::float(n.as_f64().expect("as_f64 never fails") as f32) + } + } + _ => (), + } + } + } + if let Some(n) = n.as_u64() { + UnknownValue::Varint(n) + } else if let Some(n) = n.as_i64() { + UnknownValue::int64(n) + } else { + UnknownValue::double(n.as_f64().expect("as_f64 never fails")) + } +} diff --git a/mitmproxy-contentviews/src/protobuf/view_grpc.rs b/mitmproxy-contentviews/src/protobuf/view_grpc.rs new file mode 100644 index 00000000..aa7a9f90 --- /dev/null +++ b/mitmproxy-contentviews/src/protobuf/view_grpc.rs @@ -0,0 +1,226 @@ +use super::{existing_proto_definitions, reencode}; +use crate::{Metadata, Prettify, Protobuf, Reencode}; +use anyhow::{bail, Context, Result}; +use flate2::read::{DeflateDecoder, GzDecoder}; +use mitmproxy_highlight::Language; +use serde::Deserialize; +use serde_yaml::Value; +use std::io::Read; + +pub struct GRPC; + +impl Prettify for GRPC { + fn name(&self) -> &'static str { + "gRPC" + } + + fn syntax_highlight(&self) -> Language { + Language::Yaml + } + + fn prettify(&self, mut data: &[u8], metadata: &dyn Metadata) -> Result { + let mut protos = vec![]; + + let descriptor = existing_proto_definitions::find_best_match(metadata)?.unwrap_or_default(); + + while !data.is_empty() { + let compressed = match data[0] { + 0 => false, + 1 => true, + _ => bail!("invalid gRPC: first byte is not a boolean"), + }; + let len = match data.get(1..5) { + Some(x) => u32::from_be_bytes(x.try_into()?) as usize, + _ => bail!("invalid gRPC: not enough bytes"), + }; + let Some(proto) = data.get(5..5 + len) else { + bail!("Invalid gRPC: not enough data") + }; + + let mut decompressed = Vec::new(); + let proto = if compressed { + let encoding = metadata.get_header("grpc-encoding").unwrap_or_default(); + match encoding.as_str() { + "deflate" => { + let mut decoder = DeflateDecoder::new(proto); + decoder.read_to_end(&mut decompressed)?; + &decompressed + } + "gzip" => { + let mut decoder = GzDecoder::new(proto); + decoder.read_to_end(&mut decompressed)?; + &decompressed + } + "identity" => proto, + _ => bail!("unsupported compression: {}", encoding), + } + } else { + proto + }; + protos.push(Protobuf.prettify_with_descriptor(proto, &descriptor)?); + data = &data[5 + len..]; + } + + Ok(protos.join("\n---\n\n")) + } + + fn render_priority(&self, _data: &[u8], metadata: &dyn Metadata) -> f64 { + match metadata.content_type() { + Some("application/grpc") => 1.0, + Some("application/grpc+proto") => 1.0, + Some("application/prpc") => 1.0, + _ => 0.0, + } + } +} + +impl Reencode for GRPC { + fn reencode(&self, data: &str, metadata: &dyn Metadata) -> Result> { + let descriptor = existing_proto_definitions::find_best_match(metadata)? + .unwrap_or_default() + .descriptor; + let mut ret = vec![]; + for document in serde_yaml::Deserializer::from_str(data) { + let value = Value::deserialize(document).context("Invalid YAML")?; + let proto = reencode::reencode_yaml(value, &descriptor)?; + ret.push(0); // uncompressed + ret.extend(u32::to_be_bytes(proto.len() as u32)); + ret.extend(proto); + } + Ok(ret) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test::TestMetadata; + + const TEST_YAML: &str = "1: 150\n\n---\n\n1: 150\n"; + const TEST_YAML_KNOWN: &str = "example: 150\n\n---\n\nexample: 150\n"; + const TEST_GRPC: &[u8] = &[ + 0, 0, 0, 0, 3, 8, 150, 1, // first message + 0, 0, 0, 0, 3, 8, 150, 1, // second message + ]; + + const TEST_GZIP: &[u8] = &[ + 1, 0, 0, 0, 23, // compressed flag and length + 31, 139, 8, 0, 0, 0, 0, 0, 0, 255, 227, 152, 198, 8, 0, 160, 149, 78, 161, 3, 0, 0, + 0, // gzip data + ]; + + const TEST_DEFLATE: &[u8] = &[ + 1, 0, 0, 0, 5, // compressed flag and length + 227, 152, 198, 8, 0, // deflate data + ]; + + #[test] + fn test_empty() { + let res = GRPC.prettify(&[], &TestMetadata::default()).unwrap(); + assert_eq!(res, ""); + } + + #[test] + fn test_prettify_two_messages() { + let res = GRPC.prettify(TEST_GRPC, &TestMetadata::default()).unwrap(); + assert_eq!(res, TEST_YAML); + } + + #[test] + fn test_prettify_gzip() { + let metadata = TestMetadata::default().with_header("grpc-encoding", "gzip"); + let res = GRPC.prettify(TEST_GZIP, &metadata).unwrap(); + assert_eq!(res, "1: 150\n"); + } + + #[test] + fn test_prettify_deflate() { + let metadata = TestMetadata::default().with_header("grpc-encoding", "deflate"); + let res = GRPC.prettify(TEST_DEFLATE, &metadata).unwrap(); + assert_eq!(res, "1: 150\n"); + } + + #[test] + fn test_reencode_two_messages() { + let res = GRPC.reencode(TEST_YAML, &TestMetadata::default()).unwrap(); + assert_eq!(res, TEST_GRPC); + } + + #[test] + fn test_render_priority() { + assert_eq!( + GRPC.render_priority( + b"", + &TestMetadata::default().with_content_type("application/grpc") + ), + 1.0 + ); + assert_eq!( + GRPC.render_priority( + b"", + &TestMetadata::default().with_content_type("text/plain") + ), + 0.0 + ); + } + + #[test] + fn test_existing_proto() { + let metadata = TestMetadata::default().with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/simple.proto" + )); + let res = GRPC.prettify(TEST_GRPC, &metadata).unwrap(); + assert_eq!(res, TEST_YAML_KNOWN); + } + + #[test] + fn test_existing_service_request() { + let metadata = TestMetadata::default() + .with_is_http_request(true) + .with_path("/Service/Method") + .with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/simple_service.proto" + )); + let req = GRPC.prettify(TEST_GRPC, &metadata).unwrap(); + assert_eq!(req, TEST_YAML); + } + + #[test] + fn test_existing_service_response() { + let metadata = TestMetadata::default() + .with_is_http_request(false) + .with_path("/Service/Method") + .with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/simple_service.proto" + )); + let req = GRPC.prettify(TEST_GRPC, &metadata).unwrap(); + assert_eq!(req, TEST_YAML_KNOWN); + } + + #[test] + fn test_existing_package() { + let metadata = TestMetadata::default() + .with_path("/example.simple.Service/Method") + .with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/simple_package.proto" + )); + let req = GRPC.prettify(TEST_GRPC, &metadata).unwrap(); + assert_eq!(req, TEST_YAML_KNOWN); + } + + #[test] + fn test_existing_nested() { + let metadata = TestMetadata::default() + .with_path("/example.nested.Service/Method") + .with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/nested.proto" + )); + let req = GRPC.prettify(TEST_GRPC, &metadata).unwrap(); + assert_eq!(req, TEST_YAML_KNOWN); + } +} diff --git a/mitmproxy-contentviews/src/protobuf/view_protobuf.rs b/mitmproxy-contentviews/src/protobuf/view_protobuf.rs new file mode 100644 index 00000000..c2ae25db --- /dev/null +++ b/mitmproxy-contentviews/src/protobuf/view_protobuf.rs @@ -0,0 +1,243 @@ +use crate::protobuf::existing_proto_definitions::DescriptorWithDeps; +use crate::protobuf::{ + existing_proto_definitions, proto_to_yaml, raw_to_proto, reencode, yaml_to_pretty, +}; +use crate::{Metadata, Prettify, Reencode}; +use anyhow::{Context, Result}; +use mitmproxy_highlight::Language; +use serde_yaml::Value; + +pub(super) mod tags { + use regex::Regex; + use serde_yaml::value::Tag; + use std::sync::LazyLock; + + pub static BINARY: LazyLock = LazyLock::new(|| Tag::new("binary")); + pub static VARINT: LazyLock = LazyLock::new(|| Tag::new("varint")); + pub static FIXED32: LazyLock = LazyLock::new(|| Tag::new("fixed32")); + pub static FIXED64: LazyLock = LazyLock::new(|| Tag::new("fixed64")); + + pub static VARINT_RE: LazyLock = + LazyLock::new(|| Regex::new(&format!(r"{} (\d+)", *VARINT)).unwrap()); + pub static FIXED32_RE: LazyLock = + LazyLock::new(|| Regex::new(&format!(r"{} (\d+)", *FIXED32)).unwrap()); + pub static FIXED64_RE: LazyLock = + LazyLock::new(|| Regex::new(&format!(r"{} (\d+)", *FIXED64)).unwrap()); +} + +pub struct Protobuf; + +impl Prettify for Protobuf { + fn name(&self) -> &str { + "Protobuf" + } + + fn syntax_highlight(&self) -> Language { + Language::Yaml + } + + fn prettify(&self, data: &[u8], metadata: &dyn Metadata) -> Result { + let descriptor = existing_proto_definitions::find_best_match(metadata)?.unwrap_or_default(); + self.prettify_with_descriptor(data, &descriptor) + } + + fn render_priority(&self, _data: &[u8], metadata: &dyn Metadata) -> f64 { + match metadata.content_type() { + Some("application/x-protobuf") => 1.0, + Some("application/x-protobuffer") => 1.0, + _ => 0.0, + } + } +} + +impl Reencode for Protobuf { + fn reencode(&self, data: &str, metadata: &dyn Metadata) -> Result> { + let descriptor = existing_proto_definitions::find_best_match(metadata)? + .unwrap_or_default() + .descriptor; + let value: Value = serde_yaml::from_str(data).context("Invalid YAML")?; + reencode::reencode_yaml(value, &descriptor) + } +} + +impl Protobuf { + pub(super) fn prettify_with_descriptor( + &self, + data: &[u8], + descriptor: &DescriptorWithDeps, + ) -> Result { + // Check if data is empty first + if data.is_empty() { + return Ok("{} # empty protobuf message".to_string()); + } + + let descriptor = raw_to_proto::merge_proto_and_descriptor(data, descriptor)?; + + // Parse protobuf and convert to YAML + let message = descriptor + .parse_from_bytes(data) + .context("Error parsing protobuf")?; + let yaml_value = proto_to_yaml::message_to_yaml(message.as_ref()); + + let yaml_str = serde_yaml::to_string(&yaml_value).context("Failed to convert to YAML")?; + yaml_to_pretty::apply_replacements(&yaml_str) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test::TestMetadata; + + macro_rules! test_roundtrip { + ($name:ident,$proto:literal,$yaml:literal) => { + mod $name { + use super::*; + + pub(super) const PROTO: &[u8] = $proto; + pub(super) const YAML: &str = $yaml; + + #[test] + fn prettify() { + let result = Protobuf.prettify(PROTO, &TestMetadata::default()).unwrap(); + assert_eq!(result, YAML); + } + + #[test] + fn reencode() { + let result = Protobuf.reencode(YAML, &TestMetadata::default()).unwrap(); + assert_eq!(result, PROTO); + } + } + }; + } + + test_roundtrip!(varint, b"\x08\x96\x01", "1: 150\n"); + test_roundtrip!(varint_negative, b"\x08\x0B", "1: 11 # signed: -6\n"); + test_roundtrip!(binary, b"\x32\x03\x01\x02\x03", "6: !binary '010203'\n"); + test_roundtrip!(string, b"\x0A\x05\x68\x65\x6C\x6C\x6F", "1: hello\n"); + test_roundtrip!(nested, b"\x2A\x02\x08\x2A", "5:\n 1: 42\n"); + test_roundtrip!( + nested_twice, + b"\x2A\x04\x2A\x02\x08\x2A", + "5:\n 5:\n 1: 42\n" + ); + test_roundtrip!( + fixed64, + b"\x19\x00\x00\x00\x00\x00\x00\xF0\xBF", + "3: !fixed64 13830554455654793216 # double: -1, i64: -4616189618054758400\n" + ); + test_roundtrip!( + fixed64_positive, + b"\x19\x6E\x86\x1B\xF0\xF9\x21\x09\x40", + "3: !fixed64 4614256650576692846 # double: 3.14159\n" + ); + test_roundtrip!( + fixed64_no_float, + b"\x19\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", + "3: !fixed64 18446744073709551615 # i64: -1\n" + ); + test_roundtrip!( + fixed64_positive_no_float, + b"\x19\x01\x00\x00\x00\x00\x00\xF8\x7F", + "3: !fixed64 9221120237041090561\n" + ); + test_roundtrip!( + fixed32, + b"\x15\x00\x00\x80\xBF", + "2: !fixed32 3212836864 # float: -1, i32: -1082130432\n" + ); + test_roundtrip!( + fixed32_positive, + b"\x15\xD0\x0F\x49\x40", + "2: !fixed32 1078530000 # float: 3.14159\n" + ); + test_roundtrip!( + fixed32_no_float, + b"\x15\xFF\xFF\xFF\xFF", + "2: !fixed32 4294967295 # i32: -1\n" + ); + test_roundtrip!( + fixed32_positive_no_float, + b"\x15\x01\x00\xC0\x7F", + "2: !fixed32 2143289345\n" + ); + // From docs: "message Test5 { repeated int32 f = 6 [packed=true]; }" + // With values 3, 270, and 86942 + test_roundtrip!( + repeated_packed, + b"\x32\x06\x03\x8E\x02\x9E\xA7\x05", + "6: !binary 038e029ea705\n" + ); + test_roundtrip!( + repeated_varint, + b"\x08\x01\x08\x02\x08\x03", + "1:\n- 1 # signed: -1\n- 2\n- 3 # signed: -2\n" + ); + + #[test] + fn test_invalid_protobuf() { + let result = Protobuf.prettify(b"\xFF\xFF", &TestMetadata::default()); + assert!(result.is_err()); + } + + #[test] + fn test_no_crash() { + let result = Protobuf.prettify( + b"\n\x13gRPC testing server\x12\x07\n\x05Index\x12\x07\n\x05Empty\x12\x0c\n\nDummyUnary\x12\x0f\n\rSpecificError\x12\r\n\x0bRandomError\x12\x0e\n\x0cHeadersUnary\x12\x11\n\x0fNoResponseUnary", + &TestMetadata::default()).unwrap(); + assert_eq!(result, "1: gRPC testing server\n2:\n- 1: Index\n- 1: Empty\n- 1: DummyUnary\n- 1: SpecificError\n- 1: RandomError\n- 1: HeadersUnary\n- 1: NoResponseUnary\n"); + } + + #[test] + fn test_empty_protobuf() { + let result = Protobuf.prettify(b"", &TestMetadata::default()).unwrap(); + assert_eq!(result, "{} # empty protobuf message"); + } + + mod existing_definition { + use super::*; + + const VARINT_PRETTY_YAML: &str = "example: 150\n"; + + #[test] + fn prettify() { + let metadata = TestMetadata::default().with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/simple.proto" + )); + let result = Protobuf.prettify(varint::PROTO, &metadata).unwrap(); + assert_eq!(result, VARINT_PRETTY_YAML); + } + + #[test] + fn prettify_mismatch() { + let metadata = TestMetadata::default().with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/simple.proto" + )); + let result = Protobuf.prettify(string::PROTO, &metadata); + assert!(result.is_err()); + } + + #[test] + fn reencode() { + let metadata = TestMetadata::default().with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/simple.proto" + )); + let result = Protobuf.reencode(VARINT_PRETTY_YAML, &metadata).unwrap(); + assert_eq!(result, varint::PROTO); + } + + #[test] + fn reencode_mismatch() { + let metadata = TestMetadata::default().with_protobuf_definitions(concat!( + env!("CARGO_MANIFEST_DIR"), + "/testdata/protobuf/simple.proto" + )); + let result = Protobuf.reencode("example: hello", &metadata).unwrap(); + assert_eq!(result, string::PROTO); + } + } +} diff --git a/mitmproxy-contentviews/src/protobuf/yaml_to_pretty.rs b/mitmproxy-contentviews/src/protobuf/yaml_to_pretty.rs new file mode 100644 index 00000000..e17748c4 --- /dev/null +++ b/mitmproxy-contentviews/src/protobuf/yaml_to_pretty.rs @@ -0,0 +1,72 @@ +/// YAML value => prettified text +use crate::protobuf::view_protobuf::tags; +use regex::Captures; + +// Helper method to apply regex replacements to the YAML output +pub(super) fn apply_replacements(yaml_str: &str) -> anyhow::Result { + // Replace !fixed32 tags with comments showing float and i32 interpretations + let with_fixed32 = tags::FIXED32_RE.replace_all(yaml_str, |caps: &Captures| { + let value = caps[1].parse::().unwrap_or_default(); + let float_value = f32::from_bits(value); + let i32_value = value as i32; + + if !float_value.is_nan() && float_value < 0.0 { + format!( + "{} {} # float: {}, i32: {}", + *tags::FIXED32, + value, + float_value, + i32_value + ) + } else if !float_value.is_nan() { + format!("{} {} # float: {}", *tags::FIXED32, value, float_value) + } else if i32_value < 0 { + format!("{} {} # i32: {}", *tags::FIXED32, value, i32_value) + } else { + format!("{} {}", *tags::FIXED32, value) + } + }); + + // Replace !fixed64 tags with comments showing double and i64 interpretations + let with_fixed64 = tags::FIXED64_RE.replace_all(&with_fixed32, |caps: &Captures| { + let value = caps[1].parse::().unwrap_or_default(); + let double_value = f64::from_bits(value); + let i64_value = value as i64; + + if !double_value.is_nan() && double_value < 0.0 { + format!( + "{} {} # double: {}, i64: {}", + *tags::FIXED64, + value, + double_value, + i64_value + ) + } else if !double_value.is_nan() { + format!("{} {} # double: {}", *tags::FIXED64, value, double_value) + } else if i64_value < 0 { + format!("{} {} # i64: {}", *tags::FIXED64, value, i64_value) + } else { + format!("{} {}", *tags::FIXED64, value) + } + }); + + // Replace !varint tags with comments showing signed interpretation if different + let with_varint = tags::VARINT_RE.replace_all(&with_fixed64, |caps: &Captures| { + let unsigned_value = caps[1].parse::().unwrap_or_default(); + let i64_zigzag = decode_zigzag64(unsigned_value); + + // Only show signed value if it's different from unsigned + if i64_zigzag < 0 { + format!("{} # signed: {}", unsigned_value, i64_zigzag) + } else { + unsigned_value.to_string() + } + }); + + Ok(with_varint.to_string()) +} + +// Decode a zigzag-encoded 64-bit integer +fn decode_zigzag64(n: u64) -> i64 { + ((n >> 1) as i64) ^ (-((n & 1) as i64)) +} diff --git a/mitmproxy-contentviews/testdata/protobuf/nested.proto b/mitmproxy-contentviews/testdata/protobuf/nested.proto new file mode 100644 index 00000000..d3992aa7 --- /dev/null +++ b/mitmproxy-contentviews/testdata/protobuf/nested.proto @@ -0,0 +1,17 @@ +syntax = "proto3"; + +package example.nested; + +service Service { + rpc Method(Empty) returns (Response) {} +} + +message Empty {} + +message Response { + message Nested { + int32 example = 1; + } + int32 example = 1; + Nested nested = 2; +} diff --git a/mitmproxy-contentviews/testdata/protobuf/simple.proto b/mitmproxy-contentviews/testdata/protobuf/simple.proto new file mode 100644 index 00000000..f1eb85bc --- /dev/null +++ b/mitmproxy-contentviews/testdata/protobuf/simple.proto @@ -0,0 +1,5 @@ +syntax = "proto3"; + +message TestMessage { + int32 example = 1; +} diff --git a/mitmproxy-contentviews/testdata/protobuf/simple_package.proto b/mitmproxy-contentviews/testdata/protobuf/simple_package.proto new file mode 100644 index 00000000..fd46b80e --- /dev/null +++ b/mitmproxy-contentviews/testdata/protobuf/simple_package.proto @@ -0,0 +1,18 @@ +syntax = "proto3"; + +package example.simple; + +service Other { + rpc Method(Empty) returns (Response) {} +} + +service Service { + // This endpoint + rpc Method(Empty) returns (Response) {} +} + +message Empty {} + +message Response { + int32 example = 1; +} diff --git a/mitmproxy-contentviews/testdata/protobuf/simple_service.proto b/mitmproxy-contentviews/testdata/protobuf/simple_service.proto new file mode 100644 index 00000000..7b12659e --- /dev/null +++ b/mitmproxy-contentviews/testdata/protobuf/simple_service.proto @@ -0,0 +1,16 @@ +syntax = "proto3"; + +service Other { + rpc Method(Empty) returns (Response) {} +} + +service Service { + // This endpoint + rpc Method(Empty) returns (Response) {} +} + +message Empty {} + +message Response { + int32 example = 1; +} diff --git a/mitmproxy-highlight/Cargo.toml b/mitmproxy-highlight/Cargo.toml new file mode 100644 index 00000000..bd36f616 --- /dev/null +++ b/mitmproxy-highlight/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "mitmproxy-highlight" +license = "MIT" +authors.workspace = true +version.workspace = true +repository.workspace = true +edition.workspace = true +rust-version.workspace = true +publish.workspace = true + +[lints] +workspace = true + +[dependencies] +anyhow = { version = "1.0.97", features = ["backtrace"] } +tree-sitter-highlight = "0.25.3" +tree-sitter-yaml = "0.7.0" +tree-sitter-xml = "0.7.0" +tree-sitter = "0.25.3" + +[dev-dependencies] +criterion = "0.5.1" + +[[bench]] +name = "syntax_highlight" +harness = false \ No newline at end of file diff --git a/mitmproxy-highlight/benches/syntax_highlight.rs b/mitmproxy-highlight/benches/syntax_highlight.rs new file mode 100644 index 00000000..bf7d5afe --- /dev/null +++ b/mitmproxy-highlight/benches/syntax_highlight.rs @@ -0,0 +1,57 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use mitmproxy_highlight::Language; + +fn criterion_benchmark(c: &mut Criterion) { + c.bench_function("syntax_highlight small", |b| { + b.iter(|| { + Language::Xml + .highlight(black_box( + br#" + + + + + + Bootstrap demo + + +

Hello, world!

+ + "#, + )) + .unwrap() + }) + }); + + let data = "x".repeat(8096); + c.bench_function("syntax_highlight xml", |b| { + b.iter(|| Language::Xml.highlight(black_box(data.as_bytes())).unwrap()) + }); + + // tree_sitter_html is faster, but not by orders of magnitude. + /* + let mut config = HighlightConfiguration::new( + tree_sitter_html::LANGUAGE.into(), + "", + tree_sitter_html::HIGHLIGHTS_QUERY, + "", + "" + ).unwrap(); + let names = config.names().iter().map(|x| x.to_string()).collect::>(); + let tags = names.iter().map(|_| Tag::Text).collect::>(); + config.configure(&names); + + c.bench_function("syntax_highlight html", |b| { + b.iter(|| { + common::highlight( + &config, + &tags, + data.as_bytes(), + ) + }) + }); + */ +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/mitmproxy-highlight/src/common.rs b/mitmproxy-highlight/src/common.rs new file mode 100644 index 00000000..d1225639 --- /dev/null +++ b/mitmproxy-highlight/src/common.rs @@ -0,0 +1,94 @@ +use super::{Chunk, Tag}; +use anyhow::{Context, Result}; +use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter}; + +pub fn highlight( + config: &HighlightConfiguration, + tags: &[Tag], + input: &[u8], +) -> Result> { + let mut highlighter = Highlighter::new(); + let highlights = highlighter + .highlight(config, input, None, |_| None) + .context("failed to highlight")?; + + let mut chunks: Vec = Vec::new(); + let mut tag: Tag = Tag::Text; + + for event in highlights { + let event = event.context("highlighter failure")?; + match event { + HighlightEvent::Source { start, end } => { + let contents = String::from_utf8_lossy(&input[start..end]); + match chunks.last_mut() { + Some(x) if x.0 == tag || contents.trim_ascii().is_empty() => { + x.1.push_str(&contents); + } + _ => chunks.push((tag, contents.to_string())), + } + } + HighlightEvent::HighlightStart(s) => { + tag = tags[s.0]; + } + HighlightEvent::HighlightEnd => { + tag = Tag::Text; + } + } + } + Ok(chunks) +} + +#[cfg(test)] +pub(super) fn test_names_ok( + language: tree_sitter::Language, + highlights_query: &str, + names: &[&str], + tags: &[Tag], +) { + assert_eq!(names.len(), tags.len()); + let config = HighlightConfiguration::new(language, "", highlights_query, "", "").unwrap(); + for &tag in names { + assert!( + config.names().iter().any(|name| name.contains(tag)), + "Invalid tag: {},\nAllowed tags: {:?}", + tag, + config.names() + ); + } +} + +#[allow(unused)] +#[cfg(test)] +pub(super) fn debug(language: tree_sitter::Language, highlights_query: &str, input: &[u8]) { + let mut highlighter = Highlighter::new(); + let mut config = HighlightConfiguration::new(language, "", highlights_query, "", "").unwrap(); + let names = config + .names() + .iter() + .map(|name| name.to_string()) + .collect::>(); + config.configure(&names); + let highlights = highlighter + .highlight(&config, input, None, |_| None) + .unwrap(); + + let mut tag: &str = ""; + for event in highlights { + match event.unwrap() { + HighlightEvent::Source { start, end } => { + let contents = &input[start..end]; + println!( + "{}: {:?}", + tag, + String::from_utf8_lossy(contents).to_string().as_str() + ); + } + HighlightEvent::HighlightStart(s) => { + tag = &names[s.0]; + } + HighlightEvent::HighlightEnd => { + tag = ""; + } + } + } +} diff --git a/mitmproxy-highlight/src/lib.rs b/mitmproxy-highlight/src/lib.rs new file mode 100644 index 00000000..70ec6815 --- /dev/null +++ b/mitmproxy-highlight/src/lib.rs @@ -0,0 +1,98 @@ +use anyhow::bail; +use std::fmt; +use std::fmt::Formatter; +use std::str::FromStr; + +pub mod common; +mod xml; +mod yaml; + +pub type Chunk = (Tag, String); + +pub enum Language { + Xml, + Yaml, + Error, + None, +} + +impl Language { + pub fn highlight(&self, input: &[u8]) -> anyhow::Result> { + match self { + Language::Yaml => yaml::highlight_yaml(input), + Language::Xml => xml::highlight_xml(input), + Language::None => Ok(vec![( + Tag::Text, + String::from_utf8_lossy(input).to_string(), + )]), + Language::Error => Ok(vec![( + Tag::Error, + String::from_utf8_lossy(input).to_string(), + )]), + } + } +} + +impl FromStr for Language { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + Ok(match s { + "xml" => Language::Xml, + "yaml" => Language::Yaml, + "none" => Language::None, + "error" => Language::Error, + other => bail!("Unsupported language: {other}"), + }) + } +} + +impl fmt::Display for Language { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match self { + Language::Xml => "xml", + Language::Yaml => "yaml", + Language::Error => "error", + Language::None => "none", + } + ) + } +} + +#[derive(Debug, Eq, PartialEq, Copy, Clone)] +pub enum Tag { + Text, // Text that shouldn't be emphasized. + Name, // A tag, such as an HTML tag or a YAML key. + String, // A string value. + Number, // A number value. + Boolean, // A boolean value. + Comment, // A comment. + Error, // An error value. +} + +impl Tag { + pub const VALUES: [Self; 7] = [ + Self::Text, + Self::Name, + Self::String, + Self::Number, + Self::Boolean, + Self::Comment, + Self::Error, + ]; + + pub fn to_str(self) -> &'static str { + match self { + Tag::Text => "", + Tag::Name => "name", + Tag::String => "string", + Tag::Number => "number", + Tag::Boolean => "boolean", + Tag::Comment => "comment", + Tag::Error => "error", + } + } +} diff --git a/mitmproxy-highlight/src/xml.rs b/mitmproxy-highlight/src/xml.rs new file mode 100644 index 00000000..eb3ab939 --- /dev/null +++ b/mitmproxy-highlight/src/xml.rs @@ -0,0 +1,82 @@ +use super::common::highlight; +use super::{Chunk, Tag}; +use anyhow::Result; +use std::sync::LazyLock; +use tree_sitter_highlight::HighlightConfiguration; + +const NAMES: &[&str] = &[ + "tag", //
+ "property", // class or style + "operator", // equal sign between class and value + "comment", // + "punctuation", + "markup", +]; +const TAGS: &[Tag] = &[ + Tag::Name, //
+ Tag::Name, // class or style + Tag::Name, // equal sign between class and value + Tag::Comment, // + Tag::Name, // punctuation + Tag::Text, // markup +]; + +static XML_CONFIG: LazyLock = LazyLock::new(|| { + let mut config = HighlightConfiguration::new( + tree_sitter_xml::LANGUAGE_XML.into(), + "", + tree_sitter_xml::XML_HIGHLIGHT_QUERY, + "", + "", + ) + .expect("failed to build XML syntax highlighter"); + config.configure(NAMES); + config +}); + +pub fn highlight_xml(input: &[u8]) -> Result> { + highlight(&XML_CONFIG, TAGS, input) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::common; + + #[ignore] + #[test] + fn debug() { + common::debug( + tree_sitter_xml::LANGUAGE_XML.into(), + tree_sitter_xml::XML_HIGHLIGHT_QUERY, + b"
Hello
", + ); + } + + #[test] + fn test_tags_ok() { + common::test_names_ok( + tree_sitter_xml::LANGUAGE_XML.into(), + tree_sitter_xml::XML_HIGHLIGHT_QUERY, + NAMES, + TAGS, + ); + } + + #[test] + fn test_highlight_xml() { + let input = b"
Hello
"; + let chunks = highlight_xml(input).unwrap(); + assert_eq!( + chunks, + vec![ + (Tag::Name, "
".to_string()), + (Tag::Text, "Hello".to_string()), + (Tag::Name, "
".to_string()), + (Tag::Comment, "".to_string()) + ] + ); + } +} diff --git a/mitmproxy-highlight/src/yaml.rs b/mitmproxy-highlight/src/yaml.rs new file mode 100644 index 00000000..fe0b79d2 --- /dev/null +++ b/mitmproxy-highlight/src/yaml.rs @@ -0,0 +1,81 @@ +use super::common::highlight; +use super::{Chunk, Tag}; +use anyhow::Result; +use std::sync::LazyLock; +use tree_sitter_highlight::HighlightConfiguration; + +const NAMES: &[&str] = &[ + "boolean", // YAML booleans + "string", // YAML strings + "number", // YAML numbers + "comment", // # comment + "type", // !fixed32 type annotations + "property", // key: +]; +const TAGS: &[Tag] = &[ + Tag::Boolean, + Tag::String, + Tag::Number, + Tag::Comment, + Tag::Name, + Tag::Name, +]; + +static YAML_CONFIG: LazyLock = LazyLock::new(|| { + let mut config = HighlightConfiguration::new( + tree_sitter_yaml::LANGUAGE.into(), + "", + tree_sitter_yaml::HIGHLIGHTS_QUERY, + "", + "", + ) + .expect("failed to build YAML syntax highlighter"); + config.configure(NAMES); + config +}); + +pub fn highlight_yaml(input: &[u8]) -> Result> { + highlight(&YAML_CONFIG, TAGS, input) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::common; + + #[test] + fn test_tags_ok() { + common::test_names_ok( + tree_sitter_yaml::LANGUAGE.into(), + tree_sitter_yaml::HIGHLIGHTS_QUERY, + NAMES, + TAGS, + ); + } + + #[test] + fn test_highlight_yaml() { + let input = b"\ + string: \"value\"\n\ + bool: true\n\ + number: !fixed32 42 # comment\n\ + "; + let chunks = highlight_yaml(input).unwrap(); + assert_eq!( + chunks, + vec![ + (Tag::Name, "string".to_string()), + (Tag::Text, ": ".to_string()), + (Tag::String, "\"value\"\n".to_string()), + (Tag::Name, "bool".to_string()), + (Tag::Text, ": ".to_string()), + (Tag::Boolean, "true\n".to_string()), + (Tag::Name, "number".to_string()), + (Tag::Text, ": ".to_string()), + (Tag::Name, "!fixed32 ".to_string()), + (Tag::Number, "42 ".to_string()), + (Tag::Comment, "# comment\n".to_string()), + ] + ); + } +} diff --git a/mitmproxy-rs/Cargo.toml b/mitmproxy-rs/Cargo.toml index 076fdc08..2735d6d0 100644 --- a/mitmproxy-rs/Cargo.toml +++ b/mitmproxy-rs/Cargo.toml @@ -17,10 +17,11 @@ crate-type = ["lib", "cdylib"] [dependencies] mitmproxy = { path = "../" } +mitmproxy-highlight = { path = "../mitmproxy-highlight" } +mitmproxy-contentviews = { path = "../mitmproxy-contentviews" } anyhow = { version = "1.0.97", features = ["backtrace"] } data-encoding = "2.8.0" log = "0.4.27" -once_cell = "1" pyo3 = { version = "0.24", features = ["abi3", "abi3-py312", "anyhow"] } pyo3-async-runtimes = { version = "0.24", features = ["tokio-runtime", "testing", "attributes"] } pyo3-log = "0.12" diff --git a/mitmproxy-rs/mitmproxy_rs/__init__.pyi b/mitmproxy-rs/mitmproxy_rs/__init__.pyi index 75071a38..1afa64af 100644 --- a/mitmproxy-rs/mitmproxy_rs/__init__.pyi +++ b/mitmproxy-rs/mitmproxy_rs/__init__.pyi @@ -2,7 +2,7 @@ from __future__ import annotations from typing import Any, Literal from typing import final, overload, TypeVar -from . import certs, dns, local, process_info, tun, udp, wireguard +from . import certs, contentviews, dns, local, process_info, tun, udp, wireguard, syntax_highlight T = TypeVar("T") @@ -57,9 +57,11 @@ class Stream: __all__ = [ "certs", + "contentviews", "dns", "local", "process_info", + "syntax_highlight", "tun", "udp", "wireguard", diff --git a/mitmproxy-rs/mitmproxy_rs/contentviews.pyi b/mitmproxy-rs/mitmproxy_rs/contentviews.pyi new file mode 100644 index 00000000..65911af5 --- /dev/null +++ b/mitmproxy-rs/mitmproxy_rs/contentviews.pyi @@ -0,0 +1,35 @@ +from __future__ import annotations + +from typing import ClassVar, final, Literal + +class Contentview: + name: ClassVar[str] + + syntax_highlight: ClassVar[Literal["xml", "yaml", "none", "error"]] + + def prettify(self, data: bytes, metadata) -> str: + pass + + def render_priority(self, data: bytes, metadata) -> float: + pass + +@final +class InteractiveContentview(Contentview): + def reencode(self, data: str, metadata) -> bytes: + pass + +hex_dump: Contentview +hex_stream: InteractiveContentview +msgpack: InteractiveContentview +protobuf: InteractiveContentview +grpc: InteractiveContentview + +__all__ = [ + "Contentview", + "InteractiveContentview", + "hex_dump", + "hex_stream", + "msgpack", + "protobuf", + "grpc", +] diff --git a/mitmproxy-rs/mitmproxy_rs/syntax_highlight.pyi b/mitmproxy-rs/mitmproxy_rs/syntax_highlight.pyi new file mode 100644 index 00000000..61ef248b --- /dev/null +++ b/mitmproxy-rs/mitmproxy_rs/syntax_highlight.pyi @@ -0,0 +1,15 @@ +from __future__ import annotations + +from typing import Literal + + +def highlight(text: str, language: Literal["xml", "yaml", "error", "none"]) -> list[tuple[str, str]]: + pass + +def tags() -> list[str]: + pass + +__all__ = [ + "highlight", + "tags", +] diff --git a/mitmproxy-rs/src/contentview.rs b/mitmproxy-rs/src/contentview.rs new file mode 100644 index 00000000..036e8c95 --- /dev/null +++ b/mitmproxy-rs/src/contentview.rs @@ -0,0 +1,169 @@ +use mitmproxy_contentviews::{Metadata, Prettify, Reencode}; +use pyo3::{exceptions::PyValueError, prelude::*}; +use std::cell::OnceCell; +use std::path::Path; + +pub struct PythonMetadata<'py> { + inner: Bound<'py, PyAny>, + content_type: OnceCell>, + protobuf_definitions: OnceCell>, + path: OnceCell>, +} + +impl<'py> PythonMetadata<'py> { + pub fn new(inner: Bound<'py, PyAny>) -> Self { + PythonMetadata { + inner, + content_type: OnceCell::new(), + protobuf_definitions: OnceCell::new(), + path: OnceCell::new(), + } + } +} + +impl Metadata for PythonMetadata<'_> { + fn content_type(&self) -> Option<&str> { + self.content_type + .get_or_init(|| { + self.inner + .getattr("content_type") + .ok()? + .extract::() + .ok() + }) + .as_deref() + } + + fn get_header(&self, name: &str) -> Option { + let http_message = self.inner.getattr("http_message").ok()?; + let headers = http_message.getattr("headers").ok()?; + headers.get_item(name).ok()?.extract::().ok() + } + + fn get_path(&self) -> Option<&str> { + self.path + .get_or_init(|| { + let flow = self.inner.getattr("flow").ok()?; + let request = flow.getattr("request").ok()?; + request.getattr("path").ok()?.extract::().ok() + }) + .as_deref() + } + + fn protobuf_definitions(&self) -> Option<&Path> { + self.protobuf_definitions + .get_or_init(|| { + self.inner + .getattr("protobuf_definitions") + .ok()? + .extract::() + .ok() + .map(std::path::PathBuf::from) + }) + .as_deref() + } + + fn is_http_request(&self) -> bool { + let Ok(http_message) = self.inner.getattr("http_message") else { + return false; + }; + let Ok(flow) = self + .inner + .getattr("flow") + .and_then(|flow| flow.getattr("request")) + else { + return false; + }; + let Ok(request) = flow.getattr("request") else { + return false; + }; + http_message.is(&request) + } +} + +impl<'py> FromPyObject<'py> for PythonMetadata<'py> { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { + Ok(PythonMetadata::new(ob.clone())) + } +} + +#[pyclass(frozen, module = "mitmproxy_rs.contentviews", subclass)] +pub struct Contentview(&'static dyn Prettify); + +impl Contentview { + pub fn new<'py>( + py: Python<'py>, + contentview: &'static dyn Prettify, + ) -> PyResult> { + Contentview(contentview).into_pyobject(py) + } +} + +#[pymethods] +impl Contentview { + /// The name of this contentview. + #[getter] + pub fn name(&self) -> &str { + self.0.name() + } + + /// Pretty-print an (encoded) message. + pub fn prettify(&self, data: Vec, metadata: PythonMetadata) -> PyResult { + self.0 + .prettify(&data, &metadata) + .map_err(|e| PyValueError::new_err(format!("{:?}", e))) + } + + /// Return the priority of this view for rendering data. + pub fn render_priority(&self, data: Vec, metadata: PythonMetadata) -> PyResult { + Ok(self.0.render_priority(&data, &metadata)) + } + + /// Optional syntax highlighting that should be applied to the prettified output. + #[getter] + pub fn syntax_highlight(&self) -> String { + self.0.syntax_highlight().to_string() + } + + fn __lt__(&self, py: Python<'_>, other: PyObject) -> PyResult { + Ok(self.name() < other.getattr(py, "name")?.extract::(py)?.as_str()) + } + + fn __repr__(&self) -> PyResult { + Ok(format!( + "", + self.0.name() + )) + } +} + +#[pyclass(frozen, module = "mitmproxy_rs.contentviews", extends=Contentview)] +pub struct InteractiveContentview(&'static dyn Reencode); + +impl InteractiveContentview { + /// Argument passed twice because of https://github.com/rust-lang/rust/issues/65991 + pub fn new<'py, T: Prettify + Reencode>( + py: Python<'py>, + cv: &'static T, + ) -> PyResult> { + let cls = + PyClassInitializer::from(Contentview(cv)).add_subclass(InteractiveContentview(cv)); + Bound::new(py, cls) + } +} + +#[pymethods] +impl InteractiveContentview { + pub fn reencode(&self, data: &str, metadata: PythonMetadata) -> PyResult> { + self.0 + .reencode(data, &metadata) + .map_err(|e| PyValueError::new_err(format!("{:?}", e))) + } + + fn __repr__(self_: PyRef<'_, Self>) -> PyResult { + Ok(format!( + "", + self_.as_super().name() + )) + } +} diff --git a/mitmproxy-rs/src/dns_resolver.rs b/mitmproxy-rs/src/dns_resolver.rs index f50115ca..6a473cf9 100644 --- a/mitmproxy-rs/src/dns_resolver.rs +++ b/mitmproxy-rs/src/dns_resolver.rs @@ -1,8 +1,8 @@ use mitmproxy::dns::{ResolveError, DNS_SERVERS}; -use once_cell::sync::OnceCell; use pyo3::exceptions::socket::gaierror; use pyo3::prelude::*; use pyo3::types::PyAny; +use std::sync::OnceLock; use std::{net::IpAddr, net::SocketAddr, sync::Arc}; /// A DNS resolver backed by [hickory-dns](https://github.com/hickory-dns/hickory-dns). @@ -78,10 +78,10 @@ pub fn get_system_dns_servers() -> PyResult> { }) } -struct AddrInfoErrorConst(&'static str, OnceCell); +struct AddrInfoErrorConst(&'static str, OnceLock); impl AddrInfoErrorConst { const fn new(identifier: &'static str) -> Self { - AddrInfoErrorConst(identifier, OnceCell::new()) + AddrInfoErrorConst(identifier, OnceLock::new()) } fn get(&self) -> isize { *self.1.get_or_init(|| { diff --git a/mitmproxy-rs/src/lib.rs b/mitmproxy-rs/src/lib.rs index bde795bd..44e9e195 100644 --- a/mitmproxy-rs/src/lib.rs +++ b/mitmproxy-rs/src/lib.rs @@ -1,27 +1,30 @@ extern crate core; -use std::sync::RwLock; +use std::sync::{LazyLock, Mutex}; -use once_cell::sync::Lazy; +use crate::contentview::{Contentview, InteractiveContentview}; +use mitmproxy_contentviews::{Prettify, Reencode}; use pyo3::{exceptions::PyException, prelude::*}; +mod contentview; mod dns_resolver; mod process_info; mod server; mod stream; +mod syntax_highlight; pub mod task; mod udp_client; mod util; -static LOGGER_INITIALIZED: Lazy> = Lazy::new(|| RwLock::new(false)); +static LOGGER_INITIALIZED: LazyLock> = LazyLock::new(|| Mutex::new(false)); fn init_logger() -> PyResult<()> { - if *LOGGER_INITIALIZED.read().unwrap() { + if *LOGGER_INITIALIZED.lock().unwrap() { // logger already initialized Ok(()) } else if pyo3_log::try_init().is_ok() { // logger successfully initialized - *LOGGER_INITIALIZED.write().unwrap() = true; + *LOGGER_INITIALIZED.lock().unwrap() = true; Ok(()) } else { // logger was not initialized and could not be initialized @@ -81,6 +84,26 @@ mod mitmproxy_rs { use crate::util::{genkey, pubkey}; } + #[pymodule] + mod contentviews { + use super::*; + #[pymodule_export] + use crate::contentview::Contentview; + #[pymodule_export] + use crate::contentview::InteractiveContentview; + use mitmproxy_contentviews::{HexDump, HexStream, MsgPack, Protobuf, GRPC}; + + #[pymodule_init] + fn init(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_contentview(&HexDump)?; + m.add_interactive_contentview(&HexStream)?; + m.add_interactive_contentview(&MsgPack)?; + m.add_interactive_contentview(&Protobuf)?; + m.add_interactive_contentview(&GRPC)?; + Ok(()) + } + } + #[pymodule_export] use crate::stream::Stream; @@ -104,4 +127,28 @@ mod mitmproxy_rs { Ok(()) } + + #[pymodule] + mod syntax_highlight { + #[pymodule_export] + use crate::syntax_highlight::highlight; + #[pymodule_export] + use crate::syntax_highlight::tags; + } +} + +trait AddContentview { + fn add_contentview(&self, cv: &'static T) -> PyResult<()>; + fn add_interactive_contentview(&self, i: &'static T) -> PyResult<()>; +} + +impl AddContentview for Bound<'_, PyModule> { + fn add_contentview(&self, cv: &'static T) -> PyResult<()> { + let view = Contentview::new(self.py(), cv)?; + self.add(cv.instance_name(), view) + } + fn add_interactive_contentview(&self, cv: &'static T) -> PyResult<()> { + let view = InteractiveContentview::new(self.py(), cv)?; + self.add(cv.instance_name(), view) + } } diff --git a/mitmproxy-rs/src/server/local_redirector.rs b/mitmproxy-rs/src/server/local_redirector.rs index 4b8164f3..6390a9e0 100644 --- a/mitmproxy-rs/src/server/local_redirector.rs +++ b/mitmproxy-rs/src/server/local_redirector.rs @@ -1,4 +1,5 @@ use mitmproxy::intercept_conf::InterceptConf; +use pyo3::exceptions::PyValueError; #[cfg(target_os = "linux")] use mitmproxy::packet_sources::linux::LinuxConf; @@ -38,7 +39,7 @@ impl LocalRedirector { fn describe_spec(spec: &str) -> PyResult { InterceptConf::try_from(spec) .map(|conf| conf.description()) - .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string())) + .map_err(|e| PyValueError::new_err(format!("{:?}", e))) } /// Set a new intercept spec. diff --git a/mitmproxy-rs/src/stream.rs b/mitmproxy-rs/src/stream.rs index f55585ba..50084afb 100644 --- a/mitmproxy-rs/src/stream.rs +++ b/mitmproxy-rs/src/stream.rs @@ -186,7 +186,7 @@ impl Stream { } _ => (), }, - TunnelInfo::None {} => (), + TunnelInfo::None => (), } match default { Some(x) => Ok(x), diff --git a/mitmproxy-rs/src/syntax_highlight.rs b/mitmproxy-rs/src/syntax_highlight.rs new file mode 100644 index 00000000..5efb241f --- /dev/null +++ b/mitmproxy-rs/src/syntax_highlight.rs @@ -0,0 +1,39 @@ +#[allow(unused_imports)] +use anyhow::{anyhow, Result}; +use std::str::FromStr; + +use mitmproxy_highlight::{Language, Tag}; +use pyo3::{exceptions::PyValueError, prelude::*}; + +/// Transform text into a list of tagged chunks. +/// +/// Example: +/// +/// ```python +/// from mitmproxy_rs.syntax_highlight import highlight +/// highlighted = highlight("key: 42", "yaml") +/// print(highlighted) # [('name', 'key'), ('', ': '), ('number', '42')] +/// ``` +#[pyfunction] +pub fn highlight(text: String, language: &str) -> PyResult> { + let language = Language::from_str(language)?; + language + .highlight(text.as_bytes()) + .map(|chunks| { + chunks + .into_iter() + .map(|(tag, text)| (tag.to_str(), text)) + .collect() + }) + .map_err(|e| PyValueError::new_err(format!("{:?}", e))) +} + +/// Return the list of all possible tag names for a given language. +#[pyfunction] +pub fn tags() -> PyResult> { + Ok(Tag::VALUES + .iter() + .map(|tag| tag.to_str()) + .filter(|&x| !x.is_empty()) + .collect()) +} diff --git a/src/dns.rs b/src/dns.rs index 18d45df6..25887edc 100644 --- a/src/dns.rs +++ b/src/dns.rs @@ -1,21 +1,20 @@ -use hickory_resolver::config::{LookupIpStrategy, ResolveHosts}; -use hickory_resolver::lookup_ip::LookupIp; -use hickory_resolver::system_conf::read_system_conf; -use hickory_resolver::TokioResolver; -use once_cell::sync::Lazy; -use std::net::IpAddr; -use std::net::SocketAddr; - use hickory_resolver::config::NameServerConfig; use hickory_resolver::config::ResolverConfig; +use hickory_resolver::config::{LookupIpStrategy, ResolveHosts}; +use hickory_resolver::lookup_ip::LookupIp; use hickory_resolver::name_server::TokioConnectionProvider; pub use hickory_resolver::proto::op::Query; pub use hickory_resolver::proto::op::ResponseCode; use hickory_resolver::proto::xfer::Protocol; use hickory_resolver::proto::ProtoError; +use hickory_resolver::system_conf::read_system_conf; pub use hickory_resolver::ResolveError; +use hickory_resolver::TokioResolver; +use std::net::IpAddr; +use std::net::SocketAddr; +use std::sync::LazyLock; -pub static DNS_SERVERS: Lazy, ResolveError>> = Lazy::new(|| { +pub static DNS_SERVERS: LazyLock, ResolveError>> = LazyLock::new(|| { let (config, _opts) = read_system_conf()?; Ok(config .name_servers() diff --git a/src/processes/mod.rs b/src/processes/mod.rs index 4ba0794c..8801fcf4 100644 --- a/src/processes/mod.rs +++ b/src/processes/mod.rs @@ -34,8 +34,8 @@ pub struct ProcessInfo { pub type ProcessList = Vec; #[cfg(any(windows, target_os = "macos"))] -pub static ICON_CACHE: once_cell::sync::Lazy> = - once_cell::sync::Lazy::new(|| std::sync::Mutex::new(IconCache::default())); +pub static ICON_CACHE: std::sync::LazyLock> = + std::sync::LazyLock::new(|| std::sync::Mutex::new(IconCache::default())); pub mod bench { #[cfg(target_os = "macos")] diff --git a/src/processes/windows_list.rs b/src/processes/windows_list.rs index 663a8be7..ba536595 100644 --- a/src/processes/windows_list.rs +++ b/src/processes/windows_list.rs @@ -5,10 +5,9 @@ use std::iter; use std::mem::size_of; use std::os::windows::prelude::{OsStrExt, OsStringExt}; use std::path::{Path, PathBuf}; -use std::sync::Mutex; +use std::sync::{LazyLock, Mutex}; use anyhow::{anyhow, Result}; -use once_cell::sync::Lazy; use windows::core::w; use windows::core::{BOOL, PCWSTR, PWSTR}; use windows::Win32::Foundation::{CloseHandle, HANDLE, HWND, LPARAM, MAX_PATH}; @@ -85,8 +84,8 @@ pub fn enumerate_pids() -> Result> { Ok(pids) } -pub static DISPLAY_NAME_CACHE: Lazy> = - Lazy::new(|| Mutex::new(DisplayNameCache::default())); +pub static DISPLAY_NAME_CACHE: LazyLock> = + LazyLock::new(|| Mutex::new(DisplayNameCache::default())); #[derive(Default)] pub struct DisplayNameCache(HashMap>);