diff --git a/.github/workflows/cli-release.yaml b/.github/workflows/cli-release.yaml index 3513072a..3213e727 100644 --- a/.github/workflows/cli-release.yaml +++ b/.github/workflows/cli-release.yaml @@ -4,13 +4,13 @@ name: "[impit-cli] Test & Build" on: push: branches: - - "**" + - "master" tags-ignore: - "impit-cli-*" paths: - "impit-cli/**" - "impit/**" - pull_request: + workflow_dispatch: env: CRATE_NAME: impit-cli diff --git a/.github/workflows/format.yaml b/.github/workflows/format.yaml new file mode 100644 index 00000000..090737d9 --- /dev/null +++ b/.github/workflows/format.yaml @@ -0,0 +1,35 @@ +name: Check formatting +on: + push: + branches: + - master + pull_request: + +env: + RUSTFLAGS: "--cfg reqwest_unstable" + +jobs: + fmt: + name: rustfmt + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + components: rustfmt + + - name: Rustfmt Check + uses: actions-rust-lang/rustfmt@v1 + clippy: + name: clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + components: clippy + + - name: Run clippy + run: cargo clippy --all --manifest-path=./Cargo.toml -- -D warnings \ No newline at end of file diff --git a/impit-cli/Cargo.toml b/impit-cli/Cargo.toml index c5163c62..7c4a275f 100644 --- a/impit-cli/Cargo.toml +++ b/impit-cli/Cargo.toml @@ -9,13 +9,16 @@ edition = "2021" clap = { version = "4.5.21", features = ["derive"] } impit = { path="../impit" } tokio = { version="1.41.1", features = ["full"] } -aws-lc-rs = { version = "1.11.1", features = ["bindgen"] } +aws-lc-rs = { version = "1.11.1" } [target.x86_64-unknown-linux-musl.dependencies] openssl = { version = "*", features = ["vendored"] } +aws-lc-rs = { version = "1.11.1", features = ["bindgen"] } [target.aarch64-unknown-linux-musl.dependencies] openssl = { version = "*", features = ["vendored"] } +aws-lc-rs = { version = "1.11.1", features = ["bindgen"] } [target.arm-unknown-linux-musleabi.dependencies] openssl = { version = "*", features = ["vendored"] } +aws-lc-rs = { version = "1.11.1", features = ["bindgen"] } diff --git a/impit-cli/src/headers.rs b/impit-cli/src/headers.rs index aee0932e..34bae7a1 100644 --- a/impit-cli/src/headers.rs +++ b/impit-cli/src/headers.rs @@ -11,4 +11,4 @@ pub(crate) fn process_headers(headers: Vec) -> HashMap { } map -} \ No newline at end of file +} diff --git a/impit-cli/src/main.rs b/impit-cli/src/main.rs index da095e14..16dafdd1 100644 --- a/impit-cli/src/main.rs +++ b/impit-cli/src/main.rs @@ -1,7 +1,11 @@ use std::ffi::OsString; use clap::{Parser, ValueEnum}; -use impit::{impit::{Impit, RedirectBehavior}, emulation::Browser as ImpitBrowser, request::RequestOptions}; +use impit::{ + emulation::Browser as ImpitBrowser, + impit::{Impit, RedirectBehavior}, + request::RequestOptions, +}; mod headers; @@ -9,19 +13,19 @@ mod headers; enum Browser { Chrome, Firefox, - Impit + Impit, } #[derive(Parser, Debug, Clone, Copy, ValueEnum)] enum Method { - GET, - POST, - PUT, - DELETE, - PATCH, - HEAD, - OPTIONS, - TRACE + Get, + Post, + Put, + Delete, + Patch, + Head, + Options, + Trace, } /// CLI interface for the impit library. @@ -30,31 +34,31 @@ enum Method { #[command(about, long_about = None)] struct CliArgs { /// Method to use for the request. - #[arg(short='X', long, default_value = "get")] + #[arg(short = 'X', long, default_value = "get")] method: Method, /// HTTP headers to add to the request. - #[arg(short='H', long)] + #[arg(short = 'H', long)] headers: Vec, - + /// What browser to use for the request. - #[arg(short='A', long, default_value = "impit")] + #[arg(short = 'A', long, default_value = "impit")] impersonate: Browser, /// If set, impit will ignore TLS errors. - #[arg(short='k', long, action)] + #[arg(short = 'k', long, action)] ignore_tls_errors: bool, - + /// If set, impit will fallback to vanilla HTTP if the impersonated browser fails. - #[arg(short='f', long, action)] + #[arg(short = 'f', long, action)] fallback: bool, /// Proxy to use for the request. - #[arg(short='x', long="proxy")] + #[arg(short = 'x', long = "proxy")] proxy: Option, - + /// Maximum time in seconds to wait for the request to complete. - #[arg(short='m', long="max-time")] + #[arg(short = 'm', long = "max-time")] max_time: Option, /// Data to send with the request. @@ -62,19 +66,19 @@ struct CliArgs { data: Option, /// Enforce the use of HTTP/3 for the request. Note that if the server does not support HTTP/3, the request will fail. - #[arg(long="http3-only", action)] + #[arg(long = "http3-only", action)] http3_prior_knowledge: bool, - + /// Enable the use of HTTP/3. This will attempt to use HTTP/3, but fall back to earlier versions of HTTP if the server does not support it. - #[arg(long="http3", action)] + #[arg(long = "http3", action)] enable_http3: bool, /// Follow redirects - #[arg(short='L', long="location", action)] + #[arg(short = 'L', long = "location", action)] follow_redirects: bool, - + /// Follow redirects - #[arg(long="max-redirs", default_value = "50")] + #[arg(long = "max-redirs", default_value = "50")] maximum_redirects: usize, /// URL of the request to make @@ -92,7 +96,7 @@ async fn main() { client = match args.impersonate { Browser::Chrome => client.with_browser(ImpitBrowser::Chrome), Browser::Firefox => client.with_browser(ImpitBrowser::Firefox), - Browser::Impit => client + Browser::Impit => client, }; if args.proxy.is_some() { @@ -109,17 +113,13 @@ async fn main() { client = client.with_redirect(RedirectBehavior::ManualRedirect); } - let body: Option> = match args.data { - Some(data) => Some(data.into_string().unwrap().into_bytes()), - None => None - }; + let body: Option> = args + .data + .map(|data| data.into_string().unwrap().into_bytes()); let mut client = client.build(); - let timeout = match args.max_time { - Some(time) => Some(std::time::Duration::from_secs(time)), - None => None - }; + let timeout = args.max_time.map(std::time::Duration::from_secs); let options = RequestOptions { headers: headers::process_headers(args.headers), @@ -128,15 +128,15 @@ async fn main() { }; let response = match args.method { - Method::GET => client.get(args.url, Some(options)).await.unwrap(), - Method::POST => client.post(args.url, body, Some(options)).await.unwrap(), - Method::PUT => client.put(args.url, body, Some(options)).await.unwrap(), - Method::DELETE => client.delete(args.url, Some(options)).await.unwrap(), - Method::PATCH => client.patch(args.url, body, Some(options)).await.unwrap(), - Method::HEAD => client.head(args.url, Some(options)).await.unwrap(), - Method::OPTIONS => client.options(args.url, Some(options)).await.unwrap(), - Method::TRACE => client.trace(args.url, Some(options)).await.unwrap(), + Method::Get => client.get(args.url, Some(options)).await.unwrap(), + Method::Post => client.post(args.url, body, Some(options)).await.unwrap(), + Method::Put => client.put(args.url, body, Some(options)).await.unwrap(), + Method::Delete => client.delete(args.url, Some(options)).await.unwrap(), + Method::Patch => client.patch(args.url, body, Some(options)).await.unwrap(), + Method::Head => client.head(args.url, Some(options)).await.unwrap(), + Method::Options => client.options(args.url, Some(options)).await.unwrap(), + Method::Trace => client.trace(args.url, Some(options)).await.unwrap(), }; print!("{}", response.text().await.unwrap()); -} \ No newline at end of file +} diff --git a/impit/examples/basic.rs b/impit/examples/basic.rs index d290c15e..4b603ac8 100644 --- a/impit/examples/basic.rs +++ b/impit/examples/basic.rs @@ -1,8 +1,8 @@ -use impit::impit::Impit; use impit::emulation::Browser; - - #[tokio::main] - async fn main() { +use impit::impit::Impit; + +#[tokio::main] +async fn main() { let mut impit = Impit::builder() .with_browser(Browser::Firefox) .with_http3() @@ -18,4 +18,4 @@ use impit::emulation::Browser; println!("{:#?}", e); } } - } \ No newline at end of file +} diff --git a/impit/src/http3.rs b/impit/src/http3.rs index 3be691ed..9c012544 100644 --- a/impit/src/http3.rs +++ b/impit/src/http3.rs @@ -4,11 +4,11 @@ use hickory_proto::error::ProtoError; use hickory_proto::rr::rdata::svcb::SvcParamValue; use hickory_proto::rr::RData; -use tokio::net::TcpStream as TokioTcpStream; use hickory_client::client::{AsyncClient, ClientHandle}; use hickory_client::proto::iocompat::AsyncIoTokioAsStd; use hickory_client::rr::Name; use hickory_client::tcp::TcpClientStream; +use tokio::net::TcpStream as TokioTcpStream; /// A struct encapsulating the components required to make HTTP/3 requests. pub struct H3Engine { @@ -17,7 +17,7 @@ pub struct H3Engine { /// The background task that processes DNS queries. bg_join_handle: tokio::task::JoinHandle>, /// A map of hosts that support HTTP/3. - /// + /// /// This is populated by the DNS queries and manual calls to `set_h3_support` (based on the `Alt-Svc` header). /// Implicitly used as a cache for the DNS queries. h3_alt_svc: HashMap, @@ -30,38 +30,39 @@ impl H3Engine { TcpClientStream::>::new(([8, 8, 8, 8], 53).into()); let (client, bg) = AsyncClient::new(stream, sender, None).await.unwrap(); - let bg_join_handle= tokio::spawn(bg); + let bg_join_handle = tokio::spawn(bg); - H3Engine { - client, + H3Engine { + client, bg_join_handle, h3_alt_svc: HashMap::new(), } } - pub async fn host_supports_h3(self: &mut Self, host: &String) -> bool { + pub async fn host_supports_h3(&mut self, host: &String) -> bool { if let Some(supports_h3) = self.h3_alt_svc.get(host) { return supports_h3.to_owned(); } let domain_name = Name::from_utf8(host).unwrap(); - - let response = self.client.query( - domain_name, - hickory_proto::rr::DNSClass::IN, - hickory_proto::rr::RecordType::HTTPS - ).await; - - let dns_h3_support = response.is_ok_and(|response | { + + let response = self + .client + .query( + domain_name, + hickory_proto::rr::DNSClass::IN, + hickory_proto::rr::RecordType::HTTPS, + ) + .await; + + let dns_h3_support = response.is_ok_and(|response| { response.answers().iter().any(|answer| { if let RData::HTTPS(data) = answer.data().unwrap() { return data.svc_params().iter().any(|param| { if let SvcParamValue::Alpn(alpn_protocols) = param.1.clone() { - return alpn_protocols.0.iter().any(|alpn| { - alpn == "h3" - }) + return alpn_protocols.0.iter().any(|alpn| alpn == "h3"); } - + false }); } @@ -73,7 +74,7 @@ impl H3Engine { dns_h3_support } - pub fn set_h3_support(self: &mut Self, host: &String, supports_h3: bool) { + pub fn set_h3_support(&mut self, host: &String, supports_h3: bool) { if self.h3_alt_svc.contains_key(host) { return; } @@ -86,4 +87,4 @@ impl Drop for H3Engine { fn drop(&mut self) { self.bg_join_handle.abort(); } -} \ No newline at end of file +} diff --git a/impit/src/http_headers/mod.rs b/impit/src/http_headers/mod.rs index 28c03ed9..537eaaed 100644 --- a/impit/src/http_headers/mod.rs +++ b/impit/src/http_headers/mod.rs @@ -1,6 +1,6 @@ -use std::{collections::HashMap, str::FromStr}; -use reqwest::header::{HeaderMap, HeaderName, HeaderValue}; use crate::emulation::Browser; +use reqwest::header::{HeaderMap, HeaderName, HeaderValue}; +use std::{collections::HashMap, str::FromStr}; mod statics; @@ -20,49 +20,52 @@ impl HttpHeaders { } } -impl Into for HttpHeaders { - fn into(self) -> HeaderMap { +impl From for HeaderMap { + fn from(val: HttpHeaders) -> Self { let mut headers = HeaderMap::new(); - let header_values = match self.context.browser { + let header_values = match val.context.browser { Some(Browser::Chrome) => statics::CHROME_HEADERS, Some(Browser::Firefox) => statics::FIREFOX_HEADERS, - None => &[] + None => &[], }; - let pseudo_headers_order: &[&str] = match self.context.browser { + let pseudo_headers_order: &[&str] = match val.context.browser { Some(Browser::Chrome) => statics::CHROME_PSEUDOHEADERS_ORDER.as_ref(), Some(Browser::Firefox) => statics::FIREFOX_PSEUDOHEADERS_ORDER.as_ref(), - None => &[] + None => &[], }; - if pseudo_headers_order.len() != 0 { - std::env::set_var("IMPIT_H2_PSEUDOHEADERS_ORDER", pseudo_headers_order.join(",")); + if !pseudo_headers_order.is_empty() { + std::env::set_var( + "IMPIT_H2_PSEUDOHEADERS_ORDER", + pseudo_headers_order.join(","), + ); } let mut used_custom_headers: Vec = vec![]; // TODO: don't use HTTP2 headers for HTTP1.1 for (name, impersonated_value) in header_values { - let value: &str = match self.context.custom_headers.get(*name) { + let value: &str = match val.context.custom_headers.get(*name) { Some(custom_value) => { used_custom_headers.push(name.to_string()); custom_value.as_str() - }, + } None => impersonated_value, }; headers.append( - HeaderName::from_str(name).unwrap(), - HeaderValue::from_str(value).unwrap() + HeaderName::from_str(name).unwrap(), + HeaderValue::from_str(value).unwrap(), ); } - self.context.custom_headers.iter().for_each(|(name, value)| { + val.context.custom_headers.iter().for_each(|(name, value)| { if !used_custom_headers.contains(name) { headers.append( - HeaderName::from_str(name).unwrap(), - HeaderValue::from_str(value).unwrap() + HeaderName::from_str(name).unwrap(), + HeaderValue::from_str(value).unwrap(), ); } }); @@ -81,22 +84,22 @@ pub struct HttpHeadersBuilder { impl HttpHeadersBuilder { // TODO: Enforce `with_host` to be called before `build` - pub fn with_host (&mut self, host: &String) -> &mut Self { + pub fn with_host(&mut self, host: &String) -> &mut Self { self.host = host.to_owned(); self } - pub fn with_browser (&mut self, browser: &Option) -> &mut Self { + pub fn with_browser(&mut self, browser: &Option) -> &mut Self { self.browser = browser.to_owned(); self } - pub fn with_https (&mut self, https: bool) -> &mut Self { + pub fn with_https(&mut self, https: bool) -> &mut Self { self.https = https; self } - pub fn with_custom_headers (&mut self, custom_headers: &HashMap) -> &mut Self { + pub fn with_custom_headers(&mut self, custom_headers: &HashMap) -> &mut Self { self.custom_headers = custom_headers.to_owned(); self } @@ -105,5 +108,3 @@ impl HttpHeadersBuilder { HttpHeaders::new(self) } } - - diff --git a/impit/src/http_headers/statics.rs b/impit/src/http_headers/statics.rs index 22d6d18d..e08c3c48 100644 --- a/impit/src/http_headers/statics.rs +++ b/impit/src/http_headers/statics.rs @@ -2,7 +2,7 @@ // Note that not all requests are made the same: // - on forced (Ctrl+R) reloads, Chrome sets Cache-Control: max-age=0 // - when the URL is in the address bar (but not submitted yet), Chrome sets `Purpose: prefetch` and `Sec-Purpose: prefetch` -pub static CHROME_HEADERS: &'static [(&'static str, &'static str)] = &[ +pub static CHROME_HEADERS: &[(&str, &str)] = &[ ("sec-ch-ua", "\"Google Chrome\";v=\"125\", \"Chromium\";v=\"125\", \"Not.A/Brand\";v=\"24\""), ("sec-ch-ua-mobile", "?0"), ("sec-ch-ua-platform", "Linux"), @@ -17,9 +17,16 @@ pub static CHROME_HEADERS: &'static [(&'static str, &'static str)] = &[ ("accept-language", "en-US,en;q=0.9"), ]; -pub static CHROME_PSEUDOHEADERS_ORDER : [&'static str; 6] = [":method", ":authority", ":scheme", ":path", ":protocol", ":status"]; +pub static CHROME_PSEUDOHEADERS_ORDER: [&str; 6] = [ + ":method", + ":authority", + ":scheme", + ":path", + ":protocol", + ":status", +]; -pub static FIREFOX_HEADERS: &'static [(&'static str, &'static str)] = &[ +pub static FIREFOX_HEADERS: &[(&str, &str)] = &[ ("User-Agent", "Mozilla/5.0 (X11; Linux x86_64; rv:128.0) Gecko/20100101 Firefox/128.0"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8"), ("Accept-Language", "en,cs;q=0.7,en-US;q=0.3"), @@ -33,4 +40,11 @@ pub static FIREFOX_HEADERS: &'static [(&'static str, &'static str)] = &[ ("Priority", "u=0, i"), ]; -pub static FIREFOX_PSEUDOHEADERS_ORDER : [&'static str; 6] = [":method", ":path", ":authority", ":scheme", ":protocol", ":status"]; \ No newline at end of file +pub static FIREFOX_PSEUDOHEADERS_ORDER: [&str; 6] = [ + ":method", + ":path", + ":authority", + ":scheme", + ":protocol", + ":status", +]; diff --git a/impit/src/impit.rs b/impit/src/impit.rs index f86d92b1..d448a244 100644 --- a/impit/src/impit.rs +++ b/impit/src/impit.rs @@ -1,65 +1,67 @@ -use std::{str::FromStr, time::Duration}; use log::debug; use reqwest::{Method, Response, Version}; +use std::{str::FromStr, time::Duration}; use url::Url; -use crate::{http3::H3Engine, http_headers::HttpHeaders, tls, request::RequestOptions, emulation::Browser}; +use crate::{ + emulation::Browser, http3::H3Engine, http_headers::HttpHeaders, request::RequestOptions, tls, +}; /// Error types that can be returned by the [`Impit`] struct. -/// +/// /// The `ErrorType` enum is used to represent the different types of errors that can occur when making requests. /// The `RequestError` variant is used to wrap the `reqwest::Error` type. #[derive(Debug)] pub enum ErrorType { - /// The URL couldn't be parsed. - UrlParsingError, - /// The URL is missing the hostname. - UrlMissingHostnameError, - /// The URL uses an unsupported protocol. - UrlProtocolError, - /// The request was made with `http3_prior_knowledge`, but HTTP/3 usage wasn't enabled. - Http3Disabled, - /// `reqwest::Error` variant. See the nested error for more details. - RequestError(reqwest::Error), + /// The URL couldn't be parsed. + UrlParsingError, + /// The URL is missing the hostname. + UrlMissingHostnameError, + /// The URL uses an unsupported protocol. + UrlProtocolError, + /// The request was made with `http3_prior_knowledge`, but HTTP/3 usage wasn't enabled. + Http3Disabled, + /// `reqwest::Error` variant. See the nested error for more details. + RequestError(reqwest::Error), } /// Impit is the main struct used to make (impersonated) requests. -/// +/// /// It uses `reqwest::Client` to make requests and holds info about the impersonated browser. -/// +/// /// To create a new [`Impit`] instance, use the [`Impit::builder()`](ImpitBuilder) method. pub struct Impit { - pub(self) base_client: reqwest::Client, - pub(self) h3_client: Option, - h3_engine: Option, - config: ImpitBuilder, + pub(self) base_client: reqwest::Client, + pub(self) h3_client: Option, + h3_engine: Option, + config: ImpitBuilder, } impl Default for Impit { - fn default() -> Self { - ImpitBuilder::default().build() - } + fn default() -> Self { + ImpitBuilder::default().build() + } } /// Customizes the behavior of the [`Impit`] struct when following redirects. -/// +/// /// The `RedirectBehavior` enum is used to specify how the client should handle redirects. #[derive(Debug, Clone)] pub enum RedirectBehavior { - /// Follow up to `usize` redirects. - /// - /// If the number of redirects is exceeded, the client will return an error. - FollowRedirect(usize), - /// Don't follow any redirects. - /// - /// The client will return the response for the first request, even with the `3xx` status code. - ManualRedirect, + /// Follow up to `usize` redirects. + /// + /// If the number of redirects is exceeded, the client will return an error. + FollowRedirect(usize), + /// Don't follow any redirects. + /// + /// The client will return the response for the first request, even with the `3xx` status code. + ManualRedirect, } /// A builder struct used to create a new [`Impit`] instance. -/// +/// /// The builder allows setting the browser to impersonate, ignoring TLS errors, setting a proxy, and other options. -/// +/// /// ### Example /// ```rust /// let mut impit = Impit::builder() @@ -69,354 +71,403 @@ pub enum RedirectBehavior { /// .with_default_timeout(Duration::from_secs(10)) /// .with_http3() /// .build(); -/// +/// /// let response = impit.get("https://example.com".to_string(), None).await; /// ``` #[derive(Debug, Clone)] pub struct ImpitBuilder { - browser: Option, - ignore_tls_errors: bool, - vanilla_fallback: bool, - proxy_url: String, - request_timeout: Duration, - max_http_version: Version, - redirect: RedirectBehavior, + browser: Option, + ignore_tls_errors: bool, + vanilla_fallback: bool, + proxy_url: String, + request_timeout: Duration, + max_http_version: Version, + redirect: RedirectBehavior, } impl Default for ImpitBuilder { - fn default() -> Self { - ImpitBuilder { - browser: None, - ignore_tls_errors: false, - vanilla_fallback: true, - proxy_url: String::from_str("").unwrap(), - request_timeout: Duration::from_secs(30), - max_http_version: Version::HTTP_2, - redirect: RedirectBehavior::FollowRedirect(10), + fn default() -> Self { + ImpitBuilder { + browser: None, + ignore_tls_errors: false, + vanilla_fallback: true, + proxy_url: String::from_str("").unwrap(), + request_timeout: Duration::from_secs(30), + max_http_version: Version::HTTP_2, + redirect: RedirectBehavior::FollowRedirect(10), + } } - } } impl ImpitBuilder { - /// Sets the browser to impersonate. - /// - /// The [`Browser`] enum is used to set the HTTP headers, TLS behaviour and other markers to impersonate a specific browser. - /// - /// If not used, the client will use the default `reqwest` fingerprints. - pub fn with_browser(mut self, browser: Browser) -> Self { - self.browser = Some(browser); - self - } - - /// If set to true, the client will ignore TLS-related errors. - pub fn with_ignore_tls_errors(mut self, ignore_tls_errors: bool) -> Self { - self.ignore_tls_errors = ignore_tls_errors; - self - } - - /// If set to `true`, the client will retry the request without impersonation - /// if the impersonated browser encounters an error. - pub fn with_fallback_to_vanilla(mut self, vanilla_fallback: bool) -> Self { - self.vanilla_fallback = vanilla_fallback; - self - } - - /// Sets the proxy URL to use for requests. - /// - /// Note that this proxy will be used for all the requests - /// made by the built [`Impit`] instance. - pub fn with_proxy(mut self, proxy_url: String) -> Self { - self.proxy_url = proxy_url; - self - } - - /// Sets the default timeout for requests. - /// - /// This setting can be overridden when making the request by using the `RequestOptions` struct. - pub fn with_default_timeout(mut self, timeout: Duration) -> Self { - self.request_timeout = timeout; - self - } - - /// Enables HTTP/3 usage for requests. - /// - /// `impit` currently supports HTTP/3 negotiation via the HTTPS DNS record and the `Alt-Svc` header. - /// To enforce HTTP/3 usage, use the `http3_prior_knowledge` option in the `RequestOptions` struct when - /// making the request. - /// - /// Note that this feature is experimental and may not work as expected with all servers. - pub fn with_http3(mut self) -> Self { - self.max_http_version = Version::HTTP_3; - self - } - - /// Sets the desired redirect behavior. - /// - /// By default, the client will follow up to 10 redirects. - /// By passing the `RedirectBehavior::ManualRedirect` option, the client will not follow any redirects - /// (i.e. it will return the response for the first request, with the 3xx status code). - pub fn with_redirect(mut self, behavior: RedirectBehavior) -> Self { - self.redirect = behavior; - self - } - - /// Builds the [`Impit`] instance. - pub fn build(self) -> Impit { - Impit::new(self) - } -} - -impl Impit { - pub fn builder() -> ImpitBuilder { - ImpitBuilder::default() - } + /// Sets the browser to impersonate. + /// + /// The [`Browser`] enum is used to set the HTTP headers, TLS behaviour and other markers to impersonate a specific browser. + /// + /// If not used, the client will use the default `reqwest` fingerprints. + pub fn with_browser(mut self, browser: Browser) -> Self { + self.browser = Some(browser); + self + } - fn new_reqwest_client(config: &ImpitBuilder) -> Result { - let mut client = reqwest::Client::builder(); - let mut tls_config_builder = tls::TlsConfig::builder(); - let mut tls_config_builder = tls_config_builder.with_browser(config.browser); + /// If set to true, the client will ignore TLS-related errors. + pub fn with_ignore_tls_errors(mut self, ignore_tls_errors: bool) -> Self { + self.ignore_tls_errors = ignore_tls_errors; + self + } - if config.max_http_version == Version::HTTP_3 { - tls_config_builder = tls_config_builder.with_http3(); + /// If set to `true`, the client will retry the request without impersonation + /// if the impersonated browser encounters an error. + pub fn with_fallback_to_vanilla(mut self, vanilla_fallback: bool) -> Self { + self.vanilla_fallback = vanilla_fallback; + self } - tls_config_builder = tls_config_builder.with_ignore_tls_errors(config.ignore_tls_errors); + /// Sets the proxy URL to use for requests. + /// + /// Note that this proxy will be used for all the requests + /// made by the built [`Impit`] instance. + pub fn with_proxy(mut self, proxy_url: String) -> Self { + self.proxy_url = proxy_url; + self + } - let tls_config = tls_config_builder.build(); + /// Sets the default timeout for requests. + /// + /// This setting can be overridden when making the request by using the `RequestOptions` struct. + pub fn with_default_timeout(mut self, timeout: Duration) -> Self { + self.request_timeout = timeout; + self + } - client = client - .danger_accept_invalid_certs(config.ignore_tls_errors) - .danger_accept_invalid_hostnames(config.ignore_tls_errors) - .use_preconfigured_tls(tls_config) - .cookie_store(true) - .timeout(config.request_timeout); + /// Enables HTTP/3 usage for requests. + /// + /// `impit` currently supports HTTP/3 negotiation via the HTTPS DNS record and the `Alt-Svc` header. + /// To enforce HTTP/3 usage, use the `http3_prior_knowledge` option in the `RequestOptions` struct when + /// making the request. + /// + /// Note that this feature is experimental and may not work as expected with all servers. + pub fn with_http3(mut self) -> Self { + self.max_http_version = Version::HTTP_3; + self + } - if config.max_http_version == Version::HTTP_3 { - client = client.http3_prior_knowledge(); + /// Sets the desired redirect behavior. + /// + /// By default, the client will follow up to 10 redirects. + /// By passing the `RedirectBehavior::ManualRedirect` option, the client will not follow any redirects + /// (i.e. it will return the response for the first request, with the 3xx status code). + pub fn with_redirect(mut self, behavior: RedirectBehavior) -> Self { + self.redirect = behavior; + self } - if config.proxy_url.len() > 0 { - client = client.proxy( - reqwest::Proxy::all(&config.proxy_url) - .expect("The proxy_url option should be a valid URL.") - ); + /// Builds the [`Impit`] instance. + pub fn build(self) -> Impit { + Impit::new(self) } +} - match config.redirect { - RedirectBehavior::FollowRedirect(max) => { - client = client.redirect(reqwest::redirect::Policy::limited(max)); - }, - RedirectBehavior::ManualRedirect => { - client = client.redirect(reqwest::redirect::Policy::none()); - }, +impl Impit { + pub fn builder() -> ImpitBuilder { + ImpitBuilder::default() } - client.build() - } + fn new_reqwest_client(config: &ImpitBuilder) -> Result { + let mut client = reqwest::Client::builder(); + let mut tls_config_builder = tls::TlsConfig::builder(); + let mut tls_config_builder = tls_config_builder.with_browser(config.browser); - /// Creates a new [`Impit`] instance based on the options stored in the [`ImpitBuilder`] instance. - fn new(config: ImpitBuilder) -> Self { - let mut h3_client: Option = None; - let mut base_client = Self::new_reqwest_client(&config).unwrap(); + if config.max_http_version == Version::HTTP_3 { + tls_config_builder = tls_config_builder.with_http3(); + } - if config.max_http_version == Version::HTTP_3 { - h3_client = Some(base_client); - base_client = Self::new_reqwest_client(&ImpitBuilder { - max_http_version: Version::HTTP_2, - ..config.clone() - }).unwrap(); - } + tls_config_builder = tls_config_builder.with_ignore_tls_errors(config.ignore_tls_errors); - Impit { - base_client, - h3_client, - config, - h3_engine: None, - } - } + let tls_config = tls_config_builder.build(); + + client = client + .danger_accept_invalid_certs(config.ignore_tls_errors) + .danger_accept_invalid_hostnames(config.ignore_tls_errors) + .use_preconfigured_tls(tls_config) + .cookie_store(true) + .timeout(config.request_timeout); - fn parse_url(&self, url: String) -> Result { - let url = Url::parse(&url); + if config.max_http_version == Version::HTTP_3 { + client = client.http3_prior_knowledge(); + } + + if !config.proxy_url.is_empty() { + client = client.proxy( + reqwest::Proxy::all(&config.proxy_url) + .expect("The proxy_url option should be a valid URL."), + ); + } - if url.is_err() { - return Err(ErrorType::UrlParsingError); + match config.redirect { + RedirectBehavior::FollowRedirect(max) => { + client = client.redirect(reqwest::redirect::Policy::limited(max)); + } + RedirectBehavior::ManualRedirect => { + client = client.redirect(reqwest::redirect::Policy::none()); + } + } + + client.build() } - let url = url.unwrap(); - if url.host_str().is_none() { - return Err(ErrorType::UrlMissingHostnameError); + /// Creates a new [`Impit`] instance based on the options stored in the [`ImpitBuilder`] instance. + fn new(config: ImpitBuilder) -> Self { + let mut h3_client: Option = None; + let mut base_client = Self::new_reqwest_client(&config).unwrap(); + + if config.max_http_version == Version::HTTP_3 { + h3_client = Some(base_client); + base_client = Self::new_reqwest_client(&ImpitBuilder { + max_http_version: Version::HTTP_2, + ..config.clone() + }) + .unwrap(); + } + + Impit { + base_client, + h3_client, + config, + h3_engine: None, + } } - let protocol = url.scheme(); + fn parse_url(&self, url: String) -> Result { + let url = Url::parse(&url); + + if url.is_err() { + return Err(ErrorType::UrlParsingError); + } + let url = url.unwrap(); + + if url.host_str().is_none() { + return Err(ErrorType::UrlMissingHostnameError); + } - return match protocol { - "http" => Ok(url), - "https" => Ok(url), - _ => Err(ErrorType::UrlProtocolError), - }; - } + let protocol = url.scheme(); - async fn should_use_h3(self: &mut Self, host: &String) -> bool { - if self.config.max_http_version < Version::HTTP_3 { - debug!("HTTP/3 is disabled, falling back to TCP-based requests."); - return false; + match protocol { + "http" => Ok(url), + "https" => Ok(url), + _ => Err(ErrorType::UrlProtocolError), + } } - if let None = &self.h3_engine { - self.h3_engine = Some(H3Engine::init().await); + async fn should_use_h3(&mut self, host: &String) -> bool { + if self.config.max_http_version < Version::HTTP_3 { + debug!("HTTP/3 is disabled, falling back to TCP-based requests."); + return false; + } + + if self.h3_engine.is_none() { + self.h3_engine = Some(H3Engine::init().await); + } + + self.h3_engine + .as_mut() + .unwrap() + .host_supports_h3(host) + .await } - self.h3_engine.as_mut().unwrap().host_supports_h3(host).await - } + async fn make_request( + &mut self, + method: Method, + url: String, + body: Option>, + options: Option, + ) -> Result { + let options = options.unwrap_or_default(); + + if options.http3_prior_knowledge && self.config.max_http_version < Version::HTTP_3 { + return Err(ErrorType::Http3Disabled); + } + + let parsed_url = self + .parse_url(url.clone()) + .expect("URL should be a valid URL"); + let host = parsed_url.host_str().unwrap().to_string(); + + let h3 = options.http3_prior_knowledge || self.should_use_h3(&host).await; + + let headers = HttpHeaders::get_builder() + .with_browser(&self.config.browser) + .with_host(&host) + .with_https(parsed_url.scheme() == "https") + .with_custom_headers(&options.headers) + .build(); + + let client = if h3 { + debug!("Using QUIC for request to {}", url); + self.h3_client.as_ref().unwrap() + } else { + debug!("{} doesn't seem to have HTTP3 support", url); + &self.base_client + }; + + let mut request = client + .request(method.clone(), parsed_url) + .headers(headers.into()); + + if h3 { + request = request.version(Version::HTTP_3); + } + + if let Some(timeout) = options.timeout { + request = request.timeout(timeout); + } + + request = match body { + Some(body) => request.body(body), + None => request, + }; - async fn make_request(&mut self, method: Method, url: String, body: Option>, options: Option) -> Result { - let options = options.unwrap_or_default(); + let response = request.send().await; - if options.http3_prior_knowledge && self.config.max_http_version < Version::HTTP_3 { - return Err(ErrorType::Http3Disabled); + if response.is_err() { + return Err(ErrorType::RequestError(response.err().unwrap())); + } + + let response = response.unwrap(); + + if !h3 { + if let Some(h3_engine) = self.h3_engine.as_mut() { + h3_engine.set_h3_support(&host, false); + + if let Some(alt_svc) = response.headers().get("Alt-Svc") { + let alt_svc = alt_svc.to_str().unwrap(); + if alt_svc.contains("h3") { + debug!( + "{} supports HTTP/3 (alt-svc header), adding to Alt-Svc cache", + host + ); + h3_engine.set_h3_support(&host, true); + } + } + } + } + + Ok(response) } - let parsed_url = self.parse_url(url.clone()) - .expect("URL should be a valid URL"); - let host = parsed_url.host_str().unwrap().to_string(); - - let h3 = options.http3_prior_knowledge || self.should_use_h3(&host).await; - - let headers = HttpHeaders::get_builder() - .with_browser(&self.config.browser) - .with_host(&host) - .with_https(parsed_url.scheme() == "https") - .with_custom_headers(&options.headers) - .build(); - - let client = if h3 { - debug!("Using QUIC for request to {}", url); - self.h3_client.as_ref().unwrap() - } else { - debug!("{} doesn't seem to have HTTP3 support", url); - &self.base_client - }; - - let mut request = client - .request(method.clone(), parsed_url) - .headers(headers.into()); - - if h3 { - request = request.version(Version::HTTP_3); + /// Makes a `GET` request to the specified URL. + /// + /// The `url` parameter should be a valid URL. + /// Additional options like `headers`, `timeout` or HTTP/3 usage can be passed via the `RequestOptions` struct. + /// + /// If the request is successful, the `reqwest::Response` struct is returned. + pub async fn get( + &mut self, + url: String, + options: Option, + ) -> Result { + self.make_request(Method::GET, url, None, options).await } - if let Some(timeout) = options.timeout { - request = request.timeout(timeout); + /// Makes a `HEAD` request to the specified URL. + /// + /// The `url` parameter should be a valid URL. + /// Additional options like `headers`, `timeout` or HTTP/3 usage can be passed via the `RequestOptions` struct. + /// + /// If the request is successful, the `reqwest::Response` struct is returned. + pub async fn head( + &mut self, + url: String, + options: Option, + ) -> Result { + self.make_request(Method::HEAD, url, None, options).await } - request = match body { - Some(body) => request.body(body), - None => request - }; + /// Makes an OPTIONS request to the specified URL. + /// + /// The `url` parameter should be a valid URL. + /// Additional options like `headers`, `timeout` or HTTP/3 usage can be passed via the `RequestOptions` struct. + /// + /// If the request is successful, the `reqwest::Response` struct is returned. + pub async fn options( + &mut self, + url: String, + options: Option, + ) -> Result { + self.make_request(Method::OPTIONS, url, None, options).await + } - let response = request.send().await; + /// Makes a `TRACE` request to the specified URL. + /// + /// The `url` parameter should be a valid URL. + /// Additional options like `headers`, `timeout` or HTTP/3 usage can be passed via the `RequestOptions` struct. + /// + /// If the request is successful, the `reqwest::Response` struct is returned. + pub async fn trace( + &mut self, + url: String, + options: Option, + ) -> Result { + self.make_request(Method::TRACE, url, None, options).await + } - if response.is_err() { - return Err(ErrorType::RequestError(response.err().unwrap())); + /// Makes a `DELETE` request to the specified URL. + /// + /// The `url` parameter should be a valid URL. + /// Additional options like `headers`, `timeout` or HTTP/3 usage can be passed via the `RequestOptions` struct. + /// + /// If the request is successful, the `reqwest::Response` struct is returned. + pub async fn delete( + &mut self, + url: String, + options: Option, + ) -> Result { + self.make_request(Method::DELETE, url, None, options).await } - - let response = response.unwrap(); - - if !h3 { - if let Some(h3_engine) = self.h3_engine.as_mut() { - h3_engine.set_h3_support(&host, false); - - if let Some(alt_svc) = response.headers().get("Alt-Svc") { - let alt_svc = alt_svc.to_str().unwrap(); - if alt_svc.contains("h3") { - debug!("{} supports HTTP/3 (alt-svc header), adding to Alt-Svc cache", host); - h3_engine.set_h3_support(&host, true); - } - } - } + + /// Makes a `POST` request to the specified URL. + /// + /// The `url` parameter should be a valid URL. + /// Additional options like `headers`, `timeout` or HTTP/3 usage can be passed via the `RequestOptions` struct. + /// + /// If the request is successful, the `reqwest::Response` struct is returned. + pub async fn post( + &mut self, + url: String, + body: Option>, + options: Option, + ) -> Result { + self.make_request(Method::POST, url, body, options).await + } + + /// Makes a `PUT` request to the specified URL. + /// + /// The `url` parameter should be a valid URL. + /// Additional options like `headers`, `timeout` or HTTP/3 usage can be passed via the `RequestOptions` struct. + /// + /// If the request is successful, the `reqwest::Response` struct is returned. + pub async fn put( + &mut self, + url: String, + body: Option>, + options: Option, + ) -> Result { + self.make_request(Method::PUT, url, body, options).await } - - Ok(response) - } - - /// Makes a `GET` request to the specified URL. - /// - /// The `url` parameter should be a valid URL. - /// Additional options like `headers`, `timeout` or HTTP/3 usage can be passed via the `RequestOptions` struct. - /// - /// If the request is successful, the `reqwest::Response` struct is returned. - pub async fn get(&mut self, url: String, options: Option) -> Result { - self.make_request(Method::GET, url, None, options).await - } - - /// Makes a `HEAD` request to the specified URL. - /// - /// The `url` parameter should be a valid URL. - /// Additional options like `headers`, `timeout` or HTTP/3 usage can be passed via the `RequestOptions` struct. - /// - /// If the request is successful, the `reqwest::Response` struct is returned. - pub async fn head(&mut self, url: String, options: Option) -> Result { - self.make_request(Method::HEAD, url, None, options).await - } - - /// Makes an OPTIONS request to the specified URL. - /// - /// The `url` parameter should be a valid URL. - /// Additional options like `headers`, `timeout` or HTTP/3 usage can be passed via the `RequestOptions` struct. - /// - /// If the request is successful, the `reqwest::Response` struct is returned. - pub async fn options(&mut self, url: String, options: Option) -> Result { - self.make_request(Method::OPTIONS, url, None, options).await - } - - /// Makes a `TRACE` request to the specified URL. - /// - /// The `url` parameter should be a valid URL. - /// Additional options like `headers`, `timeout` or HTTP/3 usage can be passed via the `RequestOptions` struct. - /// - /// If the request is successful, the `reqwest::Response` struct is returned. - pub async fn trace(&mut self, url: String, options: Option) -> Result { - self.make_request(Method::TRACE, url, None, options).await - } - - /// Makes a `DELETE` request to the specified URL. - /// - /// The `url` parameter should be a valid URL. - /// Additional options like `headers`, `timeout` or HTTP/3 usage can be passed via the `RequestOptions` struct. - /// - /// If the request is successful, the `reqwest::Response` struct is returned. - pub async fn delete(&mut self, url: String, options: Option) -> Result { - self.make_request(Method::DELETE, url, None, options).await - } - - /// Makes a `POST` request to the specified URL. - /// - /// The `url` parameter should be a valid URL. - /// Additional options like `headers`, `timeout` or HTTP/3 usage can be passed via the `RequestOptions` struct. - /// - /// If the request is successful, the `reqwest::Response` struct is returned. - pub async fn post(&mut self, url: String, body: Option>, options: Option) -> Result { - self.make_request(Method::POST, url, body, options).await - } - - /// Makes a `PUT` request to the specified URL. - /// - /// The `url` parameter should be a valid URL. - /// Additional options like `headers`, `timeout` or HTTP/3 usage can be passed via the `RequestOptions` struct. - /// - /// If the request is successful, the `reqwest::Response` struct is returned. - pub async fn put(&mut self, url: String, body: Option>, options: Option) -> Result { - self.make_request(Method::PUT, url, body, options).await - } - - /// Makes a `PATCH` request to the specified URL. - /// - /// The `url` parameter should be a valid URL. - /// Additional options like `headers`, `timeout` or HTTP/3 usage can be passed via the `RequestOptions` struct. - /// - /// If the request is successful, the `reqwest::Response` struct is returned. - pub async fn patch(&mut self, url: String, body: Option>, options: Option) -> Result { - self.make_request(Method::PATCH, url, body, options).await - } - -} \ No newline at end of file + + /// Makes a `PATCH` request to the specified URL. + /// + /// The `url` parameter should be a valid URL. + /// Additional options like `headers`, `timeout` or HTTP/3 usage can be passed via the `RequestOptions` struct. + /// + /// If the request is successful, the `reqwest::Response` struct is returned. + pub async fn patch( + &mut self, + url: String, + body: Option>, + options: Option, + ) -> Result { + self.make_request(Method::PATCH, url, body, options).await + } +} diff --git a/impit/src/lib.rs b/impit/src/lib.rs index 75ec0344..a0ec2bd5 100644 --- a/impit/src/lib.rs +++ b/impit/src/lib.rs @@ -1,9 +1,9 @@ //! # impit | browser impersonation made simple -//! +//! //! impit is a `rust` library that allows you to impersonate a browser and make requests to websites. It is built on top of `reqwest`, `rustls` and `tokio` and supports HTTP/1.1, HTTP/2, and HTTP/3. -//! +//! //! The library provides a simple API for making requests to websites, and it also allows you to customize the request headers, use proxies, custom timeouts and more. -//! +//! //! ```rust //! use impit::impit::Impit; //! use impit::emulation::Browser; @@ -14,9 +14,9 @@ //! .with_browser(Browser::Firefox) //! .with_http3() //! .build(); -//! +//! //! let response = impit.get(String::from("https://example.com"), None).await; -//! +//! //! match response { //! Ok(response) => { //! println!("{}", response.text().await.unwrap()); @@ -27,40 +27,40 @@ //! } //! } //! ``` -//! +//! //! ### Other projects -//! +//! //! If you are looking for a command-line tool that allows you to make requests to websites, check out the [`impit-cli`](https://github.com/apify/impit-cli/) project. -//! +//! //! If you'd prefer to use `impit` from a Node.js application, check out the [`impit-node`](https://github.com/apify/impit-node) repository, or download the package from npm: //! ```bash //! npm install impit //! ``` -//! +//! //! ### Usage from Rust -//! -//! Technically speaking, the `impit` project is a somewhat thin wrapper around `reqwest` that provides a more ergonomic API for making requests to websites. +//! +//! Technically speaking, the `impit` project is a somewhat thin wrapper around `reqwest` that provides a more ergonomic API for making requests to websites. //! The real strength of `impit` is that it uses patched versions of `rustls` and other libraries that allow it to make browser-like requests. -//! +//! //! Note that if you want to use this library in your rust project, you have to add the following dependencies to your `Cargo.toml` file: //! ```toml //! [dependencies] //! impit = { git="https://github.com/apify/impit.git", branch="master" } -//! +//! //! [patch.crates-io] //! rustls = { git="https://github.com/apify/rustls.git", branch="impit-patch" } //! h2 = { git="https://github.com/apify/h2.git", branch="impit-patch" } //! ``` -//! +//! //! Without the patched dependencies, the project won't build. -//! +//! //! Note that you also have to build your project with `rustflags = "--cfg reqwest_unstable"`, otherwise, the build will also fail. //! This is because `impit` uses unstable features of `reqwest` (namely `http3` support), which are not available in the stable version of the library. #![deny(unused_crate_dependencies)] mod http_headers; -mod tls; mod response_parsing; +mod tls; pub(crate) mod http3; @@ -72,22 +72,22 @@ pub mod request; /// Contains browser emulation-related types and functions. pub mod emulation { - - /// The `Browser` enum is used to specify the browser that should be impersonated. - /// - /// It can be passed as a parameter to [`ImpitBuilder::with_browser`](crate::impit::ImpitBuilder::with_browser) - /// to use the browser emulation with the built [`Impit`](crate::impit::Impit) instance. - #[derive(PartialEq, Debug, Clone, Copy, Default)] - pub enum Browser { - #[default] - Chrome, - Firefox, - } + + /// The `Browser` enum is used to specify the browser that should be impersonated. + /// + /// It can be passed as a parameter to [`ImpitBuilder::with_browser`](crate::impit::ImpitBuilder::with_browser) + /// to use the browser emulation with the built [`Impit`](crate::impit::Impit) instance. + #[derive(PartialEq, Debug, Clone, Copy, Default)] + pub enum Browser { + #[default] + Chrome, + Firefox, + } } /// Various utility functions and types. pub mod utils { - pub use crate::response_parsing::decode; - pub use crate::response_parsing::ContentType; - pub use encoding::all as encodings; -} \ No newline at end of file + pub use crate::response_parsing::decode; + pub use crate::response_parsing::ContentType; + pub use encoding::all as encodings; +} diff --git a/impit/src/request.rs b/impit/src/request.rs index 2fadfc6f..617e81b9 100644 --- a/impit/src/request.rs +++ b/impit/src/request.rs @@ -1,28 +1,18 @@ use std::{collections::HashMap, time::Duration}; /// A struct that holds the request options. -/// +/// /// Unlike the [`ImpitBuilder`](crate::impit::ImpitBuilder) struct, these options are specific to a single request. -/// +/// /// Used by the [`Impit`](crate::impit::Impit) struct's methods. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] pub struct RequestOptions { - /// A `HashMap` that holds custom HTTP headers. These are added to the default headers and should never overwrite them. - pub headers: HashMap, - /// The timeout for the request. This option overrides the global [`Impit`] timeout. - pub timeout: Option, - /// Enforce the use of HTTP/3 for this request. This will cause broken responses from servers that don't support HTTP/3. - /// - /// If [`ImpitBuilder::with_http3`](crate::impit::ImpitBuilder::with_http3) wasn't called, this option will cause [`ErrorType::Http3Disabled`](crate::impit::ErrorType::Http3Disabled) errors. - pub http3_prior_knowledge: bool, + /// A `HashMap` that holds custom HTTP headers. These are added to the default headers and should never overwrite them. + pub headers: HashMap, + /// The timeout for the request. This option overrides the global [`Impit`] timeout. + pub timeout: Option, + /// Enforce the use of HTTP/3 for this request. This will cause broken responses from servers that don't support HTTP/3. + /// + /// If [`ImpitBuilder::with_http3`](crate::impit::ImpitBuilder::with_http3) wasn't called, this option will cause [`ErrorType::Http3Disabled`](crate::impit::ErrorType::Http3Disabled) errors. + pub http3_prior_knowledge: bool, } - -impl Default for RequestOptions { - fn default() -> Self { - RequestOptions { - headers: HashMap::new(), - timeout: None, - http3_prior_knowledge: false, - } - } -} \ No newline at end of file diff --git a/impit/src/response_parsing/mod.rs b/impit/src/response_parsing/mod.rs index 5da277ef..f2e31dd2 100644 --- a/impit/src/response_parsing/mod.rs +++ b/impit/src/response_parsing/mod.rs @@ -2,9 +2,9 @@ use encoding::Encoding; /// Implements the BOM sniffing algorithm to detect the encoding of the response. /// If the BOM sniffing algorithm fails, the function returns `None`. -/// +/// /// See more details at https://encoding.spec.whatwg.org/#bom-sniff -fn bom_sniffing(bytes: &Vec) -> Option { +fn bom_sniffing(bytes: &[u8]) -> Option { if bytes.len() < 3 { return None; } @@ -25,26 +25,32 @@ fn bom_sniffing(bytes: &Vec) -> Option { } /// A lazy implementation of the BOM sniffing algorithm, using `scraper` to parse the HTML and extract the encoding. -/// +/// /// See more details at https://html.spec.whatwg.org/#prescan-a-byte-stream-to-determine-its-encoding -fn prescan_bytestream(bytes: &Vec) -> Option { +fn prescan_bytestream(bytes: &[u8]) -> Option { if bytes.len() < 4 { return None; } let limit = std::cmp::min(1024, bytes.len()); - let ascii_body = encoding::all::ASCII.decode(&bytes[0..limit], encoding::DecoderTrap::Replace).unwrap(); + let ascii_body = encoding::all::ASCII + .decode(&bytes[0..limit], encoding::DecoderTrap::Replace) + .unwrap(); let dom = scraper::Html::parse_document(&ascii_body); - let meta = dom.select(&scraper::Selector::parse("meta[charset]").unwrap()).next(); + let meta = dom + .select(&scraper::Selector::parse("meta[charset]").unwrap()) + .next(); if let Some(meta) = meta { let charset = meta.value().attr("charset").unwrap(); return encoding::label::encoding_from_whatwg_label(charset); } - let meta = dom.select(&scraper::Selector::parse("meta[http-equiv=content-type]").unwrap()).next(); + let meta = dom + .select(&scraper::Selector::parse("meta[http-equiv=content-type]").unwrap()) + .next(); if let Some(meta) = meta { let content = meta.value().attr("content").unwrap(); @@ -53,69 +59,76 @@ fn prescan_bytestream(bytes: &Vec) -> Option { return match content_type { Ok(content_type) => content_type.into(), Err(_) => None, - } + }; } None } /// Converts a vector of bytes to a [`String`] using the provided encoding. -/// -/// If the encoding is not provided, the function tries to detect it using the BOM sniffing algorithm +/// +/// If the encoding is not provided, the function tries to detect it using the BOM sniffing algorithm /// and the byte stream prescanning algorithm. -/// +/// /// ### Example -/// +/// /// ```rust /// let bytes = vec![0x48, 0x65, 0x6C, 0x6C, 0x6F]; /// let string = decode(&bytes, None); -/// +/// /// assert_eq!(string, "Hello"); // By default, the function uses the UTF-8 encoding. -/// +/// /// let bytes = vec![0xFE, 0xFF, 0x00, 0x48, 0x00, 0x65, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0x6F]; /// let string = decode(&bytes, None); -/// +/// /// assert_eq!(string, "Hello"); // The function detects the UTF-16BE encoding using the BOM sniffing algorithm. -/// +/// /// let bytes = vec![0x9e, 0x6c, 0x75, 0x9d, 0x6f, 0x75, 0xe8, 0x6b, 0xfd, 0x20, 0x6b, 0xf9, 0xf2]; /// let string = decode(&bytes, Some(encoding::all::WINDOWS_1250)); -/// +/// /// assert_eq!(string, "žluťoučký kůň"); // The function uses the Windows-1250 encoding. /// ``` -pub fn decode(bytes: &Vec, encoding_prior_knowledge: Option) -> String { +pub fn decode(bytes: &[u8], encoding_prior_knowledge: Option) -> String { let mut encoding: encoding::EncodingRef = encoding::all::UTF_8; if let Some(enc) = encoding_prior_knowledge { encoding = enc; - } else if let Some(enc) = bom_sniffing(&bytes) { + } else if let Some(enc) = bom_sniffing(bytes) { encoding = enc; - } else if let Some(enc) = prescan_bytestream(&bytes) { + } else if let Some(enc) = prescan_bytestream(bytes) { encoding = enc; } - return encoding.decode(&bytes, encoding::DecoderTrap::Strict).unwrap(); + encoding + .decode(bytes, encoding::DecoderTrap::Strict) + .unwrap() } /// A struct that represents the contents of the `Content-Type` header. -/// +/// /// The struct is used to extract the charset from the `Content-Type` header and convert it to an [`encoding::EncodingRef`]. -/// +/// /// ### Example -/// ```rust +/// ```rust /// let content_type = ContentType::from("text/html; charset=cp1250").unwrap(); -/// +/// /// decode(&bytes, content_type.into()); /// ``` pub struct ContentType { charset: String, } +/// Error enum for the `ContentType` struct operations. +pub enum ContentTypeError { + InvalidContentType, +} + impl ContentType { - pub fn from(content_type: &str) -> Result { + pub fn from(content_type: &str) -> Result { let parts: Vec<&str> = content_type.split("charset=").collect(); - if parts.len() != 2 || parts[1].len() == 0 { - return Err(()); + if parts.len() != 2 || parts[1].is_empty() { + return Err(ContentTypeError::InvalidContentType); } Ok(ContentType { @@ -124,8 +137,8 @@ impl ContentType { } } -impl Into> for ContentType { - fn into(self) -> Option { - encoding::label::encoding_from_whatwg_label(self.charset.as_str()) +impl From for Option { + fn from(val: ContentType) -> Self { + encoding::label::encoding_from_whatwg_label(val.charset.as_str()) } -} \ No newline at end of file +} diff --git a/impit/src/tls/ffdhe.rs b/impit/src/tls/ffdhe.rs index 07c5745f..ba0ead79 100644 --- a/impit/src/tls/ffdhe.rs +++ b/impit/src/tls/ffdhe.rs @@ -23,8 +23,6 @@ pub const FFDHE2048_KX_GROUP: FfdheKxGroup = FfdheKxGroup(NamedGroup::FFDHE2048, ffdhe_groups::FFDHE2048); pub const FFDHE3072_KX_GROUP: FfdheKxGroup = FfdheKxGroup(NamedGroup::FFDHE3072, ffdhe_groups::FFDHE3072); -pub const FFDHE4096_KX_GROUP: FfdheKxGroup = - FfdheKxGroup(NamedGroup::FFDHE4096, ffdhe_groups::FFDHE4096); static FFDHE_CIPHER_SUITES: &[rustls::SupportedCipherSuite] = &[ TLS_DHE_RSA_WITH_AES_128_GCM_SHA256, @@ -54,9 +52,7 @@ pub struct FfdheKxGroup(pub NamedGroup, pub FfdheGroup<'static>); impl SupportedKxGroup for FfdheKxGroup { fn start(&self) -> Result, rustls::Error> { let mut x = vec![0; 64]; - ffdhe_provider() - .secure_random - .fill(&mut x)?; + ffdhe_provider().secure_random.fill(&mut x)?; let x = BigUint::from_bytes_be(&x); let p = BigUint::from_bytes_be(self.1.p); @@ -118,4 +114,4 @@ fn to_bytes_be_with_len(n: BigUint, len_bytes: usize) -> Vec { bytes.resize(len_bytes, 0); bytes.reverse(); bytes -} \ No newline at end of file +} diff --git a/impit/src/tls/mod.rs b/impit/src/tls/mod.rs index d5031873..cee1b5b1 100644 --- a/impit/src/tls/mod.rs +++ b/impit/src/tls/mod.rs @@ -1,5 +1,5 @@ -mod statics; mod ffdhe; +mod statics; use std::sync::Arc; @@ -14,122 +14,124 @@ use rustls::RootCertStore; pub struct TlsConfig {} impl TlsConfig { - pub fn builder() -> TlsConfigBuilder { - TlsConfigBuilder::default() - } + pub fn builder() -> TlsConfigBuilder { + TlsConfigBuilder::default() + } } #[derive(Debug, Clone, Copy)] pub struct TlsConfigBuilder { - browser: Option, - max_http_version: Version, - ignore_tls_errors: bool, + browser: Option, + max_http_version: Version, + ignore_tls_errors: bool, } impl Default for TlsConfigBuilder { - fn default() -> Self { - TlsConfigBuilder { - browser: None, - max_http_version: Version::HTTP_2, - ignore_tls_errors: false, - } - } + fn default() -> Self { + TlsConfigBuilder { + browser: None, + max_http_version: Version::HTTP_2, + ignore_tls_errors: false, + } + } } impl TlsConfigBuilder { - fn get_ech_mode(self) -> rustls::client::EchMode { - let (public_key, _) = statics::GREASE_HPKE_SUITE - .generate_key_pair() - .unwrap(); - - EchGreaseConfig::new(statics::GREASE_HPKE_SUITE, public_key).into() - } - - pub fn with_browser(&mut self, browser: Option) -> &mut Self { - self.browser = browser; - self - } - - pub fn with_http3(&mut self) -> &mut Self { - self.max_http_version = Version::HTTP_3; - self - } - - pub fn with_ignore_tls_errors(&mut self, ignore_tls_errors: bool) -> &mut Self { - self.ignore_tls_errors = ignore_tls_errors; - self - } - - pub fn build(self) -> rustls::ClientConfig { - let mut root_store = RootCertStore::empty(); - root_store.extend( - webpki_roots::TLS_SERVER_ROOTS.iter().cloned(), - ); - - let mut config = match self.browser { - Some(browser) => { - let rustls_browser = match browser { - Browser::Chrome => RusTLSBrowser { browser_type: BrowserType::Chrome, version: 125 }, - Browser::Firefox => RusTLSBrowser { browser_type: BrowserType::Firefox, version: 125 }, + fn get_ech_mode(self) -> rustls::client::EchMode { + let (public_key, _) = statics::GREASE_HPKE_SUITE.generate_key_pair().unwrap(); + + EchGreaseConfig::new(statics::GREASE_HPKE_SUITE, public_key).into() + } + + pub fn with_browser(&mut self, browser: Option) -> &mut Self { + self.browser = browser; + self + } + + pub fn with_http3(&mut self) -> &mut Self { + self.max_http_version = Version::HTTP_3; + self + } + + pub fn with_ignore_tls_errors(&mut self, ignore_tls_errors: bool) -> &mut Self { + self.ignore_tls_errors = ignore_tls_errors; + self + } + + pub fn build(self) -> rustls::ClientConfig { + let mut root_store = RootCertStore::empty(); + root_store.extend(webpki_roots::TLS_SERVER_ROOTS.iter().cloned()); + + let mut config = match self.browser { + Some(browser) => { + let rustls_browser = match browser { + Browser::Chrome => RusTLSBrowser { + browser_type: BrowserType::Chrome, + version: 125, + }, + Browser::Firefox => RusTLSBrowser { + browser_type: BrowserType::Firefox, + version: 125, + }, + }; + + let mut crypto_provider = CryptoProvider::builder() + .with_browser_emulator(&rustls_browser) + .build(); + + if browser == Browser::Firefox { + crypto_provider.kx_groups = vec![ + X25519, + SECP256R1, + SECP384R1, + // TODO : add SECPR521R1 + &ffdhe::FFDHE2048_KX_GROUP, + &ffdhe::FFDHE3072_KX_GROUP, + ]; + } + + let mut config: rustls::ClientConfig = + rustls::ClientConfig::builder_with_provider(crypto_provider.into()) + // TODO - use the ECH extension consistently + .with_ech(self.get_ech_mode()) + .unwrap() + .with_root_certificates(root_store) + .with_browser_emulator(&rustls_browser) + .with_no_client_auth(); + + if self.ignore_tls_errors { + config + .dangerous() + .set_certificate_verifier(Arc::new(NoVerifier::new(Some(rustls_browser)))); + } + + config + } + None => { + let crypto_provider = CryptoProvider::builder().build(); + + let mut config: rustls::ClientConfig = + rustls::ClientConfig::builder_with_provider(crypto_provider.into()) + // TODO - use the ECH extension consistently + .with_ech(self.get_ech_mode()) + .unwrap() + .with_root_certificates(root_store) + .with_no_client_auth(); + + if self.ignore_tls_errors { + config + .dangerous() + .set_certificate_verifier(Arc::new(NoVerifier::new(None))); + } + + config + } }; - let mut crypto_provider = CryptoProvider::builder() - .with_browser_emulator(&rustls_browser) - .build(); - - match browser { - Browser::Firefox => { - crypto_provider.kx_groups = vec![ - X25519, - SECP256R1, - SECP384R1, - // TODO : add SECPR521R1 - &ffdhe::FFDHE2048_KX_GROUP, - &ffdhe::FFDHE3072_KX_GROUP, - ]; - }, - _ => {} - } + if self.max_http_version == Version::HTTP_3 { + config.alpn_protocols = vec![b"h3".to_vec()]; + }; - let mut config: rustls::ClientConfig = rustls::ClientConfig::builder_with_provider( - crypto_provider.into(), - ) - // TODO - use the ECH extension consistently - .with_ech(self.get_ech_mode()).unwrap() - .with_root_certificates(root_store) - .with_browser_emulator(&rustls_browser) - .with_no_client_auth(); - - if self.ignore_tls_errors { - config.dangerous().set_certificate_verifier(Arc::new(NoVerifier::new(Some(rustls_browser)))); - } - config - }, - None => { - let crypto_provider = CryptoProvider::builder() - .build(); - - let mut config: rustls::ClientConfig = rustls::ClientConfig::builder_with_provider( - crypto_provider.into(), - ) - // TODO - use the ECH extension consistently - .with_ech(self.get_ech_mode()).unwrap() - .with_root_certificates(root_store) - .with_no_client_auth(); - - if self.ignore_tls_errors { - config.dangerous().set_certificate_verifier(Arc::new(NoVerifier::new(None))); - } - - config - } - }; - - if self.max_http_version == Version::HTTP_3 { - config.alpn_protocols = vec![b"h3".to_vec()]; - }; - - config - } -} \ No newline at end of file + } +} diff --git a/impit/src/tls/statics.rs b/impit/src/tls/statics.rs index 0535efd1..9e4bfa64 100644 --- a/impit/src/tls/statics.rs +++ b/impit/src/tls/statics.rs @@ -1,3 +1,3 @@ -use rustls::crypto::{aws_lc_rs, hpke::Hpke,}; +use rustls::crypto::{aws_lc_rs, hpke::Hpke}; -pub static GREASE_HPKE_SUITE: &dyn Hpke = aws_lc_rs::hpke::DH_KEM_X25519_HKDF_SHA256_AES_128; \ No newline at end of file +pub static GREASE_HPKE_SUITE: &dyn Hpke = aws_lc_rs::hpke::DH_KEM_X25519_HKDF_SHA256_AES_128;