From 34d4796eb7a3c30ac5ada01e1798ca67fdb51982 Mon Sep 17 00:00:00 2001 From: Aram Grigoryan <132480+aram356@users.noreply.github.com> Date: Tue, 8 Jul 2025 22:02:52 -0700 Subject: [PATCH 01/20] Moved handlers to separate files --- crates/common/src/advertiser.rs | 287 +++++++++++++++ crates/common/src/gdpr.rs | 56 ++- crates/common/src/geo.rs | 80 ++++ crates/common/src/lib.rs | 5 + crates/common/src/prebid.rs | 114 +++++- crates/common/src/privacy.rs | 30 ++ crates/common/src/publisher.rs | 126 +++++++ crates/common/src/why.rs | 31 ++ crates/fastly/src/main.rs | 631 ++------------------------------ 9 files changed, 756 insertions(+), 604 deletions(-) create mode 100644 crates/common/src/advertiser.rs create mode 100644 crates/common/src/geo.rs create mode 100644 crates/common/src/publisher.rs diff --git a/crates/common/src/advertiser.rs b/crates/common/src/advertiser.rs new file mode 100644 index 0000000..4ceb043 --- /dev/null +++ b/crates/common/src/advertiser.rs @@ -0,0 +1,287 @@ +//! Ad serving and advertiser integration functionality. +//! +//! This module handles ad requests, including GDPR consent checking, +//! synthetic ID generation, visitor tracking, and communication with +//! external ad partners. + +use std::env; + +use error_stack::Report; +use fastly::http::{header, StatusCode}; +use fastly::{KVStore, Request, Response}; + +use crate::constants::{ + HEADER_SYNTHETIC_TRUSTED_SERVER, HEADER_X_COMPRESS_HINT, HEADER_X_CONSENT_ADVERTISING, + HEADER_X_FORWARDED_FOR, +}; +use crate::error::TrustedServerError; +use crate::gdpr::{get_consent_from_request, GdprConsent}; +use crate::geo::get_dma_code; +use crate::models::AdResponse; +use crate::settings::Settings; +use crate::synthetic::generate_synthetic_id; + +/// Handles ad creative requests. +/// +/// Processes ad requests with synthetic ID and consent checking. +/// +/// # Errors +/// +/// Returns a [`TrustedServerError`] if: +/// - Synthetic ID generation fails +/// - Backend communication fails +/// - Response creation fails +pub fn handle_ad_request( + settings: &Settings, + mut req: Request, +) -> Result> { + // Check GDPR consent to determine if we should serve personalized or non-personalized ads + let _consent = match get_consent_from_request(&req) { + Some(c) => c, + None => { + log::debug!("No GDPR consent found in ad request, using default"); + GdprConsent::default() + } + }; + let advertising_consent = req + .get_header(HEADER_X_CONSENT_ADVERTISING) + .and_then(|h| h.to_str().ok()) + .map(|v| v == "true") + .unwrap_or(false); + + // Add DMA code extraction + let dma_code = get_dma_code(&mut req); + + log::info!("Client location - DMA Code: {:?}", dma_code); + + // Log headers for debugging + let client_ip = req + .get_client_ip_addr() + .map(|ip| ip.to_string()) + .unwrap_or_else(|| "Unknown".to_string()); + let x_forwarded_for = req + .get_header(HEADER_X_FORWARDED_FOR) + .map(|h| h.to_str().unwrap_or("Unknown")); + + log::info!("Client IP: {}", client_ip); + log::info!("X-Forwarded-For: {}", x_forwarded_for.unwrap_or("None")); + log::info!("Advertising consent: {}", advertising_consent); + + // Generate synthetic ID only if we have consent + let synthetic_id = if advertising_consent { + generate_synthetic_id(settings, &req)? + } else { + // Use a generic ID for non-personalized ads + "non-personalized".to_string() + }; + + // Only track visits if we have consent + if advertising_consent { + // Increment visit counter in KV store + log::info!("Opening KV store: {}", settings.synthetic.counter_store); + if let Ok(Some(store)) = KVStore::open(settings.synthetic.counter_store.as_str()) { + log::info!("Fetching current count for synthetic ID: {}", synthetic_id); + let current_count: i32 = store + .lookup(&synthetic_id) + .map(|mut val| match String::from_utf8(val.take_body_bytes()) { + Ok(s) => { + log::info!("Value from KV store: {}", s); + Some(s) + } + Err(e) => { + log::error!("Error converting bytes to string: {}", e); + None + } + }) + .map(|opt_s| { + log::info!("Parsing string value: {:?}", opt_s); + opt_s.and_then(|s| s.parse().ok()) + }) + .unwrap_or_else(|_| { + log::info!("No existing count found, starting at 0"); + None + }) + .unwrap_or(0); + + let new_count = current_count + 1; + log::info!("Incrementing count from {} to {}", current_count, new_count); + + if let Err(e) = store.insert(&synthetic_id, new_count.to_string().as_bytes()) { + log::error!("Error updating KV store: {:?}", e); + } + } + } + + // Modify the ad server URL construction to include DMA code if available + let ad_server_url = if advertising_consent { + let mut url = settings + .ad_server + .sync_url + .replace("{{synthetic_id}}", &synthetic_id); + if let Some(dma) = dma_code { + url = format!("{}&dma={}", url, dma); + } + url + } else { + // Use a different URL or parameter for non-personalized ads + settings + .ad_server + .sync_url + .replace("{{synthetic_id}}", "non-personalized") + }; + + log::info!("Sending request to backend: {}", ad_server_url); + + // Add header logging here + let mut ad_req = Request::get(ad_server_url); + + // Add consent information to the ad request + ad_req.set_header( + HEADER_X_CONSENT_ADVERTISING, + if advertising_consent { "true" } else { "false" }, + ); + + log::info!("Request headers to Equativ:"); + for (name, value) in ad_req.get_headers() { + log::info!(" {}: {:?}", name, value); + } + + match ad_req.send(settings.ad_server.ad_partner_url.as_str()) { + Ok(mut res) => { + log::info!( + "Received response from backend with status: {}", + res.get_status() + ); + + // Extract Fastly PoP from the Compute environment + let fastly_pop = env::var("FASTLY_POP").unwrap_or_else(|_| "unknown".to_string()); + let fastly_cache_generation = + env::var("FASTLY_CACHE_GENERATION").unwrap_or_else(|_| "unknown".to_string()); + let fastly_customer_id = + env::var("FASTLY_CUSTOMER_ID").unwrap_or_else(|_| "unknown".to_string()); + let fastly_hostname = + env::var("FASTLY_HOSTNAME").unwrap_or_else(|_| "unknown".to_string()); + let fastly_region = env::var("FASTLY_REGION").unwrap_or_else(|_| "unknown".to_string()); + let fastly_service_id = + env::var("FASTLY_SERVICE_ID").unwrap_or_else(|_| "unknown".to_string()); + // let fastly_service_version = env::var("FASTLY_SERVICE_VERSION").unwrap_or_else(|_| "unknown".to_string()); + let fastly_trace_id = + env::var("FASTLY_TRACE_ID").unwrap_or_else(|_| "unknown".to_string()); + + log::info!("Fastly Jason PoP: {}", fastly_pop); + log::info!("Fastly Compute Variables:"); + log::info!(" - FASTLY_CACHE_GENERATION: {}", fastly_cache_generation); + log::info!(" - FASTLY_CUSTOMER_ID: {}", fastly_customer_id); + log::info!(" - FASTLY_HOSTNAME: {}", fastly_hostname); + log::info!(" - FASTLY_POP: {}", fastly_pop); + log::info!(" - FASTLY_REGION: {}", fastly_region); + log::info!(" - FASTLY_SERVICE_ID: {}", fastly_service_id); + //log::info!(" - FASTLY_SERVICE_VERSION: {}", fastly_service_version); + log::info!(" - FASTLY_TRACE_ID: {}", fastly_trace_id); + + // Log all response headers + log::info!("Response headers from Equativ:"); + for (name, value) in res.get_headers() { + log::info!(" {}: {:?}", name, value); + } + + if res.get_status().is_success() { + let body = res.take_body_str(); + log::info!("Backend response body: {}", body); + + // Parse the JSON response and extract opid + if let Ok(ad_response) = serde_json::from_str::(&body) { + // Look for the callback with type "impression" + if let Some(callback) = ad_response + .callbacks + .iter() + .find(|c| c.callback_type == "impression") + { + // Extract opid from the URL + if let Some(opid) = callback + .url + .split('&') + .find(|¶m| param.starts_with("opid=")) + .and_then(|param| param.split('=').nth(1)) + { + log::info!("Found opid: {}", opid); + + // Store in opid KV store + log::info!( + "Attempting to open KV store: {}", + settings.synthetic.opid_store + ); + match KVStore::open(settings.synthetic.opid_store.as_str()) { + Ok(Some(store)) => { + log::info!("Successfully opened KV store"); + match store.insert(&synthetic_id, opid.as_bytes()) { + Ok(_) => log::info!( + "Successfully stored opid {} for synthetic ID: {}", + opid, + synthetic_id + ), + Err(e) => { + log::error!("Error storing opid in KV store: {:?}", e) + } + } + } + Ok(None) => { + log::warn!( + "KV store returned None: {}", + settings.synthetic.opid_store + ); + } + Err(e) => { + log::error!( + "Error opening KV store {}: {:?}", + settings.synthetic.opid_store, + e + ); + } + } + } else { + log::warn!("Could not extract opid from impression callback URL"); + } + } else { + log::warn!("No impression callback found in ad response"); + } + } else { + log::warn!("Could not parse JSON response to extract opid"); + } + + let synthetic_header = req + .get_header(HEADER_SYNTHETIC_TRUSTED_SERVER) + .map(|h| h.to_str().unwrap_or("")); + log::info!( + "Returning response with Synthetic header: {:?}", + synthetic_header + ); + log::info!("Advertising consent: {}", advertising_consent); + + // Return the response to the client + Ok(Response::from_body(body) + .with_status(res.get_status()) + .with_header(header::CONTENT_TYPE, "application/json") + .with_header("X-Synthetic-ID", &synthetic_id) + .with_header( + "X-Consent-Advertising", + if advertising_consent { "true" } else { "false" }, + ) + .with_header("X-Fastly-PoP", &fastly_pop) + .with_header(HEADER_X_COMPRESS_HINT, "on")) + } else { + Ok(Response::from_status(res.get_status()) + .with_header(header::CONTENT_TYPE, "application/json") + .with_header(HEADER_X_COMPRESS_HINT, "on") + .with_body("{}")) + } + } + Err(e) => { + log::error!("Error making backend request: {:?}", e); + Ok(Response::from_status(StatusCode::NO_CONTENT) + .with_header(header::CONTENT_TYPE, "application/json") + .with_header(HEADER_X_COMPRESS_HINT, "on") + .with_body("{}")) + } + } +} diff --git a/crates/common/src/gdpr.rs b/crates/common/src/gdpr.rs index f7f27d9..9b2ee7b 100644 --- a/crates/common/src/gdpr.rs +++ b/crates/common/src/gdpr.rs @@ -3,13 +3,15 @@ //! This module provides functionality for managing GDPR consent, including //! consent tracking, data subject requests, and compliance with EU privacy regulations. +use error_stack::{Report, ResultExt}; use fastly::http::{header, Method, StatusCode}; -use fastly::{Error, Request, Response}; +use fastly::{Request, Response}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use crate::constants::HEADER_X_SUBJECT_ID; use crate::cookies; +use crate::error::TrustedServerError; use crate::settings::Settings; /// GDPR consent information for a user. @@ -112,22 +114,41 @@ pub fn create_consent_cookie(settings: &Settings, consent: &GdprConsent) -> Stri /// /// # Errors /// -/// Returns a Fastly [`Error`] if response creation fails. -pub fn handle_consent_request(settings: &Settings, req: Request) -> Result { +/// Returns a [`TrustedServerError`] if: +/// - JSON serialization/deserialization fails +/// - Response creation fails +pub fn handle_consent_request( + settings: &Settings, + req: Request, +) -> Result> { match *req.get_method() { Method::GET => { // Return current consent status let consent = get_consent_from_request(&req).unwrap_or_default(); + let json_body = serde_json::to_string(&consent) + .change_context(TrustedServerError::GdprConsent { + message: "Failed to serialize consent data".to_string(), + })?; + Ok(Response::from_status(StatusCode::OK) .with_header(header::CONTENT_TYPE, "application/json") - .with_body(serde_json::to_string(&consent)?)) + .with_body(json_body)) } Method::POST => { // Update consent preferences - let consent: GdprConsent = serde_json::from_slice(req.into_body_bytes().as_slice())?; + let consent: GdprConsent = serde_json::from_slice(req.into_body_bytes().as_slice()) + .change_context(TrustedServerError::GdprConsent { + message: "Failed to parse consent request body".to_string(), + })?; + + let json_body = serde_json::to_string(&consent) + .change_context(TrustedServerError::GdprConsent { + message: "Failed to serialize consent response".to_string(), + })?; + let mut response = Response::from_status(StatusCode::OK) .with_header(header::CONTENT_TYPE, "application/json") - .with_body(serde_json::to_string(&consent)?); + .with_body(json_body); response.set_header( header::SET_COOKIE, @@ -152,8 +173,13 @@ pub fn handle_consent_request(settings: &Settings, req: Request) -> Result Result { +/// Returns a [`TrustedServerError`] if: +/// - Header value extraction fails +/// - JSON serialization fails +pub fn handle_data_subject_request( + _settings: &Settings, + req: Request, +) -> Result> { match *req.get_method() { Method::GET => { // Handle data access request @@ -163,11 +189,21 @@ pub fn handle_data_subject_request(_settings: &Settings, req: Request) -> Result // TODO: Implement actual data retrieval from KV store // For now, return empty user data - data.insert(synthetic_id.to_str()?.to_string(), UserData::default()); + let id_str = synthetic_id + .to_str() + .change_context(TrustedServerError::InvalidHeaderValue { + message: "Invalid subject ID header value".to_string(), + })?; + data.insert(id_str.to_string(), UserData::default()); + + let json_body = serde_json::to_string(&data) + .change_context(TrustedServerError::GdprConsent { + message: "Failed to serialize user data".to_string(), + })?; Ok(Response::from_status(StatusCode::OK) .with_header(header::CONTENT_TYPE, "application/json") - .with_body(serde_json::to_string(&data)?)) + .with_body(json_body)) } else { Ok(Response::from_status(StatusCode::BAD_REQUEST).with_body("Missing subject ID")) } diff --git a/crates/common/src/geo.rs b/crates/common/src/geo.rs new file mode 100644 index 0000000..1f12885 --- /dev/null +++ b/crates/common/src/geo.rs @@ -0,0 +1,80 @@ +//! Geographic location utilities for the trusted server. +//! +//! This module provides functions for extracting and handling geographic +//! information from incoming requests, particularly DMA (Designated Market Area) codes. + +use fastly::geo::geo_lookup; +use fastly::Request; + +use crate::constants::{ + HEADER_X_GEO_CITY, HEADER_X_GEO_CONTINENT, HEADER_X_GEO_COORDINATES, HEADER_X_GEO_COUNTRY, + HEADER_X_GEO_INFO_AVAILABLE, HEADER_X_GEO_METRO_CODE, +}; + +/// Extracts the DMA (Designated Market Area) code from the request's geolocation data. +/// +/// This function: +/// 1. Checks if running in Fastly environment +/// 2. Performs geo lookup based on client IP +/// 3. Sets various geo headers on the request +/// 4. Returns the metro code (DMA) if available +/// +/// # Arguments +/// +/// * `req` - The request to extract DMA code from +/// +/// # Returns +/// +/// The DMA/metro code as a string if available, None otherwise +pub fn get_dma_code(req: &mut Request) -> Option { + // Debug: Check if we're running in Fastly environment + log::info!("Fastly Environment Check:"); + log::info!( + " FASTLY_POP: {}", + std::env::var("FASTLY_POP").unwrap_or_else(|_| "not in Fastly".to_string()) + ); + log::info!( + " FASTLY_REGION: {}", + std::env::var("FASTLY_REGION").unwrap_or_else(|_| "not in Fastly".to_string()) + ); + + // Get detailed geo information using geo_lookup + if let Some(geo) = req.get_client_ip_addr().and_then(geo_lookup) { + log::info!("Geo Information Found:"); + + // Set all available geo information in headers + let city = geo.city(); + req.set_header(HEADER_X_GEO_CITY, city); + log::info!(" City: {}", city); + + let country = geo.country_code(); + req.set_header(HEADER_X_GEO_COUNTRY, country); + log::info!(" Country: {}", country); + + req.set_header(HEADER_X_GEO_CONTINENT, format!("{:?}", geo.continent())); + log::info!(" Continent: {:?}", geo.continent()); + + req.set_header( + HEADER_X_GEO_COORDINATES, + format!("{},{}", geo.latitude(), geo.longitude()), + ); + log::info!(" Location: ({}, {})", geo.latitude(), geo.longitude()); + + // Get and set the metro code (DMA) + let metro_code = geo.metro_code(); + req.set_header(HEADER_X_GEO_METRO_CODE, metro_code.to_string()); + log::info!("Found DMA/Metro code: {}", metro_code); + return Some(metro_code.to_string()); + } else { + log::info!("No geo information available for the request"); + req.set_header(HEADER_X_GEO_INFO_AVAILABLE, "false"); + } + + // If no metro code is found, log all request headers for debugging + log::info!("No DMA/Metro code found. All request headers:"); + for (name, value) in req.get_headers() { + log::info!(" {}: {:?}", name, value); + } + + None +} diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs index d425c2c..07fe80f 100644 --- a/crates/common/src/lib.rs +++ b/crates/common/src/lib.rs @@ -5,10 +5,12 @@ //! //! # Modules //! +//! - [`advertiser`]: Ad serving and advertiser integration functionality //! - [`constants`]: Application-wide constants and configuration values //! - [`cookies`]: Cookie parsing and generation utilities //! - [`error`]: Error types and error handling utilities //! - [`gdpr`]: GDPR consent management and TCF string parsing +//! - [`geo`]: Geographic location utilities and DMA code extraction //! - [`models`]: Data models for ad serving and callbacks //! - [`prebid`]: Prebid integration and real-time bidding support //! - [`privacy`]: Privacy utilities and helpers @@ -18,13 +20,16 @@ //! - [`test_support`]: Testing utilities and mocks //! - [`why`]: Debugging and introspection utilities +pub mod advertiser; pub mod constants; pub mod cookies; pub mod error; pub mod gdpr; +pub mod geo; pub mod models; pub mod prebid; pub mod privacy; +pub mod publisher; pub mod settings; pub mod synthetic; pub mod templates; diff --git a/crates/common/src/prebid.rs b/crates/common/src/prebid.rs index b998986..ec129c0 100644 --- a/crates/common/src/prebid.rs +++ b/crates/common/src/prebid.rs @@ -4,16 +4,17 @@ //! to enable header bidding and real-time ad auctions. use error_stack::Report; -use fastly::http::{header, Method}; +use fastly::http::{header, Method, StatusCode}; use fastly::{Error, Request, Response}; use serde_json::json; use crate::constants::{ - HEADER_SYNTHETIC_FRESH, HEADER_SYNTHETIC_TRUSTED_SERVER, HEADER_X_FORWARDED_FOR, + HEADER_SYNTHETIC_FRESH, HEADER_SYNTHETIC_TRUSTED_SERVER, HEADER_X_COMPRESS_HINT, + HEADER_X_CONSENT_ADVERTISING, HEADER_X_FORWARDED_FOR, }; use crate::error::TrustedServerError; use crate::settings::Settings; -use crate::synthetic::generate_synthetic_id; +use crate::synthetic::{generate_synthetic_id, get_or_generate_synthetic_id}; /// Represents a request to the Prebid Server with all necessary parameters pub struct PrebidRequest { @@ -198,6 +199,113 @@ impl PrebidRequest { } } +/// Handles the prebid test route with detailed error logging. +/// +/// This endpoint is used to test Prebid Server integration by: +/// 1. Checking consent status +/// 2. Generating synthetic IDs (if consent is given) +/// 3. Creating a PrebidRequest +/// 4. Sending the bid request to Prebid Server +/// 5. Returning the response with appropriate headers +/// +/// # Errors +/// +/// Returns a [`TrustedServerError`] if: +/// - Synthetic ID generation fails +/// - PrebidRequest creation fails +/// - Communication with Prebid Server fails +pub async fn handle_prebid_test( + settings: &Settings, + mut req: Request, +) -> Result> { + log::info!("Starting prebid test request handling"); + + // Check consent status from headers + let advertising_consent = req + .get_header(HEADER_X_CONSENT_ADVERTISING) + .and_then(|h| h.to_str().ok()) + .map(|v| v == "true") + .unwrap_or(false); + + // Calculate fresh ID and synthetic ID only if we have advertising consent + let (fresh_id, synthetic_id) = if advertising_consent { + let fresh = generate_synthetic_id(settings, &req)?; + let synth = get_or_generate_synthetic_id(settings, &req)?; + (fresh, synth) + } else { + // Use non-personalized IDs when no consent + ( + "non-personalized".to_string(), + "non-personalized".to_string(), + ) + }; + + log::info!( + "Existing Trusted Server header: {:?}", + req.get_header(HEADER_SYNTHETIC_TRUSTED_SERVER) + ); + log::info!("Generated Fresh ID: {}", &fresh_id); + log::info!("Using Trusted Server ID: {}", synthetic_id); + log::info!("Advertising consent: {}", advertising_consent); + + // Set both IDs as headers + req.set_header(HEADER_SYNTHETIC_FRESH, &fresh_id); + req.set_header(HEADER_SYNTHETIC_TRUSTED_SERVER, &synthetic_id); + req.set_header( + HEADER_X_CONSENT_ADVERTISING, + if advertising_consent { "true" } else { "false" }, + ); + + log::info!( + "Using Trusted Server ID: {}, Fresh ID: {}", + synthetic_id, + fresh_id + ); + + let prebid_req = PrebidRequest::new(settings, &req)?; + log::info!( + "Successfully created PrebidRequest with synthetic ID: {}", + prebid_req.synthetic_id + ); + + log::info!("Attempting to send bid request to Prebid Server at prebid_backend"); + + match prebid_req.send_bid_request(settings, &req).await { + Ok(mut prebid_response) => { + log::info!("Received response from Prebid Server"); + log::info!("Response status: {}", prebid_response.get_status()); + + log::info!("Response headers:"); + for (name, value) in prebid_response.get_headers() { + log::info!(" {}: {:?}", name, value); + } + + let body = prebid_response.take_body_str(); + log::info!("Response body: {}", body); + + Ok(Response::from_status(StatusCode::OK) + .with_header(header::CONTENT_TYPE, "application/json") + .with_header("X-Prebid-Test", "true") + .with_header("X-Synthetic-ID", &prebid_req.synthetic_id) + .with_header( + "X-Consent-Advertising", + if advertising_consent { "true" } else { "false" }, + ) + .with_header(HEADER_X_COMPRESS_HINT, "on") + .with_body(body)) + } + Err(e) => { + log::error!("Error sending bid request: {:?}", e); + log::error!("Backend name used: prebid_backend"); + + // Convert Fastly Error to TrustedServerError + Err(Report::new(TrustedServerError::Prebid { + message: format!("Failed to send bid request: {}", e), + })) + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/common/src/privacy.rs b/crates/common/src/privacy.rs index e34251b..4a25fcc 100644 --- a/crates/common/src/privacy.rs +++ b/crates/common/src/privacy.rs @@ -1,3 +1,33 @@ +//! Privacy policy handling. +//! +//! This module provides the privacy policy template and handler function. + +use error_stack::Report; +use fastly::http::{header, StatusCode}; +use fastly::{Request, Response}; + +use crate::constants::HEADER_X_COMPRESS_HINT; +use crate::error::TrustedServerError; +use crate::settings::Settings; + +/// Handles privacy policy page requests. +/// +/// Returns the privacy policy HTML page. +/// +/// # Errors +/// +/// This function currently doesn't return errors, but returns a `Result` for consistency +/// and future extensibility. +pub fn handle_privacy_policy( + _settings: &Settings, + _req: Request, +) -> Result> { + Ok(Response::from_status(StatusCode::OK) + .with_body(PRIVACY_TEMPLATE) + .with_header(header::CONTENT_TYPE, "text/html") + .with_header(HEADER_X_COMPRESS_HINT, "on")) +} + pub const PRIVACY_TEMPLATE: &str = r#" diff --git a/crates/common/src/publisher.rs b/crates/common/src/publisher.rs new file mode 100644 index 0000000..8c6b02b --- /dev/null +++ b/crates/common/src/publisher.rs @@ -0,0 +1,126 @@ +use error_stack::Report; +use fastly::http::{header, StatusCode}; +use fastly::{Request, Response}; + +use crate::constants::{ + HEADER_SYNTHETIC_FRESH, HEADER_SYNTHETIC_TRUSTED_SERVER, HEADER_X_GEO_CITY, + HEADER_X_GEO_CONTINENT, HEADER_X_GEO_COORDINATES, HEADER_X_GEO_COUNTRY, + HEADER_X_GEO_INFO_AVAILABLE, HEADER_X_GEO_METRO_CODE, +}; +use crate::cookies::create_synthetic_cookie; +use crate::error::TrustedServerError; +use crate::gdpr::{get_consent_from_request, GdprConsent}; +use crate::geo::get_dma_code; +use crate::settings::Settings; +use crate::synthetic::{generate_synthetic_id, get_or_generate_synthetic_id}; +use crate::templates::HTML_TEMPLATE; + +/// Handles the main page request. +/// +/// Serves the main page with synthetic ID generation and ad integration. +/// +/// # Errors +/// +/// Returns a [`TrustedServerError`] if: +/// - Synthetic ID generation fails +/// - Response creation fails +pub fn handle_main_page( + settings: &Settings, + mut req: Request, +) -> Result> { + log::info!( + "Using ad_partner_url: {}, counter_store: {}", + settings.ad_server.ad_partner_url, + settings.synthetic.counter_store, + ); + + // Add DMA code check to main page as well + let dma_code = get_dma_code(&mut req); + log::info!("Main page - DMA Code: {:?}", dma_code); + + // Check GDPR consent before proceeding + let consent = match get_consent_from_request(&req) { + Some(c) => c, + None => { + log::debug!("No GDPR consent found, using default"); + GdprConsent::default() + } + }; + if !consent.functional { + // Return a version of the page without tracking + return Ok(Response::from_status(StatusCode::OK) + .with_body( + HTML_TEMPLATE.replace("fetch('/prebid-test')", "console.log('Tracking disabled')"), + ) + .with_header(header::CONTENT_TYPE, "text/html") + .with_header(header::CACHE_CONTROL, "no-store, private")); + } + + // Calculate fresh ID first using the synthetic module + let fresh_id = generate_synthetic_id(settings, &req)?; + + // Check for existing Trusted Server ID in this specific order: + // 1. X-Synthetic-Trusted-Server header + // 2. Cookie + // 3. Fall back to fresh ID + let synthetic_id = get_or_generate_synthetic_id(settings, &req)?; + + log::info!( + "Existing Trusted Server header: {:?}", + req.get_header(HEADER_SYNTHETIC_TRUSTED_SERVER) + ); + log::info!("Generated Fresh ID: {}", &fresh_id); + log::info!("Using Trusted Server ID: {}", synthetic_id); + + // Create response with the main page HTML + let mut response = Response::from_status(StatusCode::OK) + .with_body(HTML_TEMPLATE) + .with_header(header::CONTENT_TYPE, "text/html") + .with_header(HEADER_SYNTHETIC_FRESH, fresh_id.as_str()) // Fresh ID always changes + .with_header(HEADER_SYNTHETIC_TRUSTED_SERVER, &synthetic_id) // Trusted Server ID remains stable + .with_header( + header::ACCESS_CONTROL_EXPOSE_HEADERS, + "X-Geo-City, X-Geo-Country, X-Geo-Continent, X-Geo-Coordinates, X-Geo-Metro-Code, X-Geo-Info-Available" + ) + .with_header(header::ACCESS_CONTROL_ALLOW_ORIGIN, "*") + .with_header("x-compress-hint", "on"); + + // Copy geo headers from request to response + for header_name in &[ + HEADER_X_GEO_CITY, + HEADER_X_GEO_COUNTRY, + HEADER_X_GEO_CONTINENT, + HEADER_X_GEO_COORDINATES, + HEADER_X_GEO_METRO_CODE, + HEADER_X_GEO_INFO_AVAILABLE, + ] { + if let Some(value) = req.get_header(header_name) { + response.set_header(header_name, value); + } + } + + // Only set cookies if we have consent + if consent.functional { + response.set_header( + header::SET_COOKIE, + create_synthetic_cookie(settings, &synthetic_id), + ); + } + + // Debug: Print all request headers + log::info!("All Request Headers:"); + for (name, value) in req.get_headers() { + log::info!("{}: {:?}", name, value); + } + + // Debug: Print the response headers + log::info!("Response Headers:"); + for (name, value) in response.get_headers() { + log::info!("{}: {:?}", name, value); + } + + // Prevent caching + response.set_header(header::CACHE_CONTROL, "no-store, private"); + + Ok(response) +} diff --git a/crates/common/src/why.rs b/crates/common/src/why.rs index 40c1246..0015922 100644 --- a/crates/common/src/why.rs +++ b/crates/common/src/why.rs @@ -1,3 +1,34 @@ +//! Why Trusted Server page handling. +//! +//! This module provides the "Why Trusted Server" explanation page. + +use error_stack::Report; +use fastly::http::{header, StatusCode}; +use fastly::{Request, Response}; + +use crate::constants::HEADER_X_COMPRESS_HINT; +use crate::error::TrustedServerError; +use crate::settings::Settings; + +/// Handles "Why Trusted Server" page requests. +/// +/// Returns the Why Trusted Server HTML page that explains the purpose and benefits +/// of the trusted server approach. +/// +/// # Errors +/// +/// This function currently doesn't return errors, but returns a `Result` for consistency +/// and future extensibility. +pub fn handle_why_trusted_server( + _settings: &Settings, + _req: Request, +) -> Result> { + Ok(Response::from_status(StatusCode::OK) + .with_body(WHY_TEMPLATE) + .with_header(header::CONTENT_TYPE, "text/html") + .with_header(HEADER_X_COMPRESS_HINT, "on")) +} + pub const WHY_TEMPLATE: &str = r#" diff --git a/crates/fastly/src/main.rs b/crates/fastly/src/main.rs index e32d593..7295b5e 100644 --- a/crates/fastly/src/main.rs +++ b/crates/fastly/src/main.rs @@ -1,36 +1,23 @@ -use std::env; - -use fastly::geo::geo_lookup; use fastly::http::{header, Method, StatusCode}; -use fastly::KVStore; use fastly::{Error, Request, Response}; use log::LevelFilter::Info; -use serde_json::json; mod error; use crate::error::to_error_response; -use trusted_server_common::constants::{ - HEADER_SYNTHETIC_FRESH, HEADER_SYNTHETIC_TRUSTED_SERVER, HEADER_X_COMPRESS_HINT, - HEADER_X_CONSENT_ADVERTISING, HEADER_X_FORWARDED_FOR, HEADER_X_GEO_CITY, - HEADER_X_GEO_CONTINENT, HEADER_X_GEO_COORDINATES, HEADER_X_GEO_COUNTRY, - HEADER_X_GEO_INFO_AVAILABLE, HEADER_X_GEO_METRO_CODE, -}; -use trusted_server_common::cookies::create_synthetic_cookie; -// Note: TrustedServerError is used internally by the common crate -use trusted_server_common::gdpr::{ - get_consent_from_request, handle_consent_request, handle_data_subject_request, GdprConsent, -}; -use trusted_server_common::models::AdResponse; -use trusted_server_common::prebid::PrebidRequest; -use trusted_server_common::privacy::PRIVACY_TEMPLATE; +use trusted_server_common::advertiser::handle_ad_request; +use trusted_server_common::constants::HEADER_X_COMPRESS_HINT; +use trusted_server_common::gdpr::{handle_consent_request, handle_data_subject_request}; +use trusted_server_common::prebid::handle_prebid_test; +use trusted_server_common::privacy::handle_privacy_policy; +use trusted_server_common::publisher::handle_main_page; use trusted_server_common::settings::Settings; -use trusted_server_common::synthetic::{generate_synthetic_id, get_or_generate_synthetic_id}; -use trusted_server_common::templates::HTML_TEMPLATE; -use trusted_server_common::why::WHY_TEMPLATE; +use trusted_server_common::why::handle_why_trusted_server; #[fastly::main] fn main(req: Request) -> Result { + log_fastly::init_simple("mylogs", Info); + let settings = match Settings::new() { Ok(s) => s, Err(e) => { @@ -40,583 +27,45 @@ fn main(req: Request) -> Result { }; log::info!("Settings {settings:?}"); - futures::executor::block_on(async { - log::info!( - "FASTLY_SERVICE_VERSION: {}", - std::env::var("FASTLY_SERVICE_VERSION").unwrap_or_else(|_| String::new()) - ); - - match (req.get_method(), req.get_path()) { - (&Method::GET, "/") => handle_main_page(&settings, req), - (&Method::GET, "/ad-creative") => handle_ad_request(&settings, req), - (&Method::GET, "/prebid-test") => handle_prebid_test(&settings, req).await, - (&Method::GET, "/gdpr/consent") => handle_consent_request(&settings, req), - (&Method::POST, "/gdpr/consent") => handle_consent_request(&settings, req), - (&Method::GET, "/gdpr/data") => handle_data_subject_request(&settings, req), - (&Method::DELETE, "/gdpr/data") => handle_data_subject_request(&settings, req), - (&Method::GET, "/privacy-policy") => Ok(Response::from_status(StatusCode::OK) - .with_body(PRIVACY_TEMPLATE) - .with_header(header::CONTENT_TYPE, "text/html") - .with_header(HEADER_X_COMPRESS_HINT, "on")), - (&Method::GET, "/why-trusted-server") => Ok(Response::from_status(StatusCode::OK) - .with_body(WHY_TEMPLATE) - .with_header(header::CONTENT_TYPE, "text/html") - .with_header(HEADER_X_COMPRESS_HINT, "on")), - _ => Ok(Response::from_status(StatusCode::NOT_FOUND) - .with_body("Not Found") - .with_header(header::CONTENT_TYPE, "text/plain") - .with_header(HEADER_X_COMPRESS_HINT, "on")), - } - }) -} - -fn get_dma_code(req: &mut Request) -> Option { - // Debug: Check if we're running in Fastly environment - log::info!("Fastly Environment Check:"); - log::info!( - " FASTLY_POP: {}", - std::env::var("FASTLY_POP").unwrap_or_else(|_| "not in Fastly".to_string()) - ); - log::info!( - " FASTLY_REGION: {}", - std::env::var("FASTLY_REGION").unwrap_or_else(|_| "not in Fastly".to_string()) - ); - - // Get detailed geo information using geo_lookup - if let Some(geo) = req.get_client_ip_addr().and_then(geo_lookup) { - log::info!("Geo Information Found:"); - - // Set all available geo information in headers - let city = geo.city(); - req.set_header(HEADER_X_GEO_CITY, city); - log::info!(" City: {}", city); - - let country = geo.country_code(); - req.set_header(HEADER_X_GEO_COUNTRY, country); - log::info!(" Country: {}", country); - - req.set_header(HEADER_X_GEO_CONTINENT, format!("{:?}", geo.continent())); - log::info!(" Continent: {:?}", geo.continent()); - - req.set_header( - HEADER_X_GEO_COORDINATES, - format!("{},{}", geo.latitude(), geo.longitude()), - ); - log::info!(" Location: ({}, {})", geo.latitude(), geo.longitude()); - - // Get and set the metro code (DMA) - let metro_code = geo.metro_code(); - req.set_header(HEADER_X_GEO_METRO_CODE, metro_code.to_string()); - log::info!("Found DMA/Metro code: {}", metro_code); - return Some(metro_code.to_string()); - } else { - log::info!("No geo information available for the request"); - req.set_header(HEADER_X_GEO_INFO_AVAILABLE, "false"); - } - - // If no metro code is found, log all request headers for debugging - log::info!("No DMA/Metro code found. All request headers:"); - for (name, value) in req.get_headers() { - log::info!(" {}: {:?}", name, value); - } - - None + futures::executor::block_on(route_request(settings, req)) } -/// Handles the main page request. -/// -/// Serves the main page with synthetic ID generation and ad integration. +/// Routes incoming requests to appropriate handlers. /// -/// # Errors -/// -/// Returns a Fastly [`Error`] if response creation fails. -fn handle_main_page(settings: &Settings, mut req: Request) -> Result { - log::info!( - "Using ad_partner_url: {}, counter_store: {}", - settings.ad_server.ad_partner_url, - settings.synthetic.counter_store, - ); - - log_fastly::init_simple("mylogs", Info); - - // Add DMA code check to main page as well - let dma_code = get_dma_code(&mut req); - log::info!("Main page - DMA Code: {:?}", dma_code); - - // Check GDPR consent before proceeding - let consent = match get_consent_from_request(&req) { - Some(c) => c, - None => { - log::debug!("No GDPR consent found, using default"); - GdprConsent::default() - } - }; - if !consent.functional { - // Return a version of the page without tracking - return Ok(Response::from_status(StatusCode::OK) - .with_body( - HTML_TEMPLATE.replace("fetch('/prebid-test')", "console.log('Tracking disabled')"), - ) - .with_header(header::CONTENT_TYPE, "text/html") - .with_header(header::CACHE_CONTROL, "no-store, private")); - } - - // Calculate fresh ID first using the synthetic module - let fresh_id = match generate_synthetic_id(settings, &req) { - Ok(id) => id, - Err(e) => return Ok(to_error_response(e)), - }; - - // Check for existing Trusted Server ID in this specific order: - // 1. X-Synthetic-Trusted-Server header - // 2. Cookie - // 3. Fall back to fresh ID - let synthetic_id = match get_or_generate_synthetic_id(settings, &req) { - Ok(id) => id, - Err(e) => return Ok(to_error_response(e)), - }; - +/// This function implements the application's routing logic, matching HTTP methods +/// and paths to their corresponding handler functions. +async fn route_request(settings: Settings, req: Request) -> Result { log::info!( - "Existing Trusted Server header: {:?}", - req.get_header(HEADER_SYNTHETIC_TRUSTED_SERVER) + "FASTLY_SERVICE_VERSION: {}", + ::std::env::var("FASTLY_SERVICE_VERSION").unwrap_or_else(|_| String::new()) ); - log::info!("Generated Fresh ID: {}", &fresh_id); - log::info!("Using Trusted Server ID: {}", synthetic_id); - - // Create response with the main page HTML - let mut response = Response::from_status(StatusCode::OK) - .with_body(HTML_TEMPLATE) - .with_header(header::CONTENT_TYPE, "text/html") - .with_header(HEADER_SYNTHETIC_FRESH, fresh_id.as_str()) // Fresh ID always changes - .with_header(HEADER_SYNTHETIC_TRUSTED_SERVER, &synthetic_id) // Trusted Server ID remains stable - .with_header( - header::ACCESS_CONTROL_EXPOSE_HEADERS, - "X-Geo-City, X-Geo-Country, X-Geo-Continent, X-Geo-Coordinates, X-Geo-Metro-Code, X-Geo-Info-Available" - ) - .with_header(header::ACCESS_CONTROL_ALLOW_ORIGIN, "*") - .with_header("x-compress-hint", "on"); - - // Copy geo headers from request to response - for header_name in &[ - "X-Geo-City", - "X-Geo-Country", - "X-Geo-Continent", - "X-Geo-Coordinates", - "X-Geo-Metro-Code", - "X-Geo-Info-Available", - ] { - if let Some(value) = req.get_header(*header_name) { - response.set_header(*header_name, value); - } - } - - // Only set cookies if we have consent - if consent.functional { - response.set_header( - header::SET_COOKIE, - create_synthetic_cookie(settings, &synthetic_id), - ); - } - - // Debug: Print all request headers - log::info!("All Request Headers:"); - for (name, value) in req.get_headers() { - log::info!("{}: {:?}", name, value); - } - - // Debug: Print the response headers - log::info!("Response Headers:"); - for (name, value) in response.get_headers() { - log::info!("{}: {:?}", name, value); - } - - // Prevent caching - response.set_header(header::CACHE_CONTROL, "no-store, private"); - - Ok(response) -} - -/// Handles ad creative requests. -/// -/// Processes ad requests with synthetic ID and consent checking. -/// -/// # Errors -/// -/// Returns a Fastly [`Error`] if response creation fails. -fn handle_ad_request(settings: &Settings, mut req: Request) -> Result { - // Check GDPR consent to determine if we should serve personalized or non-personalized ads - let _consent = match get_consent_from_request(&req) { - Some(c) => c, - None => { - log::debug!("No GDPR consent found in ad request, using default"); - GdprConsent::default() - } - }; - let advertising_consent = req - .get_header(HEADER_X_CONSENT_ADVERTISING) - .and_then(|h| h.to_str().ok()) - .map(|v| v == "true") - .unwrap_or(false); - - // Add DMA code extraction - let dma_code = get_dma_code(&mut req); - - log::info!("Client location - DMA Code: {:?}", dma_code); - - // Log headers for debugging - let client_ip = req - .get_client_ip_addr() - .map(|ip| ip.to_string()) - .unwrap_or_else(|| "Unknown".to_string()); - let x_forwarded_for = req - .get_header(HEADER_X_FORWARDED_FOR) - .map(|h| h.to_str().unwrap_or("Unknown")); - log::info!("Client IP: {}", client_ip); - log::info!("X-Forwarded-For: {}", x_forwarded_for.unwrap_or("None")); - log::info!("Advertising consent: {}", advertising_consent); - - // Generate synthetic ID only if we have consent - let synthetic_id = if advertising_consent { - match generate_synthetic_id(settings, &req) { - Ok(id) => id, - Err(e) => return Ok(to_error_response(e)), - } - } else { - // Use a generic ID for non-personalized ads - "non-personalized".to_string() + let result = match (req.get_method(), req.get_path()) { + // Main application routes + (&Method::GET, "/") => handle_main_page(&settings, req), + (&Method::GET, "/ad-creative") => handle_ad_request(&settings, req), + (&Method::GET, "/prebid-test") => handle_prebid_test(&settings, req).await, + + // GDPR compliance routes + (&Method::GET | &Method::POST, "/gdpr/consent") => handle_consent_request(&settings, req), + (&Method::GET | &Method::DELETE, "/gdpr/data") => handle_data_subject_request(&settings, req), + + // Static content pages + (&Method::GET, "/privacy-policy") => handle_privacy_policy(&settings, req), + (&Method::GET, "/why-trusted-server") => handle_why_trusted_server(&settings, req), + + // Catch-all 404 handler + _ => return Ok(not_found_response()), }; - // Only track visits if we have consent - if advertising_consent { - // Increment visit counter in KV store - log::info!("Opening KV store: {}", settings.synthetic.counter_store); - if let Ok(Some(store)) = KVStore::open(settings.synthetic.counter_store.as_str()) { - log::info!("Fetching current count for synthetic ID: {}", synthetic_id); - let current_count: i32 = store - .lookup(&synthetic_id) - .map(|mut val| match String::from_utf8(val.take_body_bytes()) { - Ok(s) => { - log::info!("Value from KV store: {}", s); - Some(s) - } - Err(e) => { - log::error!("Error converting bytes to string: {}", e); - None - } - }) - .map(|opt_s| { - log::info!("Parsing string value: {:?}", opt_s); - opt_s.and_then(|s| s.parse().ok()) - }) - .unwrap_or_else(|_| { - log::info!("No existing count found, starting at 0"); - None - }) - .unwrap_or(0); - - let new_count = current_count + 1; - log::info!("Incrementing count from {} to {}", current_count, new_count); - - if let Err(e) = store.insert(&synthetic_id, new_count.to_string().as_bytes()) { - log::error!("Error updating KV store: {:?}", e); - } - } - } - - // Modify the ad server URL construction to include DMA code if available - let ad_server_url = if advertising_consent { - let mut url = settings - .ad_server - .sync_url - .replace("{{synthetic_id}}", &synthetic_id); - if let Some(dma) = dma_code { - url = format!("{}&dma={}", url, dma); - } - url - } else { - // Use a different URL or parameter for non-personalized ads - settings - .ad_server - .sync_url - .replace("{{synthetic_id}}", "non-personalized") - }; - - log::info!("Sending request to backend: {}", ad_server_url); - - // Add header logging here - let mut ad_req = Request::get(ad_server_url); - - // Add consent information to the ad request - ad_req.set_header( - HEADER_X_CONSENT_ADVERTISING, - if advertising_consent { "true" } else { "false" }, - ); - - log::info!("Request headers to Equativ:"); - for (name, value) in ad_req.get_headers() { - log::info!(" {}: {:?}", name, value); - } - - match ad_req.send(settings.ad_server.ad_partner_url.as_str()) { - Ok(mut res) => { - log::info!( - "Received response from backend with status: {}", - res.get_status() - ); - - // Extract Fastly PoP from the Compute environment - let fastly_pop = env::var("FASTLY_POP").unwrap_or_else(|_| "unknown".to_string()); - let fastly_cache_generation = - env::var("FASTLY_CACHE_GENERATION").unwrap_or_else(|_| "unknown".to_string()); - let fastly_customer_id = - env::var("FASTLY_CUSTOMER_ID").unwrap_or_else(|_| "unknown".to_string()); - let fastly_hostname = - env::var("FASTLY_HOSTNAME").unwrap_or_else(|_| "unknown".to_string()); - let fastly_region = env::var("FASTLY_REGION").unwrap_or_else(|_| "unknown".to_string()); - let fastly_service_id = - env::var("FASTLY_SERVICE_ID").unwrap_or_else(|_| "unknown".to_string()); - // let fastly_service_version = env::var("FASTLY_SERVICE_VERSION").unwrap_or_else(|_| "unknown".to_string()); - let fastly_trace_id = - env::var("FASTLY_TRACE_ID").unwrap_or_else(|_| "unknown".to_string()); - - log::info!("Fastly Jason PoP: {}", fastly_pop); - log::info!("Fastly Compute Variables:"); - log::info!(" - FASTLY_CACHE_GENERATION: {}", fastly_cache_generation); - log::info!(" - FASTLY_CUSTOMER_ID: {}", fastly_customer_id); - log::info!(" - FASTLY_HOSTNAME: {}", fastly_hostname); - log::info!(" - FASTLY_POP: {}", fastly_pop); - log::info!(" - FASTLY_REGION: {}", fastly_region); - log::info!(" - FASTLY_SERVICE_ID: {}", fastly_service_id); - //log::info!(" - FASTLY_SERVICE_VERSION: {}", fastly_service_version); - log::info!(" - FASTLY_TRACE_ID: {}", fastly_trace_id); - - // Log all response headers - log::info!("Response headers from Equativ:"); - for (name, value) in res.get_headers() { - log::info!(" {}: {:?}", name, value); - } - - if res.get_status().is_success() { - let body = res.take_body_str(); - log::info!("Backend response body: {}", body); - - // Parse the JSON response and extract opid - if let Ok(ad_response) = serde_json::from_str::(&body) { - // Look for the callback with type "impression" - if let Some(callback) = ad_response - .callbacks - .iter() - .find(|c| c.callback_type == "impression") - { - // Extract opid from the URL - if let Some(opid) = callback - .url - .split('&') - .find(|¶m| param.starts_with("opid=")) - .and_then(|param| param.split('=').nth(1)) - { - log::info!("Found opid: {}", opid); - - // Store in opid KV store - log::info!( - "Attempting to open KV store: {}", - settings.synthetic.opid_store - ); - match KVStore::open(settings.synthetic.opid_store.as_str()) { - Ok(Some(store)) => { - log::info!("Successfully opened KV store"); - match store.insert(&synthetic_id, opid.as_bytes()) { - Ok(_) => log::info!( - "Successfully stored opid {} for synthetic ID: {}", - opid, - synthetic_id - ), - Err(e) => { - log::error!("Error storing opid in KV store: {:?}", e) - } - } - } - Ok(None) => { - log::warn!( - "KV store returned None: {}", - settings.synthetic.opid_store - ); - } - Err(e) => { - log::error!( - "Error opening KV store '{}': {:?}", - settings.synthetic.opid_store, - e - ); - } - }; - } - } - } - - // Return the JSON response with CORS headers - let mut response = Response::from_status(StatusCode::OK) - .with_header(header::CONTENT_TYPE, "application/json") - .with_header(header::CACHE_CONTROL, "no-store, private") - .with_header(header::ACCESS_CONTROL_ALLOW_ORIGIN, "*") - .with_header( - header::ACCESS_CONTROL_EXPOSE_HEADERS, - "X-Geo-City, X-Geo-Country, X-Geo-Continent, X-Geo-Coordinates, X-Geo-Metro-Code, X-Geo-Info-Available" - ) - .with_header(HEADER_X_COMPRESS_HINT, "on") - .with_body(body); - - // Copy geo headers from request to response - for header_name in &[ - HEADER_X_GEO_CITY, - HEADER_X_GEO_COUNTRY, - HEADER_X_GEO_CONTINENT, - HEADER_X_GEO_COORDINATES, - HEADER_X_GEO_METRO_CODE, - HEADER_X_GEO_INFO_AVAILABLE, - ] { - if let Some(value) = req.get_header(header_name) { - response.set_header(header_name, value); - } - } - - // Attach PoP info to the response - //response.set_header("X-Debug-Fastly-PoP", &fastly_pop); - //log::info!("Added X-Debug-Fastly-PoP: {}", fastly_pop); - - Ok(response) - } else { - log::warn!("Backend returned non-success status"); - Ok(Response::from_status(StatusCode::NO_CONTENT) - .with_header(header::CONTENT_TYPE, "application/json") - .with_header(HEADER_X_COMPRESS_HINT, "on") - .with_body("{}")) - } - } - Err(e) => { - log::error!("Error making backend request: {:?}", e); - Ok(Response::from_status(StatusCode::NO_CONTENT) - .with_header(header::CONTENT_TYPE, "application/json") - .with_header(HEADER_X_COMPRESS_HINT, "on") - .with_body("{}")) - } - } + // Convert any errors to HTTP error responses + result.map_or_else(|e| Ok(to_error_response(e)), Ok) } -/// Handles the prebid test route with detailed error logging -async fn handle_prebid_test(settings: &Settings, mut req: Request) -> Result { - log::info!("Starting prebid test request handling"); - - // Check consent status from headers - let advertising_consent = req - .get_header(HEADER_X_CONSENT_ADVERTISING) - .and_then(|h| h.to_str().ok()) - .map(|v| v == "true") - .unwrap_or(false); - - // Calculate fresh ID and synthetic ID only if we have advertising consent - let (fresh_id, synthetic_id) = if advertising_consent { - match ( - generate_synthetic_id(settings, &req), - get_or_generate_synthetic_id(settings, &req), - ) { - (Ok(fresh), Ok(synth)) => (fresh, synth), - (Err(e), _) | (_, Err(e)) => { - log::error!("Failed to generate IDs: {:?}", e); - return Ok(Response::from_status(StatusCode::INTERNAL_SERVER_ERROR) - .with_header(header::CONTENT_TYPE, "application/json") - .with_body_json(&json!({ - "error": "Failed to generate IDs", - "details": format!("{:?}", e) - }))?); - } - } - } else { - // Use non-personalized IDs when no consent - ( - "non-personalized".to_string(), - "non-personalized".to_string(), - ) - }; - - log::info!( - "Existing Trusted Server header: {:?}", - req.get_header(HEADER_SYNTHETIC_TRUSTED_SERVER) - ); - log::info!("Generated Fresh ID: {}", &fresh_id); - log::info!("Using Trusted Server ID: {}", synthetic_id); - log::info!("Advertising consent: {}", advertising_consent); - - // Set both IDs as headers - req.set_header(HEADER_SYNTHETIC_FRESH, &fresh_id); - req.set_header(HEADER_SYNTHETIC_TRUSTED_SERVER, &synthetic_id); - req.set_header( - HEADER_X_CONSENT_ADVERTISING, - if advertising_consent { "true" } else { "false" }, - ); - - log::info!( - "Using Trusted Server ID: {}, Fresh ID: {}", - synthetic_id, - fresh_id - ); - - let prebid_req = match PrebidRequest::new(settings, &req) { - Ok(req) => { - log::info!( - "Successfully created PrebidRequest with synthetic ID: {}", - req.synthetic_id - ); - req - } - Err(e) => { - log::error!("Error creating PrebidRequest: {:?}", e); - return Ok(Response::from_status(StatusCode::INTERNAL_SERVER_ERROR) - .with_header(header::CONTENT_TYPE, "application/json") - .with_body_json(&json!({ - "error": "Failed to create prebid request", - "details": format!("{:?}", e) - }))?); - } - }; - - log::info!("Attempting to send bid request to Prebid Server at prebid_backend"); - - match prebid_req.send_bid_request(settings, &req).await { - Ok(mut prebid_response) => { - log::info!("Received response from Prebid Server"); - log::info!("Response status: {}", prebid_response.get_status()); - - log::info!("Response headers:"); - for (name, value) in prebid_response.get_headers() { - log::info!(" {}: {:?}", name, value); - } - - let body = prebid_response.take_body_str(); - log::info!("Response body: {}", body); - - Ok(Response::from_status(StatusCode::OK) - .with_header(header::CONTENT_TYPE, "application/json") - .with_header("X-Prebid-Test", "true") - .with_header("X-Synthetic-ID", &prebid_req.synthetic_id) - .with_header( - "X-Consent-Advertising", - if advertising_consent { "true" } else { "false" }, - ) - .with_header(HEADER_X_COMPRESS_HINT, "on") - .with_body(body)) - } - Err(e) => { - log::error!("Error sending bid request: {:?}", e); - log::error!("Backend name used: prebid_backend"); - Ok(Response::from_status(StatusCode::INTERNAL_SERVER_ERROR) - .with_header(header::CONTENT_TYPE, "application/json") - .with_body_json(&json!({ - "error": "Failed to send bid request", - "details": format!("{:?}", e), - "backend": "prebid_backend" - }))?) - } - } +/// Creates a standard 404 Not Found response. +fn not_found_response() -> Response { + Response::from_status(StatusCode::NOT_FOUND) + .with_body("Not Found") + .with_header(header::CONTENT_TYPE, "text/plain") + .with_header(HEADER_X_COMPRESS_HINT, "on") } From 887000487f8b6a0c50f43c6d5b893ebb0784f6cf Mon Sep 17 00:00:00 2001 From: Aram Grigoryan <132480+aram356@users.noreply.github.com> Date: Wed, 9 Jul 2025 09:39:02 -0700 Subject: [PATCH 02/20] Fixed formatting --- crates/common/src/gdpr.rs | 35 +++++++++++++++++++---------------- crates/common/src/prebid.rs | 2 +- crates/fastly/src/main.rs | 10 ++++++---- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/crates/common/src/gdpr.rs b/crates/common/src/gdpr.rs index 9b2ee7b..4a72653 100644 --- a/crates/common/src/gdpr.rs +++ b/crates/common/src/gdpr.rs @@ -125,11 +125,12 @@ pub fn handle_consent_request( Method::GET => { // Return current consent status let consent = get_consent_from_request(&req).unwrap_or_default(); - let json_body = serde_json::to_string(&consent) - .change_context(TrustedServerError::GdprConsent { + let json_body = serde_json::to_string(&consent).change_context( + TrustedServerError::GdprConsent { message: "Failed to serialize consent data".to_string(), - })?; - + }, + )?; + Ok(Response::from_status(StatusCode::OK) .with_header(header::CONTENT_TYPE, "application/json") .with_body(json_body)) @@ -140,12 +141,13 @@ pub fn handle_consent_request( .change_context(TrustedServerError::GdprConsent { message: "Failed to parse consent request body".to_string(), })?; - - let json_body = serde_json::to_string(&consent) - .change_context(TrustedServerError::GdprConsent { + + let json_body = serde_json::to_string(&consent).change_context( + TrustedServerError::GdprConsent { message: "Failed to serialize consent response".to_string(), - })?; - + }, + )?; + let mut response = Response::from_status(StatusCode::OK) .with_header(header::CONTENT_TYPE, "application/json") .with_body(json_body); @@ -189,17 +191,18 @@ pub fn handle_data_subject_request( // TODO: Implement actual data retrieval from KV store // For now, return empty user data - let id_str = synthetic_id - .to_str() - .change_context(TrustedServerError::InvalidHeaderValue { + let id_str = synthetic_id.to_str().change_context( + TrustedServerError::InvalidHeaderValue { message: "Invalid subject ID header value".to_string(), - })?; + }, + )?; data.insert(id_str.to_string(), UserData::default()); - let json_body = serde_json::to_string(&data) - .change_context(TrustedServerError::GdprConsent { + let json_body = serde_json::to_string(&data).change_context( + TrustedServerError::GdprConsent { message: "Failed to serialize user data".to_string(), - })?; + }, + )?; Ok(Response::from_status(StatusCode::OK) .with_header(header::CONTENT_TYPE, "application/json") diff --git a/crates/common/src/prebid.rs b/crates/common/src/prebid.rs index ec129c0..19b5e8a 100644 --- a/crates/common/src/prebid.rs +++ b/crates/common/src/prebid.rs @@ -297,7 +297,7 @@ pub async fn handle_prebid_test( Err(e) => { log::error!("Error sending bid request: {:?}", e); log::error!("Backend name used: prebid_backend"); - + // Convert Fastly Error to TrustedServerError Err(Report::new(TrustedServerError::Prebid { message: format!("Failed to send bid request: {}", e), diff --git a/crates/fastly/src/main.rs b/crates/fastly/src/main.rs index 7295b5e..5dc630b 100644 --- a/crates/fastly/src/main.rs +++ b/crates/fastly/src/main.rs @@ -45,15 +45,17 @@ async fn route_request(settings: Settings, req: Request) -> Result handle_main_page(&settings, req), (&Method::GET, "/ad-creative") => handle_ad_request(&settings, req), (&Method::GET, "/prebid-test") => handle_prebid_test(&settings, req).await, - + // GDPR compliance routes (&Method::GET | &Method::POST, "/gdpr/consent") => handle_consent_request(&settings, req), - (&Method::GET | &Method::DELETE, "/gdpr/data") => handle_data_subject_request(&settings, req), - + (&Method::GET | &Method::DELETE, "/gdpr/data") => { + handle_data_subject_request(&settings, req) + } + // Static content pages (&Method::GET, "/privacy-policy") => handle_privacy_policy(&settings, req), (&Method::GET, "/why-trusted-server") => handle_why_trusted_server(&settings, req), - + // Catch-all 404 handler _ => return Ok(not_found_response()), }; From 9fcb0e0031f92db25d30ad8226c9808dca259bba Mon Sep 17 00:00:00 2001 From: Aram Grigoryan <132480+aram356@users.noreply.github.com> Date: Wed, 9 Jul 2025 15:02:39 -0700 Subject: [PATCH 03/20] Initalize logger with formatting --- Cargo.lock | 11 +++++++++++ crates/fastly/Cargo.toml | 2 ++ crates/fastly/src/main.rs | 26 ++++++++++++++++++++++++-- 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c929cb4..2e066e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -499,6 +499,15 @@ dependencies = [ "wit-bindgen-rt 0.42.1", ] +[[package]] +name = "fern" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4316185f709b23713e41e3195f90edef7fb00c3ed4adc79769cf09cc762a3b29" +dependencies = [ + "log", +] + [[package]] name = "fnv" version = "1.0.7" @@ -1604,8 +1613,10 @@ dependencies = [ name = "trusted-server-fastly" version = "0.1.0" dependencies = [ + "chrono", "error-stack", "fastly", + "fern", "futures", "log", "log-fastly", diff --git a/crates/fastly/Cargo.toml b/crates/fastly/Cargo.toml index d98e2b5..618bd06 100644 --- a/crates/fastly/Cargo.toml +++ b/crates/fastly/Cargo.toml @@ -4,8 +4,10 @@ version = "0.1.0" edition = "2021" [dependencies] +chrono = "0.4.41" error-stack = "0.5" fastly = "0.11.5" +fern = "0.7.1" futures = "0.3" log = "0.4.20" log-fastly = "0.11.5" diff --git a/crates/fastly/src/main.rs b/crates/fastly/src/main.rs index 5dc630b..705344f 100644 --- a/crates/fastly/src/main.rs +++ b/crates/fastly/src/main.rs @@ -1,6 +1,6 @@ use fastly::http::{header, Method, StatusCode}; use fastly::{Error, Request, Response}; -use log::LevelFilter::Info; +use log_fastly::Logger; mod error; use crate::error::to_error_response; @@ -16,7 +16,7 @@ use trusted_server_common::why::handle_why_trusted_server; #[fastly::main] fn main(req: Request) -> Result { - log_fastly::init_simple("mylogs", Info); + init_logger(); let settings = match Settings::new() { Ok(s) => s, @@ -71,3 +71,25 @@ fn not_found_response() -> Response { .with_header(header::CONTENT_TYPE, "text/plain") .with_header(HEADER_X_COMPRESS_HINT, "on") } + +fn init_logger() { + let logger = Logger::builder() + .default_endpoint("tslog") + // .echo_stdout(true) + .max_level(log::LevelFilter::Debug) + .build() + .expect("Failed to build Logger"); + + fern::Dispatch::new() + .format(|out, message, record| { + out.finish(format_args!( + "{} {} {}", + chrono::Local::now().to_rfc3339_opts(chrono::SecondsFormat::Millis, true), + record.level(), + message + )) + }) + .chain(Box::new(logger) as Box) + .apply() + .expect("Failed to initialize logger"); +} From 67ce18bc44570c9a1c509078afba1d84d7690658 Mon Sep 17 00:00:00 2001 From: Aram Grigoryan <132480+aram356@users.noreply.github.com> Date: Wed, 9 Jul 2025 15:15:52 -0700 Subject: [PATCH 04/20] Use env variables during build --- Cargo.lock | 59 +++++++++++++++++++++++++++--- crates/common/Cargo.toml | 1 + crates/common/build.rs | 34 ++++++++++++++++- crates/common/src/lib.rs | 1 + crates/common/src/settings.rs | 47 ------------------------ crates/common/src/settings_data.rs | 58 +++++++++++++++++++++++++++++ crates/fastly/src/main.rs | 3 +- 7 files changed, 148 insertions(+), 55 deletions(-) create mode 100644 crates/common/src/settings_data.rs diff --git a/Cargo.lock b/Cargo.lock index 2e066e9..5fb6b4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,7 +179,7 @@ dependencies = [ "rust-ini", "serde", "serde_json", - "toml", + "toml 0.8.23", "winnow", "yaml-rust2", ] @@ -1318,6 +1318,15 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_spanned" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40734c41988f7306bb04f0ecf60ec0f3f1caa34290e4e8ea471dcd3346483b83" +dependencies = [ + "serde", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -1556,11 +1565,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" dependencies = [ "serde", - "serde_spanned", - "toml_datetime", + "serde_spanned 0.6.9", + "toml_datetime 0.6.11", "toml_edit", ] +[[package]] +name = "toml" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f271e09bde39ab52250160a67e88577e0559ad77e9085de6e9051a2c4353f8f8" +dependencies = [ + "indexmap", + "serde", + "serde_spanned 1.0.0", + "toml_datetime 0.7.0", + "toml_parser", + "toml_writer", + "winnow", +] + [[package]] name = "toml_datetime" version = "0.6.11" @@ -1570,6 +1594,15 @@ dependencies = [ "serde", ] +[[package]] +name = "toml_datetime" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bade1c3e902f58d73d3f294cd7f20391c1cb2fbcb643b73566bc773971df91e3" +dependencies = [ + "serde", +] + [[package]] name = "toml_edit" version = "0.22.27" @@ -1578,11 +1611,26 @@ checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ "indexmap", "serde", - "serde_spanned", - "toml_datetime", + "serde_spanned 0.6.9", + "toml_datetime 0.6.11", + "winnow", +] + +[[package]] +name = "toml_parser" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5c1c469eda89749d2230d8156a5969a69ffe0d6d01200581cdc6110674d293e" +dependencies = [ "winnow", ] +[[package]] +name = "toml_writer" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b679217f2848de74cabd3e8fc5e6d66f40b7da40f8e1954d92054d9010690fd5" + [[package]] name = "trusted-server-common" version = "0.1.0" @@ -1606,6 +1654,7 @@ dependencies = [ "sha2 0.10.9", "temp-env", "tokio", + "toml 0.9.0", "url", ] diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index 2cae701..5952e0b 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -35,6 +35,7 @@ config = "0.15.11" derive_more = { version = "1.0", features = ["display", "error"] } error-stack = "0.5" http = "1.3.1" +toml = "0.9.0" [dev-dependencies] regex = "1.1.1" diff --git a/crates/common/build.rs b/crates/common/build.rs index 24b9108..33a9534 100644 --- a/crates/common/build.rs +++ b/crates/common/build.rs @@ -6,10 +6,20 @@ mod settings; use serde_json::Value; use std::collections::HashSet; +use std::fs; +use std::path::Path; + +const TRUSTED_SERVER_INIT_CONFIG_PATH: &str = "../../trusted-server.toml"; +const TRUSTED_SERVER_OUTPUT_CONFIG_PATH: &str = "../../target/trusted-server-out.toml"; fn main() { - // Watch the settings.rs file for changes - println!("cargo:rerun-if-changed=../../trusted-server.toml"); + merge_toml(); + rerun_if_changed(); +} + +fn rerun_if_changed() { + // Watch the root trusted-server.toml file for changes + println!("cargo:rerun-if-changed={}", TRUSTED_SERVER_INIT_CONFIG_PATH); // Create a default Settings instance and convert to JSON to discover all fields let default_settings = settings::Settings::default(); @@ -27,6 +37,26 @@ fn main() { } } +fn merge_toml() { + // Get the OUT_DIR where we'll copy the config file + let dest_path = Path::new(TRUSTED_SERVER_OUTPUT_CONFIG_PATH); + + // Read init config + let init_config_path = Path::new(TRUSTED_SERVER_INIT_CONFIG_PATH); + let toml_content = fs::read_to_string(init_config_path) + .expect(&format!("Failed to read {:?}", init_config_path)); + + // For build time: use from_toml to parse with environment variables + let settings = settings::Settings::from_toml(&toml_content) + .expect("Failed to parse settings at build time"); + + // Write the merged settings to the output directory as TOML + let merged_toml = + toml::to_string_pretty(&settings).expect("Failed to serialize settings to TOML"); + + fs::write(&dest_path, merged_toml).expect(&format!("Failed to write {:?}", dest_path)); +} + fn collect_env_vars(value: &Value, env_vars: &mut HashSet, path: Vec) { if let Value::Object(map) = value { for (key, val) in map { diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs index 07fe80f..d4001d2 100644 --- a/crates/common/src/lib.rs +++ b/crates/common/src/lib.rs @@ -31,6 +31,7 @@ pub mod prebid; pub mod privacy; pub mod publisher; pub mod settings; +pub mod settings_data; pub mod synthetic; pub mod templates; pub mod test_support; diff --git a/crates/common/src/settings.rs b/crates/common/src/settings.rs index 05f372e..7403549 100644 --- a/crates/common/src/settings.rs +++ b/crates/common/src/settings.rs @@ -45,33 +45,6 @@ pub struct Settings { #[allow(unused)] impl Settings { - /// Creates a new [`Settings`] instance from the embedded configuration file. - /// - /// Loads the configuration from the embedded `trusted-server.toml` file - /// and applies any environment variable overrides. - /// - /// # Errors - /// - /// - [`TrustedServerError::InvalidUtf8`] if the embedded TOML file contains invalid UTF-8 - /// - [`TrustedServerError::Configuration`] if the configuration is invalid or missing required fields - /// - [`TrustedServerError::InsecureSecretKey`] if the secret key is set to the default value - pub fn new() -> Result> { - let toml_bytes = include_bytes!("../../../trusted-server.toml"); - let toml_str = - str::from_utf8(toml_bytes).change_context(TrustedServerError::InvalidUtf8 { - message: "embedded trusted-server.toml file".to_string(), - })?; - - let settings = Self::from_toml(toml_str)?; - - // Validate that the secret key is not the default - if settings.synthetic.secret_key == "secret-key" { - return Err(Report::new(TrustedServerError::InsecureSecretKey)); - } - - Ok(settings) - } - /// Creates a new [`Settings`] instance from a TOML string. /// /// Parses the provided TOML configuration and applies any environment @@ -109,26 +82,6 @@ mod tests { use crate::test_support::tests::crate_test_settings_str; - #[test] - fn test_settings_new() { - // Test that Settings::new() loads successfully - let settings = Settings::new(); - assert!(settings.is_ok(), "Settings should load from embedded TOML"); - - let settings = settings.unwrap(); - // Verify basic structure is loaded - assert!(!settings.ad_server.ad_partner_url.is_empty()); - assert!(!settings.ad_server.sync_url.is_empty()); - assert!(!settings.publisher.domain.is_empty()); - assert!(!settings.publisher.cookie_domain.is_empty()); - assert!(!settings.publisher.origin_url.is_empty()); - assert!(!settings.prebid.server_url.is_empty()); - assert!(!settings.synthetic.counter_store.is_empty()); - assert!(!settings.synthetic.opid_store.is_empty()); - assert!(!settings.synthetic.secret_key.is_empty()); - assert!(!settings.synthetic.template.is_empty()); - } - #[test] fn test_settings_from_valid_toml() { let toml_str = crate_test_settings_str(); diff --git a/crates/common/src/settings_data.rs b/crates/common/src/settings_data.rs new file mode 100644 index 0000000..e67bf3c --- /dev/null +++ b/crates/common/src/settings_data.rs @@ -0,0 +1,58 @@ +use core::str; +use error_stack::{Report, ResultExt}; + +use crate::error::TrustedServerError; +use crate::settings::Settings; + +const SETTINGS_DATA: &[u8] = include_bytes!("../../../target/trusted-server-out.toml"); + +/// Creates a new [`Settings`] instance from the embedded configuration file. +// / +// / Loads the configuration from the embedded `trusted-server.toml` file +// / and applies any environment variable overrides. +// / +// / # Errors +// / +// / - [`TrustedServerError::InvalidUtf8`] if the embedded TOML file contains invalid UTF-8 +// / - [`TrustedServerError::Configuration`] if the configuration is invalid or missing required fields +// / - [`TrustedServerError::InsecureSecretKey`] if the secret key is set to the default value +pub fn get_settings() -> Result> { + let toml_bytes = SETTINGS_DATA; + let toml_str = str::from_utf8(toml_bytes).change_context(TrustedServerError::InvalidUtf8 { + message: "embedded trusted-server.toml file".to_string(), + })?; + + let settings = Settings::from_toml(toml_str)?; + + // Validate that the secret key is not the default + if settings.synthetic.secret_key == "secret-key" { + return Err(Report::new(TrustedServerError::InsecureSecretKey)); + } + + Ok(settings) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_get_settings() { + // Test that Settings::new() loads successfully + let settings = get_settings(); + assert!(settings.is_ok(), "Settings should load from embedded TOML"); + + let settings = settings.unwrap(); + // Verify basic structure is loaded + assert!(!settings.ad_server.ad_partner_url.is_empty()); + assert!(!settings.ad_server.sync_url.is_empty()); + assert!(!settings.publisher.domain.is_empty()); + assert!(!settings.publisher.cookie_domain.is_empty()); + assert!(!settings.publisher.origin_url.is_empty()); + assert!(!settings.prebid.server_url.is_empty()); + assert!(!settings.synthetic.counter_store.is_empty()); + assert!(!settings.synthetic.opid_store.is_empty()); + assert!(!settings.synthetic.secret_key.is_empty()); + assert!(!settings.synthetic.template.is_empty()); + } +} diff --git a/crates/fastly/src/main.rs b/crates/fastly/src/main.rs index 705344f..7695a10 100644 --- a/crates/fastly/src/main.rs +++ b/crates/fastly/src/main.rs @@ -12,13 +12,14 @@ use trusted_server_common::prebid::handle_prebid_test; use trusted_server_common::privacy::handle_privacy_policy; use trusted_server_common::publisher::handle_main_page; use trusted_server_common::settings::Settings; +use trusted_server_common::settings_data::get_settings; use trusted_server_common::why::handle_why_trusted_server; #[fastly::main] fn main(req: Request) -> Result { init_logger(); - let settings = match Settings::new() { + let settings = match get_settings() { Ok(s) => s, Err(e) => { log::error!("Failed to load settings: {:?}", e); From 433554c4855a7fbaab296b3c9d42188bcc700a77 Mon Sep 17 00:00:00 2001 From: Aram Grigoryan <132480+aram356@users.noreply.github.com> Date: Wed, 9 Jul 2025 15:17:44 -0700 Subject: [PATCH 05/20] Fixed clippy --- crates/common/build.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/common/build.rs b/crates/common/build.rs index 33a9534..d7b020b 100644 --- a/crates/common/build.rs +++ b/crates/common/build.rs @@ -44,7 +44,7 @@ fn merge_toml() { // Read init config let init_config_path = Path::new(TRUSTED_SERVER_INIT_CONFIG_PATH); let toml_content = fs::read_to_string(init_config_path) - .expect(&format!("Failed to read {:?}", init_config_path)); + .unwrap_or_else(|_| panic!("Failed to read {:?}", init_config_path)); // For build time: use from_toml to parse with environment variables let settings = settings::Settings::from_toml(&toml_content) @@ -54,7 +54,7 @@ fn merge_toml() { let merged_toml = toml::to_string_pretty(&settings).expect("Failed to serialize settings to TOML"); - fs::write(&dest_path, merged_toml).expect(&format!("Failed to write {:?}", dest_path)); + fs::write(dest_path, merged_toml).unwrap_or_else(|_| panic!("Failed to write {:?}", dest_path)); } fn collect_env_vars(value: &Value, env_vars: &mut HashSet, path: Vec) { From 21b03f1371199692cfb5cc388ebac9bc44d39123 Mon Sep 17 00:00:00 2001 From: Aram Grigoryan <132480+aram356@users.noreply.github.com> Date: Wed, 9 Jul 2025 15:20:29 -0700 Subject: [PATCH 06/20] Use updated depedencies --- Cargo.lock | 71 ++++++++-------------------------------- crates/common/Cargo.toml | 4 +-- 2 files changed, 16 insertions(+), 59 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5fb6b4a..d912a5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -138,9 +138,9 @@ checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" [[package]] name = "cc" -version = "1.2.27" +version = "1.2.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d487aa071b5f64da6f19a3e848e3578944b726ee5a4854b82172f02aa876bfdc" +checksum = "5c1599538de2394445747c8cf7935946e3cc27e9625f889d979bfb2aaf569362" dependencies = [ "shlex", ] @@ -167,9 +167,9 @@ dependencies = [ [[package]] name = "config" -version = "0.15.11" +version = "0.15.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "595aae20e65c3be792d05818e8c63025294ac3cb7e200f11459063a352a6ef80" +checksum = "5b1eb4fb07bc7f012422df02766c7bd5971effb894f573865642f06fa3265440" dependencies = [ "async-trait", "convert_case", @@ -179,7 +179,7 @@ dependencies = [ "rust-ini", "serde", "serde_json", - "toml 0.8.23", + "toml", "winnow", "yaml-rust2", ] @@ -332,18 +332,18 @@ dependencies = [ [[package]] name = "derive_more" -version = "1.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05" +checksum = "093242cf7570c207c83073cf82f79706fe7b8317e98620a47d5be7c3d8497678" dependencies = [ "derive_more-impl", ] [[package]] name = "derive_more-impl" -version = "1.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" +checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" dependencies = [ "proc-macro2", "quote", @@ -1309,15 +1309,6 @@ dependencies = [ "syn 2.0.104", ] -[[package]] -name = "serde_spanned" -version = "0.6.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" -dependencies = [ - "serde", -] - [[package]] name = "serde_spanned" version = "1.0.0" @@ -1533,9 +1524,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.46.0" +version = "1.46.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1140bb80481756a8cbe10541f37433b459c5aa1e727b4c020fbfebdc25bf3ec4" +checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17" dependencies = [ "backtrace", "bytes", @@ -1558,18 +1549,6 @@ dependencies = [ "syn 2.0.104", ] -[[package]] -name = "toml" -version = "0.8.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" -dependencies = [ - "serde", - "serde_spanned 0.6.9", - "toml_datetime 0.6.11", - "toml_edit", -] - [[package]] name = "toml" version = "0.9.0" @@ -1578,22 +1557,13 @@ checksum = "f271e09bde39ab52250160a67e88577e0559ad77e9085de6e9051a2c4353f8f8" dependencies = [ "indexmap", "serde", - "serde_spanned 1.0.0", - "toml_datetime 0.7.0", + "serde_spanned", + "toml_datetime", "toml_parser", "toml_writer", "winnow", ] -[[package]] -name = "toml_datetime" -version = "0.6.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" -dependencies = [ - "serde", -] - [[package]] name = "toml_datetime" version = "0.7.0" @@ -1603,19 +1573,6 @@ dependencies = [ "serde", ] -[[package]] -name = "toml_edit" -version = "0.22.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" -dependencies = [ - "indexmap", - "serde", - "serde_spanned 0.6.9", - "toml_datetime 0.6.11", - "winnow", -] - [[package]] name = "toml_parser" version = "1.0.0" @@ -1654,7 +1611,7 @@ dependencies = [ "sha2 0.10.9", "temp-env", "tokio", - "toml 0.9.0", + "toml", "url", ] diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index 5952e0b..26ecf96 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -12,7 +12,7 @@ license = "Apache-2.0" chrono = "0.4" config = "0.15.11" cookie = "0.18.1" -derive_more = { version = "1.0", features = ["display", "error"] } +derive_more = { version = "2.0", features = ["display", "error"] } error-stack = "0.5" fastly = "0.11.5" futures = "0.3" @@ -32,7 +32,7 @@ url = "2.4.1" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0.91" config = "0.15.11" -derive_more = { version = "1.0", features = ["display", "error"] } +derive_more = { version = "2.0", features = ["display", "error"] } error-stack = "0.5" http = "1.3.1" toml = "0.9.0" From b0001efef20fad675437fa78d00e7eb18b42cc6e Mon Sep 17 00:00:00 2001 From: Aram Grigoryan <132480+aram356@users.noreply.github.com> Date: Wed, 9 Jul 2025 15:24:12 -0700 Subject: [PATCH 07/20] Proxy publisher content --- .env.dev | 3 ++ crates/common/Cargo.toml | 5 +- crates/common/src/error.rs | 43 +++++++++------- crates/common/src/publisher.rs | 32 +++++++++++- crates/common/src/settings.rs | 86 +++++++++++++++++++++++++++++++ crates/common/src/test_support.rs | 25 ++------- crates/fastly/src/main.rs | 49 +++++++++++------- fastly.toml | 3 ++ trusted-server.toml | 1 + 9 files changed, 184 insertions(+), 63 deletions(-) diff --git a/.env.dev b/.env.dev index 029284b..cf3a91b 100644 --- a/.env.dev +++ b/.env.dev @@ -1,3 +1,6 @@ +# [publisher] +TRUSTED_SERVER__PUBLISHER__ORIGIN_URL=http://127.0.0.1:9090 + # [ad_server] TRUSTED_SERVER__AD_SERVER__AD_PARTNER_URL=http://127.0.0.1:10180 diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index 26ecf96..24f35a0 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -12,7 +12,7 @@ license = "Apache-2.0" chrono = "0.4" config = "0.15.11" cookie = "0.18.1" -derive_more = { version = "2.0", features = ["display", "error"] } +derive_more = { version = "1.0", features = ["display", "error"] } error-stack = "0.5" fastly = "0.11.5" futures = "0.3" @@ -32,10 +32,11 @@ url = "2.4.1" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0.91" config = "0.15.11" -derive_more = { version = "2.0", features = ["display", "error"] } +derive_more = { version = "1.0", features = ["display", "error"] } error-stack = "0.5" http = "1.3.1" toml = "0.9.0" +url = "2.4.1" [dev-dependencies] regex = "1.1.1" diff --git a/crates/common/src/error.rs b/crates/common/src/error.rs index 65a0c47..75dba7c 100644 --- a/crates/common/src/error.rs +++ b/crates/common/src/error.rs @@ -19,38 +19,42 @@ pub enum TrustedServerError { #[display("Configuration error: {message}")] Configuration { message: String }, + /// GDPR consent handling error. + #[display("GDPR consent error: {message}")] + GdprConsent { message: String }, + + /// Key-value store operation failed. + #[display("KV store error: {store_name} - {message}")] + KvStore { store_name: String, message: String }, + /// The synthetic secret key is using the insecure default value. #[display("Synthetic secret key is set to the default value - this is insecure")] InsecureSecretKey, + /// HTTP header value creation failed. + #[display("Invalid HTTP header value: {message}")] + InvalidHeaderValue { message: String }, + /// Invalid UTF-8 data encountered. #[display("Invalid UTF-8 data: {message}")] InvalidUtf8 { message: String }, - /// HTTP header value creation failed. - #[display("Invalid HTTP header value: {message}")] - InvalidHeaderValue { message: String }, + /// Prebid integration error. + #[display("Prebid error: {message}")] + Prebid { message: String }, + + /// Proxy error. + #[display("Template error: {message}")] + Proxy { message: String }, /// Settings parsing or validation failed. #[display("Settings error: {message}")] Settings { message: String }, - /// GDPR consent handling error. - #[display("GDPR consent error: {message}")] - GdprConsent { message: String }, - /// Synthetic ID generation or validation failed. #[display("Synthetic ID error: {message}")] SyntheticId { message: String }, - /// Prebid integration error. - #[display("Prebid error: {message}")] - Prebid { message: String }, - - /// Key-value store operation failed. - #[display("KV store error: {store_name} - {message}")] - KvStore { store_name: String, message: String }, - /// Template rendering error. #[display("Template error: {message}")] Template { message: String }, @@ -72,13 +76,14 @@ impl IntoHttpResponse for TrustedServerError { fn status_code(&self) -> StatusCode { match self { Self::Configuration { .. } | Self::Settings { .. } => StatusCode::INTERNAL_SERVER_ERROR, + Self::GdprConsent { .. } => StatusCode::BAD_REQUEST, Self::InsecureSecretKey => StatusCode::INTERNAL_SERVER_ERROR, - Self::InvalidUtf8 { .. } => StatusCode::BAD_REQUEST, Self::InvalidHeaderValue { .. } => StatusCode::BAD_REQUEST, - Self::GdprConsent { .. } => StatusCode::BAD_REQUEST, - Self::SyntheticId { .. } => StatusCode::INTERNAL_SERVER_ERROR, - Self::Prebid { .. } => StatusCode::BAD_GATEWAY, + Self::InvalidUtf8 { .. } => StatusCode::BAD_REQUEST, Self::KvStore { .. } => StatusCode::SERVICE_UNAVAILABLE, + Self::Prebid { .. } => StatusCode::BAD_GATEWAY, + Self::Proxy { .. } => StatusCode::BAD_GATEWAY, + Self::SyntheticId { .. } => StatusCode::INTERNAL_SERVER_ERROR, Self::Template { .. } => StatusCode::INTERNAL_SERVER_ERROR, } } diff --git a/crates/common/src/publisher.rs b/crates/common/src/publisher.rs index 8c6b02b..c03429e 100644 --- a/crates/common/src/publisher.rs +++ b/crates/common/src/publisher.rs @@ -1,4 +1,4 @@ -use error_stack::Report; +use error_stack::{Report, ResultExt}; use fastly::http::{header, StatusCode}; use fastly::{Request, Response}; @@ -124,3 +124,33 @@ pub fn handle_main_page( Ok(response) } + +/// Proxies requests to the publisher's origin server. +/// +/// This function forwards incoming requests to the configured origin URL, +/// preserving headers and request body. It's used as a fallback for routes +/// not explicitly handled by the trusted server. +/// +/// # Errors +/// +/// Returns a [`TrustedServerError`] if: +/// - The proxy request fails +/// - The origin backend is unreachable +pub fn handle_publisher_request( + settings: &Settings, + mut req: Request, +) -> Result> { + log::info!("Proxying request to publisher_origin"); + + // Extract host from the origin_url using the Publisher's origin_host method + let host = settings.publisher.origin_host(); + + log::info!("Setting host header to: {}", host); + req.set_header("host", host); + + // Send the request to the origin backend + req.send(&settings.publisher.origin_backend) + .change_context(TrustedServerError::Proxy { + message: "Failed to proxy request".to_string(), + }) +} diff --git a/crates/common/src/settings.rs b/crates/common/src/settings.rs index 7403549..602f451 100644 --- a/crates/common/src/settings.rs +++ b/crates/common/src/settings.rs @@ -3,6 +3,7 @@ use core::str; use config::{Config, Environment, File, FileFormat}; use error_stack::{Report, ResultExt}; use serde::{Deserialize, Serialize}; +use url::Url; use crate::error::TrustedServerError; @@ -19,9 +20,37 @@ pub struct AdServer { pub struct Publisher { pub domain: String, pub cookie_domain: String, + pub origin_backend: String, pub origin_url: String, } +impl Publisher { + /// Extracts the host (including port if present) from the origin_url. + /// + /// # Examples + /// + /// ``` + /// # use trusted_server_common::settings::Publisher; + /// let publisher = Publisher { + /// domain: "example.com".to_string(), + /// cookie_domain: ".example.com".to_string(), + /// origin_url: "https://origin.example.com:8080".to_string(), + /// }; + /// assert_eq!(publisher.origin_host(), "origin.example.com:8080"); + /// ``` + pub fn origin_host(&self) -> String { + Url::parse(&self.origin_url) + .ok() + .and_then(|url| { + url.host_str().map(|host| match url.port() { + Some(port) => format!("{}:{}", host, port), + None => host.to_string(), + }) + }) + .unwrap_or_else(|| self.origin_url.clone()) + } +} + #[derive(Debug, Default, Deserialize, Serialize)] pub struct Prebid { pub server_url: String, @@ -212,4 +241,61 @@ mod tests { }, ); } + + #[test] + fn test_publisher_origin_host() { + // Test with full URL including port + let publisher = Publisher { + domain: "example.com".to_string(), + cookie_domain: ".example.com".to_string(), + origin_backend: "publisher_origin".to_string(), + origin_url: "https://origin.example.com:8080".to_string(), + }; + assert_eq!(publisher.origin_host(), "origin.example.com:8080"); + + // Test with URL without port (default HTTPS port) + let publisher = Publisher { + domain: "example.com".to_string(), + cookie_domain: ".example.com".to_string(), + origin_backend: "publisher_origin".to_string(), + origin_url: "https://origin.example.com".to_string(), + }; + assert_eq!(publisher.origin_host(), "origin.example.com"); + + // Test with HTTP URL with explicit port + let publisher = Publisher { + domain: "example.com".to_string(), + cookie_domain: ".example.com".to_string(), + origin_backend: "publisher_origin".to_string(), + origin_url: "http://localhost:9090".to_string(), + }; + assert_eq!(publisher.origin_host(), "localhost:9090"); + + // Test with URL without protocol (fallback to original) + let publisher = Publisher { + domain: "example.com".to_string(), + cookie_domain: ".example.com".to_string(), + origin_backend: "publisher_origin".to_string(), + origin_url: "localhost:9090".to_string(), + }; + assert_eq!(publisher.origin_host(), "localhost:9090"); + + // Test with IPv4 address + let publisher = Publisher { + domain: "example.com".to_string(), + cookie_domain: ".example.com".to_string(), + origin_backend: "publisher_origin".to_string(), + origin_url: "http://192.168.1.1:8080".to_string(), + }; + assert_eq!(publisher.origin_host(), "192.168.1.1:8080"); + + // Test with IPv6 address + let publisher = Publisher { + domain: "example.com".to_string(), + cookie_domain: ".example.com".to_string(), + origin_backend: "publisher_origin".to_string(), + origin_url: "http://[::1]:8080".to_string(), + }; + assert_eq!(publisher.origin_host(), "[::1]:8080"); + } } diff --git a/crates/common/src/test_support.rs b/crates/common/src/test_support.rs index 67f7262..4ac6a6d 100644 --- a/crates/common/src/test_support.rs +++ b/crates/common/src/test_support.rs @@ -1,6 +1,6 @@ #[cfg(test)] pub mod tests { - use crate::settings::{AdServer, Prebid, Publisher, Settings, Synthetic}; + use crate::settings::Settings; pub fn crate_test_settings_str() -> String { r#" @@ -11,6 +11,7 @@ pub mod tests { [publisher] domain = "test-publisher.com" cookie_domain = ".test-publisher.com" + origin_backend = "publisher_origin" origin_url= "https://origin.test-publisher.com" [prebid] @@ -25,25 +26,7 @@ pub mod tests { } pub fn create_test_settings() -> Settings { - Settings { - ad_server: AdServer { - ad_partner_url: "https://test-adpartner.com".into(), - sync_url: "https://test-adpartner.com/synthetic_id={{synthetic_id}}".to_string(), - }, - publisher: Publisher { - domain: "test-publisher.com".to_string(), - cookie_domain: ".test-publisher.com".to_string(), - origin_url: "origin.test-publisher.com".to_string(), - }, - prebid: Prebid { - server_url: "https://test-prebid.com/openrtb2/auction".to_string(), - }, - synthetic: Synthetic { - counter_store: "test_counter_store".to_string(), - opid_store: "test-opid-store".to_string(), - secret_key: "test-secret-key".to_string(), - template: "{{client_ip}}:{{user_agent}}:{{first_party_id}}:{{auth_user_id}}:{{publisher_domain}}:{{accept_language}}".to_string(), - }, - } + let toml_str = crate_test_settings_str(); + Settings::from_toml(&toml_str).expect("Invalid config") } } diff --git a/crates/fastly/src/main.rs b/crates/fastly/src/main.rs index 7695a10..39f1332 100644 --- a/crates/fastly/src/main.rs +++ b/crates/fastly/src/main.rs @@ -1,20 +1,19 @@ -use fastly::http::{header, Method, StatusCode}; +use fastly::http::Method; use fastly::{Error, Request, Response}; use log_fastly::Logger; -mod error; -use crate::error::to_error_response; - use trusted_server_common::advertiser::handle_ad_request; -use trusted_server_common::constants::HEADER_X_COMPRESS_HINT; use trusted_server_common::gdpr::{handle_consent_request, handle_data_subject_request}; use trusted_server_common::prebid::handle_prebid_test; use trusted_server_common::privacy::handle_privacy_policy; -use trusted_server_common::publisher::handle_main_page; +use trusted_server_common::publisher::handle_publisher_request; use trusted_server_common::settings::Settings; use trusted_server_common::settings_data::get_settings; use trusted_server_common::why::handle_why_trusted_server; +mod error; +use crate::error::to_error_response; + #[fastly::main] fn main(req: Request) -> Result { init_logger(); @@ -33,17 +32,22 @@ fn main(req: Request) -> Result { /// Routes incoming requests to appropriate handlers. /// -/// This function implements the application's routing logic, matching HTTP methods -/// and paths to their corresponding handler functions. +/// This function implements the application's routing logic. It first checks +/// for known routes, and if none match, it proxies the request to the +/// publisher's origin server as a fallback. async fn route_request(settings: Settings, req: Request) -> Result { log::info!( "FASTLY_SERVICE_VERSION: {}", ::std::env::var("FASTLY_SERVICE_VERSION").unwrap_or_else(|_| String::new()) ); - let result = match (req.get_method(), req.get_path()) { + // Get path and method for routing + let path = req.get_path(); + let method = req.get_method(); + + // Match known routes and handle them + let result = match (method, path) { // Main application routes - (&Method::GET, "/") => handle_main_page(&settings, req), (&Method::GET, "/ad-creative") => handle_ad_request(&settings, req), (&Method::GET, "/prebid-test") => handle_prebid_test(&settings, req).await, @@ -57,22 +61,27 @@ async fn route_request(settings: Settings, req: Request) -> Result handle_privacy_policy(&settings, req), (&Method::GET, "/why-trusted-server") => handle_why_trusted_server(&settings, req), - // Catch-all 404 handler - _ => return Ok(not_found_response()), + // No known route matched, proxy to publisher origin as fallback + _ => { + log::info!( + "No known route matched for path: {}, proxying to publisher origin", + path + ); + + match handle_publisher_request(&settings, req) { + Ok(response) => Ok(response), + Err(e) => { + log::error!("Failed to proxy to publisher origin: {:?}", e); + Err(e) + } + } + } }; // Convert any errors to HTTP error responses result.map_or_else(|e| Ok(to_error_response(e)), Ok) } -/// Creates a standard 404 Not Found response. -fn not_found_response() -> Response { - Response::from_status(StatusCode::NOT_FOUND) - .with_body("Not Found") - .with_header(header::CONTENT_TYPE, "text/plain") - .with_header(HEADER_X_COMPRESS_HINT, "on") -} - fn init_logger() { let logger = Logger::builder() .default_endpoint("tslog") diff --git a/fastly.toml b/fastly.toml index 1a7181a..54b472f 100644 --- a/fastly.toml +++ b/fastly.toml @@ -17,6 +17,9 @@ build = """ [local_server.backends] [local_server.backends.ad_partner_url] url = "http://127.0.0.1:10180/" + + [local_server.backends.publisher_origin] + url = "http://localhost:9090" [local_server.kv_stores] [[local_server.kv_stores.counter_store]] diff --git a/trusted-server.toml b/trusted-server.toml index 587607e..607b716 100644 --- a/trusted-server.toml +++ b/trusted-server.toml @@ -1,6 +1,7 @@ [publisher] domain = "test-publisher.com" cookie_domain = ".test-publisher.com" +origin_backend = "publisher_origin" origin_url = "https://origin.test-publisher.com" [ad_server] From d4618ef248c7fd09bca0075837619fd7d5213eec Mon Sep 17 00:00:00 2001 From: Aram Grigoryan <132480+aram356@users.noreply.github.com> Date: Wed, 9 Jul 2025 20:08:52 -0700 Subject: [PATCH 08/20] Fixed Cargo.lock --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d912a5c..3183dab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -332,18 +332,18 @@ dependencies = [ [[package]] name = "derive_more" -version = "2.0.1" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "093242cf7570c207c83073cf82f79706fe7b8317e98620a47d5be7c3d8497678" +checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05" dependencies = [ "derive_more-impl", ] [[package]] name = "derive_more-impl" -version = "2.0.1" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" +checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22" dependencies = [ "proc-macro2", "quote", From 0540e0769dfa0b7bbf5c06bfb7fb79ede09c3c7e Mon Sep 17 00:00:00 2001 From: Aram Grigoryan <132480+aram356@users.noreply.github.com> Date: Wed, 9 Jul 2025 20:16:13 -0700 Subject: [PATCH 09/20] Fixed warning --- crates/common/src/settings.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/common/src/settings.rs b/crates/common/src/settings.rs index 602f451..a85efc4 100644 --- a/crates/common/src/settings.rs +++ b/crates/common/src/settings.rs @@ -38,6 +38,7 @@ impl Publisher { /// }; /// assert_eq!(publisher.origin_host(), "origin.example.com:8080"); /// ``` + #[allow(dead_code)] pub fn origin_host(&self) -> String { Url::parse(&self.origin_url) .ok() From f9f8eb1dd2e5a85d0f621051eb12dc9db9b15ad2 Mon Sep 17 00:00:00 2001 From: Aram Grigoryan <132480+aram356@users.noreply.github.com> Date: Wed, 9 Jul 2025 21:54:05 -0700 Subject: [PATCH 10/20] Replace origin host --- .env.dev | 2 +- Cargo.lock | 20 ++ crates/common/Cargo.toml | 1 + crates/common/src/publisher.rs | 587 ++++++++++++++++++++++++++++++++- fastly.toml | 1 + 5 files changed, 602 insertions(+), 9 deletions(-) diff --git a/.env.dev b/.env.dev index cf3a91b..81b35c6 100644 --- a/.env.dev +++ b/.env.dev @@ -1,5 +1,5 @@ # [publisher] -TRUSTED_SERVER__PUBLISHER__ORIGIN_URL=http://127.0.0.1:9090 +TRUSTED_SERVER__PUBLISHER__ORIGIN_URL=http://localhost:9090 # [ad_server] TRUSTED_SERVER__AD_SERVER__AD_PARTNER_URL=http://127.0.0.1:10180 diff --git a/Cargo.lock b/Cargo.lock index 3183dab..b08a3b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -238,6 +238,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + [[package]] name = "crunchy" version = "0.2.4" @@ -508,6 +517,16 @@ dependencies = [ "log", ] +[[package]] +name = "flate2" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -1598,6 +1617,7 @@ dependencies = [ "derive_more", "error-stack", "fastly", + "flate2", "futures", "handlebars", "hex", diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index 24f35a0..d5a40b2 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -15,6 +15,7 @@ cookie = "0.18.1" derive_more = { version = "1.0", features = ["display", "error"] } error-stack = "0.5" fastly = "0.11.5" +flate2 = "1.0" futures = "0.3" handlebars = "6.3.2" hex = "0.4.3" diff --git a/crates/common/src/publisher.rs b/crates/common/src/publisher.rs index c03429e..a32b269 100644 --- a/crates/common/src/publisher.rs +++ b/crates/common/src/publisher.rs @@ -1,11 +1,11 @@ use error_stack::{Report, ResultExt}; use fastly::http::{header, StatusCode}; use fastly::{Request, Response}; +use flate2::read::{GzDecoder, ZlibDecoder}; +use std::io::Read; use crate::constants::{ - HEADER_SYNTHETIC_FRESH, HEADER_SYNTHETIC_TRUSTED_SERVER, HEADER_X_GEO_CITY, - HEADER_X_GEO_CONTINENT, HEADER_X_GEO_COORDINATES, HEADER_X_GEO_COUNTRY, - HEADER_X_GEO_INFO_AVAILABLE, HEADER_X_GEO_METRO_CODE, + HEADER_SYNTHETIC_FRESH, HEADER_SYNTHETIC_TRUSTED_SERVER, HEADER_X_FORWARDED_FOR, HEADER_X_GEO_CITY, HEADER_X_GEO_CONTINENT, HEADER_X_GEO_COORDINATES, HEADER_X_GEO_COUNTRY, HEADER_X_GEO_INFO_AVAILABLE, HEADER_X_GEO_METRO_CODE }; use crate::cookies::create_synthetic_cookie; use crate::error::TrustedServerError; @@ -142,15 +142,586 @@ pub fn handle_publisher_request( ) -> Result> { log::info!("Proxying request to publisher_origin"); + // Extract the request host from the incoming request + let request_host = req + .get_header(header::HOST) + .map(|h| h.to_str().unwrap_or_default()) + .unwrap_or_default() + .to_string(); + + // Extract the protocol from X-Forwarded-Proto header before moving req + let request_scheme = req + .get_header(HEADER_X_FORWARDED_FOR) + .and_then(|h| h.to_str().ok()) + .unwrap_or("http") + .to_string(); + + log::info!("Request host: {}", request_host); + // Extract host from the origin_url using the Publisher's origin_host method - let host = settings.publisher.origin_host(); + let origin_host = settings.publisher.origin_host(); - log::info!("Setting host header to: {}", host); - req.set_header("host", host); + log::info!("Setting host header to: {}", origin_host); + req.set_header("host", &origin_host); // Send the request to the origin backend - req.send(&settings.publisher.origin_backend) + let mut response = req + .send(&settings.publisher.origin_backend) .change_context(TrustedServerError::Proxy { message: "Failed to proxy request".to_string(), - }) + })?; + + // Log all response headers for debugging + log::info!("Response headers:"); + for (name, value) in response.get_headers() { + log::info!(" {}: {:?}", name, value); + } + + // Check if the response has a text-based content type that we should process + let content_type = response + .get_header(header::CONTENT_TYPE) + .map(|h| h.to_str().unwrap_or_default()) + .unwrap_or_default(); + + let should_process = content_type.contains("text/html") + || content_type.contains("text/css") + || content_type.contains("text/javascript") + || content_type.contains("application/javascript") + || content_type.contains("application/json"); + + if should_process && !request_host.is_empty() { + // Check if the response is compressed + let content_encoding = response + .get_header(header::CONTENT_ENCODING) + .map(|h| h.to_str().unwrap_or_default()) + .unwrap_or_default() + .to_lowercase(); + + // Log response details for debugging + log::info!( + "Processing response - Content-Type: {}, Content-Encoding: {}, Request Host: {}, Origin Host: {}", + content_type, content_encoding, request_host, origin_host + ); + + // Get the response body as bytes + let body_bytes = response.take_body_bytes(); + + // Check if we got an empty body + if body_bytes.is_empty() { + log::warn!("Response body is empty, nothing to process"); + return Ok(response); + } + + log::info!("Response body size: {} bytes", body_bytes.len()); + + // Decompress the body if needed + let decompressed_body = match content_encoding.as_str() { + "gzip" => { + let mut decoder = GzDecoder::new(&body_bytes[..]); + let mut decompressed = Vec::new(); + match decoder.read_to_end(&mut decompressed) { + Ok(_) => { + log::info!("Successfully decompressed gzip content"); + decompressed + } + Err(e) => { + log::warn!("Failed to decompress gzip content: {}. Content might already be decompressed by Fastly", e); + // Try using the original bytes + body_bytes + } + } + } + "deflate" => { + let mut decoder = ZlibDecoder::new(&body_bytes[..]); + let mut decompressed = Vec::new(); + match decoder.read_to_end(&mut decompressed) { + Ok(_) => { + log::info!("Successfully decompressed deflate content"); + decompressed + } + Err(e) => { + log::warn!("Failed to decompress deflate content: {}. Content might already be decompressed by Fastly", e); + // Try using the original bytes + body_bytes + } + } + } + _ => { + log::warn!( + "Unsupported content encoding: {}, passing through", + content_encoding + ); + body_bytes + } + }; + + // Try to convert to UTF-8 using lossy conversion to handle more cases + let body_str = String::from_utf8_lossy(&decompressed_body); + + // Use the extracted function to perform URL replacement + let modified_body = replace_origin_urls( + &body_str, + &origin_host, + &settings.publisher.origin_url, + &request_host, + &request_scheme, + ); + + // Set the modified body back + response.set_body(modified_body); + + // Remove headers that are no longer valid after modification + response.remove_header(header::CONTENT_LENGTH); + response.remove_header(header::CONTENT_ENCODING); + + log::info!("Completed processing response body"); + } else { + log::info!( + "Skipping response processing - should_process: {}, request_host: '{}'", + should_process, + request_host + ); + } + + Ok(response) +} + +/// Replaces origin URLs in content with request URLs. +/// +/// This function performs the URL replacement logic used in `handle_publisher_request`. +/// It replaces both the origin host and full origin URL with their request equivalents. +/// +/// # Arguments +/// +/// * `content` - The content to process +/// * `origin_host` - The origin hostname (e.g., "origin.example.com") +/// * `origin_url` - The full origin URL (e.g., "https://origin.example.com") +/// * `request_host` - The request hostname (e.g., "test.example.com") +/// * `request_scheme` - The request scheme ("http" or "https") +/// +/// # Returns +/// +/// The content with all origin references replaced +pub fn replace_origin_urls( + content: &str, + origin_host: &str, + origin_url: &str, + request_host: &str, + request_scheme: &str, +) -> String { + let request_url = format!("{}://{}", request_scheme, request_host); + + log::info!("Replacing {} with {}", origin_url, request_url); + + // Start with the content + let mut result = content.to_string(); + + // Replace full URLs first (more specific) + result = result.replace(origin_url, &request_url); + + // Also try with http if origin was https (in case of mixed content) + if origin_url.starts_with("https://") { + let http_origin_url = origin_url.replace("https://", "http://"); + result = result.replace(&http_origin_url, &request_url); + } + + // Replace protocol-relative URLs (//example.com) + let protocol_relative_origin = format!("//{}", origin_host); + let protocol_relative_request = format!("//{}", request_host); + result = result.replace(&protocol_relative_origin, &protocol_relative_request); + + // Replace host in various contexts + // This handles cases like: "host": "origin.example.com" in JSON + result = result.replace(origin_host, request_host); + + // Log if replacements were made + if result != content { + log::debug!("URL replacements made in content"); + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + use fastly::http::Method; + + fn create_test_settings() -> Settings { + Settings { + publisher: crate::settings::Publisher { + domain: "example.com".to_string(), + cookie_domain: ".example.com".to_string(), + origin_backend: "test_origin".to_string(), + origin_url: "https://origin.example.com".to_string(), + }, + ad_server: crate::settings::AdServer { + ad_partner_url: "https://ad.example.com".to_string(), + sync_url: "https://sync.example.com".to_string(), + }, + synthetic: crate::settings::Synthetic { + counter_store: "test_counter".to_string(), + opid_store: "test_opid_store".to_string(), + secret_key: "test_secret_key".to_string(), + template: "{{user_agent}}+{{ip}}".to_string(), + }, + prebid: crate::settings::Prebid { + server_url: "https://prebid.example.com".to_string(), + }, + } + } + + #[test] + fn test_replace_origin_urls() { + let test_cases = vec![ + ( + // Test HTML content + r#" + + + Link + + "#, + r#" + + + Link + + "#, + "https", + ), + ( + // Test JavaScript content + r#"const API_URL = 'https://origin.example.com/api'; + fetch('https://origin.example.com/data') + .then(res => res.json()); + window.location = 'https://origin.example.com/redirect';"#, + r#"const API_URL = 'https://test.example.com/api'; + fetch('https://test.example.com/data') + .then(res => res.json()); + window.location = 'https://test.example.com/redirect';"#, + "https", + ), + ( + // Test CSS content + r#".hero { + background: url('https://origin.example.com/hero.jpg'); + } + @import url('https://origin.example.com/fonts.css');"#, + r#".hero { + background: url('https://test.example.com/hero.jpg'); + } + @import url('https://test.example.com/fonts.css');"#, + "https", + ), + ( + // Test JSON API response + r#"{ + "api_endpoint": "https://origin.example.com/v1", + "assets_url": "https://origin.example.com/assets", + "websocket": "wss://origin.example.com/ws" + }"#, + r#"{ + "api_endpoint": "https://test.example.com/v1", + "assets_url": "https://test.example.com/assets", + "websocket": "wss://test.example.com/ws" + }"#, + "https", + ), + ( + // Test HTTP scheme + r#"HTTP Link"#, + r#"HTTP Link"#, + "http", + ), + ]; + + for (input, expected, scheme) in test_cases { + let result = replace_origin_urls( + input, + "origin.example.com", + "https://origin.example.com", + "test.example.com", + scheme, + ); + assert_eq!(result, expected); + } + } + + #[test] + fn test_replace_origin_urls_with_port() { + let content = r#"Link"#; + let result = replace_origin_urls( + content, + "origin.example.com:8080", + "https://origin.example.com:8080", + "test.example.com:9090", + "https", + ); + assert_eq!( + result, + r#"Link"# + ); + } + + #[test] + fn test_replace_origin_urls_mixed_protocols() { + let content = r#" + HTTPS + HTTP + + "#; + + // When replacing with HTTPS, both http and https URLs are replaced + let result = replace_origin_urls( + content, + "origin.example.com", + "https://origin.example.com", + "test.example.com", + "https", + ); + + assert!(result.contains("https://test.example.com/secure")); + assert!(result.contains("https://test.example.com/insecure")); // HTTP also replaced to HTTPS + assert!(result.contains("//test.example.com/protocol-relative.jpg")); + } + + #[test] + fn test_handle_publisher_request_extracts_headers() { + // Test that the function correctly extracts host and scheme from request headers + let mut req = Request::new(Method::GET, "https://test.example.com/page"); + req.set_header("host", "test.example.com"); + req.set_header("x-forwarded-proto", "https"); + + // Extract headers like the function does + let request_host = req + .get_header("host") + .map(|h| h.to_str().unwrap_or_default()) + .unwrap_or_default() + .to_string(); + + let request_scheme = req + .get_header("x-forwarded-proto") + .and_then(|h| h.to_str().ok()) + .unwrap_or("https") + .to_string(); + + assert_eq!(request_host, "test.example.com"); + assert_eq!(request_scheme, "https"); + } + + #[test] + fn test_handle_publisher_request_default_https_scheme() { + // Test default HTTPS when x-forwarded-proto is missing + let mut req = Request::new(Method::GET, "https://test.example.com/page"); + req.set_header("host", "test.example.com"); + // No x-forwarded-proto header + + let request_scheme = req + .get_header("x-forwarded-proto") + .and_then(|h| h.to_str().ok()) + .unwrap_or("https"); + + assert_eq!(request_scheme, "https"); + } + + #[test] + fn test_handle_publisher_request_http_scheme() { + // Test HTTP scheme detection + let mut req = Request::new(Method::GET, "http://test.example.com/page"); + req.set_header("host", "test.example.com"); + req.set_header("x-forwarded-proto", "http"); + + let request_scheme = req + .get_header("x-forwarded-proto") + .and_then(|h| h.to_str().ok()) + .unwrap_or("https"); + + assert_eq!(request_scheme, "http"); + } + + #[test] + fn test_content_type_detection() { + // Test which content types should be processed + let test_cases = vec![ + ("text/html", true), + ("text/html; charset=utf-8", true), + ("text/css", true), + ("text/javascript", true), + ("application/javascript", true), + ("application/json", true), + ("application/json; charset=utf-8", true), + ("image/jpeg", false), + ("image/png", false), + ("application/pdf", false), + ("video/mp4", false), + ("application/octet-stream", false), + ]; + + for (content_type, should_process) in test_cases { + let result = content_type.contains("text/html") + || content_type.contains("text/css") + || content_type.contains("text/javascript") + || content_type.contains("application/javascript") + || content_type.contains("application/json"); + + assert_eq!( + result, should_process, + "Content-Type '{}' should_process: expected {}, got {}", + content_type, should_process, result + ); + } + } + + #[test] + fn test_handle_main_page_gdpr_consent() { + let settings = create_test_settings(); + let req = Request::new(Method::GET, "https://example.com/"); + + // Without GDPR consent, tracking should be disabled + let response = handle_main_page(&settings, req).unwrap(); + assert_eq!(response.get_status(), StatusCode::OK); + // Note: Would need to verify response body contains disabled tracking + } + + #[test] + fn test_publisher_origin_host_extraction() { + let settings = create_test_settings(); + let origin_host = settings.publisher.origin_host(); + assert_eq!(origin_host, "origin.example.com"); + + // Test with port + let mut settings_with_port = create_test_settings(); + settings_with_port.publisher.origin_url = "https://origin.example.com:8080".to_string(); + assert_eq!( + settings_with_port.publisher.origin_host(), + "origin.example.com:8080" + ); + } + + #[test] + fn test_invalid_utf8_handling() { + // Test that invalid UTF-8 bytes are handled gracefully + let invalid_utf8_bytes = vec![0xFF, 0xFE, 0xFD]; // Invalid UTF-8 sequence + + // Verify these bytes cannot be converted to a valid UTF-8 string + assert!(String::from_utf8(invalid_utf8_bytes.clone()).is_err()); + + // In the actual function, invalid UTF-8 would be passed through unchanged + // This test verifies our approach is sound + } + + #[test] + fn test_utf8_conversion_edge_cases() { + // Test various UTF-8 edge cases + let test_cases = vec![ + // Valid UTF-8 with special characters + (vec![0xE2, 0x98, 0x83], true), // ☃ (snowman) + (vec![0xF0, 0x9F, 0x98, 0x80], true), // 😀 (emoji) + // Invalid UTF-8 sequences + (vec![0xFF, 0xFE], false), // Invalid start byte + (vec![0xC0, 0x80], false), // Overlong encoding + (vec![0xED, 0xA0, 0x80], false), // Surrogate half + ]; + + for (bytes, should_be_valid) in test_cases { + let result = String::from_utf8(bytes.clone()); + assert_eq!( + result.is_ok(), + should_be_valid, + "UTF-8 validation failed for bytes: {:?}", + bytes + ); + } + } + + #[test] + fn test_content_encoding_detection() { + // Test that we properly handle responses with various content encodings + let test_encodings = vec!["gzip", "deflate", "br", "identity", ""]; + + for encoding in test_encodings { + let mut req = Request::new(Method::GET, "https://test.example.com/page"); + req.set_header("accept-encoding", "gzip, deflate, br"); + + if !encoding.is_empty() { + req.set_header("content-encoding", encoding); + } + + let content_encoding = req + .get_header("content-encoding") + .map(|h| h.to_str().unwrap_or_default()) + .unwrap_or_default(); + + assert_eq!(content_encoding, encoding); + } + } + + #[test] + fn test_compressed_content_handling() { + // Test the overall flow with compressed content + // In production, Fastly handles decompression/recompression automatically + + let compressed_html = r#" + + + "#; + + let expected_html = r#" + + + "#; + + let result = replace_origin_urls( + compressed_html, + "origin.example.com", + "https://origin.example.com", + "test.example.com", + "https", + ); + + assert_eq!(result, expected_html); + } + + #[test] + fn test_replace_origin_urls_comprehensive() { + // Test comprehensive URL replacement scenarios + let content = r#" + + Link + + + + + + + + + {"api": "https://origin.example.com/api", "host": "origin.example.com"} + + + fetch('https://origin.example.com/data'); + const host = 'origin.example.com'; + "#; + + let result = replace_origin_urls( + content, + "origin.example.com", + "https://origin.example.com", + "test.example.com", + "https", + ); + + // Verify all replacements + assert!(result.contains(r#"href="https://test.example.com/page""#)); + assert!(result.contains(r#"src="https://test.example.com/image.jpg""#)); // HTTP upgraded + assert!(result.contains(r#"src="//test.example.com/script.js""#)); + assert!(result.contains(r#""api": "https://test.example.com/api""#)); + assert!(result.contains(r#""host": "test.example.com""#)); + assert!(result.contains(r#"fetch('https://test.example.com/data')"#)); + assert!(result.contains(r#"const host = 'test.example.com'"#)); + + // Ensure no origin references remain + assert!(!result.contains("origin.example.com")); + } } diff --git a/fastly.toml b/fastly.toml index 54b472f..197bcf8 100644 --- a/fastly.toml +++ b/fastly.toml @@ -20,6 +20,7 @@ build = """ [local_server.backends.publisher_origin] url = "http://localhost:9090" + override_host = "localhost:9090" [local_server.kv_stores] [[local_server.kv_stores.counter_store]] From ce4a8902b3d9551d79c6729787ac885c0ab01f92 Mon Sep 17 00:00:00 2001 From: Aram Grigoryan <132480+aram356@users.noreply.github.com> Date: Wed, 9 Jul 2025 22:33:12 -0700 Subject: [PATCH 11/20] Implemented stream replacer --- crates/common/src/lib.rs | 2 + crates/common/src/publisher.rs | 475 +++++++++--------------- crates/common/src/streaming_replacer.rs | 323 ++++++++++++++++ 3 files changed, 494 insertions(+), 306 deletions(-) create mode 100644 crates/common/src/streaming_replacer.rs diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs index d4001d2..a9a8068 100644 --- a/crates/common/src/lib.rs +++ b/crates/common/src/lib.rs @@ -15,6 +15,7 @@ //! - [`prebid`]: Prebid integration and real-time bidding support //! - [`privacy`]: Privacy utilities and helpers //! - [`settings`]: Configuration management and validation +//! - [`streaming_replacer`]: Streaming URL replacement for large responses //! - [`synthetic`]: Synthetic ID generation using HMAC //! - [`templates`]: Handlebars template handling //! - [`test_support`]: Testing utilities and mocks @@ -32,6 +33,7 @@ pub mod privacy; pub mod publisher; pub mod settings; pub mod settings_data; +pub mod streaming_replacer; pub mod synthetic; pub mod templates; pub mod test_support; diff --git a/crates/common/src/publisher.rs b/crates/common/src/publisher.rs index a32b269..8ee5de4 100644 --- a/crates/common/src/publisher.rs +++ b/crates/common/src/publisher.rs @@ -1,17 +1,22 @@ use error_stack::{Report, ResultExt}; use fastly::http::{header, StatusCode}; -use fastly::{Request, Response}; +use fastly::{Body, Request, Response}; use flate2::read::{GzDecoder, ZlibDecoder}; -use std::io::Read; +use flate2::write::{GzEncoder, ZlibEncoder}; +use flate2::Compression; +use std::io::{Read, Write}; use crate::constants::{ - HEADER_SYNTHETIC_FRESH, HEADER_SYNTHETIC_TRUSTED_SERVER, HEADER_X_FORWARDED_FOR, HEADER_X_GEO_CITY, HEADER_X_GEO_CONTINENT, HEADER_X_GEO_COORDINATES, HEADER_X_GEO_COUNTRY, HEADER_X_GEO_INFO_AVAILABLE, HEADER_X_GEO_METRO_CODE + HEADER_SYNTHETIC_FRESH, HEADER_SYNTHETIC_TRUSTED_SERVER, HEADER_X_FORWARDED_FOR, + HEADER_X_GEO_CITY, HEADER_X_GEO_CONTINENT, HEADER_X_GEO_COORDINATES, HEADER_X_GEO_COUNTRY, + HEADER_X_GEO_INFO_AVAILABLE, HEADER_X_GEO_METRO_CODE, }; use crate::cookies::create_synthetic_cookie; use crate::error::TrustedServerError; use crate::gdpr::{get_consent_from_request, GdprConsent}; use crate::geo::get_dma_code; use crate::settings::Settings; +use crate::streaming_replacer::StreamingReplacer; use crate::synthetic::{generate_synthetic_id, get_or_generate_synthetic_id}; use crate::templates::HTML_TEMPLATE; @@ -125,6 +130,142 @@ pub fn handle_main_page( Ok(response) } +/// Process response body in streaming fashion with compression preservation +fn process_response_streaming( + body: Body, + content_encoding: &str, + origin_host: &str, + origin_url: &str, + request_host: &str, + request_scheme: &str, +) -> Result> { + const CHUNK_SIZE: usize = 8192; // 8KB chunks + + // Create the streaming replacer + let mut replacer = + StreamingReplacer::new(origin_host, origin_url, request_host, request_scheme); + + // Create output body + let mut output_body = Body::new(); + + // Determine if content needs decompression/recompression + let is_compressed = matches!(content_encoding, "gzip" | "deflate"); + + if is_compressed { + // For compressed content, we need to: + // 1. Decompress the entire content + // 2. Process it + // 3. Recompress it + // This is necessary because compression algorithms need to see the full content + + log::info!( + "Processing compressed content with encoding: {}", + content_encoding + ); + + // First, decompress everything + let mut decompressed = Vec::new(); + let mut decoder: Box = match content_encoding { + "gzip" => Box::new(GzDecoder::new(body)), + "deflate" => Box::new(ZlibDecoder::new(body)), + _ => unreachable!(), + }; + + decoder + .read_to_end(&mut decompressed) + .change_context(TrustedServerError::Proxy { + message: format!("Failed to decompress {} content", content_encoding), + })?; + + log::info!("Decompressed {} bytes", decompressed.len()); + + // Process the decompressed content in chunks + let mut processed = Vec::new(); + let chunks = decompressed.chunks(CHUNK_SIZE); + let total_chunks = chunks.len(); + + for (i, chunk) in chunks.enumerate() { + let is_last = i == total_chunks - 1; + let result = replacer.process_chunk(chunk, is_last); + processed.extend_from_slice(&result); + } + + log::info!("Processed {} bytes", processed.len()); + + // Recompress with the same encoding + match content_encoding { + "gzip" => { + log::info!("Recompressing as gzip"); + let mut encoder = GzEncoder::new(&mut output_body, Compression::default()); + encoder + .write_all(&processed) + .change_context(TrustedServerError::Proxy { + message: "Failed to write gzip data".to_string(), + })?; + encoder.finish().change_context(TrustedServerError::Proxy { + message: "Failed to finish gzip compression".to_string(), + })?; + } + "deflate" => { + log::info!("Recompressing as deflate"); + let mut encoder = ZlibEncoder::new(&mut output_body, Compression::default()); + encoder + .write_all(&processed) + .change_context(TrustedServerError::Proxy { + message: "Failed to write deflate data".to_string(), + })?; + encoder.finish().change_context(TrustedServerError::Proxy { + message: "Failed to finish deflate compression".to_string(), + })?; + } + _ => unreachable!(), + } + } else { + // For uncompressed content, we can truly stream + log::info!("Processing uncompressed content"); + + let mut buffer = vec![0u8; CHUNK_SIZE]; + let mut body_reader = body; + + loop { + match body_reader.read(&mut buffer) { + Ok(0) => { + // End of stream - process any remaining data + let final_chunk = replacer.process_chunk(&[], true); + if !final_chunk.is_empty() { + output_body.write_all(&final_chunk).change_context( + TrustedServerError::Proxy { + message: "Failed to write final chunk".to_string(), + }, + )?; + } + break; + } + Ok(n) => { + // Process this chunk + let processed = replacer.process_chunk(&buffer[..n], false); + if !processed.is_empty() { + output_body.write_all(&processed).change_context( + TrustedServerError::Proxy { + message: "Failed to write processed chunk".to_string(), + }, + )?; + } + } + Err(e) => { + log::error!("Error reading body: {}", e); + return Err(Report::new(TrustedServerError::Proxy { + message: format!("Failed to read body: {}", e), + })); + } + } + } + } + + log::info!("Streaming processing complete"); + Ok(output_body) +} + /// Proxies requests to the publisher's origin server. /// /// This function forwards incoming requests to the configured origin URL, @@ -203,78 +344,39 @@ pub fn handle_publisher_request( content_type, content_encoding, request_host, origin_host ); - // Get the response body as bytes - let body_bytes = response.take_body_bytes(); - - // Check if we got an empty body - if body_bytes.is_empty() { - log::warn!("Response body is empty, nothing to process"); - return Ok(response); - } - - log::info!("Response body size: {} bytes", body_bytes.len()); - - // Decompress the body if needed - let decompressed_body = match content_encoding.as_str() { - "gzip" => { - let mut decoder = GzDecoder::new(&body_bytes[..]); - let mut decompressed = Vec::new(); - match decoder.read_to_end(&mut decompressed) { - Ok(_) => { - log::info!("Successfully decompressed gzip content"); - decompressed - } - Err(e) => { - log::warn!("Failed to decompress gzip content: {}. Content might already be decompressed by Fastly", e); - // Try using the original bytes - body_bytes - } - } - } - "deflate" => { - let mut decoder = ZlibDecoder::new(&body_bytes[..]); - let mut decompressed = Vec::new(); - match decoder.read_to_end(&mut decompressed) { - Ok(_) => { - log::info!("Successfully decompressed deflate content"); - decompressed - } - Err(e) => { - log::warn!("Failed to decompress deflate content: {}. Content might already be decompressed by Fastly", e); - // Try using the original bytes - body_bytes - } - } - } - _ => { - log::warn!( - "Unsupported content encoding: {}, passing through", - content_encoding - ); - body_bytes - } - }; - - // Try to convert to UTF-8 using lossy conversion to handle more cases - let body_str = String::from_utf8_lossy(&decompressed_body); + // Take the response body for streaming processing + let body = response.take_body(); - // Use the extracted function to perform URL replacement - let modified_body = replace_origin_urls( - &body_str, + // Process the body using streaming approach + match process_response_streaming( + body, + &content_encoding, &origin_host, &settings.publisher.origin_url, &request_host, &request_scheme, - ); + ) { + Ok(processed_body) => { + // Set the processed body back + response.set_body(processed_body); - // Set the modified body back - response.set_body(modified_body); + // Remove Content-Length as the size has likely changed + response.remove_header(header::CONTENT_LENGTH); - // Remove headers that are no longer valid after modification - response.remove_header(header::CONTENT_LENGTH); - response.remove_header(header::CONTENT_ENCODING); + // Keep Content-Encoding header since we're returning compressed content + log::info!( + "Preserved Content-Encoding: {} for compressed response", + content_encoding + ); - log::info!("Completed processing response body"); + log::info!("Completed streaming processing of response body"); + } + Err(e) => { + log::error!("Failed to process response body: {:?}", e); + // Return an error response + return Err(e); + } + } } else { log::info!( "Skipping response processing - should_process: {}, request_host: '{}'", @@ -286,62 +388,6 @@ pub fn handle_publisher_request( Ok(response) } -/// Replaces origin URLs in content with request URLs. -/// -/// This function performs the URL replacement logic used in `handle_publisher_request`. -/// It replaces both the origin host and full origin URL with their request equivalents. -/// -/// # Arguments -/// -/// * `content` - The content to process -/// * `origin_host` - The origin hostname (e.g., "origin.example.com") -/// * `origin_url` - The full origin URL (e.g., "https://origin.example.com") -/// * `request_host` - The request hostname (e.g., "test.example.com") -/// * `request_scheme` - The request scheme ("http" or "https") -/// -/// # Returns -/// -/// The content with all origin references replaced -pub fn replace_origin_urls( - content: &str, - origin_host: &str, - origin_url: &str, - request_host: &str, - request_scheme: &str, -) -> String { - let request_url = format!("{}://{}", request_scheme, request_host); - - log::info!("Replacing {} with {}", origin_url, request_url); - - // Start with the content - let mut result = content.to_string(); - - // Replace full URLs first (more specific) - result = result.replace(origin_url, &request_url); - - // Also try with http if origin was https (in case of mixed content) - if origin_url.starts_with("https://") { - let http_origin_url = origin_url.replace("https://", "http://"); - result = result.replace(&http_origin_url, &request_url); - } - - // Replace protocol-relative URLs (//example.com) - let protocol_relative_origin = format!("//{}", origin_host); - let protocol_relative_request = format!("//{}", request_host); - result = result.replace(&protocol_relative_origin, &protocol_relative_request); - - // Replace host in various contexts - // This handles cases like: "host": "origin.example.com" in JSON - result = result.replace(origin_host, request_host); - - // Log if replacements were made - if result != content { - log::debug!("URL replacements made in content"); - } - - result -} - #[cfg(test)] mod tests { use super::*; @@ -371,121 +417,6 @@ mod tests { } } - #[test] - fn test_replace_origin_urls() { - let test_cases = vec![ - ( - // Test HTML content - r#" - - - Link - - "#, - r#" - - - Link - - "#, - "https", - ), - ( - // Test JavaScript content - r#"const API_URL = 'https://origin.example.com/api'; - fetch('https://origin.example.com/data') - .then(res => res.json()); - window.location = 'https://origin.example.com/redirect';"#, - r#"const API_URL = 'https://test.example.com/api'; - fetch('https://test.example.com/data') - .then(res => res.json()); - window.location = 'https://test.example.com/redirect';"#, - "https", - ), - ( - // Test CSS content - r#".hero { - background: url('https://origin.example.com/hero.jpg'); - } - @import url('https://origin.example.com/fonts.css');"#, - r#".hero { - background: url('https://test.example.com/hero.jpg'); - } - @import url('https://test.example.com/fonts.css');"#, - "https", - ), - ( - // Test JSON API response - r#"{ - "api_endpoint": "https://origin.example.com/v1", - "assets_url": "https://origin.example.com/assets", - "websocket": "wss://origin.example.com/ws" - }"#, - r#"{ - "api_endpoint": "https://test.example.com/v1", - "assets_url": "https://test.example.com/assets", - "websocket": "wss://test.example.com/ws" - }"#, - "https", - ), - ( - // Test HTTP scheme - r#"HTTP Link"#, - r#"HTTP Link"#, - "http", - ), - ]; - - for (input, expected, scheme) in test_cases { - let result = replace_origin_urls( - input, - "origin.example.com", - "https://origin.example.com", - "test.example.com", - scheme, - ); - assert_eq!(result, expected); - } - } - - #[test] - fn test_replace_origin_urls_with_port() { - let content = r#"Link"#; - let result = replace_origin_urls( - content, - "origin.example.com:8080", - "https://origin.example.com:8080", - "test.example.com:9090", - "https", - ); - assert_eq!( - result, - r#"Link"# - ); - } - - #[test] - fn test_replace_origin_urls_mixed_protocols() { - let content = r#" - HTTPS - HTTP - - "#; - - // When replacing with HTTPS, both http and https URLs are replaced - let result = replace_origin_urls( - content, - "origin.example.com", - "https://origin.example.com", - "test.example.com", - "https", - ); - - assert!(result.contains("https://test.example.com/secure")); - assert!(result.contains("https://test.example.com/insecure")); // HTTP also replaced to HTTPS - assert!(result.contains("//test.example.com/protocol-relative.jpg")); - } - #[test] fn test_handle_publisher_request_extracts_headers() { // Test that the function correctly extracts host and scheme from request headers @@ -656,72 +587,4 @@ mod tests { assert_eq!(content_encoding, encoding); } } - - #[test] - fn test_compressed_content_handling() { - // Test the overall flow with compressed content - // In production, Fastly handles decompression/recompression automatically - - let compressed_html = r#" - - - "#; - - let expected_html = r#" - - - "#; - - let result = replace_origin_urls( - compressed_html, - "origin.example.com", - "https://origin.example.com", - "test.example.com", - "https", - ); - - assert_eq!(result, expected_html); - } - - #[test] - fn test_replace_origin_urls_comprehensive() { - // Test comprehensive URL replacement scenarios - let content = r#" - - Link - - - - - - - - - {"api": "https://origin.example.com/api", "host": "origin.example.com"} - - - fetch('https://origin.example.com/data'); - const host = 'origin.example.com'; - "#; - - let result = replace_origin_urls( - content, - "origin.example.com", - "https://origin.example.com", - "test.example.com", - "https", - ); - - // Verify all replacements - assert!(result.contains(r#"href="https://test.example.com/page""#)); - assert!(result.contains(r#"src="https://test.example.com/image.jpg""#)); // HTTP upgraded - assert!(result.contains(r#"src="//test.example.com/script.js""#)); - assert!(result.contains(r#""api": "https://test.example.com/api""#)); - assert!(result.contains(r#""host": "test.example.com""#)); - assert!(result.contains(r#"fetch('https://test.example.com/data')"#)); - assert!(result.contains(r#"const host = 'test.example.com'"#)); - - // Ensure no origin references remain - assert!(!result.contains("origin.example.com")); - } } diff --git a/crates/common/src/streaming_replacer.rs b/crates/common/src/streaming_replacer.rs new file mode 100644 index 0000000..de2d15c --- /dev/null +++ b/crates/common/src/streaming_replacer.rs @@ -0,0 +1,323 @@ +//! Streaming URL replacer for processing large responses. +//! +//! This module provides functionality for replacing origin URLs with request URLs +//! in streaming fashion, handling content that may be split across multiple chunks. + +/// A streaming replacer that processes content in chunks +pub struct StreamingReplacer { + origin_host: String, + origin_url: String, + request_host: String, + request_url: String, + // Buffer to handle partial matches at chunk boundaries + overlap_buffer: Vec, + // Maximum pattern length to determine overlap size + max_pattern_length: usize, +} + +impl StreamingReplacer { + /// Creates a new `StreamingReplacer` instance. + /// + /// # Arguments + /// + /// * `origin_host` - The origin hostname (e.g., "origin.example.com") + /// * `origin_url` - The full origin URL (e.g., "https://origin.example.com") + /// * `request_host` - The request hostname (e.g., "test.example.com") + /// * `request_scheme` - The request scheme ("http" or "https") + pub fn new( + origin_host: &str, + origin_url: &str, + request_host: &str, + request_scheme: &str, + ) -> Self { + let request_url = format!("{}://{}", request_scheme, request_host); + + // Calculate the maximum pattern length we need to buffer + let patterns = vec![ + origin_url.len(), + origin_host.len(), + format!("//{}", origin_host).len(), + // Account for HTTP variant if origin is HTTPS + if origin_url.starts_with("https://") { + origin_url.replace("https://", "http://").len() + } else { + 0 + }, + ]; + + let max_pattern_length = patterns.into_iter().max().unwrap_or(0); + + Self { + origin_host: origin_host.to_string(), + origin_url: origin_url.to_string(), + request_host: request_host.to_string(), + request_url, + overlap_buffer: Vec::with_capacity(max_pattern_length), + max_pattern_length, + } + } + + /// Process a chunk of data and return the processed output + pub fn process_chunk(&mut self, chunk: &[u8], is_last_chunk: bool) -> Vec { + // Combine overlap buffer with new chunk + let mut combined = self.overlap_buffer.clone(); + combined.extend_from_slice(chunk); + + // Convert to string for processing (using lossy conversion) + let content = String::from_utf8_lossy(&combined); + + // Determine how much content to process + let process_end = if is_last_chunk { + content.len() + } else { + // Keep the last max_pattern_length characters for the next chunk + content.len().saturating_sub(self.max_pattern_length) + }; + + if process_end == 0 { + // Not enough data to process yet + self.overlap_buffer = combined; + return Vec::new(); + } + + // Process the content up to process_end + let to_process = &content[..process_end]; + + // Use the replace_origin_urls method + let processed = self.replace_origin_urls( + to_process, + self.request_url.split("://").nth(0).unwrap_or("https"), + ); + + // Save the overlap for the next chunk + if !is_last_chunk { + self.overlap_buffer = combined[process_end..].to_vec(); + } else { + self.overlap_buffer.clear(); + } + + processed.into_bytes() + } + + /// Replaces origin URLs in content with request URLs. + /// + /// This function performs the URL replacement logic. + /// It replaces both the origin host and full origin URL with their request equivalents. + /// + /// # Arguments + /// + /// * `content` - The content to process + /// * `request_scheme` - The request scheme ("http" or "https") + /// + /// # Returns + /// + /// The content with all origin references replaced + pub fn replace_origin_urls(&self, content: &str, request_scheme: &str) -> String { + let request_url = format!("{}://{}", request_scheme, self.request_host); + + log::info!("Replacing {} with {}", self.origin_url, request_url); + + // Start with the content + let mut result = content.to_string(); + + // Replace full URLs first (more specific) + result = result.replace(&self.origin_url, &request_url); + + // Also try with http if origin was https (in case of mixed content) + if self.origin_url.starts_with("https://") { + let http_origin_url = self.origin_url.replace("https://", "http://"); + result = result.replace(&http_origin_url, &request_url); + } + + // Replace protocol-relative URLs (//example.com) + let protocol_relative_origin = format!("//{}", self.origin_host); + let protocol_relative_request = format!("//{}", self.request_host); + result = result.replace(&protocol_relative_origin, &protocol_relative_request); + + // Replace host in various contexts + // This handles cases like: "host": "origin.example.com" in JSON + result = result.replace(&self.origin_host, &self.request_host); + + // Log if replacements were made + if result != content { + log::debug!("URL replacements made in content"); + } + + result + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_streaming_replacer_basic() { + let mut replacer = StreamingReplacer::new( + "origin.example.com", + "https://origin.example.com", + "test.example.com", + "https", + ); + + let input = b"Visit https://origin.example.com for more info"; + let processed = replacer.process_chunk(input, true); + let result = String::from_utf8(processed).unwrap(); + + assert_eq!(result, "Visit https://test.example.com for more info"); + } + + #[test] + fn test_streaming_replacer_chunks() { + let mut replacer = StreamingReplacer::new( + "origin.example.com", + "https://origin.example.com", + "test.example.com", + "https", + ); + + // Test that patterns split across chunks are handled correctly + let chunk1 = b"Visit https://origin.exam"; + let chunk2 = b"ple.com for more info"; + + let processed1 = replacer.process_chunk(chunk1, false); + let processed2 = replacer.process_chunk(chunk2, true); + + let result = String::from_utf8([processed1, processed2].concat()).unwrap(); + assert_eq!(result, "Visit https://test.example.com for more info"); + } + + #[test] + fn test_streaming_replacer_multiple_patterns() { + let mut replacer = StreamingReplacer::new( + "origin.example.com", + "https://origin.example.com", + "test.example.com", + "https", + ); + + let input = + b"Link and //origin.example.com/resource"; + let processed = replacer.process_chunk(input, true); + let result = String::from_utf8(processed).unwrap(); + + assert!(result.contains("https://test.example.com")); + assert!(result.contains("//test.example.com/resource")); + } + + #[test] + fn test_streaming_replacer_edge_cases() { + let mut replacer = StreamingReplacer::new( + "origin.example.com", + "https://origin.example.com", + "test.example.com", + "https", + ); + + // Empty chunk + let processed = replacer.process_chunk(b"", true); + assert!(processed.is_empty()); + + // Very small chunks + let chunks = [ + b"h".as_ref(), + b"t".as_ref(), + b"t".as_ref(), + b"p".as_ref(), + b"s".as_ref(), + b":".as_ref(), + b"/".as_ref(), + b"/".as_ref(), + b"origin.example.com".as_ref(), + ]; + + let mut result = Vec::new(); + for (i, chunk) in chunks.iter().enumerate() { + let is_last = i == chunks.len() - 1; + let processed = replacer.process_chunk(chunk, is_last); + result.extend(processed); + } + + let result_str = String::from_utf8(result).unwrap(); + assert_eq!(result_str, "https://test.example.com"); + } + + #[test] + fn test_replace_origin_urls_comprehensive() { + let replacer = StreamingReplacer::new( + "origin.example.com", + "https://origin.example.com", + "test.example.com", + "https", + ); + + // Test comprehensive URL replacement scenarios + let content = r#" + + Link + + + + + + + + + {"api": "https://origin.example.com/api", "host": "origin.example.com"} + "#; + + let result = replacer.replace_origin_urls(content, "https"); + + // Verify all patterns were replaced + assert!(result.contains("https://test.example.com/page")); + assert!(result.contains("https://test.example.com/image.jpg")); + assert!(result.contains("//test.example.com/script.js")); + assert!(result.contains(r#""api": "https://test.example.com/api""#)); + assert!(result.contains(r#""host": "test.example.com""#)); + + // Ensure no origin URLs remain + assert!(!result.contains("origin.example.com")); + } + + #[test] + fn test_replace_origin_urls_with_port() { + let replacer = StreamingReplacer::new( + "origin.example.com:8080", + "https://origin.example.com:8080", + "test.example.com:9090", + "https", + ); + + let content = + "Visit https://origin.example.com:8080/api or //origin.example.com:8080/resource"; + let result = replacer.replace_origin_urls(content, "https"); + + assert_eq!( + result, + "Visit https://test.example.com:9090/api or //test.example.com:9090/resource" + ); + } + + #[test] + fn test_replace_origin_urls_mixed_protocols() { + let replacer = StreamingReplacer::new( + "origin.example.com", + "https://origin.example.com", + "test.example.com", + "http", + ); + + let content = r#" + HTTPS Link + HTTP Link + + "#; + + let result = replacer.replace_origin_urls(content, "http"); + + // When request is HTTP, all URLs should be replaced with HTTP + assert!(result.contains("http://test.example.com")); + assert!(!result.contains("https://test.example.com")); + assert!(result.contains("//test.example.com/script.js")); + } +} From 212bc8dcacf7378681c7f0b52064b52540a817b4 Mon Sep 17 00:00:00 2001 From: Aram Grigoryan <132480+aram356@users.noreply.github.com> Date: Wed, 9 Jul 2025 22:41:48 -0700 Subject: [PATCH 12/20] Fixed streaming --- crates/common/src/publisher.rs | 216 ++++++++++++++++++++------------- 1 file changed, 129 insertions(+), 87 deletions(-) diff --git a/crates/common/src/publisher.rs b/crates/common/src/publisher.rs index 8ee5de4..645ceed 100644 --- a/crates/common/src/publisher.rs +++ b/crates/common/src/publisher.rs @@ -130,6 +130,52 @@ pub fn handle_main_page( Ok(response) } +/// Generic streaming processor that reads from a source, processes through replacer, and writes to output +fn stream_process( + mut reader: R, + writer: &mut W, + replacer: &mut StreamingReplacer, + chunk_size: usize, +) -> Result<(), Report> { + let mut buffer = vec![0u8; chunk_size]; + + loop { + match reader.read(&mut buffer) { + Ok(0) => { + // End of stream - process any remaining data + let final_chunk = replacer.process_chunk(&[], true); + if !final_chunk.is_empty() { + writer + .write_all(&final_chunk) + .change_context(TrustedServerError::Proxy { + message: "Failed to write final chunk".to_string(), + })?; + } + break; + } + Ok(n) => { + // Process this chunk + let processed = replacer.process_chunk(&buffer[..n], false); + if !processed.is_empty() { + writer + .write_all(&processed) + .change_context(TrustedServerError::Proxy { + message: "Failed to write processed chunk".to_string(), + })?; + } + } + Err(e) => { + log::error!("Error reading from stream: {}", e); + return Err(Report::new(TrustedServerError::Proxy { + message: format!("Failed to read from stream: {}", e), + })); + } + } + } + + Ok(()) +} + /// Process response body in streaming fashion with compression preservation fn process_response_streaming( body: Body, @@ -152,69 +198,42 @@ fn process_response_streaming( let is_compressed = matches!(content_encoding, "gzip" | "deflate"); if is_compressed { - // For compressed content, we need to: - // 1. Decompress the entire content - // 2. Process it - // 3. Recompress it - // This is necessary because compression algorithms need to see the full content + // For compressed content, we stream through: + // 1. Decompress chunks + // 2. Process them + // 3. Recompress and write to output log::info!( "Processing compressed content with encoding: {}", content_encoding ); - // First, decompress everything - let mut decompressed = Vec::new(); - let mut decoder: Box = match content_encoding { - "gzip" => Box::new(GzDecoder::new(body)), - "deflate" => Box::new(ZlibDecoder::new(body)), - _ => unreachable!(), - }; - - decoder - .read_to_end(&mut decompressed) - .change_context(TrustedServerError::Proxy { - message: format!("Failed to decompress {} content", content_encoding), - })?; - - log::info!("Decompressed {} bytes", decompressed.len()); - - // Process the decompressed content in chunks - let mut processed = Vec::new(); - let chunks = decompressed.chunks(CHUNK_SIZE); - let total_chunks = chunks.len(); - - for (i, chunk) in chunks.enumerate() { - let is_last = i == total_chunks - 1; - let result = replacer.process_chunk(chunk, is_last); - processed.extend_from_slice(&result); - } - - log::info!("Processed {} bytes", processed.len()); - - // Recompress with the same encoding match content_encoding { "gzip" => { - log::info!("Recompressing as gzip"); - let mut encoder = GzEncoder::new(&mut output_body, Compression::default()); - encoder - .write_all(&processed) - .change_context(TrustedServerError::Proxy { - message: "Failed to write gzip data".to_string(), - })?; - encoder.finish().change_context(TrustedServerError::Proxy { + // Create gzip decompressor + let decoder = GzDecoder::new(body); + // Create gzip compressor + let mut encoder = GzEncoder::new(output_body, Compression::default()); + + // Stream through the pipeline + stream_process(decoder, &mut encoder, &mut replacer, CHUNK_SIZE)?; + + // Finish compression and get the output body + output_body = encoder.finish().change_context(TrustedServerError::Proxy { message: "Failed to finish gzip compression".to_string(), })?; } "deflate" => { - log::info!("Recompressing as deflate"); - let mut encoder = ZlibEncoder::new(&mut output_body, Compression::default()); - encoder - .write_all(&processed) - .change_context(TrustedServerError::Proxy { - message: "Failed to write deflate data".to_string(), - })?; - encoder.finish().change_context(TrustedServerError::Proxy { + // Create deflate decompressor + let decoder = ZlibDecoder::new(body); + // Create deflate compressor + let mut encoder = ZlibEncoder::new(output_body, Compression::default()); + + // Stream through the pipeline + stream_process(decoder, &mut encoder, &mut replacer, CHUNK_SIZE)?; + + // Finish compression and get the output body + output_body = encoder.finish().change_context(TrustedServerError::Proxy { message: "Failed to finish deflate compression".to_string(), })?; } @@ -224,42 +243,8 @@ fn process_response_streaming( // For uncompressed content, we can truly stream log::info!("Processing uncompressed content"); - let mut buffer = vec![0u8; CHUNK_SIZE]; - let mut body_reader = body; - - loop { - match body_reader.read(&mut buffer) { - Ok(0) => { - // End of stream - process any remaining data - let final_chunk = replacer.process_chunk(&[], true); - if !final_chunk.is_empty() { - output_body.write_all(&final_chunk).change_context( - TrustedServerError::Proxy { - message: "Failed to write final chunk".to_string(), - }, - )?; - } - break; - } - Ok(n) => { - // Process this chunk - let processed = replacer.process_chunk(&buffer[..n], false); - if !processed.is_empty() { - output_body.write_all(&processed).change_context( - TrustedServerError::Proxy { - message: "Failed to write processed chunk".to_string(), - }, - )?; - } - } - Err(e) => { - log::error!("Error reading body: {}", e); - return Err(Report::new(TrustedServerError::Proxy { - message: format!("Failed to read body: {}", e), - })); - } - } - } + // Stream directly from body to output + stream_process(body, &mut output_body, &mut replacer, CHUNK_SIZE)?; } log::info!("Streaming processing complete"); @@ -566,6 +551,63 @@ mod tests { } } + #[test] + fn test_streaming_compressed_content() { + use flate2::write::GzEncoder; + use flate2::Compression; + use std::io::Write; + + // Create some HTML content with origin URLs + let original_content = r#" + + + Link + "#; + + // Compress the content + let mut compressed = Vec::new(); + { + let mut encoder = GzEncoder::new(&mut compressed, Compression::default()); + encoder.write_all(original_content.as_bytes()).unwrap(); + encoder.finish().unwrap(); + } + + // Create a Body from compressed data + let body = Body::from(compressed); + + // Process the compressed body + let result = process_response_streaming( + body, + "gzip", + "origin.example.com", + "https://origin.example.com", + "test.example.com", + "https", + ); + + assert!(result.is_ok()); + let processed_body = result.unwrap(); + + // The body should still be compressed + // In a real test, we'd decompress and verify the content + // For now, just check that we got a body back + let bytes = processed_body.into_bytes(); + assert!(!bytes.is_empty()); + + // Decompress to verify content was transformed + use flate2::read::GzDecoder; + use std::io::Read; + let mut decoder = GzDecoder::new(&bytes[..]); + let mut decompressed = String::new(); + decoder.read_to_string(&mut decompressed).unwrap(); + + // Verify URLs were replaced + assert!(decompressed.contains("https://test.example.com/style.css")); + assert!(decompressed.contains("https://test.example.com/app.js")); + assert!(decompressed.contains("https://test.example.com/page")); + assert!(!decompressed.contains("origin.example.com")); + } + #[test] fn test_content_encoding_detection() { // Test that we properly handle responses with various content encodings From 2cfb096fd786e92937589a8f942f7db83d83dfcb Mon Sep 17 00:00:00 2001 From: Aram Grigoryan <132480+aram356@users.noreply.github.com> Date: Tue, 15 Jul 2025 10:45:53 -0700 Subject: [PATCH 13/20] Detect TLS connection --- .env.dev | 6 +- crates/common/src/publisher.rs | 124 ++++++++++++++++++++++++++++++--- trusted-server.toml | 16 ++--- 3 files changed, 126 insertions(+), 20 deletions(-) diff --git a/.env.dev b/.env.dev index 81b35c6..dc7a6f2 100644 --- a/.env.dev +++ b/.env.dev @@ -1,9 +1,9 @@ -# [publisher] -TRUSTED_SERVER__PUBLISHER__ORIGIN_URL=http://localhost:9090 - # [ad_server] TRUSTED_SERVER__AD_SERVER__AD_PARTNER_URL=http://127.0.0.1:10180 +# [publisher] +TRUSTED_SERVER__PUBLISHER__ORIGIN_URL=http://localhost:9090 + # [synthetic] TRUSTED_SERVER__SYNTHETIC__COUNTER_STORE=counter_store TRUSTED_SERVER__SYNTHETIC__OPID_STORE=opid_store diff --git a/crates/common/src/publisher.rs b/crates/common/src/publisher.rs index 645ceed..de5c3e8 100644 --- a/crates/common/src/publisher.rs +++ b/crates/common/src/publisher.rs @@ -7,8 +7,8 @@ use flate2::Compression; use std::io::{Read, Write}; use crate::constants::{ - HEADER_SYNTHETIC_FRESH, HEADER_SYNTHETIC_TRUSTED_SERVER, HEADER_X_FORWARDED_FOR, - HEADER_X_GEO_CITY, HEADER_X_GEO_CONTINENT, HEADER_X_GEO_COORDINATES, HEADER_X_GEO_COUNTRY, + HEADER_SYNTHETIC_FRESH, HEADER_SYNTHETIC_TRUSTED_SERVER, HEADER_X_GEO_CITY, + HEADER_X_GEO_CONTINENT, HEADER_X_GEO_COORDINATES, HEADER_X_GEO_COUNTRY, HEADER_X_GEO_INFO_AVAILABLE, HEADER_X_GEO_METRO_CODE, }; use crate::cookies::create_synthetic_cookie; @@ -20,6 +20,65 @@ use crate::streaming_replacer::StreamingReplacer; use crate::synthetic::{generate_synthetic_id, get_or_generate_synthetic_id}; use crate::templates::HTML_TEMPLATE; +/// Detects the request scheme (HTTP or HTTPS) using Fastly SDK methods and headers. +/// +/// Tries multiple methods in order of reliability: +/// 1. Fastly SDK TLS detection methods (most reliable) +/// 2. Forwarded header (RFC 7239) +/// 3. X-Forwarded-Proto header +/// 4. Fastly-SSL header (least reliable, can be spoofed) +/// 5. Default to HTTP +fn detect_request_scheme(req: &Request) -> String { + // 1. First try Fastly SDK's built-in TLS detection methods + // These are the most reliable as they check the actual connection + if let Some(tls_protocol) = req.get_tls_protocol() { + // If we have a TLS protocol, the connection is definitely HTTPS + log::debug!("TLS protocol detected: {}", tls_protocol); + return "https".to_string(); + } + + // Also check TLS cipher - if present, connection is HTTPS + if req.get_tls_cipher_openssl_name().is_some() { + log::debug!("TLS cipher detected, using HTTPS"); + return "https".to_string(); + } + + // 2. Try the Forwarded header (RFC 7239) + if let Some(forwarded) = req.get_header("forwarded") { + if let Ok(forwarded_str) = forwarded.to_str() { + // Parse the Forwarded header + // Format: Forwarded: for=192.0.2.60;proto=https;by=203.0.113.43 + if forwarded_str.contains("proto=https") { + return "https".to_string(); + } else if forwarded_str.contains("proto=http") { + return "http".to_string(); + } + } + } + + // 3. Try X-Forwarded-Proto header + if let Some(proto) = req.get_header("x-forwarded-proto") { + if let Ok(proto_str) = proto.to_str() { + let proto_lower = proto_str.to_lowercase(); + if proto_lower == "https" || proto_lower == "http" { + return proto_lower; + } + } + } + + // 4. Check Fastly-SSL header (can be spoofed by clients, use as last resort) + if let Some(ssl) = req.get_header("fastly-ssl") { + if let Ok(ssl_str) = ssl.to_str() { + if ssl_str == "1" || ssl_str.to_lowercase() == "true" { + return "https".to_string(); + } + } + } + + // Default to HTTP (changed from HTTPS based on your settings file) + "http".to_string() +} + /// Handles the main page request. /// /// Serves the main page with synthetic ID generation and ad integration. @@ -275,14 +334,21 @@ pub fn handle_publisher_request( .unwrap_or_default() .to_string(); - // Extract the protocol from X-Forwarded-Proto header before moving req - let request_scheme = req - .get_header(HEADER_X_FORWARDED_FOR) - .and_then(|h| h.to_str().ok()) - .unwrap_or("http") - .to_string(); + // Detect the request scheme using multiple methods + let request_scheme = detect_request_scheme(&req); + + // Log detection details for debugging + log::info!( + "Scheme detection - TLS Protocol: {:?}, TLS Cipher: {:?}, Forwarded: {:?}, X-Forwarded-Proto: {:?}, Fastly-SSL: {:?}, Result: {}", + req.get_tls_protocol(), + req.get_tls_cipher_openssl_name(), + req.get_header("forwarded"), + req.get_header("x-forwarded-proto"), + req.get_header("fastly-ssl"), + request_scheme + ); - log::info!("Request host: {}", request_host); + log::info!("Request host: {}, scheme: {}", request_host, request_scheme); // Extract host from the origin_url using the Publisher's origin_host method let origin_host = settings.publisher.origin_host(); @@ -402,6 +468,46 @@ mod tests { } } + #[test] + fn test_detect_request_scheme() { + // Note: In tests, we can't mock the TLS methods on Request, so we test header fallbacks + + // Test Forwarded header with HTTPS + let mut req = Request::new(Method::GET, "https://test.example.com/page"); + req.set_header("forwarded", "for=192.0.2.60;proto=https;by=203.0.113.43"); + assert_eq!(detect_request_scheme(&req), "https"); + + // Test Forwarded header with HTTP + let mut req = Request::new(Method::GET, "http://test.example.com/page"); + req.set_header("forwarded", "for=192.0.2.60;proto=http;by=203.0.113.43"); + assert_eq!(detect_request_scheme(&req), "http"); + + // Test X-Forwarded-Proto with HTTPS + let mut req = Request::new(Method::GET, "https://test.example.com/page"); + req.set_header("x-forwarded-proto", "https"); + assert_eq!(detect_request_scheme(&req), "https"); + + // Test X-Forwarded-Proto with HTTP + let mut req = Request::new(Method::GET, "http://test.example.com/page"); + req.set_header("x-forwarded-proto", "http"); + assert_eq!(detect_request_scheme(&req), "http"); + + // Test Fastly-SSL header + let mut req = Request::new(Method::GET, "https://test.example.com/page"); + req.set_header("fastly-ssl", "1"); + assert_eq!(detect_request_scheme(&req), "https"); + + // Test default to HTTP when no headers present + let req = Request::new(Method::GET, "https://test.example.com/page"); + assert_eq!(detect_request_scheme(&req), "http"); + + // Test priority: Forwarded takes precedence over X-Forwarded-Proto + let mut req = Request::new(Method::GET, "https://test.example.com/page"); + req.set_header("forwarded", "proto=https"); + req.set_header("x-forwarded-proto", "http"); + assert_eq!(detect_request_scheme(&req), "https"); + } + #[test] fn test_handle_publisher_request_extracts_headers() { // Test that the function correctly extracts host and scheme from request headers diff --git a/trusted-server.toml b/trusted-server.toml index 607b716..885f8dc 100644 --- a/trusted-server.toml +++ b/trusted-server.toml @@ -1,9 +1,3 @@ -[publisher] -domain = "test-publisher.com" -cookie_domain = ".test-publisher.com" -origin_backend = "publisher_origin" -origin_url = "https://origin.test-publisher.com" - [ad_server] ad_partner_url = "equativ_ad_api_2" sync_url = "https://adapi-srv-eu.smartadserver.com/ac?pgid=2040327&fmtid=137675&synthetic_id={{synthetic_id}}" @@ -11,9 +5,15 @@ sync_url = "https://adapi-srv-eu.smartadserver.com/ac?pgid=2040327&fmtid=137675& [prebid] server_url = "http://68.183.113.79:8000/openrtb2/auction" +[publisher] +domain = "test-publisher.com" +cookie_domain = ".test-publisher.com" +origin_backend = "publisher_origin" +origin_url = "https://origin.test-publisher.com" + [synthetic] -counter_store = "jevans_synth_id_counter" -opid_store = "jevans_synth_id_opid" +counter_store = "counter_store" +opid_store = "opid_store" secret_key = "trusted-server" # Possible values # - "client_ip" From 9862ec6e19569fd3d3e103071055c8da355a82f8 Mon Sep 17 00:00:00 2001 From: Aram Grigoryan <132480+aram356@users.noreply.github.com> Date: Tue, 15 Jul 2025 12:16:21 -0700 Subject: [PATCH 14/20] Updated packages --- Cargo.lock | 54 ++++++++++++++++++++++++++++------------ crates/common/Cargo.toml | 4 +-- 2 files changed, 40 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cca6992..05dddf7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -141,9 +141,9 @@ dependencies = [ [[package]] name = "brotli" -version = "3.5.0" +version = "8.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d640d25bc63c50fb1f0b545ffd80207d2e10a4c965530809b40ba3386825c391" +checksum = "9991eea70ea4f293524138648e41ee89b0b2b12ddef3b255effa43c8056e0e0d" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -152,9 +152,9 @@ dependencies = [ [[package]] name = "brotli-decompressor" -version = "2.5.1" +version = "5.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" +checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -276,9 +276,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.4.2" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" dependencies = [ "cfg-if", ] @@ -381,7 +381,16 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a9b99b9cbbe49445b21764dc0625032a89b145a2642e67603e1c936f5458d05" dependencies = [ - "derive_more-impl", + "derive_more-impl 1.0.0", +] + +[[package]] +name = "derive_more" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "093242cf7570c207c83073cf82f79706fe7b8317e98620a47d5be7c3d8497678" +dependencies = [ + "derive_more-impl 2.0.1", ] [[package]] @@ -396,6 +405,18 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "derive_more-impl" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.104", + "unicode-xid", +] + [[package]] name = "digest" version = "0.9.0" @@ -1624,9 +1645,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.9.0" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f271e09bde39ab52250160a67e88577e0559ad77e9085de6e9051a2c4353f8f8" +checksum = "ed0aee96c12fa71097902e0bb061a5e1ebd766a6636bb605ba401c45c1650eac" dependencies = [ "indexmap", "serde", @@ -1648,18 +1669,18 @@ dependencies = [ [[package]] name = "toml_parser" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5c1c469eda89749d2230d8156a5969a69ffe0d6d01200581cdc6110674d293e" +checksum = "97200572db069e74c512a14117b296ba0a80a30123fbbb5aa1f4a348f639ca30" dependencies = [ "winnow", ] [[package]] name = "toml_writer" -version = "1.0.0" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b679217f2848de74cabd3e8fc5e6d66f40b7da40f8e1954d92054d9010690fd5" +checksum = "fcc842091f2def52017664b53082ecbbeb5c7731092bad69d2c63050401dfd64" [[package]] name = "trusted-server-common" @@ -1669,7 +1690,8 @@ dependencies = [ "chrono", "config", "cookie", - "derive_more", + "derive_more 1.0.0", + "derive_more 2.0.1", "error-stack", "fastly", "flate2", @@ -1985,9 +2007,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.7.11" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd" +checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95" dependencies = [ "memchr", ] diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index 83656de..6661fdf 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -9,11 +9,11 @@ publish = false license = "Apache-2.0" [dependencies] -brotli = "3.3" +brotli = "8.0" chrono = "0.4" config = "0.15.11" cookie = "0.18.1" -derive_more = { version = "1.0", features = ["display", "error"] } +derive_more = { version = "2.0", features = ["display", "error"] } error-stack = "0.5" fastly = "0.11.5" flate2 = "1.0" From 4c4278349f6fd0ff30111ad958bf90ef09354acb Mon Sep 17 00:00:00 2001 From: Aram Grigoryan <132480+aram356@users.noreply.github.com> Date: Wed, 16 Jul 2025 09:21:04 -0700 Subject: [PATCH 15/20] Removed unnecessary files --- crates/common/src/settings.rs.orig | 255 ----------------------------- crates/fastly/src/main.rs.orig | 111 ------------- 2 files changed, 366 deletions(-) delete mode 100644 crates/common/src/settings.rs.orig delete mode 100644 crates/fastly/src/main.rs.orig diff --git a/crates/common/src/settings.rs.orig b/crates/common/src/settings.rs.orig deleted file mode 100644 index 31173e1..0000000 --- a/crates/common/src/settings.rs.orig +++ /dev/null @@ -1,255 +0,0 @@ -use core::str; - -use config::{Config, Environment, File, FileFormat}; -use error_stack::{Report, ResultExt}; -use serde::{Deserialize, Serialize}; - -use crate::error::TrustedServerError; - -pub const ENVIRONMENT_VARIABLE_PREFIX: &str = "TRUSTED_SERVER"; -pub const ENVIRONMENT_VARIABLE_SEPARATOR: &str = "__"; - -#[derive(Debug, Default, Deserialize, Serialize)] -pub struct AdServer { - pub ad_partner_backend: String, - pub sync_url: String, -} - -#[derive(Debug, Default, Deserialize, Serialize)] -pub struct Publisher { - pub domain: String, - pub cookie_domain: String, - pub origin_url: String, -} - -#[derive(Debug, Default, Deserialize, Serialize)] -pub struct Prebid { - pub server_url: String, -} - -#[derive(Debug, Default, Deserialize, Serialize)] -#[allow(unused)] -pub struct GamAdUnit { - pub name: String, - pub size: String, -} - -#[derive(Debug, Default, Deserialize, Serialize)] -#[allow(unused)] -pub struct Gam { - pub publisher_id: String, - pub server_url: String, - pub ad_units: Vec, -} - -#[allow(unused)] -#[derive(Debug, Default, Deserialize, Serialize)] -pub struct Synthetic { - pub counter_store: String, - pub opid_store: String, - pub secret_key: String, - pub template: String, -} - -#[derive(Debug, Default, Deserialize, Serialize)] -pub struct Settings { - pub ad_server: AdServer, - pub publisher: Publisher, - pub prebid: Prebid, - pub gam: Gam, - pub synthetic: Synthetic, -} - -#[allow(unused)] -impl Settings { - /// Creates a new [`Settings`] instance from a TOML string. - /// - /// Parses the provided TOML configuration and applies any environment - /// variable overrides using the `TRUSTED_SERVER__` prefix. - /// - /// # Errors - /// - /// - [`TrustedServerError::Configuration`] if the TOML is invalid or missing required fields - pub fn from_toml(toml_str: &str) -> Result> { - let environment = Environment::default() - .prefix(ENVIRONMENT_VARIABLE_PREFIX) - .separator(ENVIRONMENT_VARIABLE_SEPARATOR); - - let toml = File::from_str(toml_str, FileFormat::Toml); - let config = Config::builder() - .add_source(toml) - .add_source(environment) - .build() - .change_context(TrustedServerError::Configuration { - message: "Failed to build configuration".to_string(), - })?; - // You can deserialize (and thus freeze) the entire configuration as - config - .try_deserialize() - .change_context(TrustedServerError::Configuration { - message: "Failed to deserialize configuration".to_string(), - }) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use regex::Regex; - - use crate::test_support::tests::crate_test_settings_str; - - #[test] -<<<<<<< HEAD -======= - fn test_settings_new() { - // Test that Settings::new() loads successfully - let settings = Settings::new(); - assert!(settings.is_ok(), "Settings should load from embedded TOML"); - - let settings = settings.unwrap(); - // Verify basic structure is loaded - assert!(!settings.ad_server.ad_partner_backend.is_empty()); - assert!(!settings.ad_server.sync_url.is_empty()); - assert!(!settings.publisher.domain.is_empty()); - assert!(!settings.publisher.cookie_domain.is_empty()); - assert!(!settings.publisher.origin_url.is_empty()); - assert!(!settings.prebid.server_url.is_empty()); - assert!(!settings.synthetic.counter_store.is_empty()); - assert!(!settings.synthetic.opid_store.is_empty()); - assert!(!settings.synthetic.secret_key.is_empty()); - assert!(!settings.synthetic.template.is_empty()); - } - - #[test] ->>>>>>> feature/init-logger - fn test_settings_from_valid_toml() { - let toml_str = crate_test_settings_str(); - let settings = Settings::from_toml(&toml_str); - - assert!(settings.is_ok()); - - let settings = settings.expect("should parse valid TOML"); - assert_eq!( - settings.ad_server.ad_partner_backend, - "https://test-adpartner.com" - ); - assert_eq!( - settings.ad_server.sync_url, - "https://test-adpartner.com/synthetic_id={{synthetic_id}}" - ); - assert_eq!( - settings.prebid.server_url, - "https://test-prebid.com/openrtb2/auction" - ); - assert_eq!(settings.publisher.domain, "test-publisher.com"); - assert_eq!(settings.publisher.cookie_domain, ".test-publisher.com"); - assert_eq!( - settings.publisher.origin_url, - "https://origin.test-publisher.com" - ); - assert_eq!(settings.synthetic.counter_store, "test-counter-store"); - assert_eq!(settings.synthetic.opid_store, "test-opid-store"); - assert_eq!(settings.synthetic.secret_key, "test-secret-key"); - assert!(settings.synthetic.template.contains("{{client_ip}}")); - } - - #[test] - fn test_settings_missing_required_fields() { - let re = Regex::new(r"ad_partner_backend = .*").unwrap(); - let toml_str = crate_test_settings_str(); - let toml_str = re.replace(&toml_str, ""); - - let settings = Settings::from_toml(&toml_str); - assert!( - settings.is_err(), - "Should fail when required fields are missing" - ); - } - - #[test] - fn test_settings_empty_toml() { - let toml_str = ""; - let settings = Settings::from_toml(toml_str); - - assert!(settings.is_err(), "Should fail with empty TOML"); - } - - #[test] - fn test_settings_invalid_toml_syntax() { - let re = Regex::new(r"\]").unwrap(); - let toml_str = crate_test_settings_str(); - let toml_str = re.replace(&toml_str, ""); - - let settings = Settings::from_toml(&toml_str); - assert!(settings.is_err(), "Should fail with invalid TOML syntax"); - } - - #[test] - fn test_settings_partial_config() { - let re = Regex::new(r"\[ad_server\]").unwrap(); - let toml_str = crate_test_settings_str(); - let toml_str = re.replace(&toml_str, ""); - - let settings = Settings::from_toml(&toml_str); - assert!(settings.is_err(), "Should fail when sections are missing"); - } - - #[test] - fn test_settings_extra_fields() { - let toml_str = crate_test_settings_str() + "\nhello = 1"; - - let settings = Settings::from_toml(&toml_str); - assert!(settings.is_ok(), "Extra fields should be ignored"); - } - - #[test] - fn test_set_env() { - let re = Regex::new(r"ad_partner_backend = .*").unwrap(); - let toml_str = crate_test_settings_str(); - let toml_str = re.replace(&toml_str, ""); - - temp_env::with_var( - format!( - "{}{}AD_SERVER{}AD_PARTNER_BACKEND", - ENVIRONMENT_VARIABLE_PREFIX, - ENVIRONMENT_VARIABLE_SEPARATOR, - ENVIRONMENT_VARIABLE_SEPARATOR - ), - Some("https://change-ad.com/serve"), - || { - let settings = Settings::from_toml(&toml_str); - - assert!(settings.is_ok(), "Settings should load from embedded TOML"); - assert_eq!( - settings.unwrap().ad_server.ad_partner_backend, - "https://change-ad.com/serve" - ); - }, - ); - } - - #[test] - fn test_override_env() { - let toml_str = crate_test_settings_str(); - - temp_env::with_var( - format!( - "{}{}AD_SERVER{}AD_PARTNER_BACKEND", - ENVIRONMENT_VARIABLE_PREFIX, - ENVIRONMENT_VARIABLE_SEPARATOR, - ENVIRONMENT_VARIABLE_SEPARATOR - ), - Some("https://change-ad.com/serve"), - || { - let settings = Settings::from_toml(&toml_str); - - assert!(settings.is_ok(), "Settings should load from embedded TOML"); - assert_eq!( - settings.unwrap().ad_server.ad_partner_backend, - "https://change-ad.com/serve" - ); - }, - ); - } -} diff --git a/crates/fastly/src/main.rs.orig b/crates/fastly/src/main.rs.orig deleted file mode 100644 index c648beb..0000000 --- a/crates/fastly/src/main.rs.orig +++ /dev/null @@ -1,111 +0,0 @@ -use fastly::http::{header, Method, StatusCode}; -use fastly::{Error, Request, Response}; -use log_fastly::Logger; - -mod error; -use crate::error::to_error_response; - -use trusted_server_common::advertiser::handle_ad_request; -use trusted_server_common::constants::HEADER_X_COMPRESS_HINT; -use trusted_server_common::gam::{ - handle_gam_custom_url, handle_gam_golden_url, handle_gam_render, handle_gam_test, -}; -use trusted_server_common::gdpr::{handle_consent_request, handle_data_subject_request}; -use trusted_server_common::prebid::handle_prebid_test; -use trusted_server_common::privacy::handle_privacy_policy; -use trusted_server_common::publisher::handle_main_page; -use trusted_server_common::settings::Settings; -<<<<<<< HEAD -use trusted_server_common::settings_data::get_settings; -======= -use trusted_server_common::templates::GAM_TEST_TEMPLATE; ->>>>>>> feature/init-logger -use trusted_server_common::why::handle_why_trusted_server; - -#[fastly::main] -fn main(req: Request) -> Result { - init_logger(); - - let settings = match get_settings() { - Ok(s) => s, - Err(e) => { - log::error!("Failed to load settings: {:?}", e); - return Ok(to_error_response(e)); - } - }; - log::info!("Settings {settings:?}"); - - futures::executor::block_on(route_request(settings, req)) -} - -/// Routes incoming requests to appropriate handlers. -/// -/// This function implements the application's routing logic, matching HTTP methods -/// and paths to their corresponding handler functions. -async fn route_request(settings: Settings, req: Request) -> Result { - log::info!( - "FASTLY_SERVICE_VERSION: {}", - ::std::env::var("FASTLY_SERVICE_VERSION").unwrap_or_else(|_| String::new()) - ); - - let result = match (req.get_method(), req.get_path()) { - // Main application routes - (&Method::GET, "/") => handle_main_page(&settings, req), - (&Method::GET, "/ad-creative") => handle_ad_request(&settings, req), - (&Method::GET, "/prebid-test") => handle_prebid_test(&settings, req).await, - (&Method::GET, "/gam-test") => handle_gam_test(&settings, req).await, - (&Method::GET, "/gam-golden-url") => handle_gam_golden_url(&settings, req).await, - (&Method::POST, "/gam-test-custom-url") => handle_gam_custom_url(&settings, req).await, - (&Method::GET, "/gam-render") => handle_gam_render(&settings, req).await, - (&Method::GET, "/gam-test-page") => Ok(Response::from_status(StatusCode::OK) - .with_body(GAM_TEST_TEMPLATE) - .with_header(header::CONTENT_TYPE, "text/html") - .with_header("x-compress-hint", "on")), - - // GDPR compliance routes - (&Method::GET | &Method::POST, "/gdpr/consent") => handle_consent_request(&settings, req), - (&Method::GET | &Method::DELETE, "/gdpr/data") => { - handle_data_subject_request(&settings, req) - } - - // Static content pages - (&Method::GET, "/privacy-policy") => handle_privacy_policy(&settings, req), - (&Method::GET, "/why-trusted-server") => handle_why_trusted_server(&settings, req), - - // Catch-all 404 handler - _ => return Ok(not_found_response()), - }; - - // Convert any errors to HTTP error responses - result.map_or_else(|e| Ok(to_error_response(e)), Ok) -} - -/// Creates a standard 404 Not Found response. -fn not_found_response() -> Response { - Response::from_status(StatusCode::NOT_FOUND) - .with_body("Not Found") - .with_header(header::CONTENT_TYPE, "text/plain") - .with_header(HEADER_X_COMPRESS_HINT, "on") -} - -fn init_logger() { - let logger = Logger::builder() - .default_endpoint("tslog") - // .echo_stdout(true) - .max_level(log::LevelFilter::Debug) - .build() - .expect("Failed to build Logger"); - - fern::Dispatch::new() - .format(|out, message, record| { - out.finish(format_args!( - "{} {} {}", - chrono::Local::now().to_rfc3339_opts(chrono::SecondsFormat::Millis, true), - record.level(), - message - )) - }) - .chain(Box::new(logger) as Box) - .apply() - .expect("Failed to initialize logger"); -} From 1fd1fcca3085a1bb3070c6ac5d7680eb111f83e2 Mon Sep 17 00:00:00 2001 From: Aram Grigoryan <132480+aram356@users.noreply.github.com> Date: Wed, 16 Jul 2025 19:49:49 -0700 Subject: [PATCH 16/20] Output logs to stdout --- crates/fastly/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/fastly/src/main.rs b/crates/fastly/src/main.rs index 8be6395..1b4149a 100644 --- a/crates/fastly/src/main.rs +++ b/crates/fastly/src/main.rs @@ -99,7 +99,7 @@ async fn route_request(settings: Settings, req: Request) -> Result Date: Wed, 16 Jul 2025 19:52:16 -0700 Subject: [PATCH 17/20] Output logs to stdout --- crates/fastly/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/fastly/src/main.rs b/crates/fastly/src/main.rs index ee993de..1e71749 100644 --- a/crates/fastly/src/main.rs +++ b/crates/fastly/src/main.rs @@ -87,7 +87,7 @@ fn not_found_response() -> Response { fn init_logger() { let logger = Logger::builder() .default_endpoint("tslog") - // .echo_stdout(true) + .echo_stdout(true) .max_level(log::LevelFilter::Debug) .build() .expect("Failed to build Logger"); From 4247e30cd4f8abfc4716b4453f94be15edaaa482 Mon Sep 17 00:00:00 2001 From: Aram Grigoryan <132480+aram356@users.noreply.github.com> Date: Mon, 21 Jul 2025 17:00:49 -0700 Subject: [PATCH 18/20] Simplify streaming replacer logic --- crates/common/src/publisher.rs | 168 ++++-- crates/common/src/streaming_replacer.rs | 675 +++++++++++++++++++----- 2 files changed, 653 insertions(+), 190 deletions(-) diff --git a/crates/common/src/publisher.rs b/crates/common/src/publisher.rs index 8c24259..7d5b181 100644 --- a/crates/common/src/publisher.rs +++ b/crates/common/src/publisher.rs @@ -1,10 +1,11 @@ +use brotli::enc::{writer::CompressorWriter, BrotliEncoderParams}; +use brotli::Decompressor; use error_stack::{Report, ResultExt}; use fastly::http::{header, StatusCode}; use fastly::{Body, Request, Response}; use flate2::read::{GzDecoder, ZlibDecoder}; use flate2::write::{GzEncoder, ZlibEncoder}; use flate2::Compression; -use std::io::{Read, Write}; use crate::constants::{ HEADER_SYNTHETIC_FRESH, HEADER_SYNTHETIC_TRUSTED_SERVER, HEADER_X_GEO_CITY, @@ -16,7 +17,7 @@ use crate::error::TrustedServerError; use crate::gdpr::{get_consent_from_request, GdprConsent}; use crate::geo::get_dma_code; use crate::settings::Settings; -use crate::streaming_replacer::StreamingReplacer; +use crate::streaming_replacer::{create_url_replacer, stream_process}; use crate::synthetic::{generate_synthetic_id, get_or_generate_synthetic_id}; use crate::templates::HTML_TEMPLATE; @@ -189,52 +190,6 @@ pub fn handle_main_page( Ok(response) } -/// Generic streaming processor that reads from a source, processes through replacer, and writes to output -fn stream_process( - mut reader: R, - writer: &mut W, - replacer: &mut StreamingReplacer, - chunk_size: usize, -) -> Result<(), Report> { - let mut buffer = vec![0u8; chunk_size]; - - loop { - match reader.read(&mut buffer) { - Ok(0) => { - // End of stream - process any remaining data - let final_chunk = replacer.process_chunk(&[], true); - if !final_chunk.is_empty() { - writer - .write_all(&final_chunk) - .change_context(TrustedServerError::Proxy { - message: "Failed to write final chunk".to_string(), - })?; - } - break; - } - Ok(n) => { - // Process this chunk - let processed = replacer.process_chunk(&buffer[..n], false); - if !processed.is_empty() { - writer - .write_all(&processed) - .change_context(TrustedServerError::Proxy { - message: "Failed to write processed chunk".to_string(), - })?; - } - } - Err(e) => { - log::error!("Error reading from stream: {}", e); - return Err(Report::new(TrustedServerError::Proxy { - message: format!("Failed to read from stream: {}", e), - })); - } - } - } - - Ok(()) -} - /// Process response body in streaming fashion with compression preservation fn process_response_streaming( body: Body, @@ -246,15 +201,14 @@ fn process_response_streaming( ) -> Result> { const CHUNK_SIZE: usize = 8192; // 8KB chunks - // Create the streaming replacer - let mut replacer = - StreamingReplacer::new(origin_host, origin_url, request_host, request_scheme); + // Create the streaming replacer for URL replacements + let mut replacer = create_url_replacer(origin_host, origin_url, request_host, request_scheme); // Create output body let mut output_body = Body::new(); // Determine if content needs decompression/recompression - let is_compressed = matches!(content_encoding, "gzip" | "deflate"); + let is_compressed = matches!(content_encoding, "gzip" | "deflate" | "br"); if is_compressed { // For compressed content, we stream through: @@ -275,7 +229,11 @@ fn process_response_streaming( let mut encoder = GzEncoder::new(output_body, Compression::default()); // Stream through the pipeline - stream_process(decoder, &mut encoder, &mut replacer, CHUNK_SIZE)?; + stream_process(decoder, &mut encoder, &mut replacer, CHUNK_SIZE).map_err(|e| { + Report::new(TrustedServerError::Proxy { + message: format!("Failed to process stream: {}", e), + }) + })?; // Finish compression and get the output body output_body = encoder.finish().change_context(TrustedServerError::Proxy { @@ -289,13 +247,43 @@ fn process_response_streaming( let mut encoder = ZlibEncoder::new(output_body, Compression::default()); // Stream through the pipeline - stream_process(decoder, &mut encoder, &mut replacer, CHUNK_SIZE)?; + stream_process(decoder, &mut encoder, &mut replacer, CHUNK_SIZE).map_err(|e| { + Report::new(TrustedServerError::Proxy { + message: format!("Failed to process stream: {}", e), + }) + })?; // Finish compression and get the output body output_body = encoder.finish().change_context(TrustedServerError::Proxy { message: "Failed to finish deflate compression".to_string(), })?; } + "br" => { + // Create Brotli decompressor + let decoder = Decompressor::new(body, 4096); // 4KB buffer + + // Create Brotli compressor with reasonable parameters + // Quality 4 gives good balance of speed and compression + let params = BrotliEncoderParams { + quality: 4, + lgwin: 22, // 4MB window + ..Default::default() + }; + + // Create Brotli compressor writer + let mut encoder = CompressorWriter::with_params(output_body, 4096, ¶ms); + + // Stream through the pipeline + stream_process(decoder, &mut encoder, &mut replacer, CHUNK_SIZE).map_err(|e| { + Report::new(TrustedServerError::Proxy { + message: format!("Failed to process Brotli stream: {}", e), + }) + })?; + + // Finish compression and get the output body + // Note: into_inner() returns the inner writer (Body), not a Result + output_body = encoder.into_inner(); + } _ => unreachable!(), } } else { @@ -303,7 +291,11 @@ fn process_response_streaming( log::info!("Processing uncompressed content"); // Stream directly from body to output - stream_process(body, &mut output_body, &mut replacer, CHUNK_SIZE)?; + stream_process(body, &mut output_body, &mut replacer, CHUNK_SIZE).map_err(|e| { + Report::new(TrustedServerError::Proxy { + message: format!("Failed to process stream: {}", e), + }) + })?; } log::info!("Streaming processing complete"); @@ -375,9 +367,7 @@ pub fn handle_publisher_request( .map(|h| h.to_str().unwrap_or_default()) .unwrap_or_default(); - let should_process = content_type.contains("text/html") - || content_type.contains("text/css") - || content_type.contains("text/javascript") + let should_process = content_type.contains("text/") || content_type.contains("application/javascript") || content_type.contains("application/json"); @@ -691,6 +681,68 @@ mod tests { assert!(!decompressed.contains("origin.example.com")); } + #[test] + fn test_streaming_brotli_content() { + use brotli::enc::writer::CompressorWriter; + use brotli::enc::BrotliEncoderParams; + use std::io::Write; + + // Create some HTML content with origin URLs + let original_content = r#" + + + Link + "#; + + // Compress the content with Brotli + let mut compressed = Vec::new(); + { + let params = BrotliEncoderParams { + quality: 4, + lgwin: 22, + ..Default::default() + }; + let mut encoder = CompressorWriter::with_params(&mut compressed, 4096, ¶ms); + encoder.write_all(original_content.as_bytes()).unwrap(); + // encoder is dropped here, which finishes the compression + } + + // Create a Body from compressed data + let body = Body::from(compressed); + + // Process the compressed body + let result = process_response_streaming( + body, + "br", + "origin.example.com", + "https://origin.example.com", + "test.example.com", + "https", + ); + + assert!(result.is_ok()); + let processed_body = result.unwrap(); + + // The body should still be compressed + // In a real test, we'd decompress and verify the content + // For now, just check that we got a body back + let bytes = processed_body.into_bytes(); + assert!(!bytes.is_empty()); + + // Decompress to verify content was transformed + use brotli::Decompressor; + use std::io::Read; + let mut decoder = Decompressor::new(&bytes[..], 4096); + let mut decompressed = String::new(); + decoder.read_to_string(&mut decompressed).unwrap(); + + // Verify URLs were replaced + assert!(decompressed.contains("https://test.example.com/style.css")); + assert!(decompressed.contains("https://test.example.com/app.js")); + assert!(decompressed.contains("https://test.example.com/page")); + assert!(!decompressed.contains("origin.example.com")); + } + #[test] fn test_content_encoding_detection() { // Test that we properly handle responses with various content encodings diff --git a/crates/common/src/streaming_replacer.rs b/crates/common/src/streaming_replacer.rs index de2d15c..3dc606e 100644 --- a/crates/common/src/streaming_replacer.rs +++ b/crates/common/src/streaming_replacer.rs @@ -1,14 +1,23 @@ -//! Streaming URL replacer for processing large responses. +//! Generic streaming replacer for processing large content. //! -//! This module provides functionality for replacing origin URLs with request URLs +//! This module provides functionality for replacing patterns in content //! in streaming fashion, handling content that may be split across multiple chunks. -/// A streaming replacer that processes content in chunks +use std::io::{self, Read, Write}; + +/// A replacement pattern configuration +#[derive(Debug, Clone)] +pub struct Replacement { + /// The string to find + pub find: String, + /// The string to replace it with + pub replace_with: String, +} + +/// A generic streaming replacer that processes content in chunks pub struct StreamingReplacer { - origin_host: String, - origin_url: String, - request_host: String, - request_url: String, + /// List of replacements to apply + replacements: Vec, // Buffer to handle partial matches at chunk boundaries overlap_buffer: Vec, // Maximum pattern length to determine overlap size @@ -16,135 +25,211 @@ pub struct StreamingReplacer { } impl StreamingReplacer { - /// Creates a new `StreamingReplacer` instance. + /// Creates a new `StreamingReplacer` with the given replacements. /// /// # Arguments /// - /// * `origin_host` - The origin hostname (e.g., "origin.example.com") - /// * `origin_url` - The full origin URL (e.g., "https://origin.example.com") - /// * `request_host` - The request hostname (e.g., "test.example.com") - /// * `request_scheme` - The request scheme ("http" or "https") - pub fn new( - origin_host: &str, - origin_url: &str, - request_host: &str, - request_scheme: &str, - ) -> Self { - let request_url = format!("{}://{}", request_scheme, request_host); - + /// * `replacements` - List of string replacements to perform + pub fn new(replacements: Vec) -> Self { // Calculate the maximum pattern length we need to buffer - let patterns = vec![ - origin_url.len(), - origin_host.len(), - format!("//{}", origin_host).len(), - // Account for HTTP variant if origin is HTTPS - if origin_url.starts_with("https://") { - origin_url.replace("https://", "http://").len() - } else { - 0 - }, - ]; - - let max_pattern_length = patterns.into_iter().max().unwrap_or(0); + let max_pattern_length = replacements.iter().map(|r| r.find.len()).max().unwrap_or(0); Self { - origin_host: origin_host.to_string(), - origin_url: origin_url.to_string(), - request_host: request_host.to_string(), - request_url, + replacements, overlap_buffer: Vec::with_capacity(max_pattern_length), max_pattern_length, } } + /// Creates a new `StreamingReplacer` with a single replacement. + /// + /// # Arguments + /// + /// * `find` - The string to find + /// * `replace_with` - The string to replace it with + pub fn new_single(find: &str, replace_with: &str) -> Self { + Self::new(vec![Replacement { + find: find.to_string(), + replace_with: replace_with.to_string(), + }]) + } + /// Process a chunk of data and return the processed output pub fn process_chunk(&mut self, chunk: &[u8], is_last_chunk: bool) -> Vec { // Combine overlap buffer with new chunk let mut combined = self.overlap_buffer.clone(); combined.extend_from_slice(chunk); - // Convert to string for processing (using lossy conversion) - let content = String::from_utf8_lossy(&combined); + if combined.is_empty() { + return Vec::new(); + } // Determine how much content to process - let process_end = if is_last_chunk { - content.len() + let process_end_bytes = if is_last_chunk { + combined.len() } else { - // Keep the last max_pattern_length characters for the next chunk - content.len().saturating_sub(self.max_pattern_length) + // To avoid splitting patterns, we need to be careful about where we cut. + // We want to keep at least (max_pattern_length - 1) bytes for overlap. + if combined.len() <= self.max_pattern_length { + // Not enough data to process safely + 0 + } else { + // Start with a safe boundary + let mut boundary = combined.len().saturating_sub(self.max_pattern_length - 1); + + // Check if we might be splitting a pattern at this boundary + // by looking for pattern starts near the boundary + let check_start = boundary.saturating_sub(self.max_pattern_length); + let check_end = (boundary + self.max_pattern_length).min(combined.len()); + + if let Ok(check_str) = std::str::from_utf8(&combined[check_start..check_end]) { + // Look for any pattern that would be split by our boundary + for replacement in &self.replacements { + if let Some(pos) = check_str.find(&replacement.find) { + let pattern_start = check_start + pos; + let pattern_end = pattern_start + replacement.find.len(); + + // If the pattern crosses our boundary, adjust the boundary + if pattern_start < boundary && pattern_end > boundary { + boundary = pattern_start; + break; + } + } + } + } + + boundary + } }; - if process_end == 0 { + if process_end_bytes == 0 { // Not enough data to process yet self.overlap_buffer = combined; return Vec::new(); } - // Process the content up to process_end - let to_process = &content[..process_end]; - - // Use the replace_origin_urls method - let processed = self.replace_origin_urls( - to_process, - self.request_url.split("://").nth(0).unwrap_or("https"), - ); - - // Save the overlap for the next chunk - if !is_last_chunk { - self.overlap_buffer = combined[process_end..].to_vec(); - } else { - self.overlap_buffer.clear(); + // Find a valid UTF-8 boundary at or before process_end_bytes + let mut adjusted_end_bytes = process_end_bytes; + while adjusted_end_bytes > 0 { + // Check if this is a valid UTF-8 boundary + if let Ok(s) = std::str::from_utf8(&combined[..adjusted_end_bytes]) { + // Valid UTF-8 up to this point, process it + let mut processed = s.to_string(); + + // Apply all replacements + for replacement in &self.replacements { + processed = processed.replace(&replacement.find, &replacement.replace_with); + } + + // Save the overlap for the next chunk + if !is_last_chunk { + self.overlap_buffer = combined[adjusted_end_bytes..].to_vec(); + } else { + self.overlap_buffer.clear(); + } + + return processed.into_bytes(); + } + adjusted_end_bytes -= 1; } - processed.into_bytes() + // This should never happen, but handle it gracefully + self.overlap_buffer = combined; + Vec::new() } - /// Replaces origin URLs in content with request URLs. - /// - /// This function performs the URL replacement logic. - /// It replaces both the origin host and full origin URL with their request equivalents. - /// - /// # Arguments - /// - /// * `content` - The content to process - /// * `request_scheme` - The request scheme ("http" or "https") - /// - /// # Returns - /// - /// The content with all origin references replaced - pub fn replace_origin_urls(&self, content: &str, request_scheme: &str) -> String { - let request_url = format!("{}://{}", request_scheme, self.request_host); - - log::info!("Replacing {} with {}", self.origin_url, request_url); + /// Reset the internal buffer (useful when reusing the replacer) + pub fn reset(&mut self) { + self.overlap_buffer.clear(); + } +} - // Start with the content - let mut result = content.to_string(); +/// Process a stream through a StreamingReplacer +/// +/// This function reads from a source, processes chunks through the replacer, +/// and writes the results to the output. +/// +/// # Arguments +/// +/// * `reader` - The input stream to read from +/// * `writer` - The output stream to write to +/// * `replacer` - The streaming replacer to use for processing +/// * `chunk_size` - The size of chunks to read at a time +/// +/// # Returns +/// +/// Returns `Ok(())` on success, or an `io::Error` if reading/writing fails. +pub fn stream_process( + mut reader: R, + writer: &mut W, + replacer: &mut StreamingReplacer, + chunk_size: usize, +) -> io::Result<()> { + let mut buffer = vec![0u8; chunk_size]; + + loop { + match reader.read(&mut buffer)? { + 0 => { + // End of stream - process any remaining data + let final_chunk = replacer.process_chunk(&[], true); + if !final_chunk.is_empty() { + writer.write_all(&final_chunk)?; + } + break; + } + n => { + // Process this chunk + let processed = replacer.process_chunk(&buffer[..n], false); + if !processed.is_empty() { + writer.write_all(&processed)?; + } + } + } + } - // Replace full URLs first (more specific) - result = result.replace(&self.origin_url, &request_url); + writer.flush()?; + Ok(()) +} - // Also try with http if origin was https (in case of mixed content) - if self.origin_url.starts_with("https://") { - let http_origin_url = self.origin_url.replace("https://", "http://"); - result = result.replace(&http_origin_url, &request_url); - } +/// Helper function to create a StreamingReplacer for URL replacements +pub fn create_url_replacer( + origin_host: &str, + origin_url: &str, + request_host: &str, + request_scheme: &str, +) -> StreamingReplacer { + let request_url = format!("{}://{}", request_scheme, request_host); - // Replace protocol-relative URLs (//example.com) - let protocol_relative_origin = format!("//{}", self.origin_host); - let protocol_relative_request = format!("//{}", self.request_host); - result = result.replace(&protocol_relative_origin, &protocol_relative_request); + let mut replacements = vec![ + // Replace full URLs first (more specific) + Replacement { + find: origin_url.to_string(), + replace_with: request_url.clone(), + }, + ]; + + // Also handle HTTP variant if origin is HTTPS + if origin_url.starts_with("https://") { + let http_origin_url = origin_url.replace("https://", "http://"); + replacements.push(Replacement { + find: http_origin_url, + replace_with: request_url.clone(), + }); + } - // Replace host in various contexts - // This handles cases like: "host": "origin.example.com" in JSON - result = result.replace(&self.origin_host, &self.request_host); + // Replace protocol-relative URLs + replacements.push(Replacement { + find: format!("//{}", origin_host), + replace_with: format!("//{}", request_host), + }); - // Log if replacements were made - if result != content { - log::debug!("URL replacements made in content"); - } + // Replace host in various contexts + replacements.push(Replacement { + find: origin_host.to_string(), + replace_with: request_host.to_string(), + }); - result - } + StreamingReplacer::new(replacements) } #[cfg(test)] @@ -153,12 +238,8 @@ mod tests { #[test] fn test_streaming_replacer_basic() { - let mut replacer = StreamingReplacer::new( - "origin.example.com", - "https://origin.example.com", - "test.example.com", - "https", - ); + let mut replacer = + StreamingReplacer::new_single("https://origin.example.com", "https://test.example.com"); let input = b"Visit https://origin.example.com for more info"; let processed = replacer.process_chunk(input, true); @@ -167,14 +248,32 @@ mod tests { assert_eq!(result, "Visit https://test.example.com for more info"); } + #[test] + fn test_multiple_replacements() { + let replacements = vec![ + Replacement { + find: "foo".to_string(), + replace_with: "bar".to_string(), + }, + Replacement { + find: "hello".to_string(), + replace_with: "hi".to_string(), + }, + ]; + + let mut replacer = StreamingReplacer::new(replacements); + + let input = b"hello world, foo is foo"; + let processed = replacer.process_chunk(input, true); + let result = String::from_utf8(processed).unwrap(); + + assert_eq!(result, "hi world, bar is bar"); + } + #[test] fn test_streaming_replacer_chunks() { - let mut replacer = StreamingReplacer::new( - "origin.example.com", - "https://origin.example.com", - "test.example.com", - "https", - ); + let mut replacer = + StreamingReplacer::new_single("https://origin.example.com", "https://test.example.com"); // Test that patterns split across chunks are handled correctly let chunk1 = b"Visit https://origin.exam"; @@ -189,12 +288,18 @@ mod tests { #[test] fn test_streaming_replacer_multiple_patterns() { - let mut replacer = StreamingReplacer::new( - "origin.example.com", - "https://origin.example.com", - "test.example.com", - "https", - ); + let replacements = vec![ + Replacement { + find: "https://origin.example.com".to_string(), + replace_with: "https://test.example.com".to_string(), + }, + Replacement { + find: "//origin.example.com".to_string(), + replace_with: "//test.example.com".to_string(), + }, + ]; + + let mut replacer = StreamingReplacer::new(replacements); let input = b"Link and //origin.example.com/resource"; @@ -207,12 +312,8 @@ mod tests { #[test] fn test_streaming_replacer_edge_cases() { - let mut replacer = StreamingReplacer::new( - "origin.example.com", - "https://origin.example.com", - "test.example.com", - "https", - ); + let mut replacer = + StreamingReplacer::new_single("https://origin.example.com", "https://test.example.com"); // Empty chunk let processed = replacer.process_chunk(b"", true); @@ -243,8 +344,8 @@ mod tests { } #[test] - fn test_replace_origin_urls_comprehensive() { - let replacer = StreamingReplacer::new( + fn test_url_replacer_comprehensive() { + let mut replacer = create_url_replacer( "origin.example.com", "https://origin.example.com", "test.example.com", @@ -266,7 +367,8 @@ mod tests { {"api": "https://origin.example.com/api", "host": "origin.example.com"} "#; - let result = replacer.replace_origin_urls(content, "https"); + let processed = replacer.process_chunk(content.as_bytes(), true); + let result = String::from_utf8(processed).unwrap(); // Verify all patterns were replaced assert!(result.contains("https://test.example.com/page")); @@ -280,8 +382,8 @@ mod tests { } #[test] - fn test_replace_origin_urls_with_port() { - let replacer = StreamingReplacer::new( + fn test_url_replacer_with_port() { + let mut replacer = create_url_replacer( "origin.example.com:8080", "https://origin.example.com:8080", "test.example.com:9090", @@ -289,8 +391,9 @@ mod tests { ); let content = - "Visit https://origin.example.com:8080/api or //origin.example.com:8080/resource"; - let result = replacer.replace_origin_urls(content, "https"); + b"Visit https://origin.example.com:8080/api or //origin.example.com:8080/resource"; + let processed = replacer.process_chunk(content, true); + let result = String::from_utf8(processed).unwrap(); assert_eq!( result, @@ -299,8 +402,8 @@ mod tests { } #[test] - fn test_replace_origin_urls_mixed_protocols() { - let replacer = StreamingReplacer::new( + fn test_url_replacer_mixed_protocols() { + let mut replacer = create_url_replacer( "origin.example.com", "https://origin.example.com", "test.example.com", @@ -313,11 +416,319 @@ mod tests { "#; - let result = replacer.replace_origin_urls(content, "http"); + let processed = replacer.process_chunk(content.as_bytes(), true); + let result = String::from_utf8(processed).unwrap(); // When request is HTTP, all URLs should be replaced with HTTP assert!(result.contains("http://test.example.com")); assert!(!result.contains("https://test.example.com")); assert!(result.contains("//test.example.com/script.js")); } + + #[test] + fn test_process_chunk_utf8_boundary() { + let mut replacer = + create_url_replacer("origin.com", "https://origin.com", "test.com", "https"); + + // Create content with multi-byte UTF-8 characters that could cause boundary issues + let content = "https://origin.com/test 思怙ᕏ测试 https://origin.com/more".as_bytes(); + + // Process in small chunks to force potential boundary issues + let chunk_size = 20; + let mut result = Vec::new(); + + for (i, chunk) in content.chunks(chunk_size).enumerate() { + let is_last = i == content.chunks(chunk_size).count() - 1; + result.extend(replacer.process_chunk(chunk, is_last)); + } + + let result_str = String::from_utf8(result).unwrap(); + assert!(result_str.contains("https://test.com/test")); + assert!(result_str.contains("https://test.com/more")); + assert!(result_str.contains("思怙ᕏ测试")); + } + + #[test] + fn test_process_chunk_boundary_in_multibyte_char() { + let mut replacer = + create_url_replacer("example.com", "https://example.com", "new.com", "https"); + + // Create a scenario where chunk boundary falls in the middle of a multi-byte character + let content = "https://example.com/før/bår/test".as_bytes(); + + // Split at byte 23, which should be in the middle of 'ø' (2-byte character) + let chunk1 = &content[..23]; + let chunk2 = &content[23..]; + + let mut result = Vec::new(); + result.extend(replacer.process_chunk(chunk1, false)); + result.extend(replacer.process_chunk(chunk2, true)); + + let result_str = String::from_utf8(result).unwrap(); + assert!(result_str.contains("https://new.com/før/bår/test")); + } + + #[test] + fn test_process_chunk_emoji_boundary() { + let mut replacer = + create_url_replacer("emoji.com", "https://emoji.com", "test.com", "https"); + + // Test with 4-byte emoji characters + let content = "https://emoji.com/test 🎉🎊🎋 https://emoji.com/more".as_bytes(); + + // Process the entire content at once to verify it works + let all_at_once = replacer.process_chunk(content, true); + let expected = String::from_utf8(all_at_once).unwrap(); + assert!(expected.contains("https://test.com/test")); + assert!(expected.contains("https://test.com/more")); + } + + #[test] + fn test_process_chunk_large_chunks() { + let mut replacer = + create_url_replacer("example.com", "https://example.com", "test.com", "https"); + + // Test with content that won't have URLs split across chunks + let content = + "Visit https://example.com/page1 and then https://example.com/page2 for more info" + .as_bytes(); + + // Use large chunks to avoid splitting URLs + let chunk_size = 50; + let mut result = Vec::new(); + + for (i, chunk) in content.chunks(chunk_size).enumerate() { + let is_last = i == content.chunks(chunk_size).count() - 1; + result.extend(replacer.process_chunk(chunk, is_last)); + } + + let result_str = String::from_utf8(result).unwrap(); + assert!(result_str.contains("https://test.com/page1")); + assert!(result_str.contains("https://test.com/page2")); + } + + #[test] + fn test_process_chunk_utf8_boundary_small_chunks() { + let mut replacer = create_url_replacer("test.com", "https://test.com", "new.com", "https"); + + // Test with multi-byte characters and very small chunks to stress UTF-8 boundaries + let content = "Some text 思怙ᕏ测试 more text with 🎉 emoji".as_bytes(); + + // Use very small chunks to force UTF-8 boundary handling + let chunk_size = 8; + let mut result = Vec::new(); + let chunks: Vec<_> = content.chunks(chunk_size).collect(); + + for (i, chunk) in chunks.iter().enumerate() { + let is_last = i == chunks.len() - 1; + result.extend(replacer.process_chunk(chunk, is_last)); + } + + let result_str = String::from_utf8(result).unwrap(); + // Just verify the content is preserved correctly + assert!(result_str.contains("思怙ᕏ测试")); + assert!(result_str.contains("🎉")); + } + + #[test] + fn test_generic_replacements() { + // Test replacing arbitrary strings + let replacements = vec![ + Replacement { + find: "color".to_string(), + replace_with: "colour".to_string(), + }, + Replacement { + find: "gray".to_string(), + replace_with: "grey".to_string(), + }, + ]; + + let mut replacer = StreamingReplacer::new(replacements); + + let input = b"The color is gray, not light gray."; + let processed = replacer.process_chunk(input, true); + let result = String::from_utf8(processed).unwrap(); + + assert_eq!(result, "The colour is grey, not light grey."); + } + + #[test] + fn test_pattern_priority() { + // Test that longer patterns are replaced first (order matters) + let replacements = vec![ + Replacement { + find: "hello world".to_string(), + replace_with: "greetings universe".to_string(), + }, + Replacement { + find: "hello".to_string(), + replace_with: "hi".to_string(), + }, + ]; + + let mut replacer = StreamingReplacer::new(replacements); + + let input = b"Say hello world and hello there!"; + let processed = replacer.process_chunk(input, true); + let result = String::from_utf8(processed).unwrap(); + + // Note: Since we apply replacements in order, "hello world" gets replaced first + assert_eq!(result, "Say greetings universe and hi there!"); + } + + #[test] + fn test_overlapping_patterns() { + // Test handling of overlapping patterns + let replacements = vec![ + Replacement { + find: "abc".to_string(), + replace_with: "xyz".to_string(), + }, + Replacement { + find: "bcd".to_string(), + replace_with: "123".to_string(), + }, + ]; + + let mut replacer = StreamingReplacer::new(replacements); + + let input = b"abcdef"; + let processed = replacer.process_chunk(input, true); + let result = String::from_utf8(processed).unwrap(); + + // "abc" gets replaced first, so "bcd" is no longer found + assert_eq!(result, "xyzdef"); + } + + #[test] + fn test_empty_replacement() { + // Test removing strings (replacing with empty string) + let mut replacer = StreamingReplacer::new_single("REMOVE_ME", ""); + + let input = b"Keep this REMOVE_ME but not this"; + let processed = replacer.process_chunk(input, true); + let result = String::from_utf8(processed).unwrap(); + + assert_eq!(result, "Keep this but not this"); + } + + #[test] + fn test_case_sensitive_replacement() { + // Test that replacements are case-sensitive + let mut replacer = StreamingReplacer::new_single("Hello", "Hi"); + + let input = b"Hello world, hello there, HELLO!"; + let processed = replacer.process_chunk(input, true); + let result = String::from_utf8(processed).unwrap(); + + assert_eq!(result, "Hi world, hello there, HELLO!"); + } + + #[test] + fn test_special_characters_in_pattern() { + // Test replacing patterns with special regex characters + let replacements = vec![ + Replacement { + find: "cost: $10.99".to_string(), + replace_with: "price: €9.99".to_string(), + }, + Replacement { + find: "[TAG]".to_string(), + replace_with: "