diff --git a/crates/common/src/http_util.rs b/crates/common/src/http_util.rs index 132d6bd8..c830aab8 100644 --- a/crates/common/src/http_util.rs +++ b/crates/common/src/http_util.rs @@ -6,6 +6,157 @@ use sha2::{Digest, Sha256}; use crate::settings::Settings; +/// Extracted request information for host rewriting. +/// +/// This struct captures the effective host and scheme from an incoming request, +/// accounting for proxy headers like `X-Forwarded-Host` and `X-Forwarded-Proto`. +#[derive(Debug, Clone)] +pub struct RequestInfo { + /// The effective host for URL rewriting (from Forwarded, X-Forwarded-Host, or Host header) + pub host: String, + /// The effective scheme (from TLS detection, Forwarded, X-Forwarded-Proto, or default) + pub scheme: String, +} + +impl RequestInfo { + /// Extract request info from a Fastly request. + /// + /// Host priority: + /// 1. `Forwarded` header (RFC 7239, `host=...`) + /// 2. `X-Forwarded-Host` header (for chained proxy setups) + /// 3. `Host` header + /// + /// Scheme priority: + /// 1. Fastly SDK TLS detection (most reliable) + /// 2. `Forwarded` header (RFC 7239, `proto=https`) + /// 3. `X-Forwarded-Proto` header + /// 4. `Fastly-SSL` header + /// 5. Default to `http` + pub fn from_request(req: &Request) -> Self { + let host = extract_request_host(req); + let scheme = detect_request_scheme(req); + + Self { host, scheme } + } +} + +fn extract_request_host(req: &Request) -> String { + req.get_header("forwarded") + .and_then(|h| h.to_str().ok()) + .and_then(|value| parse_forwarded_param(value, "host")) + .or_else(|| { + req.get_header("x-forwarded-host") + .and_then(|h| h.to_str().ok()) + .and_then(parse_list_header_value) + }) + .or_else(|| req.get_header(header::HOST).and_then(|h| h.to_str().ok())) + .unwrap_or_default() + .to_string() +} + +fn parse_forwarded_param<'a>(forwarded: &'a str, param: &str) -> Option<&'a str> { + for entry in forwarded.split(',') { + for part in entry.split(';') { + let mut iter = part.splitn(2, '='); + let key = iter.next().unwrap_or("").trim(); + let value = iter.next().unwrap_or("").trim(); + if key.is_empty() || value.is_empty() { + continue; + } + if key.eq_ignore_ascii_case(param) { + let value = strip_quotes(value); + if !value.is_empty() { + return Some(value); + } + } + } + } + None +} + +fn parse_list_header_value(value: &str) -> Option<&str> { + value + .split(',') + .map(|part| part.trim()) + .find(|part| !part.is_empty()) + .map(strip_quotes) + .filter(|part| !part.is_empty()) +} + +fn strip_quotes(value: &str) -> &str { + let trimmed = value.trim(); + if trimmed.len() >= 2 && trimmed.starts_with('"') && trimmed.ends_with('"') { + &trimmed[1..trimmed.len() - 1] + } else { + trimmed + } +} + +fn normalize_scheme(value: &str) -> Option { + let scheme = value.trim().to_ascii_lowercase(); + if scheme == "https" || scheme == "http" { + Some(scheme) + } else { + None + } +} + +/// Detects the request scheme (HTTP or HTTPS) using Fastly SDK methods and headers. +/// +/// Tries multiple methods in order of reliability: +/// 1. Fastly SDK TLS detection methods (most reliable) +/// 2. Forwarded header (RFC 7239) +/// 3. X-Forwarded-Proto header +/// 4. Fastly-SSL header (least reliable, can be spoofed) +/// 5. Default to HTTP +fn detect_request_scheme(req: &Request) -> String { + // 1. First try Fastly SDK's built-in TLS detection methods + if let Some(tls_protocol) = req.get_tls_protocol() { + log::debug!("TLS protocol detected: {}", tls_protocol); + return "https".to_string(); + } + + // Also check TLS cipher - if present, connection is HTTPS + if req.get_tls_cipher_openssl_name().is_some() { + log::debug!("TLS cipher detected, using HTTPS"); + return "https".to_string(); + } + + // 2. Try the Forwarded header (RFC 7239) + if let Some(forwarded) = req.get_header("forwarded") { + if let Ok(forwarded_str) = forwarded.to_str() { + if let Some(proto) = parse_forwarded_param(forwarded_str, "proto") { + if let Some(scheme) = normalize_scheme(proto) { + return scheme; + } + } + } + } + + // 3. Try X-Forwarded-Proto header + if let Some(proto) = req.get_header("x-forwarded-proto") { + if let Ok(proto_str) = proto.to_str() { + if let Some(value) = parse_list_header_value(proto_str) { + if let Some(scheme) = normalize_scheme(value) { + return scheme; + } + } + } + } + + // 4. Check Fastly-SSL header (can be spoofed by clients, use as last resort) + if let Some(ssl) = req.get_header("fastly-ssl") { + if let Ok(ssl_str) = ssl.to_str() { + if ssl_str == "1" || ssl_str.to_lowercase() == "true" { + return "https".to_string(); + } + } + } + + // Default to HTTP + "http".to_string() +} + /// Build a static text response with strong ETag and standard caching headers. /// Handles If-None-Match to return 304 when appropriate. pub fn serve_static_with_etag(body: &str, req: &Request, content_type: &str) -> Response { @@ -166,4 +317,118 @@ mod tests { &t1 )); } + + // RequestInfo tests + + #[test] + fn test_request_info_from_host_header() { + let mut req = Request::new(fastly::http::Method::GET, "https://test.example.com/page"); + req.set_header("host", "test.example.com"); + + let info = RequestInfo::from_request(&req); + assert_eq!( + info.host, "test.example.com", + "Host should use Host header when forwarded headers are missing" + ); + // No TLS or forwarded headers, defaults to http. + assert_eq!( + info.scheme, "http", + "Scheme should default to http without TLS or forwarded headers" + ); + } + + #[test] + fn test_request_info_x_forwarded_host_precedence() { + let mut req = Request::new(fastly::http::Method::GET, "https://test.example.com/page"); + req.set_header("host", "internal-proxy.local"); + req.set_header("x-forwarded-host", "public.example.com, proxy.local"); + + let info = RequestInfo::from_request(&req); + assert_eq!( + info.host, "public.example.com", + "Host should prefer X-Forwarded-Host over Host" + ); + } + + #[test] + fn test_request_info_scheme_from_x_forwarded_proto() { + let mut req = Request::new(fastly::http::Method::GET, "https://test.example.com/page"); + req.set_header("host", "test.example.com"); + req.set_header("x-forwarded-proto", "https, http"); + + let info = RequestInfo::from_request(&req); + assert_eq!( + info.scheme, "https", + "Scheme should prefer the first X-Forwarded-Proto value" + ); + + // Test HTTP + let mut req = Request::new(fastly::http::Method::GET, "http://test.example.com/page"); + req.set_header("host", "test.example.com"); + req.set_header("x-forwarded-proto", "http"); + + let info = RequestInfo::from_request(&req); + assert_eq!( + info.scheme, "http", + "Scheme should use the X-Forwarded-Proto value when present" + ); + } + + #[test] + fn request_info_forwarded_header_precedence() { + // Forwarded header takes precedence over X-Forwarded-Proto + let mut req = Request::new(fastly::http::Method::GET, "https://test.example.com/page"); + req.set_header( + "forwarded", + "for=192.0.2.60;proto=\"HTTPS\";host=\"public.example.com:443\"", + ); + req.set_header("host", "internal-proxy.local"); + req.set_header("x-forwarded-host", "proxy.local"); + req.set_header("x-forwarded-proto", "http"); + + let info = RequestInfo::from_request(&req); + assert_eq!( + info.host, "public.example.com:443", + "Host should prefer Forwarded host over X-Forwarded-Host" + ); + assert_eq!( + info.scheme, "https", + "Scheme should prefer Forwarded proto over X-Forwarded-Proto" + ); + } + + #[test] + fn test_request_info_scheme_from_fastly_ssl() { + let mut req = Request::new(fastly::http::Method::GET, "https://test.example.com/page"); + req.set_header("fastly-ssl", "1"); + + let info = RequestInfo::from_request(&req); + assert_eq!( + info.scheme, "https", + "Scheme should fall back to Fastly-SSL when other signals are missing" + ); + } + + #[test] + fn test_request_info_chained_proxy_scenario() { + // Simulate: Client (HTTPS) -> Proxy A -> Trusted Server (HTTP internally) + // Proxy A sets X-Forwarded-Host and X-Forwarded-Proto + let mut req = Request::new( + fastly::http::Method::GET, + "http://trusted-server.internal/page", + ); + req.set_header("host", "trusted-server.internal"); + req.set_header("x-forwarded-host", "public.example.com"); + req.set_header("x-forwarded-proto", "https"); + + let info = RequestInfo::from_request(&req); + assert_eq!( + info.host, "public.example.com", + "Host should use X-Forwarded-Host in chained proxy scenarios" + ); + assert_eq!( + info.scheme, "https", + "Scheme should use X-Forwarded-Proto in chained proxy scenarios" + ); + } } diff --git a/crates/common/src/integrations/prebid.rs b/crates/common/src/integrations/prebid.rs index 70b6e7c4..7fdb440f 100644 --- a/crates/common/src/integrations/prebid.rs +++ b/crates/common/src/integrations/prebid.rs @@ -19,6 +19,7 @@ use crate::constants::{HEADER_SYNTHETIC_FRESH, HEADER_SYNTHETIC_TRUSTED_SERVER}; use crate::creative; use crate::error::TrustedServerError; use crate::geo::GeoInfo; +use crate::http_util::RequestInfo; use crate::integrations::{ AttributeRewriteAction, IntegrationAttributeContext, IntegrationAttributeRewriter, IntegrationEndpoint, IntegrationProxy, IntegrationRegistration, @@ -45,14 +46,18 @@ pub struct PrebidIntegrationConfig { deserialize_with = "crate::settings::vec_from_seq_or_map" )] pub bidders: Vec, - #[serde(default = "default_auto_configure")] - pub auto_configure: bool, #[serde(default)] pub debug: bool, #[serde(default)] - pub script_handler: Option, - #[serde(default)] pub debug_query_params: Option, + /// Patterns to match Prebid script URLs for serving empty JS. + /// Supports suffix matching (e.g., "/prebid.min.js" matches any path ending with that) + /// and wildcard patterns (e.g., "/static/prebid/*" matches paths under that prefix). + #[serde( + default = "default_script_patterns", + deserialize_with = "crate::settings::vec_from_seq_or_map" + )] + pub script_patterns: Vec, } impl IntegrationConfig for PrebidIntegrationConfig { @@ -69,12 +74,29 @@ fn default_bidders() -> Vec { vec!["mocktioneer".to_string()] } -fn default_auto_configure() -> bool { +fn default_enabled() -> bool { true } -fn default_enabled() -> bool { - true +/// Default suffixes that identify Prebid scripts +const PREBID_SCRIPT_SUFFIXES: &[&str] = &[ + "/prebid.js", + "/prebid.min.js", + "/prebidjs.js", + "/prebidjs.min.js", +]; + +fn default_script_patterns() -> Vec { + // Default patterns to intercept Prebid scripts and serve empty JS + // - Exact paths like "/prebid.min.js" match only that path + // - Wildcard paths like "/static/prebid/*" match anything under that prefix + // and are filtered by PREBID_SCRIPT_SUFFIXES in matches_script_pattern() + vec![ + "/prebid.js".to_string(), + "/prebid.min.js".to_string(), + "/prebidjs.js".to_string(), + "/prebidjs.min.js".to_string(), + ] } #[allow(dead_code)] @@ -126,6 +148,72 @@ impl PrebidIntegration { Arc::new(Self { config }) } + fn matches_script_url(&self, attr_value: &str) -> bool { + let trimmed = attr_value.trim(); + let without_query = trimmed.split(['?', '#']).next().unwrap_or(trimmed); + + if self.matches_script_pattern(without_query) { + return true; + } + + if !without_query.starts_with('/') + && !without_query.starts_with("//") + && !without_query.contains("://") + { + let with_slash = format!("/{without_query}"); + if self.matches_script_pattern(&with_slash) { + return true; + } + } + + let parsed = if without_query.starts_with("//") { + Url::parse(&format!("https:{without_query}")) + } else { + Url::parse(without_query) + }; + + parsed + .ok() + .is_some_and(|url| self.matches_script_pattern(url.path())) + } + + fn matches_script_pattern(&self, path: &str) -> bool { + // Normalize path to lowercase for case-insensitive matching + let path_lower = path.to_ascii_lowercase(); + + // Check if path matches any configured pattern + for pattern in &self.config.script_patterns { + let pattern_lower = pattern.to_ascii_lowercase(); + + // Check for wildcard patterns: /* or {*name} + if pattern_lower.ends_with("/*") || pattern_lower.contains("{*") { + // Extract prefix before the wildcard + let prefix = if pattern_lower.ends_with("/*") { + &pattern_lower[..pattern_lower.len() - 1] // Remove trailing * + } else { + // Find {* and extract prefix before it + pattern_lower.split("{*").next().unwrap_or("") + }; + + if path_lower.starts_with(prefix) { + // Check if it ends with a known Prebid script suffix + if PREBID_SCRIPT_SUFFIXES + .iter() + .any(|suffix| path_lower.ends_with(suffix)) + { + return true; + } + } + } else { + // Exact match or suffix match + if path_lower.ends_with(&pattern_lower) { + return true; + } + } + } + false + } + fn error(message: impl Into) -> TrustedServerError { TrustedServerError::Integration { integration: PREBID_INTEGRATION_ID.to_string(), @@ -133,6 +221,7 @@ impl PrebidIntegration { } } + fn handle_script_handler(&self) -> Result> { let body = "// Script overridden by Trusted Server\n"; @@ -246,11 +335,16 @@ impl IntegrationProxy for PrebidIntegration { PREBID_INTEGRATION_ID } + fn routes(&self) -> Vec { let mut routes = vec![]; - if let Some(script_path) = &self.config.script_handler { - routes.push(IntegrationEndpoint::get(script_path.clone())); + // Register routes for script removal patterns + // Patterns can be exact paths (e.g., "/prebid.min.js") or use matchit wildcards + // (e.g., "/static/prebid/{*rest}") + for pattern in &self.config.script_patterns { + let static_path: &'static str = Box::leak(pattern.clone().into_boxed_str()); + routes.push(IntegrationEndpoint::get(static_path)); } routes @@ -265,9 +359,8 @@ impl IntegrationProxy for PrebidIntegration { let method = req.get_method().clone(); match method { - Method::GET if self.config.script_handler.as_ref() == Some(&path) => { - self.handle_script_handler() - } + // Serve empty JS for matching script patterns + Method::GET if self.matches_script_pattern(&path) => self.handle_script_handler(), _ => Err(Report::new(Self::error(format!( "Unsupported Prebid route: {path}" )))), @@ -281,7 +374,7 @@ impl IntegrationAttributeRewriter for PrebidIntegration { } fn handles_attribute(&self, attribute: &str) -> bool { - self.config.auto_configure && matches!(attribute, "src" | "href") + matches!(attribute, "src" | "href") } fn rewrite( @@ -290,7 +383,7 @@ impl IntegrationAttributeRewriter for PrebidIntegration { attr_value: &str, _ctx: &IntegrationAttributeContext<'_>, ) -> AttributeRewriteAction { - if self.config.auto_configure && is_prebid_script_url(attr_value) { + if self.matches_script_url(attr_value) { AttributeRewriteAction::remove_element() } else { AttributeRewriteAction::keep() @@ -359,15 +452,6 @@ fn build_openrtb_from_ts( } } -fn is_prebid_script_url(url: &str) -> bool { - let lower = url.to_ascii_lowercase(); - let without_query = lower.split('?').next().unwrap_or(""); - let filename = without_query.rsplit('/').next().unwrap_or(""); - matches!( - filename, - "prebid.js" | "prebid.min.js" | "prebidjs.js" | "prebidjs.min.js" - ) -} #[allow(dead_code)] async fn handle_prebid_auction( @@ -425,9 +509,12 @@ async fn handle_prebid_auction( let response_body = pbs_response.take_body_bytes(); match serde_json::from_slice::(&response_body) { Ok(mut response_json) => { - let request_host = get_request_host(&req); - let request_scheme = get_request_scheme(&req); - transform_prebid_response(&mut response_json, &request_host, &request_scheme)?; + let request_info = RequestInfo::from_request(&req); + transform_prebid_response( + &mut response_json, + &request_info.host, + &request_info.scheme, + )?; let transformed_body = serde_json::to_vec(&response_json).change_context( TrustedServerError::Prebid { @@ -645,26 +732,6 @@ fn copy_request_headers(from: &Request, to: &mut Request) { } } -fn get_request_host(req: &Request) -> String { - req.get_header(header::HOST) - .and_then(|h| h.to_str().ok()) - .unwrap_or("") - .to_string() -} - -fn get_request_scheme(req: &Request) -> String { - if req.get_tls_protocol().is_some() || req.get_tls_cipher_openssl_name().is_some() { - return "https".to_string(); - } - - if let Some(proto) = req.get_header("X-Forwarded-Proto") { - if let Ok(proto_str) = proto.to_str() { - return proto_str.to_lowercase(); - } - } - - "https".to_string() -} /// Appends query parameters to a URL, handling both URLs with and without existing query strings. /// Returns the original URL unchanged if params are empty or already present. @@ -798,9 +865,7 @@ impl PrebidAuctionProvider { }; // Build ext object - let request_host = get_request_host(context.request); - let request_scheme = get_request_scheme(context.request); - + let request_info = RequestInfo::from_request(&context.request); let (signature, kid) = signer .map(|(s, sig)| (Some(sig), Some(s.kid.clone()))) .unwrap_or((None, None)); @@ -814,8 +879,8 @@ impl PrebidAuctionProvider { trusted_server: Some(TrustedServerExt { signature, kid, - request_host: Some(request_host), - request_scheme: Some(request_scheme), + request_host: Some(request_info.host), + request_scheme: Some(request_info.scheme), }), }); @@ -1086,10 +1151,9 @@ mod tests { server_url: "https://prebid.example".to_string(), timeout_ms: 1000, bidders: vec!["exampleBidder".to_string()], - auto_configure: true, debug: false, - script_handler: None, debug_query_params: None, + script_patterns: default_script_patterns(), } } @@ -1143,7 +1207,7 @@ mod tests { } #[test] - fn html_processor_keeps_prebid_scripts_when_auto_config_disabled() { + fn html_processor_keeps_prebid_scripts_when_no_patterns() { let html = r#" @@ -1159,7 +1223,7 @@ mod tests { "server_url": "https://test-prebid.com/openrtb2/auction", "timeout_ms": 1000, "bidders": ["mocktioneer"], - "auto_configure": false, + "script_patterns": [], "debug": false }), ) @@ -1184,16 +1248,16 @@ mod tests { ); assert!( processed.contains("prebid.min.js"), - "Prebid script should remain when auto-config is disabled" + "Prebid script should remain when no script patterns configured" ); assert!( processed.contains("cdn.prebid.org/prebid.js"), - "Prebid preload should remain when auto-config is disabled" + "Prebid preload should remain when no script patterns configured" ); } #[test] - fn html_processor_removes_prebid_scripts_when_auto_config_enabled() { + fn html_processor_removes_prebid_scripts_when_patterns_match() { let html = r#" @@ -1209,7 +1273,7 @@ mod tests { "server_url": "https://test-prebid.com/openrtb2/auction", "timeout_ms": 1000, "bidders": ["mocktioneer"], - "auto_configure": true, + "script_patterns": ["/prebid.js", "/prebid.min.js"], "debug": false }), ) @@ -1234,11 +1298,11 @@ mod tests { ); assert!( !processed.contains("prebid.min.js"), - "Prebid script should be removed when auto-config is enabled" + "Prebid script should be removed when script patterns match" ); assert!( !processed.contains("cdn.prebid.org/prebid.js"), - "Prebid preload should be removed when auto-config is enabled" + "Prebid preload should be removed when script patterns match" ); } @@ -1396,16 +1460,17 @@ mod tests { } #[test] - fn is_prebid_script_url_matches_common_variants() { - assert!(is_prebid_script_url("https://cdn.com/prebid.js")); - assert!(is_prebid_script_url( + fn matches_script_url_matches_common_variants() { + let integration = PrebidIntegration::new(base_config()); + assert!(integration.matches_script_url("https://cdn.com/prebid.js")); + assert!(integration.matches_script_url( "https://cdn.com/prebid.min.js?version=1" )); - assert!(!is_prebid_script_url("https://cdn.com/app.js")); + assert!(!integration.matches_script_url("https://cdn.com/app.js")); } #[test] - fn test_script_handler_config_parsing() { + fn test_script_patterns_config_parsing() { let toml_str = r#" [publisher] domain = "test-publisher.com" @@ -1422,7 +1487,7 @@ template = "{{client_ip}}:{{user_agent}}" [integrations.prebid] enabled = true server_url = "https://prebid.example" -script_handler = "/prebid.js" +script_patterns = ["/prebid.js", "/custom/prebid.min.js"] "#; let settings = Settings::from_toml(toml_str).expect("should parse TOML"); @@ -1431,11 +1496,13 @@ script_handler = "/prebid.js" .expect("should get config") .expect("should be enabled"); - assert_eq!(config.script_handler, Some("/prebid.js".to_string())); + assert_eq!(config.script_patterns.len(), 2); + assert!(config.script_patterns.contains(&"/prebid.js".to_string())); + assert!(config.script_patterns.contains(&"/custom/prebid.min.js".to_string())); } #[test] - fn test_script_handler_none_by_default() { + fn test_script_patterns_defaults() { let toml_str = r#" [publisher] domain = "test-publisher.com" @@ -1460,22 +1527,15 @@ server_url = "https://prebid.example" .expect("should get config") .expect("should be enabled"); - assert_eq!(config.script_handler, None); + // Should have default script patterns + assert!(!config.script_patterns.is_empty()); + assert!(config.script_patterns.contains(&"/prebid.js".to_string())); + assert!(config.script_patterns.contains(&"/prebid.min.js".to_string())); } #[test] fn test_script_handler_returns_empty_js() { - let config = PrebidIntegrationConfig { - enabled: true, - server_url: "https://prebid.example".to_string(), - timeout_ms: 1000, - bidders: vec![], - auto_configure: false, - debug: false, - script_handler: Some("/prebid.js".to_string()), - debug_query_params: None, - }; - let integration = PrebidIntegration::new(config); + let integration = PrebidIntegration::new(base_config()); let response = integration .handle_script_handler() @@ -1499,38 +1559,34 @@ server_url = "https://prebid.example" } #[test] - fn test_routes_includes_script_handler() { - let config = PrebidIntegrationConfig { - enabled: true, - server_url: "https://prebid.example".to_string(), - timeout_ms: 1000, - bidders: vec![], - auto_configure: false, - debug: false, - script_handler: Some("/prebid.js".to_string()), - debug_query_params: None, - }; - let integration = PrebidIntegration::new(config); + fn test_routes_includes_script_patterns() { + let integration = PrebidIntegration::new(base_config()); let routes = integration.routes(); - // Should have 1 route: script handler - assert_eq!(routes.len(), 1); + // Should have routes for default script patterns + assert!(!routes.is_empty()); - let has_script_route = routes + let has_prebid_js_route = routes .iter() .any(|r| r.path == "/prebid.js" && r.method == Method::GET); - assert!(has_script_route, "should register script handler route"); + assert!(has_prebid_js_route, "should register /prebid.js route"); + + let has_prebid_min_js_route = routes + .iter() + .any(|r| r.path == "/prebid.min.js" && r.method == Method::GET); + assert!(has_prebid_min_js_route, "should register /prebid.min.js route"); } #[test] - fn test_routes_without_script_handler() { - let config = base_config(); // Has script_handler: None + fn test_routes_with_empty_script_patterns() { + let mut config = base_config(); + config.script_patterns = vec![]; let integration = PrebidIntegration::new(config); let routes = integration.routes(); - // Should have 0 routes when no script handler configured + // Should have 0 routes when no script patterns configured assert_eq!(routes.len(), 0); } diff --git a/crates/common/src/publisher.rs b/crates/common/src/publisher.rs index 5d3b8a08..e0852e1f 100644 --- a/crates/common/src/publisher.rs +++ b/crates/common/src/publisher.rs @@ -3,7 +3,7 @@ use fastly::http::{header, StatusCode}; use fastly::{Body, Request, Response}; use crate::backend::ensure_backend_from_url; -use crate::http_util::serve_static_with_etag; +use crate::http_util::{serve_static_with_etag, RequestInfo}; use crate::constants::{HEADER_SYNTHETIC_TRUSTED_SERVER, HEADER_X_COMPRESS_HINT}; use crate::cookies::create_synthetic_cookie; @@ -15,65 +15,6 @@ use crate::streaming_processor::{Compression, PipelineConfig, StreamProcessor, S use crate::streaming_replacer::create_url_replacer; use crate::synthetic::get_or_generate_synthetic_id; -/// Detects the request scheme (HTTP or HTTPS) using Fastly SDK methods and headers. -/// -/// Tries multiple methods in order of reliability: -/// 1. Fastly SDK TLS detection methods (most reliable) -/// 2. Forwarded header (RFC 7239) -/// 3. X-Forwarded-Proto header -/// 4. Fastly-SSL header (least reliable, can be spoofed) -/// 5. Default to HTTP -fn detect_request_scheme(req: &Request) -> String { - // 1. First try Fastly SDK's built-in TLS detection methods - // These are the most reliable as they check the actual connection - if let Some(tls_protocol) = req.get_tls_protocol() { - // If we have a TLS protocol, the connection is definitely HTTPS - log::debug!("TLS protocol detected: {}", tls_protocol); - return "https".to_string(); - } - - // Also check TLS cipher - if present, connection is HTTPS - if req.get_tls_cipher_openssl_name().is_some() { - log::debug!("TLS cipher detected, using HTTPS"); - return "https".to_string(); - } - - // 2. Try the Forwarded header (RFC 7239) - if let Some(forwarded) = req.get_header("forwarded") { - if let Ok(forwarded_str) = forwarded.to_str() { - // Parse the Forwarded header - // Format: Forwarded: for=192.0.2.60;proto=https;by=203.0.113.43 - if forwarded_str.contains("proto=https") { - return "https".to_string(); - } else if forwarded_str.contains("proto=http") { - return "http".to_string(); - } - } - } - - // 3. Try X-Forwarded-Proto header - if let Some(proto) = req.get_header("x-forwarded-proto") { - if let Ok(proto_str) = proto.to_str() { - let proto_lower = proto_str.to_lowercase(); - if proto_lower == "https" || proto_lower == "http" { - return proto_lower; - } - } - } - - // 4. Check Fastly-SSL header (can be spoofed by clients, use as last resort) - if let Some(ssl) = req.get_header("fastly-ssl") { - if let Ok(ssl_str) = ssl.to_str() { - if ssl_str == "1" || ssl_str.to_lowercase() == "true" { - return "https".to_string(); - } - } - } - - // Default to HTTP (changed from HTTPS based on your settings file) - "http".to_string() -} - /// Unified tsjs static serving: `/static/tsjs=` /// Accepts: `tsjs-core(.min).js`, `tsjs-ext(.min).js`, `tsjs-creative(.min).js` pub fn handle_tsjs_dynamic(req: Request) -> Result> { @@ -235,29 +176,20 @@ pub fn handle_publisher_request( // Prebid.js requests are not intercepted here anymore. The HTML processor rewrites // any Prebid script references to `/static/tsjs-ext.min.js` when auto-configure is enabled. - // Extract the request host from the incoming request - let request_host = req - .get_header(header::HOST) - .map(|h| h.to_str().unwrap_or_default()) - .unwrap_or_default() - .to_string(); + // Extract request host and scheme from headers (supports X-Forwarded-Host/Proto for chained proxies) + let request_info = RequestInfo::from_request(&req); + let request_host = &request_info.host; + let request_scheme = &request_info.scheme; - // Detect the request scheme using multiple methods - let request_scheme = detect_request_scheme(&req); - - // Log detection details for debugging log::debug!( - "Scheme detection - TLS Protocol: {:?}, TLS Cipher: {:?}, Forwarded: {:?}, X-Forwarded-Proto: {:?}, Fastly-SSL: {:?}, Result: {}", - req.get_tls_protocol(), - req.get_tls_cipher_openssl_name(), - req.get_header("forwarded"), + "Request info: host={}, scheme={} (X-Forwarded-Host: {:?}, Host: {:?}, X-Forwarded-Proto: {:?})", + request_host, + request_scheme, + req.get_header("x-forwarded-host"), + req.get_header(header::HOST), req.get_header("x-forwarded-proto"), - req.get_header("fastly-ssl"), - request_scheme ); - log::debug!("Request host: {}, scheme: {}", request_host, request_scheme); - // Generate synthetic identifiers before the request body is consumed. let synthetic_id = get_or_generate_synthetic_id(settings, &req)?; let has_synthetic_cookie = req @@ -331,8 +263,8 @@ pub fn handle_publisher_request( content_encoding: &content_encoding, origin_host: &origin_host, origin_url: &settings.publisher.origin_url, - request_host: &request_host, - request_scheme: &request_scheme, + request_host, + request_scheme, settings, content_type: &content_type, integration_registry, @@ -384,73 +316,6 @@ mod tests { use crate::test_support::tests::create_test_settings; use fastly::http::Method; - #[test] - fn test_detect_request_scheme() { - // Note: In tests, we can't mock the TLS methods on Request, so we test header fallbacks - - // Test Forwarded header with HTTPS - let mut req = Request::new(Method::GET, "https://test.example.com/page"); - req.set_header("forwarded", "for=192.0.2.60;proto=https;by=203.0.113.43"); - assert_eq!(detect_request_scheme(&req), "https"); - - // Test Forwarded header with HTTP - let mut req = Request::new(Method::GET, "http://test.example.com/page"); - req.set_header("forwarded", "for=192.0.2.60;proto=http;by=203.0.113.43"); - assert_eq!(detect_request_scheme(&req), "http"); - - // Test X-Forwarded-Proto with HTTPS - let mut req = Request::new(Method::GET, "https://test.example.com/page"); - req.set_header("x-forwarded-proto", "https"); - assert_eq!(detect_request_scheme(&req), "https"); - - // Test X-Forwarded-Proto with HTTP - let mut req = Request::new(Method::GET, "http://test.example.com/page"); - req.set_header("x-forwarded-proto", "http"); - assert_eq!(detect_request_scheme(&req), "http"); - - // Test Fastly-SSL header - let mut req = Request::new(Method::GET, "https://test.example.com/page"); - req.set_header("fastly-ssl", "1"); - assert_eq!(detect_request_scheme(&req), "https"); - - // Test default to HTTP when no headers present - let req = Request::new(Method::GET, "https://test.example.com/page"); - assert_eq!(detect_request_scheme(&req), "http"); - - // Test priority: Forwarded takes precedence over X-Forwarded-Proto - let mut req = Request::new(Method::GET, "https://test.example.com/page"); - req.set_header("forwarded", "proto=https"); - req.set_header("x-forwarded-proto", "http"); - assert_eq!(detect_request_scheme(&req), "https"); - } - - #[test] - fn test_handle_publisher_request_extracts_headers() { - // Test that the function correctly extracts host and scheme from request headers - let mut req = Request::new(Method::GET, "https://test.example.com/page"); - req.set_header("host", "test.example.com"); - req.set_header("x-forwarded-proto", "https"); - - // Extract headers like the function does - let request_host = req - .get_header("host") - .map(|h| h.to_str().unwrap_or_default()) - .unwrap_or_default() - .to_string(); - - let request_scheme = req - .get_header("x-forwarded-proto") - .and_then(|h| h.to_str().ok()) - .unwrap_or("https") - .to_string(); - - assert_eq!(request_host, "test.example.com"); - assert_eq!(request_scheme, "https"); - } - - // Note: test_handle_publisher_request_default_https_scheme and test_handle_publisher_request_http_scheme - // were removed as they're redundant with test_detect_request_scheme which covers all scheme detection cases - #[test] fn test_content_type_detection() { // Test which content types should be processed diff --git a/crates/common/src/synthetic.rs b/crates/common/src/synthetic.rs index d84a95ef..b60736cf 100644 --- a/crates/common/src/synthetic.rs +++ b/crates/common/src/synthetic.rs @@ -14,6 +14,7 @@ use sha2::Sha256; use crate::constants::{HEADER_SYNTHETIC_PUB_USER_ID, HEADER_SYNTHETIC_TRUSTED_SERVER}; use crate::cookies::handle_request_cookies; use crate::error::TrustedServerError; +use crate::http_util::RequestInfo; use crate::settings::Settings; type HmacSha256 = Hmac; @@ -41,9 +42,13 @@ pub fn generate_synthetic_id( let auth_user_id = req .get_header(HEADER_SYNTHETIC_PUB_USER_ID) .map(|h| h.to_str().unwrap_or("anonymous")); - let publisher_domain = req - .get_header(header::HOST) - .map(|h| h.to_str().unwrap_or("unknown")); + // Use RequestInfo for consistent host extraction (respects X-Forwarded-Host) + let request_info = RequestInfo::from_request(req); + let publisher_domain = if request_info.host.is_empty() { + None + } else { + Some(request_info.host.as_str()) + }; let client_ip = req.get_client_ip_addr().map(|ip| ip.to_string()); let accept_language = req .get_header(header::ACCEPT_LANGUAGE) diff --git a/docs/guide/api-reference.md b/docs/guide/api-reference.md index b10d345b..eabd1fa1 100644 --- a/docs/guide/api-reference.md +++ b/docs/guide/api-reference.md @@ -421,18 +421,22 @@ See [First-Party Endpoints](#get-first-party-ad) above. #### POST /third-party/ad See [First-Party Endpoints](#post-third-party-ad) above. -#### GET /prebid.js (Optional) -Returns empty JavaScript to override Prebid.js when `script_handler` is configured. +#### GET /prebid.js, /prebid.min.js, etc. (Script Override) +Returns empty JavaScript to override Prebid.js scripts when the Prebid integration is enabled. By default, exact requests to `/prebid.js`, `/prebid.min.js`, `/prebidjs.js`, or `/prebidjs.min.js` will be intercepted and served an empty script. **Configuration:** ```toml [integrations.prebid] -script_handler = "/prebid.js" +# Default patterns (exact paths) +script_patterns = ["/prebid.js", "/prebid.min.js", "/prebidjs.js", "/prebidjs.min.js"] + +# Use wildcard patterns to match paths under a prefix +# script_patterns = ["/static/prebid/*"] ``` **Response:** - **Content-Type:** `application/javascript; charset=utf-8` -- **Body:** `// Prebid.js override by Trusted Server` +- **Body:** `// Script overridden by Trusted Server` - **Cache:** `immutable, max-age=31536000` --- diff --git a/docs/guide/configuration-reference.md b/docs/guide/configuration-reference.md index c820ca79..db4d7ec8 100644 --- a/docs/guide/configuration-reference.md +++ b/docs/guide/configuration-reference.md @@ -597,9 +597,15 @@ All integrations support: | `server_url` | String | Required | Prebid Server endpoint URL | | `timeout_ms` | Integer | `1000` | Request timeout in milliseconds | | `bidders` | Array[String] | `[]` | List of enabled bidders | -| `auto_configure` | Boolean | `false` | Auto-inject Prebid.js shim | | `debug` | Boolean | `false` | Enable debug logging | -| `script_handler` | String | Optional | Custom script endpoint path | +| `script_patterns` | Array[String] | See below | Patterns for removing Prebid script tags and intercepting requests | + +**Default `script_patterns`**: +```toml +["/prebid.js", "/prebid.min.js", "/prebidjs.js", "/prebidjs.min.js"] +``` + +These patterns use suffix matching when stripping HTML, so `/static/prebid/v8/prebid.min.js` matches because it ends with `/prebid.min.js`. For request interception, exact paths are registered unless you use wildcard patterns (e.g., `/static/prebid/*`), which match paths under that prefix. **Example**: ```toml @@ -608,8 +614,8 @@ enabled = true server_url = "https://prebid-server.example/openrtb2/auction" timeout_ms = 1200 bidders = ["kargo", "rubicon", "appnexus", "openx"] -auto_configure = true debug = false +# script_patterns = ["/static/prebid/*"] # Optional: restrict to specific path ``` **Environment Override**: @@ -618,8 +624,9 @@ TRUSTED_SERVER__INTEGRATIONS__PREBID__ENABLED=true TRUSTED_SERVER__INTEGRATIONS__PREBID__SERVER_URL=https://prebid.example/auction TRUSTED_SERVER__INTEGRATIONS__PREBID__TIMEOUT_MS=1200 TRUSTED_SERVER__INTEGRATIONS__PREBID__BIDDERS=kargo,rubicon,appnexus -TRUSTED_SERVER__INTEGRATIONS__PREBID__AUTO_CONFIGURE=true TRUSTED_SERVER__INTEGRATIONS__PREBID__DEBUG=false +TRUSTED_SERVER__INTEGRATIONS__PREBID__SCRIPT_PATTERNS__0=/prebid.js +TRUSTED_SERVER__INTEGRATIONS__PREBID__SCRIPT_PATTERNS__1=/prebid.min.js ``` ### Next.js Integration diff --git a/docs/guide/configuration.md b/docs/guide/configuration.md index 962e4403..d929a1ca 100644 --- a/docs/guide/configuration.md +++ b/docs/guide/configuration.md @@ -95,7 +95,7 @@ enabled = true server_url = "https://prebid-server.com/openrtb2/auction" timeout_ms = 1200 bidders = ["kargo", "rubicon", "appnexus"] -auto_configure = false +# script_patterns = ["/static/prebid/*"] ``` ### `fastly.toml` @@ -264,7 +264,7 @@ enabled = true server_url = "https://prebid-server.com/openrtb2/auction" timeout_ms = 1200 bidders = ["kargo", "rubicon", "appnexus"] -auto_configure = false +# script_patterns = ["/static/prebid/*"] ``` **Next.js**: diff --git a/docs/guide/environment-variables.md b/docs/guide/environment-variables.md index 82a11f96..ce4b5a6f 100644 --- a/docs/guide/environment-variables.md +++ b/docs/guide/environment-variables.md @@ -201,14 +201,15 @@ TRUSTED_SERVER__INTEGRATIONS__PREBID__TIMEOUT_MS=1000 # Bidders (comma-separated) TRUSTED_SERVER__INTEGRATIONS__PREBID__BIDDERS="appnexus,rubicon,openx" -# Auto-remove Prebid.js scripts -TRUSTED_SERVER__INTEGRATIONS__PREBID__AUTO_CONFIGURE=true - # Enable debug logging TRUSTED_SERVER__INTEGRATIONS__PREBID__DEBUG=false -# Optional: Script handler path -TRUSTED_SERVER__INTEGRATIONS__PREBID__SCRIPT_HANDLER="/prebid.js" +# Script patterns to remove Prebid tags and serve empty JS (indexed format) +# Default patterns match common Prebid filenames at exact paths +TRUSTED_SERVER__INTEGRATIONS__PREBID__SCRIPT_PATTERNS__0="/prebid.js" +TRUSTED_SERVER__INTEGRATIONS__PREBID__SCRIPT_PATTERNS__1="/prebid.min.js" +# For versioned paths, use wildcards: +# TRUSTED_SERVER__INTEGRATIONS__PREBID__SCRIPT_PATTERNS__0="/static/prebid/{*rest}" ``` **TOML Equivalent:** @@ -218,9 +219,8 @@ enabled = true server_url = "https://prebid-server.example.com" timeout_ms = 1000 bidders = ["appnexus", "rubicon", "openx"] -auto_configure = true debug = false -script_handler = "/prebid.js" +script_patterns = ["/prebid.js", "/prebid.min.js", "/prebidjs.js", "/prebidjs.min.js"] ``` --- @@ -642,7 +642,6 @@ export TRUSTED_SERVER__INTEGRATIONS__PREBID__ENABLED=true export TRUSTED_SERVER__INTEGRATIONS__PREBID__SERVER_URL="https://prebid-server.com" export TRUSTED_SERVER__INTEGRATIONS__PREBID__TIMEOUT_MS=2000 export TRUSTED_SERVER__INTEGRATIONS__PREBID__BIDDERS="appnexus,rubicon,openx" -export TRUSTED_SERVER__INTEGRATIONS__PREBID__AUTO_CONFIGURE=true # Optional: Security Headers export TRUSTED_SERVER__RESPONSE_HEADERS__STRICT_TRANSPORT_SECURITY="max-age=31536000" diff --git a/docs/guide/integration-guide.md b/docs/guide/integration-guide.md index 74f46cff..f96a9a38 100644 --- a/docs/guide/integration-guide.md +++ b/docs/guide/integration-guide.md @@ -276,7 +276,7 @@ enabled = true server_url = "https://prebid.example/openrtb2/auction" timeout_ms = 1200 bidders = ["equativ", "sampleBidder"] -auto_configure = true +# script_patterns = ["/static/prebid/*"] ``` Tests or scaffolding can inject configs by calling `settings.integrations.insert_config("prebid", &serde_json::json!({...}))`, the same helper that other integrations use. @@ -287,7 +287,7 @@ Tests or scaffolding can inject configs by calling `settings.integrations.insert **3. HTML Rewrites Through the Registry** -When `auto_configure` is enabled, the integration's `IntegrationAttributeRewriter` removes any `