diff --git a/crates/feedparser-rs-core/src/parser/atom.rs b/crates/feedparser-rs-core/src/parser/atom.rs index bc963b8..4e48f74 100644 --- a/crates/feedparser-rs-core/src/parser/atom.rs +++ b/crates/feedparser-rs-core/src/parser/atom.rs @@ -8,13 +8,14 @@ use crate::{ Content, Entry, FeedVersion, Generator, Link, MediaContent, MediaThumbnail, ParsedFeed, Person, Source, Tag, TextConstruct, TextType, }, - util::parse_date, + util::{base_url::BaseUrlContext, parse_date}, }; use quick_xml::{Reader, events::Event}; use super::common::{ EVENT_BUFFER_CAPACITY, FromAttributes, LimitedCollectionExt, bytes_to_string, check_depth, - init_feed, is_content_tag, is_dc_tag, is_media_tag, read_text, skip_element, skip_to_end, + extract_xml_base, init_feed, is_content_tag, is_dc_tag, is_media_tag, read_text, skip_element, + skip_to_end, }; /// Parse Atom 1.0 feed from raw bytes @@ -63,12 +64,19 @@ pub fn parse_atom10_with_limits(data: &[u8], limits: ParserLimits) -> Result { + if let Some(xml_base) = extract_xml_base(&e, limits.max_attribute_length) { + base_ctx.update_base(&xml_base); + } + depth += 1; - if let Err(e) = parse_feed_element(&mut reader, &mut feed, &limits, &mut depth) { + if let Err(e) = + parse_feed_element(&mut reader, &mut feed, &limits, &mut depth, &base_ctx) + { feed.bozo = true; feed.bozo_exception = Some(e.to_string()); } @@ -95,6 +103,7 @@ fn parse_feed_element( feed: &mut ParsedFeed, limits: &ParserLimits, depth: &mut usize, + base_ctx: &BaseUrlContext, ) -> Result<()> { let mut buf = Vec::with_capacity(EVENT_BUFFER_CAPACITY); @@ -117,10 +126,12 @@ fn parse_feed_element( feed.feed.set_title(text); } b"link" => { - if let Some(link) = Link::from_attributes( + if let Some(mut link) = Link::from_attributes( element.attributes().flatten(), limits.max_attribute_length, ) { + link.href = base_ctx.resolve_safe(&link.href); + if feed.feed.link.is_none() && link.rel.as_deref() == Some("alternate") { feed.feed.link = Some(link.href.clone()); @@ -144,6 +155,10 @@ fn parse_feed_element( let text = read_text(reader, &mut buf, limits)?; feed.feed.updated = parse_date(&text); } + b"published" if !is_empty => { + let text = read_text(reader, &mut buf, limits)?; + feed.feed.published = parse_date(&text); + } b"author" if !is_empty => { if let Ok(person) = parse_person(reader, &mut buf, limits, depth) { if feed.feed.author.is_none() { @@ -177,10 +192,12 @@ fn parse_feed_element( feed.feed.set_generator(generator); } b"icon" if !is_empty => { - feed.feed.icon = Some(read_text(reader, &mut buf, limits)?); + let url = read_text(reader, &mut buf, limits)?; + feed.feed.icon = Some(base_ctx.resolve_safe(&url)); } b"logo" if !is_empty => { - feed.feed.logo = Some(read_text(reader, &mut buf, limits)?); + let url = read_text(reader, &mut buf, limits)?; + feed.feed.logo = Some(base_ctx.resolve_safe(&url)); } b"rights" if !is_empty => { let text = parse_text_construct(reader, &mut buf, &element, limits)?; @@ -191,7 +208,14 @@ fn parse_feed_element( continue; } - match parse_entry(reader, &mut buf, limits, depth) { + let mut entry_ctx = base_ctx.child(); + if let Some(xml_base) = + extract_xml_base(&element, limits.max_attribute_length) + { + entry_ctx.update_base(&xml_base); + } + + match parse_entry(reader, &mut buf, limits, depth, &entry_ctx) { Ok(entry) => feed.entries.push(entry), Err(e) => { feed.bozo = true; @@ -249,6 +273,7 @@ fn parse_entry( buf: &mut Vec, limits: &ParserLimits, depth: &mut usize, + base_ctx: &BaseUrlContext, ) -> Result { let mut entry = Entry::with_capacity(); @@ -271,10 +296,12 @@ fn parse_entry( entry.set_title(text); } b"link" => { - if let Some(link) = Link::from_attributes( + if let Some(mut link) = Link::from_attributes( element.attributes().flatten(), limits.max_attribute_length, ) { + link.href = base_ctx.resolve_safe(&link.href); + if entry.link.is_none() && link.rel.as_deref() == Some("alternate") { entry.link = Some(link.href.clone()); } diff --git a/crates/feedparser-rs-core/src/parser/common.rs b/crates/feedparser-rs-core/src/parser/common.rs index d906e05..efa5540 100644 --- a/crates/feedparser-rs-core/src/parser/common.rs +++ b/crates/feedparser-rs-core/src/parser/common.rs @@ -244,6 +244,47 @@ pub fn is_itunes_tag(name: &[u8], tag: &[u8]) -> bool { name == tag } +/// Extract xml:base attribute from element +/// +/// Returns the base URL string if xml:base attribute exists. +/// Respects `max_attribute_length` limit for `DoS` protection. +/// +/// # Arguments +/// +/// * `element` - The XML element to extract xml:base from +/// * `max_attr_length` - Maximum allowed attribute length (`DoS` protection) +/// +/// # Returns +/// +/// * `Some(String)` - The xml:base value if found and within length limit +/// * `None` - If attribute not found or exceeds length limit +/// +/// # Examples +/// +/// ```ignore +/// use feedparser_rs::parser::common::extract_xml_base; +/// +/// let element = /* BytesStart from quick-xml */; +/// if let Some(base) = extract_xml_base(&element, 1024) { +/// println!("Base URL: {}", base); +/// } +/// ``` +pub fn extract_xml_base( + element: &quick_xml::events::BytesStart, + max_attr_length: usize, +) -> Option { + element + .attributes() + .flatten() + .find(|attr| { + let key = attr.key.as_ref(); + key == b"xml:base" || key == b"base" + }) + .filter(|attr| attr.value.len() <= max_attr_length) + .and_then(|attr| attr.unescape_value().ok()) + .map(|s| s.to_string()) +} + /// Read text content from current XML element (handles text and CDATA) pub fn read_text( reader: &mut Reader<&[u8]>, diff --git a/crates/feedparser-rs-core/src/parser/rss.rs b/crates/feedparser-rs-core/src/parser/rss.rs index 1341fc2..6223c33 100644 --- a/crates/feedparser-rs-core/src/parser/rss.rs +++ b/crates/feedparser-rs-core/src/parser/rss.rs @@ -10,7 +10,7 @@ use crate::{ PodcastPerson, PodcastTranscript, Source, Tag, TextConstruct, TextType, parse_duration, parse_explicit, }, - util::{parse_date, text::truncate_to_length}, + util::{base_url::BaseUrlContext, parse_date, text::truncate_to_length}, }; use quick_xml::{Reader, events::Event}; @@ -105,12 +105,15 @@ pub fn parse_rss20_with_limits(data: &[u8], limits: ParserLimits) -> Result { depth += 1; - if let Err(e) = parse_channel(&mut reader, &mut feed, &limits, &mut depth) { + if let Err(e) = + parse_channel(&mut reader, &mut feed, &limits, &mut depth, &mut base_ctx) + { feed.bozo = true; feed.bozo_exception = Some(e.to_string()); } @@ -136,6 +139,7 @@ fn parse_channel( feed: &mut ParsedFeed, limits: &ParserLimits, depth: &mut usize, + base_ctx: &mut BaseUrlContext, ) -> Result<()> { let mut buf = Vec::with_capacity(EVENT_BUFFER_CAPACITY); @@ -159,7 +163,7 @@ fn parse_channel( match tag.as_slice() { b"title" | b"link" | b"description" | b"language" | b"pubDate" | b"managingEditor" | b"webMaster" | b"generator" | b"ttl" | b"category" => { - parse_channel_standard(reader, &mut buf, &tag, feed, limits)?; + parse_channel_standard(reader, &mut buf, &tag, feed, limits, base_ctx)?; } b"image" => { if let Ok(image) = parse_image(reader, &mut buf, limits, depth) { @@ -171,7 +175,7 @@ fn parse_channel( continue; } - match parse_item(reader, &mut buf, limits, depth) { + match parse_item(reader, &mut buf, limits, depth, base_ctx) { Ok((entry, has_attr_errors)) => { if has_attr_errors { feed.bozo = true; @@ -256,6 +260,7 @@ fn parse_channel_standard( tag: &[u8], feed: &mut ParsedFeed, limits: &ParserLimits, + base_ctx: &mut BaseUrlContext, ) -> Result<()> { match tag { b"title" => { @@ -264,7 +269,11 @@ fn parse_channel_standard( b"link" => { let link_text = read_text(reader, buf, limits)?; feed.feed - .set_alternate_link(link_text, limits.max_links_per_feed); + .set_alternate_link(link_text.clone(), limits.max_links_per_feed); + + if base_ctx.base().is_none() { + base_ctx.update_base(&link_text); + } } b"description" => { feed.feed.subtitle = Some(read_text(reader, buf, limits)?); @@ -275,7 +284,7 @@ fn parse_channel_standard( b"pubDate" => { let text = read_text(reader, buf, limits)?; match parse_date(&text) { - Some(dt) => feed.feed.updated = Some(dt), + Some(dt) => feed.feed.published = Some(dt), None if !text.is_empty() => { feed.bozo = true; feed.bozo_exception = Some("Invalid pubDate format".to_string()); @@ -506,6 +515,7 @@ fn parse_item( buf: &mut Vec, limits: &ParserLimits, depth: &mut usize, + base_ctx: &BaseUrlContext, ) -> Result<(Entry, bool)> { let mut entry = Entry::with_capacity(); let mut has_attr_errors = false; @@ -534,10 +544,11 @@ fn parse_item( match tag.as_slice() { b"title" | b"link" | b"description" | b"guid" | b"pubDate" | b"author" | b"category" | b"comments" => { - parse_item_standard(reader, buf, &tag, &mut entry, limits)?; + parse_item_standard(reader, buf, &tag, &mut entry, limits, base_ctx)?; } b"enclosure" => { - if let Some(enclosure) = parse_enclosure(&attrs, limits) { + if let Some(mut enclosure) = parse_enclosure(&attrs, limits) { + enclosure.url = base_ctx.resolve_safe(&enclosure.url); entry .enclosures .try_push_limited(enclosure, limits.max_enclosures); @@ -591,6 +602,7 @@ fn parse_item_standard( tag: &[u8], entry: &mut Entry, limits: &ParserLimits, + base_ctx: &BaseUrlContext, ) -> Result<()> { match tag { b"title" => { @@ -598,10 +610,11 @@ fn parse_item_standard( } b"link" => { let link_text = read_text(reader, buf, limits)?; - entry.link = Some(link_text.clone()); + let resolved_link = base_ctx.resolve_safe(&link_text); + entry.link = Some(resolved_link.clone()); entry.links.try_push_limited( Link { - href: link_text, + href: resolved_link, rel: Some("alternate".to_string()), ..Default::default() }, @@ -1109,10 +1122,10 @@ mod tests { "#; let feed = parse_rss20(xml).unwrap(); - assert!(feed.feed.updated.is_some()); + assert!(feed.feed.published.is_some()); assert!(feed.entries[0].published.is_some()); - let dt = feed.feed.updated.unwrap(); + let dt = feed.feed.published.unwrap(); assert_eq!(dt.year(), 2024); assert_eq!(dt.month(), 12); assert_eq!(dt.day(), 14); diff --git a/crates/feedparser-rs-core/src/types/feed.rs b/crates/feedparser-rs-core/src/types/feed.rs index e167396..ede71ad 100644 --- a/crates/feedparser-rs-core/src/types/feed.rs +++ b/crates/feedparser-rs-core/src/types/feed.rs @@ -27,6 +27,8 @@ pub struct FeedMeta { pub subtitle_detail: Option, /// Last update date pub updated: Option>, + /// Initial publication date (RSS pubDate, Atom published) + pub published: Option>, /// Primary author name pub author: Option, /// Detailed author information diff --git a/crates/feedparser-rs-core/src/util/base_url.rs b/crates/feedparser-rs-core/src/util/base_url.rs index df2c007..5f997f3 100644 --- a/crates/feedparser-rs-core/src/util/base_url.rs +++ b/crates/feedparser-rs-core/src/util/base_url.rs @@ -264,6 +264,82 @@ impl BaseUrlContext { resolve_url(href, self.base.as_deref()) } + /// Resolves a URL against the current base with SSRF protection + /// + /// This method performs URL resolution and validates the result to prevent + /// Server-Side Request Forgery (SSRF) attacks via malicious xml:base attributes. + /// + /// # Security + /// + /// If the resolved URL fails SSRF safety checks (localhost, private IPs, + /// dangerous schemes), the original `href` is returned unchanged instead + /// of the resolved URL. + /// + /// # Arguments + /// + /// * `href` - The URL to resolve (may be relative or absolute) + /// + /// # Returns + /// + /// The resolved URL if safe, otherwise the original `href` + /// + /// # Examples + /// + /// ``` + /// use feedparser_rs::util::base_url::BaseUrlContext; + /// + /// // Safe URL resolution + /// let ctx = BaseUrlContext::with_base("http://example.com/"); + /// assert_eq!(ctx.resolve_safe("page.html"), "http://example.com/page.html"); + /// + /// // SSRF blocked - returns original href + /// let dangerous_ctx = BaseUrlContext::with_base("http://localhost/"); + /// assert_eq!(dangerous_ctx.resolve_safe("admin"), "admin"); + /// ``` + #[must_use] + pub fn resolve_safe(&self, href: &str) -> String { + let resolved = self.resolve(href); + + // Use lowercase for case-insensitive scheme comparison (RFC 3986) + let resolved_lower = resolved.to_lowercase(); + + // Block dangerous schemes (file://, data://, javascript://, etc.) + // Case-insensitive to prevent bypass via FILE://, JAVASCRIPT:, etc. + if resolved_lower.starts_with("file://") + || resolved_lower.starts_with("data:") + || resolved_lower.starts_with("javascript:") + || resolved_lower.starts_with("ftp://") + || resolved_lower.starts_with("gopher://") + { + // Dangerous scheme - return original href + return href.to_string(); + } + + // Validate HTTP(S) URLs for SSRF + if resolved_lower.starts_with("http://") || resolved_lower.starts_with("https://") { + if is_safe_url(&resolved) { + resolved + } else { + // SSRF blocked - check if href itself is an unsafe absolute URL + // If href is an absolute URL pointing to dangerous target, return empty + // Otherwise return original relative href (safe since it requires base to resolve) + let href_is_unsafe_absolute = Url::parse(href).is_ok_and(|parsed_href| { + let is_http_scheme = matches!(parsed_href.scheme(), "http" | "https"); + is_http_scheme && !is_safe_url(href) + }); + + if href_is_unsafe_absolute { + String::new() + } else { + href.to_string() + } + } + } else { + // Other schemes (mailto:, tel:) or relative URLs pass through + resolved + } + } + /// Creates a child context inheriting this context's base #[must_use] pub fn child(&self) -> Self { diff --git a/crates/feedparser-rs-core/tests/test_url_resolution.rs b/crates/feedparser-rs-core/tests/test_url_resolution.rs new file mode 100644 index 0000000..83f8566 --- /dev/null +++ b/crates/feedparser-rs-core/tests/test_url_resolution.rs @@ -0,0 +1,213 @@ +//! URL resolution integration tests for xml:base support. +#![allow(missing_docs)] + +use chrono::Datelike; +use feedparser_rs::parse; + +#[test] +fn test_atom_feed_level_xml_base() { + let xml = br#" + + Test Feed + + icon.png + logo.png + + "#; + + let feed = parse(xml).unwrap(); + + assert_eq!( + feed.feed.links[0].href, + "http://example.org/blog/index.html" + ); + assert_eq!( + feed.feed.icon.as_deref(), + Some("http://example.org/blog/icon.png") + ); + assert_eq!( + feed.feed.logo.as_deref(), + Some("http://example.org/blog/logo.png") + ); +} + +#[test] +fn test_atom_entry_level_xml_base() { + let xml = br#" + + Test + + Post 1 + + + + "#; + + let feed = parse(xml).unwrap(); + let entry = &feed.entries[0]; + + assert_eq!( + entry.link.as_deref(), + Some("http://example.org/posts/123.html") + ); + assert_eq!(entry.links[0].href, "http://example.org/posts/123.html"); +} + +#[test] +fn test_atom_nested_xml_base_override() { + let xml = br#" + + Test + + Post 1 + + + + "#; + + let feed = parse(xml).unwrap(); + let entry = &feed.entries[0]; + + // Entry's absolute xml:base overrides feed base + assert_eq!(entry.link.as_deref(), Some("http://other.org/post/123")); +} + +#[test] +fn test_rss_item_link_resolution() { + let xml = br#" + + + Test Podcast + http://example.org/feed/ + + Episode 1 + episodes/ep1.html + + + + "#; + + let feed = parse(xml).unwrap(); + let entry = &feed.entries[0]; + + assert_eq!( + entry.link.as_deref(), + Some("http://example.org/feed/episodes/ep1.html") + ); +} + +#[test] +fn test_rss_enclosure_url_resolution() { + let xml = br#" + + + Podcast + http://example.org/feed/ + + Episode 1 + + + + + "#; + + let feed = parse(xml).unwrap(); + let enclosure = &feed.entries[0].enclosures[0]; + + assert_eq!(enclosure.url, "http://example.org/feed/media/ep1.mp3"); +} + +#[test] +fn test_absolute_urls_unchanged() { + let xml = br#" + + + + + + "#; + + let feed = parse(xml).unwrap(); + + // Absolute URLs should not be modified + assert_eq!( + feed.entries[0].link.as_deref(), + Some("http://absolute.com/page") + ); +} + +#[test] +fn test_mailto_urls_unchanged() { + let xml = br#" + + + + + + "#; + + let feed = parse(xml).unwrap(); + + // Special schemes should be preserved + assert_eq!( + feed.entries[0].link.as_deref(), + Some("mailto:test@example.com") + ); +} + +#[test] +fn test_rss_no_base_url_leaves_relative() { + let xml = br#" + + + Test + + relative/link.html + + + + "#; + + let feed = parse(xml).unwrap(); + + // Without a channel link, relative URLs stay relative + assert_eq!(feed.entries[0].link.as_deref(), Some("relative/link.html")); +} + +#[test] +fn test_atom_feed_published_field() { + let xml = br#" + + Test Feed + 2025-01-01T00:00:00Z + + "#; + + let feed = parse(xml).unwrap(); + + assert!(feed.feed.published.is_some()); + let dt = feed.feed.published.unwrap(); + assert_eq!(dt.year(), 2025); + assert_eq!(dt.month(), 1); + assert_eq!(dt.day(), 1); +} + +#[test] +fn test_rss_channel_pubdate_maps_to_published() { + let xml = br#" + + + Test Podcast + Wed, 18 Dec 2024 10:00:00 +0000 + + + "#; + + let feed = parse(xml).unwrap(); + + assert!(feed.feed.published.is_some()); + let dt = feed.feed.published.unwrap(); + assert_eq!(dt.year(), 2024); + assert_eq!(dt.month(), 12); + assert_eq!(dt.day(), 18); +} diff --git a/crates/feedparser-rs-core/tests/test_url_security.rs b/crates/feedparser-rs-core/tests/test_url_security.rs new file mode 100644 index 0000000..4feaa50 --- /dev/null +++ b/crates/feedparser-rs-core/tests/test_url_security.rs @@ -0,0 +1,412 @@ +//! SSRF Protection Tests for URL Resolution. +//! +//! These tests verify that malicious xml:base attributes cannot be used +//! to create Server-Side Request Forgery (SSRF) attacks. +#![allow(missing_docs)] + +use feedparser_rs::parse; + +#[test] +fn test_ssrf_localhost_blocked() { + let xml = br#" + + admin/config + "#; + + let feed = parse(xml).unwrap(); + let icon = feed.feed.icon.as_deref().unwrap(); + assert!( + !icon.starts_with("http://localhost"), + "SSRF to localhost should be blocked, got: {icon}" + ); + // Should return original relative URL instead + assert_eq!(icon, "admin/config"); +} + +#[test] +fn test_ssrf_localhost_domain_blocked() { + let xml = br#" + + secret/api/key + "#; + + let feed = parse(xml).unwrap(); + let logo = feed.feed.logo.as_deref().unwrap(); + assert!( + !logo.contains("localhost"), + "SSRF to localhost domain should be blocked, got: {logo}" + ); +} + +#[test] +fn test_ssrf_loopback_ip_blocked() { + let xml = br#" + + config.php + "#; + + let feed = parse(xml).unwrap(); + let icon = feed.feed.icon.as_deref().unwrap(); + assert!( + !icon.contains("127.0.0.1"), + "SSRF to loopback IP should be blocked, got: {icon}" + ); +} + +#[test] +fn test_ssrf_private_ip_192_168_blocked() { + let xml = br#" + + config.php + "#; + + let feed = parse(xml).unwrap(); + let icon = feed.feed.icon.as_deref().unwrap(); + assert!( + !icon.contains("192.168"), + "SSRF to private IP 192.168.x.x should be blocked, got: {icon}" + ); +} + +#[test] +fn test_ssrf_private_ip_10_x_blocked() { + let xml = br#" + + admin/backup.sql + "#; + + let feed = parse(xml).unwrap(); + let icon = feed.feed.icon.as_deref().unwrap(); + assert!( + !icon.contains("10.0.0.1"), + "SSRF to private IP 10.x.x.x should be blocked, got: {icon}" + ); +} + +#[test] +fn test_ssrf_private_ip_172_16_blocked() { + let xml = br#" + + internal/service + "#; + + let feed = parse(xml).unwrap(); + let icon = feed.feed.icon.as_deref().unwrap(); + assert!( + !icon.contains("172.20"), + "SSRF to private IP 172.16-31.x.x should be blocked, got: {icon}" + ); +} + +#[test] +fn test_ssrf_metadata_endpoint_blocked() { + let xml = br#" + + meta-data/iam/security-credentials/ + "#; + + let feed = parse(xml).unwrap(); + let icon = feed.feed.icon.as_deref().unwrap(); + assert!( + !icon.contains("169.254.169.254"), + "SSRF to AWS metadata endpoint should be blocked, got: {icon}" + ); +} + +#[test] +fn test_ssrf_ipv6_loopback_blocked() { + let xml = br#" + + admin/config + "#; + + let feed = parse(xml).unwrap(); + let icon = feed.feed.icon.as_deref().unwrap(); + assert!( + !icon.contains("[::1]"), + "SSRF to IPv6 loopback should be blocked, got: {icon}" + ); +} + +#[test] +fn test_safe_public_urls_work() { + let xml = br#" + + images/icon.png + images/logo.png + "#; + + let feed = parse(xml).unwrap(); + assert_eq!( + feed.feed.icon.as_deref(), + Some("http://example.com/images/icon.png") + ); + assert_eq!( + feed.feed.logo.as_deref(), + Some("http://example.com/images/logo.png") + ); +} + +#[test] +fn test_safe_https_urls_work() { + let xml = br#" + + icon.png + "#; + + let feed = parse(xml).unwrap(); + assert_eq!( + feed.feed.icon.as_deref(), + Some("https://secure.example.com/icon.png") + ); +} + +#[test] +fn test_rss_enclosure_ssrf_blocked() { + // In RSS, the channel link serves as the base URL + // Test with malicious channel link + let xml = br#" + + + Test + http://192.168.1.1/ + + Test Item + + + + "#; + + let feed = parse(xml).unwrap(); + let enclosure = &feed.entries[0].enclosures[0]; + assert!( + !enclosure.url.contains("192.168"), + "SSRF in RSS enclosure should be blocked, got: {}", + enclosure.url + ); + // Should return original relative URL when base is malicious + assert_eq!(enclosure.url, "backup.sql"); +} + +#[test] +fn test_rss_link_ssrf_blocked() { + // In RSS, the channel link serves as the base URL + let xml = br#" + + + Test + http://localhost/ + + Test Item + admin/config.php + + + "#; + + let feed = parse(xml).unwrap(); + let link = feed.entries[0].link.as_deref().unwrap(); + assert!( + !link.contains("localhost"), + "SSRF in RSS item link should be blocked, got: {link}" + ); + // Should return original relative URL when base is malicious + assert_eq!(link, "admin/config.php"); +} + +#[test] +fn test_atom_link_ssrf_blocked() { + let xml = br#" + + test + Test + 2024-01-01T00:00:00Z + + "#; + + let feed = parse(xml).unwrap(); + let link_href = &feed.feed.links[0].href; + assert!( + !link_href.contains("10.0.0.1"), + "SSRF in Atom link should be blocked, got: {link_href}" + ); + assert_eq!(link_href, "admin/panel"); +} + +#[test] +fn test_nested_xml_base_ssrf_blocked() { + let xml = br#" + + + test + Test + 2024-01-01T00:00:00Z + + + "#; + + let feed = parse(xml).unwrap(); + let entry_link = &feed.entries[0].links[0].href; + assert!( + !entry_link.contains("192.168"), + "SSRF via nested xml:base should be blocked, got: {entry_link}" + ); +} + +#[test] +fn test_absolute_urls_bypass_malicious_base() { + // Even with malicious base, absolute URLs should work + let xml = br#" + + http://cdn.example.com/icon.png + "#; + + let feed = parse(xml).unwrap(); + assert_eq!( + feed.feed.icon.as_deref(), + Some("http://cdn.example.com/icon.png"), + "Absolute URLs should override base" + ); +} + +#[test] +fn test_relative_urls_without_base_unchanged() { + // Without xml:base, relative URLs should remain relative + let xml = br#" + + images/icon.png + "#; + + let feed = parse(xml).unwrap(); + assert_eq!( + feed.feed.icon.as_deref(), + Some("images/icon.png"), + "Relative URLs without base should remain unchanged" + ); +} + +#[test] +fn test_special_schemes_unaffected() { + // mailto: and tel: should pass through + let xml = br#" + + + "#; + + let feed = parse(xml).unwrap(); + assert_eq!( + feed.feed.links[0].href, "mailto:admin@example.com", + "Special schemes should not be affected by base" + ); +} + +#[test] +fn test_file_scheme_protection() { + // file:// schemes in xml:base should not resolve because resolve_url + // only works with http/https bases. Non-HTTP schemes result in the + // original href being returned unchanged. + let xml = br#" + + passwd + "#; + + let feed = parse(xml).unwrap(); + let icon = feed.feed.icon.as_deref().unwrap(); + // file:// base URL parsing fails in url::Url::parse, so original href is returned + assert_eq!(icon, "passwd", "file:// base should not resolve"); + assert!( + !icon.starts_with("file://"), + "file:// scheme should not be in result" + ); +} + +#[test] +fn test_google_metadata_domain_blocked() { + let xml = br#" + + computeMetadata/v1/instance/service-accounts/default/token + "#; + + let feed = parse(xml).unwrap(); + let icon = feed.feed.icon.as_deref().unwrap(); + assert!( + !icon.contains("metadata.google.internal"), + "Google metadata endpoint should be blocked, got: {icon}" + ); +} + +#[test] +fn test_absolute_malicious_url_in_href_blocked() { + // If href itself is an absolute malicious URL, it should be blocked + // even when base URL is safe (or when there's no base URL) + let xml = br#" + + http://localhost/admin/config + "#; + + let feed = parse(xml).unwrap(); + let icon = feed.feed.icon.as_deref().unwrap_or(""); + assert!( + !icon.contains("localhost"), + "Absolute malicious URL in href should be blocked, got: {icon}" + ); + // Should return empty string for dangerous absolute URLs + assert!( + icon.is_empty(), + "Dangerous absolute URL should result in empty string, got: {icon}" + ); +} + +#[test] +fn test_absolute_malicious_private_ip_in_href_blocked() { + // Private IP in href should be blocked + let xml = br#" + + + "#; + + let feed = parse(xml).unwrap(); + let link_href = &feed.feed.links[0].href; + assert!( + !link_href.contains("192.168"), + "Absolute malicious private IP in href should be blocked, got: {link_href}" + ); + // Should return empty string for dangerous absolute URLs + assert!( + link_href.is_empty(), + "Dangerous absolute URL should result in empty string, got: {link_href}" + ); +} + +#[test] +fn test_case_insensitive_scheme_bypass_blocked() { + // Uppercase schemes should also be blocked (RFC 3986 - schemes are case-insensitive) + let xml = br#" + + passwd + "#; + + let feed = parse(xml).unwrap(); + let icon = feed.feed.icon.as_deref().unwrap(); + assert!( + !icon.to_lowercase().starts_with("file://"), + "Uppercase FILE:// scheme should be blocked, got: {icon}" + ); +} + +#[test] +fn test_mixed_case_javascript_scheme_blocked() { + // Mixed case javascript: should be blocked + let xml = br#" + + test + "#; + + let feed = parse(xml).unwrap(); + let icon = feed.feed.icon.as_deref().unwrap(); + assert!( + !icon.to_lowercase().contains("javascript"), + "Mixed case javascript: scheme should be blocked, got: {icon}" + ); +} diff --git a/crates/feedparser-rs-node/__test__/index.spec.mjs b/crates/feedparser-rs-node/__test__/index.spec.mjs index 5c596b1..57134b2 100644 --- a/crates/feedparser-rs-node/__test__/index.spec.mjs +++ b/crates/feedparser-rs-node/__test__/index.spec.mjs @@ -128,6 +128,29 @@ describe('feedparser-rs', () => { } }); + it('should parse feed-level published date', () => { + const xml = ` + + + + Test Feed + Wed, 18 Dec 2024 10:00:00 +0000 + + Test Entry + + + + `; + + const feed = parse(xml); + + assert(feed.feed.published !== null && feed.feed.published !== undefined); + assert.strictEqual(typeof feed.feed.published, 'number'); + assert(feed.feed.published > 0); + // Verify it's the expected timestamp (Wed, 18 Dec 2024 10:00:00 +0000) + assert.strictEqual(feed.feed.published, 1734516000000); + }); + it('should handle multiple entries', () => { const xml = ` diff --git a/crates/feedparser-rs-node/index.d.ts b/crates/feedparser-rs-node/index.d.ts index 835a7f3..66037ae 100644 --- a/crates/feedparser-rs-node/index.d.ts +++ b/crates/feedparser-rs-node/index.d.ts @@ -81,6 +81,10 @@ export interface Entry { comments?: string /** Source feed reference */ source?: Source + /** Podcast transcripts */ + podcastTranscripts: Array + /** Podcast persons */ + podcastPersons: Array } /** Feed metadata */ @@ -99,6 +103,8 @@ export interface FeedMeta { subtitleDetail?: TextConstruct /** Last update date (milliseconds since epoch) */ updated?: number + /** Initial publication date (milliseconds since epoch) */ + published?: number /** Primary author name */ author?: string /** Detailed author information */ @@ -214,8 +220,87 @@ export interface ParsedFeed { version: string /** XML namespaces (prefix -> URI) */ namespaces: Record + /** HTTP status code (if fetched from URL) */ + status?: number + /** Final URL after redirects (if fetched from URL) */ + href?: string + /** ETag header from HTTP response */ + etag?: string + /** Last-Modified header from HTTP response */ + modified?: string + /** HTTP response headers (if fetched from URL) */ + headers?: Record } +/** + * Parse feed from HTTP/HTTPS URL with conditional GET support + * + * Fetches the feed from the given URL and parses it. Supports conditional GET + * using ETag and Last-Modified headers for bandwidth-efficient caching. + * + * # Arguments + * + * * `url` - HTTP or HTTPS URL to fetch + * * `etag` - Optional ETag from previous fetch for conditional GET + * * `modified` - Optional Last-Modified timestamp from previous fetch + * * `user_agent` - Optional custom User-Agent header + * + * # Returns + * + * Parsed feed result with HTTP metadata fields populated: + * - `status`: HTTP status code (200, 304, etc.) + * - `href`: Final URL after redirects + * - `etag`: ETag header value (for next request) + * - `modified`: Last-Modified header value (for next request) + * - `headers`: Full HTTP response headers + * + * On 304 Not Modified, returns a feed with empty entries but status=304. + * + * # Examples + * + * ```javascript + * const feedparser = require('feedparser-rs'); + * + * // First fetch + * const feed = await feedparser.parseUrl("https://example.com/feed.xml"); + * console.log(feed.feed.title); + * console.log(`ETag: ${feed.etag}`); + * + * // Subsequent fetch with caching + * const feed2 = await feedparser.parseUrl( + * "https://example.com/feed.xml", + * feed.etag, + * feed.modified + * ); + * + * if (feed2.status === 304) { + * console.log("Feed not modified, use cached version"); + * } + * ``` + */ +export declare function parseUrl(url: string, etag?: string | undefined | null, modified?: string | undefined | null, userAgent?: string | undefined | null): ParsedFeed + +/** + * Parse feed from URL with custom resource limits + * + * Like `parseUrl` but allows specifying custom limits for DoS protection. + * + * # Examples + * + * ```javascript + * const feedparser = require('feedparser-rs'); + * + * const feed = await feedparser.parseUrlWithOptions( + * "https://example.com/feed.xml", + * null, // etag + * null, // modified + * null, // user_agent + * 10485760 // max_size: 10MB + * ); + * ``` + */ +export declare function parseUrlWithOptions(url: string, etag?: string | undefined | null, modified?: string | undefined | null, userAgent?: string | undefined | null, maxSize?: number | undefined | null): ParsedFeed + /** * Parse an RSS/Atom/JSON Feed with custom size limit * @@ -244,6 +329,32 @@ export interface Person { uri?: string } +/** Podcast person metadata */ +export interface PodcastPerson { + /** Person's name */ + name: string + /** Person's role (e.g., "host", "guest") */ + role?: string + /** Person's group (e.g., "cast", "crew") */ + group?: string + /** Person's image URL */ + img?: string + /** Person's URL/website */ + href?: string +} + +/** Podcast transcript metadata */ +export interface PodcastTranscript { + /** Transcript URL */ + url: string + /** Transcript type (e.g., "text/plain", "application/srt") */ + type?: string + /** Transcript language */ + language?: string + /** Relationship type (e.g., "captions", "chapters") */ + rel?: string +} + /** Source reference (for entries) */ export interface Source { /** Source title */ diff --git a/crates/feedparser-rs-node/index.js b/crates/feedparser-rs-node/index.js index af88484..04700d2 100644 --- a/crates/feedparser-rs-node/index.js +++ b/crates/feedparser-rs-node/index.js @@ -77,8 +77,8 @@ function requireNative() { try { const binding = require('feedparser-rs-android-arm64') const bindingPackageVersion = require('feedparser-rs-android-arm64/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -93,8 +93,8 @@ function requireNative() { try { const binding = require('feedparser-rs-android-arm-eabi') const bindingPackageVersion = require('feedparser-rs-android-arm-eabi/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -114,8 +114,8 @@ function requireNative() { try { const binding = require('feedparser-rs-win32-x64-gnu') const bindingPackageVersion = require('feedparser-rs-win32-x64-gnu/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -130,8 +130,8 @@ function requireNative() { try { const binding = require('feedparser-rs-win32-x64-msvc') const bindingPackageVersion = require('feedparser-rs-win32-x64-msvc/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -147,8 +147,8 @@ function requireNative() { try { const binding = require('feedparser-rs-win32-ia32-msvc') const bindingPackageVersion = require('feedparser-rs-win32-ia32-msvc/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -163,8 +163,8 @@ function requireNative() { try { const binding = require('feedparser-rs-win32-arm64-msvc') const bindingPackageVersion = require('feedparser-rs-win32-arm64-msvc/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -182,8 +182,8 @@ function requireNative() { try { const binding = require('feedparser-rs-darwin-universal') const bindingPackageVersion = require('feedparser-rs-darwin-universal/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -198,8 +198,8 @@ function requireNative() { try { const binding = require('feedparser-rs-darwin-x64') const bindingPackageVersion = require('feedparser-rs-darwin-x64/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -214,8 +214,8 @@ function requireNative() { try { const binding = require('feedparser-rs-darwin-arm64') const bindingPackageVersion = require('feedparser-rs-darwin-arm64/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -234,8 +234,8 @@ function requireNative() { try { const binding = require('feedparser-rs-freebsd-x64') const bindingPackageVersion = require('feedparser-rs-freebsd-x64/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -250,8 +250,8 @@ function requireNative() { try { const binding = require('feedparser-rs-freebsd-arm64') const bindingPackageVersion = require('feedparser-rs-freebsd-arm64/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -271,8 +271,8 @@ function requireNative() { try { const binding = require('feedparser-rs-linux-x64-musl') const bindingPackageVersion = require('feedparser-rs-linux-x64-musl/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -287,8 +287,8 @@ function requireNative() { try { const binding = require('feedparser-rs-linux-x64-gnu') const bindingPackageVersion = require('feedparser-rs-linux-x64-gnu/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -305,8 +305,8 @@ function requireNative() { try { const binding = require('feedparser-rs-linux-arm64-musl') const bindingPackageVersion = require('feedparser-rs-linux-arm64-musl/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -321,8 +321,8 @@ function requireNative() { try { const binding = require('feedparser-rs-linux-arm64-gnu') const bindingPackageVersion = require('feedparser-rs-linux-arm64-gnu/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -339,8 +339,8 @@ function requireNative() { try { const binding = require('feedparser-rs-linux-arm-musleabihf') const bindingPackageVersion = require('feedparser-rs-linux-arm-musleabihf/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -355,8 +355,8 @@ function requireNative() { try { const binding = require('feedparser-rs-linux-arm-gnueabihf') const bindingPackageVersion = require('feedparser-rs-linux-arm-gnueabihf/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -373,8 +373,8 @@ function requireNative() { try { const binding = require('feedparser-rs-linux-loong64-musl') const bindingPackageVersion = require('feedparser-rs-linux-loong64-musl/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -389,8 +389,8 @@ function requireNative() { try { const binding = require('feedparser-rs-linux-loong64-gnu') const bindingPackageVersion = require('feedparser-rs-linux-loong64-gnu/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -407,8 +407,8 @@ function requireNative() { try { const binding = require('feedparser-rs-linux-riscv64-musl') const bindingPackageVersion = require('feedparser-rs-linux-riscv64-musl/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -423,8 +423,8 @@ function requireNative() { try { const binding = require('feedparser-rs-linux-riscv64-gnu') const bindingPackageVersion = require('feedparser-rs-linux-riscv64-gnu/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -440,8 +440,8 @@ function requireNative() { try { const binding = require('feedparser-rs-linux-ppc64-gnu') const bindingPackageVersion = require('feedparser-rs-linux-ppc64-gnu/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -456,8 +456,8 @@ function requireNative() { try { const binding = require('feedparser-rs-linux-s390x-gnu') const bindingPackageVersion = require('feedparser-rs-linux-s390x-gnu/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -476,8 +476,8 @@ function requireNative() { try { const binding = require('feedparser-rs-openharmony-arm64') const bindingPackageVersion = require('feedparser-rs-openharmony-arm64/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -492,8 +492,8 @@ function requireNative() { try { const binding = require('feedparser-rs-openharmony-x64') const bindingPackageVersion = require('feedparser-rs-openharmony-x64/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -508,8 +508,8 @@ function requireNative() { try { const binding = require('feedparser-rs-openharmony-arm') const bindingPackageVersion = require('feedparser-rs-openharmony-arm/package.json').version - if (bindingPackageVersion !== '0.1.0' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { - throw new Error(`Native binding package version mismatch, expected 0.1.0 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) + if (bindingPackageVersion !== '0.2.1' && process.env.NAPI_RS_ENFORCE_VERSION_CHECK && process.env.NAPI_RS_ENFORCE_VERSION_CHECK !== '0') { + throw new Error(`Native binding package version mismatch, expected 0.2.1 but got ${bindingPackageVersion}. You can reinstall dependencies to fix this issue.`) } return binding } catch (e) { @@ -574,4 +574,6 @@ if (!nativeBinding) { module.exports = nativeBinding module.exports.detectFormat = nativeBinding.detectFormat module.exports.parse = nativeBinding.parse +module.exports.parseUrl = nativeBinding.parseUrl +module.exports.parseUrlWithOptions = nativeBinding.parseUrlWithOptions module.exports.parseWithOptions = nativeBinding.parseWithOptions diff --git a/crates/feedparser-rs-node/src/lib.rs b/crates/feedparser-rs-node/src/lib.rs index 67e9810..3c1de01 100644 --- a/crates/feedparser-rs-node/src/lib.rs +++ b/crates/feedparser-rs-node/src/lib.rs @@ -281,6 +281,8 @@ pub struct FeedMeta { pub subtitle_detail: Option, /// Last update date (milliseconds since epoch) pub updated: Option, + /// Initial publication date (milliseconds since epoch) + pub published: Option, /// Primary author name pub author: Option, /// Detailed author information @@ -327,6 +329,7 @@ impl From for FeedMeta { subtitle: core.subtitle, subtitle_detail: core.subtitle_detail.map(TextConstruct::from), updated: core.updated.map(|dt| dt.timestamp_millis()), + published: core.published.map(|dt| dt.timestamp_millis()), author: core.author, author_detail: core.author_detail.map(Person::from), authors: core.authors.into_iter().map(Person::from).collect(), diff --git a/crates/feedparser-rs-py/src/types/feed_meta.rs b/crates/feedparser-rs-py/src/types/feed_meta.rs index a85e524..51bb353 100644 --- a/crates/feedparser-rs-py/src/types/feed_meta.rs +++ b/crates/feedparser-rs-py/src/types/feed_meta.rs @@ -69,6 +69,16 @@ impl PyFeedMeta { optional_datetime_to_struct_time(py, &self.inner.updated) } + #[getter] + fn published(&self) -> Option { + self.inner.published.map(|dt| dt.to_rfc3339()) + } + + #[getter] + fn published_parsed(&self, py: Python<'_>) -> PyResult>> { + optional_datetime_to_struct_time(py, &self.inner.published) + } + #[getter] fn author(&self) -> Option<&str> { self.inner.author.as_deref() diff --git a/crates/feedparser-rs-py/tests/test_phase1_integration.py b/crates/feedparser-rs-py/tests/test_phase1_integration.py new file mode 100644 index 0000000..83d9fa5 --- /dev/null +++ b/crates/feedparser-rs-py/tests/test_phase1_integration.py @@ -0,0 +1,211 @@ +"""Integration tests for Phase 1: published_parsed and URL resolution""" + +import time + +import feedparser_rs + + +def test_feed_published_parsed_atom(): + """Test feed.published_parsed returns time.struct_time for Atom""" + xml = b""" + + Test Feed + 2025-01-01T00:00:00Z + + """ + + result = feedparser_rs.parse(xml) + assert result.feed.published is not None + assert result.feed.published_parsed is not None + assert isinstance(result.feed.published_parsed, time.struct_time) + assert result.feed.published_parsed.tm_year == 2025 + assert result.feed.published_parsed.tm_mon == 1 + assert result.feed.published_parsed.tm_mday == 1 + assert result.feed.published_parsed.tm_isdst == 0 # UTC + + +def test_feed_published_parsed_rss(): + """Test RSS channel pubDate maps to feed.published_parsed""" + xml = b""" + + + Test Podcast + Wed, 18 Dec 2024 10:00:00 +0000 + + + """ + + result = feedparser_rs.parse(xml) + assert result.feed.published_parsed is not None + assert result.feed.published_parsed.tm_year == 2024 + assert result.feed.published_parsed.tm_mon == 12 + assert result.feed.published_parsed.tm_mday == 18 + + +def test_feed_updated_parsed_still_works(): + """Test feed.updated_parsed still works (backwards compatibility)""" + xml = b""" + + Test Feed + 2024-12-18T10:30:00Z + + """ + + result = feedparser_rs.parse(xml) + assert result.feed.updated_parsed is not None + assert isinstance(result.feed.updated_parsed, time.struct_time) + assert result.feed.updated_parsed.tm_year == 2024 + assert result.feed.updated_parsed.tm_mon == 12 + assert result.feed.updated_parsed.tm_mday == 18 + assert result.feed.updated_parsed.tm_isdst == 0 + + +def test_entry_all_parsed_dates(): + """Test entry has all *_parsed date fields""" + xml = b""" + + Test + + Entry 1 + 2025-01-01T12:00:00Z + 2025-01-02T12:00:00Z + + + """ + + result = feedparser_rs.parse(xml) + entry = result.entries[0] + + assert entry.published_parsed is not None + assert entry.published_parsed.tm_year == 2025 + assert entry.published_parsed.tm_mon == 1 + + assert entry.updated_parsed is not None + assert entry.updated_parsed.tm_mday == 2 + + +def test_missing_dates_return_none(): + """Test that missing dates return None, not crash""" + xml = b""" + + Test Feed + + """ + + result = feedparser_rs.parse(xml) + assert result.feed.updated_parsed is None + assert result.feed.published_parsed is None + + +def test_atom_xml_base_resolution(): + """Test relative URLs resolved against xml:base""" + xml = b""" + + Test Feed + + + Post 1 + + + + """ + + result = feedparser_rs.parse(xml) + + # Feed link resolved + assert result.feed.links[0].href == "http://example.org/blog/index.html" + + # Entry link resolved + assert result.entries[0].link == "http://example.org/blog/posts/first.html" + + +def test_nested_xml_base(): + """Test nested xml:base combines with parent""" + xml = b""" + + + + + + """ + + result = feedparser_rs.parse(xml) + assert result.entries[0].link == "http://example.org/posts/123.html" + + +def test_rss_relative_links(): + """Test RSS links resolved against channel link""" + xml = b""" + + + http://example.org/ + + Episode 1 + episodes/ep1.html + + + + """ + + result = feedparser_rs.parse(xml) + assert result.entries[0].link == "http://example.org/episodes/ep1.html" + + +def test_rss_relative_enclosures(): + """Test RSS enclosures resolved against channel link""" + xml = b""" + + + http://podcast.example.com/ + + + + + + """ + + result = feedparser_rs.parse(xml) + assert result.entries[0].enclosures[0].url == "http://podcast.example.com/episodes/ep1.mp3" + + +def test_absolute_urls_not_modified(): + """Test absolute URLs remain unchanged""" + xml = b""" + + + + + + """ + + result = feedparser_rs.parse(xml) + assert result.entries[0].link == "http://absolute.com/page" + + +def test_atom_feed_icons_and_logos_resolved(): + """Test Atom feed icon and logo URLs are resolved""" + xml = b""" + + Test + icon.png + logo.png + + """ + + result = feedparser_rs.parse(xml) + assert result.feed.icon == "http://example.org/blog/icon.png" + assert result.feed.logo == "http://example.org/blog/logo.png" + + +def test_special_schemes_preserved(): + """Test special URL schemes like mailto: are preserved""" + xml = b""" + + + + + + """ + + result = feedparser_rs.parse(xml) + assert result.entries[0].link == "mailto:test@example.com"