diff --git a/crates/feedparser-rs-core/src/lib.rs b/crates/feedparser-rs-core/src/lib.rs
index 6fbb8b3..a1bc006 100644
--- a/crates/feedparser-rs-core/src/lib.rs
+++ b/crates/feedparser-rs-core/src/lib.rs
@@ -72,6 +72,8 @@ pub use types::{
TextType, parse_duration, parse_explicit,
};
+pub use namespace::syndication::{SyndicationMeta, UpdatePeriod};
+
#[cfg(feature = "http")]
pub use http::{FeedHttpClient, FeedHttpResponse};
diff --git a/crates/feedparser-rs-core/src/namespace/mod.rs b/crates/feedparser-rs-core/src/namespace/mod.rs
index 7056e7d..668bb53 100644
--- a/crates/feedparser-rs-core/src/namespace/mod.rs
+++ b/crates/feedparser-rs-core/src/namespace/mod.rs
@@ -35,6 +35,8 @@ pub mod dublin_core;
pub mod georss;
/// Media RSS specification
pub mod media_rss;
+/// Syndication Module for RSS 1.0
+pub mod syndication;
/// Common namespace URIs used in feeds
pub mod namespaces {
@@ -56,6 +58,9 @@ pub mod namespaces {
/// RSS 1.0
pub const RSS_10: &str = "http://purl.org/rss/1.0/";
+ /// Syndication Module for RSS 1.0
+ pub const SYNDICATION: &str = "http://purl.org/rss/1.0/modules/syndication/";
+
/// iTunes Podcast
pub const ITUNES: &str = "http://www.itunes.com/dtds/podcast-1.0.dtd";
@@ -88,6 +93,7 @@ pub fn get_namespace_uri(prefix: &str) -> Option<&'static str> {
"media" => Some(namespaces::MEDIA),
"atom" => Some(namespaces::ATOM),
"rdf" => Some(namespaces::RDF),
+ "syn" | "syndication" => Some(namespaces::SYNDICATION),
"itunes" => Some(namespaces::ITUNES),
"podcast" => Some(namespaces::PODCAST),
"georss" => Some(namespaces::GEORSS),
@@ -113,6 +119,7 @@ pub fn get_namespace_prefix(uri: &str) -> Option<&'static str> {
namespaces::MEDIA => Some("media"),
namespaces::ATOM => Some("atom"),
namespaces::RDF => Some("rdf"),
+ namespaces::SYNDICATION => Some("syn"),
namespaces::ITUNES => Some("itunes"),
namespaces::PODCAST => Some("podcast"),
namespaces::GEORSS => Some("georss"),
diff --git a/crates/feedparser-rs-core/src/namespace/syndication.rs b/crates/feedparser-rs-core/src/namespace/syndication.rs
new file mode 100644
index 0000000..f98105e
--- /dev/null
+++ b/crates/feedparser-rs-core/src/namespace/syndication.rs
@@ -0,0 +1,211 @@
+/// Syndication Module for RSS 1.0
+///
+/// Namespace:
+/// Prefix: syn
+///
+/// This module provides parsing support for the Syndication namespace,
+/// used in RSS 1.0 feeds to indicate update schedules and frequencies.
+///
+/// Elements:
+/// - `syn:updatePeriod` → Update period (hourly, daily, weekly, monthly, yearly)
+/// - `syn:updateFrequency` → Number of times per period
+/// - `syn:updateBase` → Base date for update schedule (ISO 8601)
+use crate::types::FeedMeta;
+
+/// Syndication namespace URI
+pub const SYNDICATION_NAMESPACE: &str = "http://purl.org/rss/1.0/modules/syndication/";
+
+/// Valid update period values
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum UpdatePeriod {
+ /// Update hourly
+ Hourly,
+ /// Update daily
+ Daily,
+ /// Update weekly
+ Weekly,
+ /// Update monthly
+ Monthly,
+ /// Update yearly
+ Yearly,
+}
+
+impl UpdatePeriod {
+ /// Parse update period from string (case-insensitive)
+ ///
+ /// Returns `None` if the string doesn't match any valid period.
+ #[must_use]
+ pub fn parse(s: &str) -> Option {
+ match s.to_lowercase().as_str() {
+ "hourly" => Some(Self::Hourly),
+ "daily" => Some(Self::Daily),
+ "weekly" => Some(Self::Weekly),
+ "monthly" => Some(Self::Monthly),
+ "yearly" => Some(Self::Yearly),
+ _ => None,
+ }
+ }
+
+ /// Convert to string representation
+ #[must_use]
+ pub const fn as_str(&self) -> &'static str {
+ match self {
+ Self::Hourly => "hourly",
+ Self::Daily => "daily",
+ Self::Weekly => "weekly",
+ Self::Monthly => "monthly",
+ Self::Yearly => "yearly",
+ }
+ }
+}
+
+/// Syndication metadata
+#[derive(Debug, Clone, Default)]
+pub struct SyndicationMeta {
+ /// Update period (hourly, daily, weekly, monthly, yearly)
+ pub update_period: Option,
+ /// Number of times updated per period
+ pub update_frequency: Option,
+ /// Base date for update schedule (ISO 8601)
+ pub update_base: Option,
+}
+
+/// Handle Syndication namespace element at feed level
+///
+/// # Arguments
+///
+/// * `element` - Local name of the element (without namespace prefix)
+/// * `text` - Text content of the element
+/// * `feed` - Feed metadata to update
+pub fn handle_feed_element(element: &str, text: &str, feed: &mut FeedMeta) {
+ match element {
+ "updatePeriod" => {
+ if let Some(period) = UpdatePeriod::parse(text) {
+ if feed.syndication.is_none() {
+ feed.syndication = Some(SyndicationMeta::default());
+ }
+ if let Some(syn) = &mut feed.syndication {
+ syn.update_period = Some(period);
+ }
+ }
+ }
+ "updateFrequency" => {
+ if let Ok(freq) = text.parse::() {
+ if feed.syndication.is_none() {
+ feed.syndication = Some(SyndicationMeta::default());
+ }
+ if let Some(syn) = &mut feed.syndication {
+ syn.update_frequency = Some(freq);
+ }
+ }
+ }
+ "updateBase" => {
+ if feed.syndication.is_none() {
+ feed.syndication = Some(SyndicationMeta::default());
+ }
+ if let Some(syn) = &mut feed.syndication {
+ syn.update_base = Some(text.to_string());
+ }
+ }
+ _ => {
+ // Ignore unknown syndication elements
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_update_period_parse() {
+ assert_eq!(UpdatePeriod::parse("hourly"), Some(UpdatePeriod::Hourly));
+ assert_eq!(UpdatePeriod::parse("daily"), Some(UpdatePeriod::Daily));
+ assert_eq!(UpdatePeriod::parse("weekly"), Some(UpdatePeriod::Weekly));
+ assert_eq!(UpdatePeriod::parse("monthly"), Some(UpdatePeriod::Monthly));
+ assert_eq!(UpdatePeriod::parse("yearly"), Some(UpdatePeriod::Yearly));
+ assert_eq!(UpdatePeriod::parse("invalid"), None);
+ }
+
+ #[test]
+ fn test_update_period_case_insensitive() {
+ assert_eq!(UpdatePeriod::parse("HOURLY"), Some(UpdatePeriod::Hourly));
+ assert_eq!(UpdatePeriod::parse("Daily"), Some(UpdatePeriod::Daily));
+ assert_eq!(UpdatePeriod::parse("WeeKLY"), Some(UpdatePeriod::Weekly));
+ }
+
+ #[test]
+ fn test_update_period_as_str() {
+ assert_eq!(UpdatePeriod::Hourly.as_str(), "hourly");
+ assert_eq!(UpdatePeriod::Daily.as_str(), "daily");
+ assert_eq!(UpdatePeriod::Weekly.as_str(), "weekly");
+ assert_eq!(UpdatePeriod::Monthly.as_str(), "monthly");
+ assert_eq!(UpdatePeriod::Yearly.as_str(), "yearly");
+ }
+
+ #[test]
+ fn test_handle_update_period() {
+ let mut feed = FeedMeta::default();
+
+ handle_feed_element("updatePeriod", "daily", &mut feed);
+
+ assert!(feed.syndication.is_some());
+ let syn = feed.syndication.as_ref().unwrap();
+ assert_eq!(syn.update_period, Some(UpdatePeriod::Daily));
+ }
+
+ #[test]
+ fn test_handle_update_frequency() {
+ let mut feed = FeedMeta::default();
+
+ handle_feed_element("updateFrequency", "2", &mut feed);
+
+ assert!(feed.syndication.is_some());
+ let syn = feed.syndication.as_ref().unwrap();
+ assert_eq!(syn.update_frequency, Some(2));
+ }
+
+ #[test]
+ fn test_handle_update_base() {
+ let mut feed = FeedMeta::default();
+
+ handle_feed_element("updateBase", "2024-12-18T00:00:00Z", &mut feed);
+
+ assert!(feed.syndication.is_some());
+ let syn = feed.syndication.as_ref().unwrap();
+ assert_eq!(syn.update_base.as_deref(), Some("2024-12-18T00:00:00Z"));
+ }
+
+ #[test]
+ fn test_handle_multiple_elements() {
+ let mut feed = FeedMeta::default();
+
+ handle_feed_element("updatePeriod", "hourly", &mut feed);
+ handle_feed_element("updateFrequency", "1", &mut feed);
+ handle_feed_element("updateBase", "2024-01-01T00:00:00Z", &mut feed);
+
+ let syn = feed.syndication.as_ref().unwrap();
+ assert_eq!(syn.update_period, Some(UpdatePeriod::Hourly));
+ assert_eq!(syn.update_frequency, Some(1));
+ assert_eq!(syn.update_base.as_deref(), Some("2024-01-01T00:00:00Z"));
+ }
+
+ #[test]
+ fn test_handle_invalid_frequency() {
+ let mut feed = FeedMeta::default();
+
+ handle_feed_element("updateFrequency", "not-a-number", &mut feed);
+
+ // Should not create syndication metadata for invalid input
+ assert!(feed.syndication.is_none());
+ }
+
+ #[test]
+ fn test_handle_unknown_element() {
+ let mut feed = FeedMeta::default();
+
+ handle_feed_element("unknown", "value", &mut feed);
+
+ assert!(feed.syndication.is_none());
+ }
+}
diff --git a/crates/feedparser-rs-core/src/parser/common.rs b/crates/feedparser-rs-core/src/parser/common.rs
index 569c24b..55074d7 100644
--- a/crates/feedparser-rs-core/src/parser/common.rs
+++ b/crates/feedparser-rs-core/src/parser/common.rs
@@ -209,6 +209,20 @@ pub fn is_content_tag(name: &[u8]) -> Option<&str> {
extract_ns_local_name(name, b"content:")
}
+/// Check if element is a Syndication namespaced tag
+///
+/// # Examples
+///
+/// ```ignore
+/// assert_eq!(is_syn_tag(b"syn:updatePeriod"), Some("updatePeriod"));
+/// assert_eq!(is_syn_tag(b"syn:updateFrequency"), Some("updateFrequency"));
+/// assert_eq!(is_syn_tag(b"dc:creator"), None);
+/// ```
+#[inline]
+pub fn is_syn_tag(name: &[u8]) -> Option<&str> {
+ extract_ns_local_name(name, b"syn:")
+}
+
/// Check if element is a Media RSS namespaced tag
///
/// # Examples
diff --git a/crates/feedparser-rs-core/src/parser/rss10.rs b/crates/feedparser-rs-core/src/parser/rss10.rs
index bc9bd4d..98b9a3b 100644
--- a/crates/feedparser-rs-core/src/parser/rss10.rs
+++ b/crates/feedparser-rs-core/src/parser/rss10.rs
@@ -10,14 +10,14 @@
use crate::{
ParserLimits,
error::{FeedError, Result},
- namespace::dublin_core,
+ namespace::{content, dublin_core, syndication},
types::{Entry, FeedVersion, Image, ParsedFeed, TextConstruct, TextType},
};
use quick_xml::{Reader, events::Event};
use super::common::{
- EVENT_BUFFER_CAPACITY, LimitedCollectionExt, check_depth, init_feed, is_dc_tag, read_text,
- skip_element,
+ EVENT_BUFFER_CAPACITY, LimitedCollectionExt, check_depth, init_feed, is_content_tag, is_dc_tag,
+ is_syn_tag, read_text, skip_element,
};
/// Parse RSS 1.0 (RDF) feed from raw bytes
@@ -223,6 +223,10 @@ fn parse_channel(
let dc_elem = dc_element.to_string();
let text = read_text(reader, &mut buf, limits)?;
dublin_core::handle_feed_element(&dc_elem, &text, &mut feed.feed);
+ } else if let Some(syn_element) = is_syn_tag(full_name.as_ref()) {
+ let syn_elem = syn_element.to_string();
+ let text = read_text(reader, &mut buf, limits)?;
+ syndication::handle_feed_element(&syn_elem, &text, &mut feed.feed);
} else {
skip_element(reader, &mut buf, limits, *depth)?;
}
@@ -288,6 +292,10 @@ fn parse_item(
let text = read_text(reader, buf, limits)?;
// dublin_core::handle_entry_element already handles dc:date -> published
dublin_core::handle_entry_element(&dc_elem, &text, &mut entry);
+ } else if let Some(content_element) = is_content_tag(full_name.as_ref()) {
+ let content_elem = content_element.to_string();
+ let text = read_text(reader, buf, limits)?;
+ content::handle_entry_element(&content_elem, &text, &mut entry);
} else {
skip_element(reader, buf, limits, *depth)?;
}
@@ -568,4 +576,64 @@ mod tests {
assert!(is_dc_tag(b"link").is_none());
assert!(is_dc_tag(b"atom:title").is_none());
}
+
+ #[test]
+ fn test_parse_rss10_with_content_encoded() {
+ let xml = br#"
+
+
+ Test
+ http://example.com
+ Test
+
+ -
+ Item 1
+ http://example.com/1
+ Brief summary
+ Full HTML content
]]>
+
+ "#;
+
+ let feed = parse_rss10(xml).unwrap();
+ assert_eq!(feed.entries.len(), 1);
+
+ let entry = &feed.entries[0];
+ assert_eq!(entry.summary.as_deref(), Some("Brief summary"));
+
+ // Verify content:encoded is parsed
+ assert!(!entry.content.is_empty());
+ assert_eq!(entry.content[0].content_type.as_deref(), Some("text/html"));
+ assert!(entry.content[0].value.contains("Full"));
+ assert!(entry.content[0].value.contains("HTML"));
+ }
+
+ #[test]
+ fn test_parse_rss10_with_syndication() {
+ let xml = br#"
+
+
+ Test
+ http://example.com
+ Test
+ hourly
+ 2
+ 2024-01-01T00:00:00Z
+
+ "#;
+
+ let feed = parse_rss10(xml).unwrap();
+ assert!(feed.feed.syndication.is_some());
+
+ let syn = feed.feed.syndication.as_ref().unwrap();
+ assert_eq!(
+ syn.update_period,
+ Some(crate::namespace::syndication::UpdatePeriod::Hourly)
+ );
+ assert_eq!(syn.update_frequency, Some(2));
+ assert_eq!(syn.update_base.as_deref(), Some("2024-01-01T00:00:00Z"));
+ }
}
diff --git a/crates/feedparser-rs-core/src/types/feed.rs b/crates/feedparser-rs-core/src/types/feed.rs
index 247f61d..9419b6f 100644
--- a/crates/feedparser-rs-core/src/types/feed.rs
+++ b/crates/feedparser-rs-core/src/types/feed.rs
@@ -5,6 +5,7 @@ use super::{
podcast::{ItunesFeedMeta, PodcastMeta},
version::FeedVersion,
};
+use crate::namespace::syndication::SyndicationMeta;
use crate::{ParserLimits, error::Result};
use chrono::{DateTime, Utc};
use quick_xml::Reader;
@@ -75,6 +76,8 @@ pub struct FeedMeta {
pub dc_rights: Option,
/// License URL (Creative Commons, etc.)
pub license: Option,
+ /// Syndication module metadata (RSS 1.0)
+ pub syndication: Option,
}
/// Parsed feed result
diff --git a/crates/feedparser-rs-core/tests/test_rss10.rs b/crates/feedparser-rs-core/tests/test_rss10.rs
new file mode 100644
index 0000000..f77dce3
--- /dev/null
+++ b/crates/feedparser-rs-core/tests/test_rss10.rs
@@ -0,0 +1,520 @@
+//! Integration tests for RSS 1.0 (RDF) parser
+//!
+//! Tests comprehensive RSS 1.0 feed parsing including:
+//! - Basic channel and item elements
+//! - Dublin Core namespace support
+//! - Content namespace support
+//! - RDF structure handling
+//! - Malformed feed tolerance (bozo pattern)
+
+use chrono::{Datelike, Timelike};
+use feedparser_rs::{FeedVersion, ParserLimits, namespace::syndication::UpdatePeriod, parse};
+use std::fmt::Write as _;
+
+#[test]
+fn test_basic_rss10_feed() {
+ let xml = br#"
+
+
+ Example RSS 1.0 Feed
+ http://example.com
+ This is an example RSS 1.0 feed
+
+ -
+ First Article
+ http://example.com/article1
+ Summary of first article
+
+ -
+ Second Article
+ http://example.com/article2
+ Summary of second article
+
+ "#;
+
+ let feed = parse(xml).expect("Failed to parse RSS 1.0 feed");
+
+ assert_eq!(feed.version, FeedVersion::Rss10);
+ assert!(!feed.bozo, "Feed should not be marked as bozo");
+
+ // Check feed metadata
+ assert_eq!(feed.feed.title.as_deref(), Some("Example RSS 1.0 Feed"));
+ assert_eq!(feed.feed.link.as_deref(), Some("http://example.com"));
+ assert_eq!(
+ feed.feed.subtitle.as_deref(),
+ Some("This is an example RSS 1.0 feed")
+ );
+ assert_eq!(feed.feed.id.as_deref(), Some("http://example.com/"));
+
+ // Check entries
+ assert_eq!(feed.entries.len(), 2);
+
+ let first = &feed.entries[0];
+ assert_eq!(first.title.as_deref(), Some("First Article"));
+ assert_eq!(first.link.as_deref(), Some("http://example.com/article1"));
+ assert_eq!(first.id.as_deref(), Some("http://example.com/article1"));
+ assert_eq!(first.summary.as_deref(), Some("Summary of first article"));
+
+ let second = &feed.entries[1];
+ assert_eq!(second.title.as_deref(), Some("Second Article"));
+ assert_eq!(second.link.as_deref(), Some("http://example.com/article2"));
+}
+
+#[test]
+fn test_rss10_with_dublin_core() {
+ let xml = br#"
+
+
+ News Feed
+ http://example.com
+ Daily news
+ Editorial Team
+ Copyright 2024 Example Corp
+ 2024-12-18T10:00:00Z
+ en-US
+
+ -
+ Breaking News
+ http://example.com/news1
+ Important announcement
+ John Doe
+ 2024-12-18T09:30:00Z
+ politics
+
+ "#;
+
+ let feed = parse(xml).expect("Failed to parse RSS 1.0 with Dublin Core");
+
+ assert_eq!(feed.version, FeedVersion::Rss10);
+ assert!(!feed.bozo);
+
+ // Check Dublin Core elements at feed level
+ assert_eq!(feed.feed.dc_creator.as_deref(), Some("Editorial Team"));
+ assert_eq!(
+ feed.feed.dc_rights.as_deref(),
+ Some("Copyright 2024 Example Corp")
+ );
+ // dc:language is mapped to feed.language
+ assert_eq!(feed.feed.language.as_deref(), Some("en-US"));
+
+ // Check Dublin Core elements at entry level
+ assert_eq!(feed.entries.len(), 1);
+ let entry = &feed.entries[0];
+ assert_eq!(entry.author.as_deref(), Some("John Doe"));
+ assert!(
+ entry.published.is_some(),
+ "dc:date should be parsed as published date"
+ );
+
+ if let Some(published) = entry.published {
+ assert_eq!(published.year(), 2024);
+ assert_eq!(published.month(), 12);
+ assert_eq!(published.day(), 18);
+ assert_eq!(published.hour(), 9);
+ assert_eq!(published.minute(), 30);
+ }
+}
+
+#[test]
+fn test_rss10_with_content_encoded() {
+ let xml = br#"
+
+
+ Blog
+ http://example.com
+ Tech blog
+
+ -
+ Using RSS 1.0
+ http://example.com/post1
+ Brief summary
+ This is the full HTML content of the post.
+
It includes formatting and multiple paragraphs.
+ ]]>
+
+ "#;
+
+ let feed = parse(xml).expect("Failed to parse RSS 1.0 with content:encoded");
+
+ assert_eq!(feed.version, FeedVersion::Rss10);
+ assert!(!feed.bozo);
+
+ assert_eq!(feed.entries.len(), 1);
+ let entry = &feed.entries[0];
+
+ // Check that summary is populated from description
+ assert_eq!(entry.summary.as_deref(), Some("Brief summary"));
+
+ // Check that content:encoded is parsed
+ assert!(
+ !entry.content.is_empty(),
+ "content:encoded should be parsed"
+ );
+ assert_eq!(entry.content[0].content_type.as_deref(), Some("text/html"));
+ assert!(entry.content[0].value.contains("full HTML content"));
+}
+
+#[test]
+fn test_rss10_with_image() {
+ let xml = br#"
+
+
+ Example Feed
+ http://example.com
+ Example
+
+
+ http://example.com/logo.png
+ Example Logo
+ http://example.com
+
+ "#;
+
+ let feed = parse(xml).expect("Failed to parse RSS 1.0 with image");
+
+ assert_eq!(feed.version, FeedVersion::Rss10);
+ assert!(!feed.bozo);
+
+ assert!(feed.feed.image.is_some(), "Image should be parsed");
+ let image = feed.feed.image.as_ref().unwrap();
+ assert_eq!(image.url, "http://example.com/logo.png");
+ assert_eq!(image.title.as_deref(), Some("Example Logo"));
+ assert_eq!(image.link.as_deref(), Some("http://example.com"));
+}
+
+#[test]
+fn test_rss10_empty_items() {
+ let xml = br#"
+
+
+ Empty Feed
+ http://example.com
+ Feed with no items
+
+ "#;
+
+ let feed = parse(xml).expect("Failed to parse RSS 1.0 with empty items");
+
+ assert_eq!(feed.version, FeedVersion::Rss10);
+ assert!(!feed.bozo);
+ assert_eq!(feed.entries.len(), 0);
+ assert_eq!(feed.feed.title.as_deref(), Some("Empty Feed"));
+}
+
+#[test]
+fn test_rss10_missing_required_fields() {
+ let xml = br#"
+
+
+ Incomplete Feed
+
+
+ -
+
+ Only has description
+
+ "#;
+
+ let feed = parse(xml).expect("Parser should be tolerant of missing fields");
+
+ assert_eq!(feed.version, FeedVersion::Rss10);
+ // Should still extract what's available
+ assert_eq!(feed.feed.title.as_deref(), Some("Incomplete Feed"));
+ assert_eq!(feed.entries.len(), 1);
+ assert_eq!(
+ feed.entries[0].summary.as_deref(),
+ Some("Only has description")
+ );
+}
+
+#[test]
+fn test_rss10_malformed_xml_bozo() {
+ let xml = br#"
+
+
+ Test Feed
+ http://example.com
+ Test
+
+ -
+ Unclosed title
+ http://example.com/1
+
+ "#;
+
+ let feed = parse(xml).expect("Should parse despite malformed XML");
+
+ assert_eq!(feed.version, FeedVersion::Rss10);
+ // Bozo pattern: continue parsing but set flag
+ // Note: quick-xml in tolerant mode may or may not set bozo depending on how it handles this
+ // At minimum, feed metadata should be extracted
+ assert_eq!(feed.feed.title.as_deref(), Some("Test Feed"));
+}
+
+#[test]
+fn test_rss10_entry_limit() {
+ let xml = br#"
+
+
+ Limited Feed
+ http://example.com
+ Test entry limits
+
+ -
+ Item 1
+ http://example.com/1
+
+ -
+ Item 2
+ http://example.com/2
+
+ -
+ Item 3
+ http://example.com/3
+
+ -
+ Item 4
+ http://example.com/4
+
+ "#;
+
+ let limits = ParserLimits {
+ max_entries: 2,
+ ..Default::default()
+ };
+
+ let feed =
+ feedparser_rs::parse_with_limits(xml, limits).expect("Failed to parse with entry limit");
+
+ assert_eq!(feed.entries.len(), 2);
+ assert!(feed.bozo, "Should set bozo flag when limit exceeded");
+ assert!(
+ feed.bozo_exception
+ .as_ref()
+ .unwrap()
+ .contains("Entry limit exceeded")
+ );
+}
+
+#[test]
+fn test_rss10_without_rdf_prefix() {
+ let xml = br#"
+
+
+ No Prefix Feed
+ http://example.com
+ RSS 1.0 without rdf: prefix
+
+ -
+ Item Title
+ http://example.com/1
+
+ "#;
+
+ let feed = parse(xml).expect("Should parse RDF without rdf: prefix");
+
+ assert_eq!(feed.version, FeedVersion::Rss10);
+ assert_eq!(feed.feed.title.as_deref(), Some("No Prefix Feed"));
+ assert_eq!(feed.entries.len(), 1);
+}
+
+#[test]
+fn test_rss10_nesting_depth_limit() {
+ let mut xml = String::from(
+ r#"
+
+
+ Deep Nesting
+ http://example.com
+ Test nesting limits"#,
+ );
+
+ // Create deeply nested structure (exceed default max_nesting_depth)
+ for i in 0..150 {
+ write!(&mut xml, "").unwrap();
+ }
+ for i in (0..150).rev() {
+ write!(&mut xml, "").unwrap();
+ }
+
+ xml.push_str(
+ r"
+
+ ",
+ );
+
+ let feed = parse(xml.as_bytes()).expect("Should handle deep nesting");
+
+ // Should set bozo flag when depth limit exceeded
+ assert!(
+ feed.bozo,
+ "Should set bozo flag for excessive nesting depth"
+ );
+ assert!(
+ feed.bozo_exception
+ .as_ref()
+ .is_some_and(|e| e.contains("nesting depth") || e.contains("exceeds maximum"))
+ );
+}
+
+#[test]
+fn test_rss10_real_world_slashdot_like() {
+ let xml = br#"
+
+
+ Slashdot
+ http://slashdot.org/
+ News for nerds, stuff that matters
+ en-us
+ Copyright 1997-2024, OSDN
+ 2024-12-18T10:00:00+00:00
+ OSDN
+
+
+
+
+
+
+
+ -
+ New Technology Breakthrough
+ http://slashdot.org/story/1
+ Scientists discover amazing things
+ BeauHD
+ 2024-12-18T08:30:00+00:00
+ science
+
+ -
+ Open Source Project Released
+ http://slashdot.org/story/2
+ New version available for download
+ msmash
+ 2024-12-18T07:15:00+00:00
+ opensource
+
+ "#;
+
+ let feed = parse(xml).expect("Failed to parse Slashdot-like RSS 1.0");
+
+ assert_eq!(feed.version, FeedVersion::Rss10);
+ assert!(!feed.bozo);
+
+ // Feed metadata
+ assert_eq!(feed.feed.title.as_deref(), Some("Slashdot"));
+ assert_eq!(feed.feed.link.as_deref(), Some("http://slashdot.org/"));
+ assert_eq!(
+ feed.feed.subtitle.as_deref(),
+ Some("News for nerds, stuff that matters")
+ );
+ // dc:language is mapped to feed.language
+ assert_eq!(feed.feed.language.as_deref(), Some("en-us"));
+ assert_eq!(
+ feed.feed.dc_rights.as_deref(),
+ Some("Copyright 1997-2024, OSDN")
+ );
+
+ // Entries
+ assert_eq!(feed.entries.len(), 2);
+
+ let first = &feed.entries[0];
+ assert_eq!(first.title.as_deref(), Some("New Technology Breakthrough"));
+ assert_eq!(first.author.as_deref(), Some("BeauHD"));
+ assert!(first.published.is_some());
+
+ let second = &feed.entries[1];
+ assert_eq!(
+ second.title.as_deref(),
+ Some("Open Source Project Released")
+ );
+ assert_eq!(second.author.as_deref(), Some("msmash"));
+}
+
+#[test]
+fn test_rss10_version_string() {
+ let xml = br#"
+
+
+ Test
+ http://example.com
+ Test
+
+ "#;
+
+ let feed = parse(xml).expect("Failed to parse");
+
+ // Verify version string matches Python feedparser convention
+ assert_eq!(feed.version.as_str(), "rss10");
+ assert_eq!(format!("{}", feed.version), "rss10");
+}
+
+#[test]
+fn test_rss10_with_syndication_module() {
+ let xml = br#"
+
+
+ Auto-Updated Feed
+ http://example.com
+ This feed updates every 2 hours
+ hourly
+ 2
+ 2024-01-01T00:00:00Z
+
+ -
+ Test Item
+ http://example.com/1
+ Test description
+
+ "#;
+
+ let feed = parse(xml).expect("Failed to parse RSS 1.0 with syndication");
+
+ assert_eq!(feed.version, FeedVersion::Rss10);
+ assert!(!feed.bozo);
+
+ // Verify syndication metadata
+ assert!(
+ feed.feed.syndication.is_some(),
+ "Syndication metadata should be present"
+ );
+
+ let syn = feed.feed.syndication.as_ref().unwrap();
+
+ // Check update period (hourly)
+ assert_eq!(
+ syn.update_period,
+ Some(UpdatePeriod::Hourly),
+ "Update period should be hourly"
+ );
+
+ // Check update frequency (2 times per period)
+ assert_eq!(
+ syn.update_frequency,
+ Some(2),
+ "Update frequency should be 2"
+ );
+
+ // Check update base timestamp
+ assert_eq!(
+ syn.update_base.as_deref(),
+ Some("2024-01-01T00:00:00Z"),
+ "Update base should be preserved"
+ );
+}
diff --git a/crates/feedparser-rs-node/__test__/syndication.spec.mjs b/crates/feedparser-rs-node/__test__/syndication.spec.mjs
new file mode 100644
index 0000000..3661da7
--- /dev/null
+++ b/crates/feedparser-rs-node/__test__/syndication.spec.mjs
@@ -0,0 +1,147 @@
+import { describe, it } from 'node:test';
+import assert from 'node:assert';
+import { parse } from '../index.js';
+
+describe('syndication', () => {
+ it('should parse syndication updatePeriod', () => {
+ const xml = `
+
+
+ Test Feed
+ https://example.com
+ daily
+
+ `;
+
+ const feed = parse(xml);
+ assert.ok(feed.feed.syndication);
+ assert.strictEqual(feed.feed.syndication.updatePeriod, 'daily');
+ });
+
+ it('should parse syndication updateFrequency', () => {
+ const xml = `
+
+
+ Test Feed
+ https://example.com
+ 2
+
+ `;
+
+ const feed = parse(xml);
+ assert.ok(feed.feed.syndication);
+ assert.strictEqual(feed.feed.syndication.updateFrequency, 2);
+ });
+
+ it('should parse complete syndication metadata', () => {
+ const xml = `
+
+
+ Test Feed
+ https://example.com
+ hourly
+ 1
+ 2024-01-01T00:00:00Z
+
+ `;
+
+ const feed = parse(xml);
+ const syn = feed.feed.syndication;
+ assert.ok(syn);
+ assert.strictEqual(syn.updatePeriod, 'hourly');
+ assert.strictEqual(syn.updateFrequency, 1);
+ assert.strictEqual(syn.updateBase, '2024-01-01T00:00:00Z');
+ });
+
+ it('should return undefined when syndication data is missing', () => {
+ const xml = `
+
+
+ Test Feed
+ https://example.com
+
+ `;
+
+ const feed = parse(xml);
+ assert.strictEqual(feed.feed.syndication, undefined);
+ });
+
+ it('should parse Dublin Core fields', () => {
+ const xml = `
+
+
+ Test Feed
+ https://example.com
+ John Doe
+ ACME Corp
+ Copyright 2024
+
+ `;
+
+ const feed = parse(xml);
+ assert.strictEqual(feed.feed.dcCreator, 'John Doe');
+ assert.strictEqual(feed.feed.dcPublisher, 'ACME Corp');
+ assert.strictEqual(feed.feed.dcRights, 'Copyright 2024');
+ });
+
+ it('should handle invalid updatePeriod gracefully (bozo pattern)', () => {
+ const xml = `
+
+
+ Test
+ https://example.com
+ invalid
+
+ `;
+
+ const feed = parse(xml);
+ // Should not crash, syndication should be undefined or updatePeriod undefined
+ assert.ok(!feed.feed.syndication || !feed.feed.syndication.updatePeriod);
+ });
+
+ it('should handle case-insensitive updatePeriod', () => {
+ const xml = `
+
+
+ Test
+ https://example.com
+ HOURLY
+
+ `;
+
+ const feed = parse(xml);
+ assert.ok(feed.feed.syndication);
+ assert.strictEqual(feed.feed.syndication.updatePeriod, 'hourly');
+ });
+
+ it('should parse feed with partial syndication fields', () => {
+ const xml = `
+
+
+ Test
+ https://example.com
+ weekly
+
+ `;
+
+ const feed = parse(xml);
+ assert.ok(feed.feed.syndication);
+ assert.strictEqual(feed.feed.syndication.updatePeriod, 'weekly');
+ assert.strictEqual(feed.feed.syndication.updateFrequency, undefined);
+ assert.strictEqual(feed.feed.syndication.updateBase, undefined);
+ });
+});
diff --git a/crates/feedparser-rs-node/index.d.ts b/crates/feedparser-rs-node/index.d.ts
index c2c0687..a07033b 100644
--- a/crates/feedparser-rs-node/index.d.ts
+++ b/crates/feedparser-rs-node/index.d.ts
@@ -143,6 +143,14 @@ export interface FeedMeta {
ttl?: number
/** License URL (Creative Commons, etc.) */
license?: string
+ /** Syndication module metadata (RSS 1.0) */
+ syndication?: SyndicationMeta
+ /** Dublin Core creator (author fallback) */
+ dcCreator?: string
+ /** Dublin Core publisher */
+ dcPublisher?: string
+ /** Dublin Core rights (copyright) */
+ dcRights?: string
}
/** Generator metadata */
@@ -369,6 +377,16 @@ export interface Source {
id?: string
}
+/** Syndication module metadata (RSS 1.0) */
+export interface SyndicationMeta {
+ /** Update period (hourly, daily, weekly, monthly, yearly) */
+ updatePeriod?: string
+ /** Number of times updated per period */
+ updateFrequency?: number
+ /** Base date for update schedule (ISO 8601) */
+ updateBase?: string
+}
+
/** Tag/category */
export interface Tag {
/** Tag term/label */
diff --git a/crates/feedparser-rs-node/src/lib.rs b/crates/feedparser-rs-node/src/lib.rs
index 02060db..b11f692 100644
--- a/crates/feedparser-rs-node/src/lib.rs
+++ b/crates/feedparser-rs-node/src/lib.rs
@@ -9,7 +9,8 @@ use feedparser_rs::{
FeedMeta as CoreFeedMeta, Generator as CoreGenerator, Image as CoreImage, Link as CoreLink,
ParsedFeed as CoreParsedFeed, ParserLimits, Person as CorePerson,
PodcastPerson as CorePodcastPerson, PodcastTranscript as CorePodcastTranscript,
- Source as CoreSource, Tag as CoreTag, TextConstruct as CoreTextConstruct, TextType,
+ Source as CoreSource, SyndicationMeta as CoreSyndicationMeta, Tag as CoreTag,
+ TextConstruct as CoreTextConstruct, TextType,
};
/// Default maximum feed size (100 MB) - prevents DoS attacks
@@ -264,6 +265,27 @@ impl From for ParsedFeed {
}
}
+/// Syndication module metadata (RSS 1.0)
+#[napi(object)]
+pub struct SyndicationMeta {
+ /// Update period (hourly, daily, weekly, monthly, yearly)
+ pub update_period: Option,
+ /// Number of times updated per period
+ pub update_frequency: Option,
+ /// Base date for update schedule (ISO 8601)
+ pub update_base: Option,
+}
+
+impl From for SyndicationMeta {
+ fn from(core: CoreSyndicationMeta) -> Self {
+ Self {
+ update_period: core.update_period.map(|p| p.as_str().to_string()),
+ update_frequency: core.update_frequency,
+ update_base: core.update_base,
+ }
+ }
+}
+
/// Feed metadata
#[napi(object)]
pub struct FeedMeta {
@@ -319,6 +341,14 @@ pub struct FeedMeta {
pub ttl: Option,
/// License URL (Creative Commons, etc.)
pub license: Option,
+ /// Syndication module metadata (RSS 1.0)
+ pub syndication: Option,
+ /// Dublin Core creator (author fallback)
+ pub dc_creator: Option,
+ /// Dublin Core publisher
+ pub dc_publisher: Option,
+ /// Dublin Core rights (copyright)
+ pub dc_rights: Option,
}
impl From for FeedMeta {
@@ -350,6 +380,10 @@ impl From for FeedMeta {
id: core.id,
ttl: core.ttl,
license: core.license,
+ syndication: core.syndication.map(SyndicationMeta::from),
+ dc_creator: core.dc_creator,
+ dc_publisher: core.dc_publisher,
+ dc_rights: core.dc_rights,
}
}
}
@@ -411,77 +445,39 @@ pub struct Entry {
impl From for Entry {
fn from(core: CoreEntry) -> Self {
- // Pre-allocate Vec capacity to avoid reallocations
- let links_cap = core.links.len();
- let content_cap = core.content.len();
- let authors_cap = core.authors.len();
- let contributors_cap = core.contributors.len();
- let tags_cap = core.tags.len();
- let enclosures_cap = core.enclosures.len();
- let transcripts_cap = core.podcast_transcripts.len();
- let persons_cap = core.podcast_persons.len();
-
Self {
id: core.id,
title: core.title,
title_detail: core.title_detail.map(TextConstruct::from),
link: core.link,
- links: {
- let mut v = Vec::with_capacity(links_cap);
- v.extend(core.links.into_iter().map(Link::from));
- v
- },
+ links: core.links.into_iter().map(Link::from).collect(),
summary: core.summary,
summary_detail: core.summary_detail.map(TextConstruct::from),
- content: {
- let mut v = Vec::with_capacity(content_cap);
- v.extend(core.content.into_iter().map(Content::from));
- v
- },
+ content: core.content.into_iter().map(Content::from).collect(),
published: core.published.map(|dt| dt.timestamp_millis()),
updated: core.updated.map(|dt| dt.timestamp_millis()),
created: core.created.map(|dt| dt.timestamp_millis()),
expired: core.expired.map(|dt| dt.timestamp_millis()),
author: core.author,
author_detail: core.author_detail.map(Person::from),
- authors: {
- let mut v = Vec::with_capacity(authors_cap);
- v.extend(core.authors.into_iter().map(Person::from));
- v
- },
- contributors: {
- let mut v = Vec::with_capacity(contributors_cap);
- v.extend(core.contributors.into_iter().map(Person::from));
- v
- },
+ authors: core.authors.into_iter().map(Person::from).collect(),
+ contributors: core.contributors.into_iter().map(Person::from).collect(),
publisher: core.publisher,
publisher_detail: core.publisher_detail.map(Person::from),
- tags: {
- let mut v = Vec::with_capacity(tags_cap);
- v.extend(core.tags.into_iter().map(Tag::from));
- v
- },
- enclosures: {
- let mut v = Vec::with_capacity(enclosures_cap);
- v.extend(core.enclosures.into_iter().map(Enclosure::from));
- v
- },
+ tags: core.tags.into_iter().map(Tag::from).collect(),
+ enclosures: core.enclosures.into_iter().map(Enclosure::from).collect(),
comments: core.comments,
source: core.source.map(Source::from),
- podcast_transcripts: {
- let mut v = Vec::with_capacity(transcripts_cap);
- v.extend(
- core.podcast_transcripts
- .into_iter()
- .map(PodcastTranscript::from),
- );
- v
- },
- podcast_persons: {
- let mut v = Vec::with_capacity(persons_cap);
- v.extend(core.podcast_persons.into_iter().map(PodcastPerson::from));
- v
- },
+ podcast_transcripts: core
+ .podcast_transcripts
+ .into_iter()
+ .map(PodcastTranscript::from)
+ .collect(),
+ podcast_persons: core
+ .podcast_persons
+ .into_iter()
+ .map(PodcastPerson::from)
+ .collect(),
license: core.license,
}
}
diff --git a/crates/feedparser-rs-py/src/types/feed_meta.rs b/crates/feedparser-rs-py/src/types/feed_meta.rs
index 5b8c3da..9c241d8 100644
--- a/crates/feedparser-rs-py/src/types/feed_meta.rs
+++ b/crates/feedparser-rs-py/src/types/feed_meta.rs
@@ -4,6 +4,7 @@ use pyo3::prelude::*;
use super::common::{PyGenerator, PyImage, PyLink, PyPerson, PyTag, PyTextConstruct};
use super::datetime::optional_datetime_to_struct_time;
use super::podcast::{PyItunesFeedMeta, PyPodcastMeta};
+use super::syndication::PySyndicationMeta;
#[pyclass(name = "FeedMeta", module = "feedparser_rs")]
#[derive(Clone)]
@@ -212,6 +213,29 @@ impl PyFeedMeta {
self.inner.license.as_deref()
}
+ #[getter]
+ fn syndication(&self) -> Option {
+ self.inner
+ .syndication
+ .as_ref()
+ .map(|s| PySyndicationMeta::from_core(s.clone()))
+ }
+
+ #[getter]
+ fn dc_creator(&self) -> Option<&str> {
+ self.inner.dc_creator.as_deref()
+ }
+
+ #[getter]
+ fn dc_publisher(&self) -> Option<&str> {
+ self.inner.dc_publisher.as_deref()
+ }
+
+ #[getter]
+ fn dc_rights(&self) -> Option<&str> {
+ self.inner.dc_rights.as_deref()
+ }
+
fn __repr__(&self) -> String {
format!(
"FeedMeta(title='{}', link='{}')",
diff --git a/crates/feedparser-rs-py/src/types/mod.rs b/crates/feedparser-rs-py/src/types/mod.rs
index df99f58..cafb245 100644
--- a/crates/feedparser-rs-py/src/types/mod.rs
+++ b/crates/feedparser-rs-py/src/types/mod.rs
@@ -4,5 +4,6 @@ pub mod entry;
pub mod feed_meta;
pub mod parsed_feed;
pub mod podcast;
+pub mod syndication;
pub use parsed_feed::PyParsedFeed;
diff --git a/crates/feedparser-rs-py/src/types/syndication.rs b/crates/feedparser-rs-py/src/types/syndication.rs
new file mode 100644
index 0000000..684741c
--- /dev/null
+++ b/crates/feedparser-rs-py/src/types/syndication.rs
@@ -0,0 +1,45 @@
+use feedparser_rs::SyndicationMeta as CoreSyndicationMeta;
+use pyo3::prelude::*;
+
+/// Syndication module metadata
+#[pyclass(name = "SyndicationMeta", module = "feedparser_rs")]
+#[derive(Clone)]
+pub struct PySyndicationMeta {
+ inner: CoreSyndicationMeta,
+}
+
+impl PySyndicationMeta {
+ pub fn from_core(core: CoreSyndicationMeta) -> Self {
+ Self { inner: core }
+ }
+}
+
+#[pymethods]
+impl PySyndicationMeta {
+ /// Update period (hourly, daily, weekly, monthly, yearly)
+ #[getter]
+ fn update_period(&self) -> Option<&str> {
+ self.inner.update_period.as_ref().map(|p| p.as_str())
+ }
+
+ /// Number of times updated per period
+ #[getter]
+ fn update_frequency(&self) -> Option {
+ self.inner.update_frequency
+ }
+
+ /// Base date for update schedule (ISO 8601)
+ #[getter]
+ fn update_base(&self) -> Option<&str> {
+ self.inner.update_base.as_deref()
+ }
+
+ fn __repr__(&self) -> String {
+ format!(
+ "SyndicationMeta(update_period={:?}, update_frequency={:?}, update_base={:?})",
+ self.inner.update_period.as_ref().map(|p| p.as_str()),
+ self.inner.update_frequency,
+ self.inner.update_base.as_deref()
+ )
+ }
+}
diff --git a/crates/feedparser-rs-py/tests/test_syndication.py b/crates/feedparser-rs-py/tests/test_syndication.py
new file mode 100644
index 0000000..fc7ba9d
--- /dev/null
+++ b/crates/feedparser-rs-py/tests/test_syndication.py
@@ -0,0 +1,166 @@
+import feedparser_rs
+
+
+def test_syndication_update_period():
+ """Test syn:updatePeriod parsing"""
+ feed_xml = b"""
+
+
+ Test Feed
+ https://example.com
+ daily
+
+ """
+
+ d = feedparser_rs.parse(feed_xml)
+ assert d.feed.syndication is not None
+ assert d.feed.syndication.update_period == "daily"
+
+
+def test_syndication_update_frequency():
+ """Test syn:updateFrequency parsing"""
+ feed_xml = b"""
+
+
+ Test Feed
+ https://example.com
+ 2
+
+ """
+
+ d = feedparser_rs.parse(feed_xml)
+ assert d.feed.syndication is not None
+ assert d.feed.syndication.update_frequency == 2
+
+
+def test_syndication_update_base():
+ """Test syn:updateBase parsing"""
+ feed_xml = b"""
+
+
+ Test Feed
+ https://example.com
+ 2024-12-18T00:00:00Z
+
+ """
+
+ d = feedparser_rs.parse(feed_xml)
+ assert d.feed.syndication is not None
+ assert d.feed.syndication.update_base == "2024-12-18T00:00:00Z"
+
+
+def test_syndication_complete():
+ """Test all syndication fields together"""
+ feed_xml = b"""
+
+
+ Test Feed
+ https://example.com
+ hourly
+ 1
+ 2024-01-01T00:00:00Z
+
+ """
+
+ d = feedparser_rs.parse(feed_xml)
+ syn = d.feed.syndication
+ assert syn is not None
+ assert syn.update_period == "hourly"
+ assert syn.update_frequency == 1
+ assert syn.update_base == "2024-01-01T00:00:00Z"
+
+
+def test_syndication_missing():
+ """Test feed without syndication data"""
+ feed_xml = b"""
+
+
+ Test Feed
+ https://example.com
+
+ """
+
+ d = feedparser_rs.parse(feed_xml)
+ assert d.feed.syndication is None
+
+
+def test_dublin_core_fields():
+ """Test Dublin Core fields"""
+ feed_xml = b"""
+
+
+ Test Feed
+ https://example.com
+ John Doe
+ ACME Corp
+ Copyright 2024
+
+ """
+
+ d = feedparser_rs.parse(feed_xml)
+ assert d.feed.dc_creator == "John Doe"
+ assert d.feed.dc_publisher == "ACME Corp"
+ assert d.feed.dc_rights == "Copyright 2024"
+
+
+def test_invalid_update_period():
+ """Test invalid updatePeriod is handled gracefully (bozo pattern)"""
+ feed_xml = b"""
+
+
+ Test
+ https://example.com
+ invalid
+
+ """
+ d = feedparser_rs.parse(feed_xml)
+ # Should not crash, syndication should be None or update_period None
+ assert d.feed.syndication is None or d.feed.syndication.update_period is None
+
+
+def test_case_insensitive_update_period():
+ """Test updatePeriod is case-insensitive"""
+ feed_xml = b"""
+
+
+ Test
+ https://example.com
+ HOURLY
+
+ """
+ d = feedparser_rs.parse(feed_xml)
+ assert d.feed.syndication is not None
+ assert d.feed.syndication.update_period == "hourly"
+
+
+def test_partial_syndication():
+ """Test feed with only some syndication fields"""
+ feed_xml = b"""
+
+
+ Test
+ https://example.com
+ weekly
+
+ """
+ d = feedparser_rs.parse(feed_xml)
+ assert d.feed.syndication is not None
+ assert d.feed.syndication.update_period == "weekly"
+ assert d.feed.syndication.update_frequency is None
+ assert d.feed.syndication.update_base is None