Skip to content

Commit ea0cd24

Browse files
authored
feat(rss10): add syndication namespace with Python and Node.js bindings (#28)
* feat(rss10): add syndication namespace and comprehensive test coverage - Add Syndication Module namespace support (syn:updatePeriod, syn:updateFrequency, syn:updateBase) - Add content:encoded namespace support to RSS 1.0 parser - Create comprehensive RSS 1.0 integration tests (12+ test cases) - Add is_syn_tag() helper for namespace detection - All 510 tests passing, clippy clean Phase 3: RSS 1.0 validation and enhancement * feat(bindings): add syndication and Dublin Core support to Python and Node.js bindings Add complete support for syndication module (RSS 1.0) and Dublin Core metadata fields in both Python (PyO3) and Node.js (napi-rs) bindings. Changes: - Export SyndicationMeta and UpdatePeriod from core library - Python bindings: - New PySyndicationMeta wrapper class with update_period, update_frequency, update_base getters - Added syndication, dc_creator, dc_publisher, dc_rights getters to PyFeedMeta - Added comprehensive test suite for syndication and Dublin Core fields - Node.js bindings: - New SyndicationMeta struct with automatic camelCase conversion (updatePeriod, etc.) - Added syndication, dcCreator, dcPublisher, dcRights fields to FeedMeta - Added test suite for syndication and Dublin Core fields - All tests passing - Clippy clean with no warnings This completes Phase 3 syndication bindings implementation. * perf(bindings): optimize string allocations and add edge case tests - Python: Return &str instead of String for update_period() to avoid allocation - Python: Optimize __repr__ to access fields directly without allocations - Node.js: Simplify Entry conversion using collect() for all Vec fields - Add tests for invalid input handling (bozo pattern) - Add tests for case-insensitive updatePeriod parsing - Add tests for partial syndication metadata All tests passing, clippy clean.
1 parent b8dc6ad commit ea0cd24

File tree

14 files changed

+1280
-58
lines changed

14 files changed

+1280
-58
lines changed

crates/feedparser-rs-core/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ pub use types::{
7272
TextType, parse_duration, parse_explicit,
7373
};
7474

75+
pub use namespace::syndication::{SyndicationMeta, UpdatePeriod};
76+
7577
#[cfg(feature = "http")]
7678
pub use http::{FeedHttpClient, FeedHttpResponse};
7779

crates/feedparser-rs-core/src/namespace/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ pub mod dublin_core;
3535
pub mod georss;
3636
/// Media RSS specification
3737
pub mod media_rss;
38+
/// Syndication Module for RSS 1.0
39+
pub mod syndication;
3840

3941
/// Common namespace URIs used in feeds
4042
pub mod namespaces {
@@ -56,6 +58,9 @@ pub mod namespaces {
5658
/// RSS 1.0
5759
pub const RSS_10: &str = "http://purl.org/rss/1.0/";
5860

61+
/// Syndication Module for RSS 1.0
62+
pub const SYNDICATION: &str = "http://purl.org/rss/1.0/modules/syndication/";
63+
5964
/// iTunes Podcast
6065
pub const ITUNES: &str = "http://www.itunes.com/dtds/podcast-1.0.dtd";
6166

@@ -88,6 +93,7 @@ pub fn get_namespace_uri(prefix: &str) -> Option<&'static str> {
8893
"media" => Some(namespaces::MEDIA),
8994
"atom" => Some(namespaces::ATOM),
9095
"rdf" => Some(namespaces::RDF),
96+
"syn" | "syndication" => Some(namespaces::SYNDICATION),
9197
"itunes" => Some(namespaces::ITUNES),
9298
"podcast" => Some(namespaces::PODCAST),
9399
"georss" => Some(namespaces::GEORSS),
@@ -113,6 +119,7 @@ pub fn get_namespace_prefix(uri: &str) -> Option<&'static str> {
113119
namespaces::MEDIA => Some("media"),
114120
namespaces::ATOM => Some("atom"),
115121
namespaces::RDF => Some("rdf"),
122+
namespaces::SYNDICATION => Some("syn"),
116123
namespaces::ITUNES => Some("itunes"),
117124
namespaces::PODCAST => Some("podcast"),
118125
namespaces::GEORSS => Some("georss"),
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
/// Syndication Module for RSS 1.0
2+
///
3+
/// Namespace: <http://purl.org/rss/1.0/modules/syndication/>
4+
/// Prefix: syn
5+
///
6+
/// This module provides parsing support for the Syndication namespace,
7+
/// used in RSS 1.0 feeds to indicate update schedules and frequencies.
8+
///
9+
/// Elements:
10+
/// - `syn:updatePeriod` → Update period (hourly, daily, weekly, monthly, yearly)
11+
/// - `syn:updateFrequency` → Number of times per period
12+
/// - `syn:updateBase` → Base date for update schedule (ISO 8601)
13+
use crate::types::FeedMeta;
14+
15+
/// Syndication namespace URI
16+
pub const SYNDICATION_NAMESPACE: &str = "http://purl.org/rss/1.0/modules/syndication/";
17+
18+
/// Valid update period values
19+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20+
pub enum UpdatePeriod {
21+
/// Update hourly
22+
Hourly,
23+
/// Update daily
24+
Daily,
25+
/// Update weekly
26+
Weekly,
27+
/// Update monthly
28+
Monthly,
29+
/// Update yearly
30+
Yearly,
31+
}
32+
33+
impl UpdatePeriod {
34+
/// Parse update period from string (case-insensitive)
35+
///
36+
/// Returns `None` if the string doesn't match any valid period.
37+
#[must_use]
38+
pub fn parse(s: &str) -> Option<Self> {
39+
match s.to_lowercase().as_str() {
40+
"hourly" => Some(Self::Hourly),
41+
"daily" => Some(Self::Daily),
42+
"weekly" => Some(Self::Weekly),
43+
"monthly" => Some(Self::Monthly),
44+
"yearly" => Some(Self::Yearly),
45+
_ => None,
46+
}
47+
}
48+
49+
/// Convert to string representation
50+
#[must_use]
51+
pub const fn as_str(&self) -> &'static str {
52+
match self {
53+
Self::Hourly => "hourly",
54+
Self::Daily => "daily",
55+
Self::Weekly => "weekly",
56+
Self::Monthly => "monthly",
57+
Self::Yearly => "yearly",
58+
}
59+
}
60+
}
61+
62+
/// Syndication metadata
63+
#[derive(Debug, Clone, Default)]
64+
pub struct SyndicationMeta {
65+
/// Update period (hourly, daily, weekly, monthly, yearly)
66+
pub update_period: Option<UpdatePeriod>,
67+
/// Number of times updated per period
68+
pub update_frequency: Option<u32>,
69+
/// Base date for update schedule (ISO 8601)
70+
pub update_base: Option<String>,
71+
}
72+
73+
/// Handle Syndication namespace element at feed level
74+
///
75+
/// # Arguments
76+
///
77+
/// * `element` - Local name of the element (without namespace prefix)
78+
/// * `text` - Text content of the element
79+
/// * `feed` - Feed metadata to update
80+
pub fn handle_feed_element(element: &str, text: &str, feed: &mut FeedMeta) {
81+
match element {
82+
"updatePeriod" => {
83+
if let Some(period) = UpdatePeriod::parse(text) {
84+
if feed.syndication.is_none() {
85+
feed.syndication = Some(SyndicationMeta::default());
86+
}
87+
if let Some(syn) = &mut feed.syndication {
88+
syn.update_period = Some(period);
89+
}
90+
}
91+
}
92+
"updateFrequency" => {
93+
if let Ok(freq) = text.parse::<u32>() {
94+
if feed.syndication.is_none() {
95+
feed.syndication = Some(SyndicationMeta::default());
96+
}
97+
if let Some(syn) = &mut feed.syndication {
98+
syn.update_frequency = Some(freq);
99+
}
100+
}
101+
}
102+
"updateBase" => {
103+
if feed.syndication.is_none() {
104+
feed.syndication = Some(SyndicationMeta::default());
105+
}
106+
if let Some(syn) = &mut feed.syndication {
107+
syn.update_base = Some(text.to_string());
108+
}
109+
}
110+
_ => {
111+
// Ignore unknown syndication elements
112+
}
113+
}
114+
}
115+
116+
#[cfg(test)]
117+
mod tests {
118+
use super::*;
119+
120+
#[test]
121+
fn test_update_period_parse() {
122+
assert_eq!(UpdatePeriod::parse("hourly"), Some(UpdatePeriod::Hourly));
123+
assert_eq!(UpdatePeriod::parse("daily"), Some(UpdatePeriod::Daily));
124+
assert_eq!(UpdatePeriod::parse("weekly"), Some(UpdatePeriod::Weekly));
125+
assert_eq!(UpdatePeriod::parse("monthly"), Some(UpdatePeriod::Monthly));
126+
assert_eq!(UpdatePeriod::parse("yearly"), Some(UpdatePeriod::Yearly));
127+
assert_eq!(UpdatePeriod::parse("invalid"), None);
128+
}
129+
130+
#[test]
131+
fn test_update_period_case_insensitive() {
132+
assert_eq!(UpdatePeriod::parse("HOURLY"), Some(UpdatePeriod::Hourly));
133+
assert_eq!(UpdatePeriod::parse("Daily"), Some(UpdatePeriod::Daily));
134+
assert_eq!(UpdatePeriod::parse("WeeKLY"), Some(UpdatePeriod::Weekly));
135+
}
136+
137+
#[test]
138+
fn test_update_period_as_str() {
139+
assert_eq!(UpdatePeriod::Hourly.as_str(), "hourly");
140+
assert_eq!(UpdatePeriod::Daily.as_str(), "daily");
141+
assert_eq!(UpdatePeriod::Weekly.as_str(), "weekly");
142+
assert_eq!(UpdatePeriod::Monthly.as_str(), "monthly");
143+
assert_eq!(UpdatePeriod::Yearly.as_str(), "yearly");
144+
}
145+
146+
#[test]
147+
fn test_handle_update_period() {
148+
let mut feed = FeedMeta::default();
149+
150+
handle_feed_element("updatePeriod", "daily", &mut feed);
151+
152+
assert!(feed.syndication.is_some());
153+
let syn = feed.syndication.as_ref().unwrap();
154+
assert_eq!(syn.update_period, Some(UpdatePeriod::Daily));
155+
}
156+
157+
#[test]
158+
fn test_handle_update_frequency() {
159+
let mut feed = FeedMeta::default();
160+
161+
handle_feed_element("updateFrequency", "2", &mut feed);
162+
163+
assert!(feed.syndication.is_some());
164+
let syn = feed.syndication.as_ref().unwrap();
165+
assert_eq!(syn.update_frequency, Some(2));
166+
}
167+
168+
#[test]
169+
fn test_handle_update_base() {
170+
let mut feed = FeedMeta::default();
171+
172+
handle_feed_element("updateBase", "2024-12-18T00:00:00Z", &mut feed);
173+
174+
assert!(feed.syndication.is_some());
175+
let syn = feed.syndication.as_ref().unwrap();
176+
assert_eq!(syn.update_base.as_deref(), Some("2024-12-18T00:00:00Z"));
177+
}
178+
179+
#[test]
180+
fn test_handle_multiple_elements() {
181+
let mut feed = FeedMeta::default();
182+
183+
handle_feed_element("updatePeriod", "hourly", &mut feed);
184+
handle_feed_element("updateFrequency", "1", &mut feed);
185+
handle_feed_element("updateBase", "2024-01-01T00:00:00Z", &mut feed);
186+
187+
let syn = feed.syndication.as_ref().unwrap();
188+
assert_eq!(syn.update_period, Some(UpdatePeriod::Hourly));
189+
assert_eq!(syn.update_frequency, Some(1));
190+
assert_eq!(syn.update_base.as_deref(), Some("2024-01-01T00:00:00Z"));
191+
}
192+
193+
#[test]
194+
fn test_handle_invalid_frequency() {
195+
let mut feed = FeedMeta::default();
196+
197+
handle_feed_element("updateFrequency", "not-a-number", &mut feed);
198+
199+
// Should not create syndication metadata for invalid input
200+
assert!(feed.syndication.is_none());
201+
}
202+
203+
#[test]
204+
fn test_handle_unknown_element() {
205+
let mut feed = FeedMeta::default();
206+
207+
handle_feed_element("unknown", "value", &mut feed);
208+
209+
assert!(feed.syndication.is_none());
210+
}
211+
}

crates/feedparser-rs-core/src/parser/common.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,20 @@ pub fn is_content_tag(name: &[u8]) -> Option<&str> {
209209
extract_ns_local_name(name, b"content:")
210210
}
211211

212+
/// Check if element is a Syndication namespaced tag
213+
///
214+
/// # Examples
215+
///
216+
/// ```ignore
217+
/// assert_eq!(is_syn_tag(b"syn:updatePeriod"), Some("updatePeriod"));
218+
/// assert_eq!(is_syn_tag(b"syn:updateFrequency"), Some("updateFrequency"));
219+
/// assert_eq!(is_syn_tag(b"dc:creator"), None);
220+
/// ```
221+
#[inline]
222+
pub fn is_syn_tag(name: &[u8]) -> Option<&str> {
223+
extract_ns_local_name(name, b"syn:")
224+
}
225+
212226
/// Check if element is a Media RSS namespaced tag
213227
///
214228
/// # Examples

crates/feedparser-rs-core/src/parser/rss10.rs

Lines changed: 71 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@
1010
use crate::{
1111
ParserLimits,
1212
error::{FeedError, Result},
13-
namespace::dublin_core,
13+
namespace::{content, dublin_core, syndication},
1414
types::{Entry, FeedVersion, Image, ParsedFeed, TextConstruct, TextType},
1515
};
1616
use quick_xml::{Reader, events::Event};
1717

1818
use super::common::{
19-
EVENT_BUFFER_CAPACITY, LimitedCollectionExt, check_depth, init_feed, is_dc_tag, read_text,
20-
skip_element,
19+
EVENT_BUFFER_CAPACITY, LimitedCollectionExt, check_depth, init_feed, is_content_tag, is_dc_tag,
20+
is_syn_tag, read_text, skip_element,
2121
};
2222

2323
/// Parse RSS 1.0 (RDF) feed from raw bytes
@@ -223,6 +223,10 @@ fn parse_channel(
223223
let dc_elem = dc_element.to_string();
224224
let text = read_text(reader, &mut buf, limits)?;
225225
dublin_core::handle_feed_element(&dc_elem, &text, &mut feed.feed);
226+
} else if let Some(syn_element) = is_syn_tag(full_name.as_ref()) {
227+
let syn_elem = syn_element.to_string();
228+
let text = read_text(reader, &mut buf, limits)?;
229+
syndication::handle_feed_element(&syn_elem, &text, &mut feed.feed);
226230
} else {
227231
skip_element(reader, &mut buf, limits, *depth)?;
228232
}
@@ -288,6 +292,10 @@ fn parse_item(
288292
let text = read_text(reader, buf, limits)?;
289293
// dublin_core::handle_entry_element already handles dc:date -> published
290294
dublin_core::handle_entry_element(&dc_elem, &text, &mut entry);
295+
} else if let Some(content_element) = is_content_tag(full_name.as_ref()) {
296+
let content_elem = content_element.to_string();
297+
let text = read_text(reader, buf, limits)?;
298+
content::handle_entry_element(&content_elem, &text, &mut entry);
291299
} else {
292300
skip_element(reader, buf, limits, *depth)?;
293301
}
@@ -568,4 +576,64 @@ mod tests {
568576
assert!(is_dc_tag(b"link").is_none());
569577
assert!(is_dc_tag(b"atom:title").is_none());
570578
}
579+
580+
#[test]
581+
fn test_parse_rss10_with_content_encoded() {
582+
let xml = br#"<?xml version="1.0"?>
583+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
584+
xmlns="http://purl.org/rss/1.0/"
585+
xmlns:content="http://purl.org/rss/1.0/modules/content/">
586+
<channel rdf:about="http://example.com/">
587+
<title>Test</title>
588+
<link>http://example.com</link>
589+
<description>Test</description>
590+
</channel>
591+
<item rdf:about="http://example.com/1">
592+
<title>Item 1</title>
593+
<link>http://example.com/1</link>
594+
<description>Brief summary</description>
595+
<content:encoded><![CDATA[<p>Full <strong>HTML</strong> content</p>]]></content:encoded>
596+
</item>
597+
</rdf:RDF>"#;
598+
599+
let feed = parse_rss10(xml).unwrap();
600+
assert_eq!(feed.entries.len(), 1);
601+
602+
let entry = &feed.entries[0];
603+
assert_eq!(entry.summary.as_deref(), Some("Brief summary"));
604+
605+
// Verify content:encoded is parsed
606+
assert!(!entry.content.is_empty());
607+
assert_eq!(entry.content[0].content_type.as_deref(), Some("text/html"));
608+
assert!(entry.content[0].value.contains("Full"));
609+
assert!(entry.content[0].value.contains("HTML"));
610+
}
611+
612+
#[test]
613+
fn test_parse_rss10_with_syndication() {
614+
let xml = br#"<?xml version="1.0"?>
615+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
616+
xmlns="http://purl.org/rss/1.0/"
617+
xmlns:syn="http://purl.org/rss/1.0/modules/syndication/">
618+
<channel rdf:about="http://example.com/">
619+
<title>Test</title>
620+
<link>http://example.com</link>
621+
<description>Test</description>
622+
<syn:updatePeriod>hourly</syn:updatePeriod>
623+
<syn:updateFrequency>2</syn:updateFrequency>
624+
<syn:updateBase>2024-01-01T00:00:00Z</syn:updateBase>
625+
</channel>
626+
</rdf:RDF>"#;
627+
628+
let feed = parse_rss10(xml).unwrap();
629+
assert!(feed.feed.syndication.is_some());
630+
631+
let syn = feed.feed.syndication.as_ref().unwrap();
632+
assert_eq!(
633+
syn.update_period,
634+
Some(crate::namespace::syndication::UpdatePeriod::Hourly)
635+
);
636+
assert_eq!(syn.update_frequency, Some(2));
637+
assert_eq!(syn.update_base.as_deref(), Some("2024-01-01T00:00:00Z"));
638+
}
571639
}

crates/feedparser-rs-core/src/types/feed.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use super::{
55
podcast::{ItunesFeedMeta, PodcastMeta},
66
version::FeedVersion,
77
};
8+
use crate::namespace::syndication::SyndicationMeta;
89
use crate::{ParserLimits, error::Result};
910
use chrono::{DateTime, Utc};
1011
use quick_xml::Reader;
@@ -75,6 +76,8 @@ pub struct FeedMeta {
7576
pub dc_rights: Option<String>,
7677
/// License URL (Creative Commons, etc.)
7778
pub license: Option<String>,
79+
/// Syndication module metadata (RSS 1.0)
80+
pub syndication: Option<SyndicationMeta>,
7881
}
7982

8083
/// Parsed feed result

0 commit comments

Comments
 (0)