Skip to content

Commit 9df92a3

Browse files
committed
feat(rss10): add syndication namespace and comprehensive test coverage
- Add Syndication Module namespace support (syn:updatePeriod, syn:updateFrequency, syn:updateBase) - Add content:encoded namespace support to RSS 1.0 parser - Create comprehensive RSS 1.0 integration tests (12+ test cases) - Add is_syn_tag() helper for namespace detection - All 510 tests passing, clippy clean Phase 3: RSS 1.0 validation and enhancement
1 parent 066ab85 commit 9df92a3

File tree

13 files changed

+1142
-20
lines changed

13 files changed

+1142
-20
lines changed

crates/feedparser-rs-core/src/namespace/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ pub mod dublin_core;
3535
pub mod georss;
3636
/// Media RSS specification
3737
pub mod media_rss;
38+
/// Syndication Module for RSS 1.0
39+
pub mod syndication;
3840

3941
/// Common namespace URIs used in feeds
4042
pub mod namespaces {
@@ -56,6 +58,9 @@ pub mod namespaces {
5658
/// RSS 1.0
5759
pub const RSS_10: &str = "http://purl.org/rss/1.0/";
5860

61+
/// Syndication Module for RSS 1.0
62+
pub const SYNDICATION: &str = "http://purl.org/rss/1.0/modules/syndication/";
63+
5964
/// iTunes Podcast
6065
pub const ITUNES: &str = "http://www.itunes.com/dtds/podcast-1.0.dtd";
6166

@@ -88,6 +93,7 @@ pub fn get_namespace_uri(prefix: &str) -> Option<&'static str> {
8893
"media" => Some(namespaces::MEDIA),
8994
"atom" => Some(namespaces::ATOM),
9095
"rdf" => Some(namespaces::RDF),
96+
"syn" | "syndication" => Some(namespaces::SYNDICATION),
9197
"itunes" => Some(namespaces::ITUNES),
9298
"podcast" => Some(namespaces::PODCAST),
9399
"georss" => Some(namespaces::GEORSS),
@@ -113,6 +119,7 @@ pub fn get_namespace_prefix(uri: &str) -> Option<&'static str> {
113119
namespaces::MEDIA => Some("media"),
114120
namespaces::ATOM => Some("atom"),
115121
namespaces::RDF => Some("rdf"),
122+
namespaces::SYNDICATION => Some("syn"),
116123
namespaces::ITUNES => Some("itunes"),
117124
namespaces::PODCAST => Some("podcast"),
118125
namespaces::GEORSS => Some("georss"),
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
/// Syndication Module for RSS 1.0
2+
///
3+
/// Namespace: <http://purl.org/rss/1.0/modules/syndication/>
4+
/// Prefix: syn
5+
///
6+
/// This module provides parsing support for the Syndication namespace,
7+
/// used in RSS 1.0 feeds to indicate update schedules and frequencies.
8+
///
9+
/// Elements:
10+
/// - `syn:updatePeriod` → Update period (hourly, daily, weekly, monthly, yearly)
11+
/// - `syn:updateFrequency` → Number of times per period
12+
/// - `syn:updateBase` → Base date for update schedule (ISO 8601)
13+
use crate::types::FeedMeta;
14+
15+
/// Syndication namespace URI
16+
pub const SYNDICATION_NAMESPACE: &str = "http://purl.org/rss/1.0/modules/syndication/";
17+
18+
/// Valid update period values
19+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20+
pub enum UpdatePeriod {
21+
/// Update hourly
22+
Hourly,
23+
/// Update daily
24+
Daily,
25+
/// Update weekly
26+
Weekly,
27+
/// Update monthly
28+
Monthly,
29+
/// Update yearly
30+
Yearly,
31+
}
32+
33+
impl UpdatePeriod {
34+
/// Parse update period from string (case-insensitive)
35+
///
36+
/// Returns `None` if the string doesn't match any valid period.
37+
#[must_use]
38+
pub fn parse(s: &str) -> Option<Self> {
39+
match s.to_lowercase().as_str() {
40+
"hourly" => Some(Self::Hourly),
41+
"daily" => Some(Self::Daily),
42+
"weekly" => Some(Self::Weekly),
43+
"monthly" => Some(Self::Monthly),
44+
"yearly" => Some(Self::Yearly),
45+
_ => None,
46+
}
47+
}
48+
49+
/// Convert to string representation
50+
#[must_use]
51+
pub const fn as_str(&self) -> &'static str {
52+
match self {
53+
Self::Hourly => "hourly",
54+
Self::Daily => "daily",
55+
Self::Weekly => "weekly",
56+
Self::Monthly => "monthly",
57+
Self::Yearly => "yearly",
58+
}
59+
}
60+
}
61+
62+
/// Syndication metadata
63+
#[derive(Debug, Clone, Default)]
64+
pub struct SyndicationMeta {
65+
/// Update period (hourly, daily, weekly, monthly, yearly)
66+
pub update_period: Option<UpdatePeriod>,
67+
/// Number of times updated per period
68+
pub update_frequency: Option<u32>,
69+
/// Base date for update schedule (ISO 8601)
70+
pub update_base: Option<String>,
71+
}
72+
73+
/// Handle Syndication namespace element at feed level
74+
///
75+
/// # Arguments
76+
///
77+
/// * `element` - Local name of the element (without namespace prefix)
78+
/// * `text` - Text content of the element
79+
/// * `feed` - Feed metadata to update
80+
pub fn handle_feed_element(element: &str, text: &str, feed: &mut FeedMeta) {
81+
match element {
82+
"updatePeriod" => {
83+
if let Some(period) = UpdatePeriod::parse(text) {
84+
if feed.syndication.is_none() {
85+
feed.syndication = Some(SyndicationMeta::default());
86+
}
87+
if let Some(syn) = &mut feed.syndication {
88+
syn.update_period = Some(period);
89+
}
90+
}
91+
}
92+
"updateFrequency" => {
93+
if let Ok(freq) = text.parse::<u32>() {
94+
if feed.syndication.is_none() {
95+
feed.syndication = Some(SyndicationMeta::default());
96+
}
97+
if let Some(syn) = &mut feed.syndication {
98+
syn.update_frequency = Some(freq);
99+
}
100+
}
101+
}
102+
"updateBase" => {
103+
if feed.syndication.is_none() {
104+
feed.syndication = Some(SyndicationMeta::default());
105+
}
106+
if let Some(syn) = &mut feed.syndication {
107+
syn.update_base = Some(text.to_string());
108+
}
109+
}
110+
_ => {
111+
// Ignore unknown syndication elements
112+
}
113+
}
114+
}
115+
116+
#[cfg(test)]
117+
mod tests {
118+
use super::*;
119+
120+
#[test]
121+
fn test_update_period_parse() {
122+
assert_eq!(UpdatePeriod::parse("hourly"), Some(UpdatePeriod::Hourly));
123+
assert_eq!(UpdatePeriod::parse("daily"), Some(UpdatePeriod::Daily));
124+
assert_eq!(UpdatePeriod::parse("weekly"), Some(UpdatePeriod::Weekly));
125+
assert_eq!(UpdatePeriod::parse("monthly"), Some(UpdatePeriod::Monthly));
126+
assert_eq!(UpdatePeriod::parse("yearly"), Some(UpdatePeriod::Yearly));
127+
assert_eq!(UpdatePeriod::parse("invalid"), None);
128+
}
129+
130+
#[test]
131+
fn test_update_period_case_insensitive() {
132+
assert_eq!(UpdatePeriod::parse("HOURLY"), Some(UpdatePeriod::Hourly));
133+
assert_eq!(UpdatePeriod::parse("Daily"), Some(UpdatePeriod::Daily));
134+
assert_eq!(UpdatePeriod::parse("WeeKLY"), Some(UpdatePeriod::Weekly));
135+
}
136+
137+
#[test]
138+
fn test_update_period_as_str() {
139+
assert_eq!(UpdatePeriod::Hourly.as_str(), "hourly");
140+
assert_eq!(UpdatePeriod::Daily.as_str(), "daily");
141+
assert_eq!(UpdatePeriod::Weekly.as_str(), "weekly");
142+
assert_eq!(UpdatePeriod::Monthly.as_str(), "monthly");
143+
assert_eq!(UpdatePeriod::Yearly.as_str(), "yearly");
144+
}
145+
146+
#[test]
147+
fn test_handle_update_period() {
148+
let mut feed = FeedMeta::default();
149+
150+
handle_feed_element("updatePeriod", "daily", &mut feed);
151+
152+
assert!(feed.syndication.is_some());
153+
let syn = feed.syndication.as_ref().unwrap();
154+
assert_eq!(syn.update_period, Some(UpdatePeriod::Daily));
155+
}
156+
157+
#[test]
158+
fn test_handle_update_frequency() {
159+
let mut feed = FeedMeta::default();
160+
161+
handle_feed_element("updateFrequency", "2", &mut feed);
162+
163+
assert!(feed.syndication.is_some());
164+
let syn = feed.syndication.as_ref().unwrap();
165+
assert_eq!(syn.update_frequency, Some(2));
166+
}
167+
168+
#[test]
169+
fn test_handle_update_base() {
170+
let mut feed = FeedMeta::default();
171+
172+
handle_feed_element("updateBase", "2024-12-18T00:00:00Z", &mut feed);
173+
174+
assert!(feed.syndication.is_some());
175+
let syn = feed.syndication.as_ref().unwrap();
176+
assert_eq!(syn.update_base.as_deref(), Some("2024-12-18T00:00:00Z"));
177+
}
178+
179+
#[test]
180+
fn test_handle_multiple_elements() {
181+
let mut feed = FeedMeta::default();
182+
183+
handle_feed_element("updatePeriod", "hourly", &mut feed);
184+
handle_feed_element("updateFrequency", "1", &mut feed);
185+
handle_feed_element("updateBase", "2024-01-01T00:00:00Z", &mut feed);
186+
187+
let syn = feed.syndication.as_ref().unwrap();
188+
assert_eq!(syn.update_period, Some(UpdatePeriod::Hourly));
189+
assert_eq!(syn.update_frequency, Some(1));
190+
assert_eq!(syn.update_base.as_deref(), Some("2024-01-01T00:00:00Z"));
191+
}
192+
193+
#[test]
194+
fn test_handle_invalid_frequency() {
195+
let mut feed = FeedMeta::default();
196+
197+
handle_feed_element("updateFrequency", "not-a-number", &mut feed);
198+
199+
// Should not create syndication metadata for invalid input
200+
assert!(feed.syndication.is_none());
201+
}
202+
203+
#[test]
204+
fn test_handle_unknown_element() {
205+
let mut feed = FeedMeta::default();
206+
207+
handle_feed_element("unknown", "value", &mut feed);
208+
209+
assert!(feed.syndication.is_none());
210+
}
211+
}

crates/feedparser-rs-core/src/parser/atom.rs

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,10 @@ fn parse_feed_element(
136136
{
137137
feed.feed.link = Some(link.href.clone());
138138
}
139+
if feed.feed.license.is_none() && link.rel.as_deref() == Some("license")
140+
{
141+
feed.feed.license = Some(link.href.clone());
142+
}
139143
feed.feed
140144
.links
141145
.try_push_limited(link, limits.max_links_per_feed);
@@ -305,6 +309,9 @@ fn parse_entry(
305309
if entry.link.is_none() && link.rel.as_deref() == Some("alternate") {
306310
entry.link = Some(link.href.clone());
307311
}
312+
if entry.license.is_none() && link.rel.as_deref() == Some("license") {
313+
entry.license = Some(link.href.clone());
314+
}
308315
entry
309316
.links
310317
.try_push_limited(link, limits.max_links_per_entry);
@@ -926,4 +933,45 @@ mod tests {
926933
assert_eq!(feed.feed.links.len(), 1);
927934
assert_eq!(feed.feed.tags.len(), 1);
928935
}
936+
937+
#[test]
938+
fn test_parse_atom_license_feed() {
939+
let xml = br#"<?xml version="1.0"?>
940+
<feed xmlns="http://www.w3.org/2005/Atom">
941+
<title>Test Feed</title>
942+
<link rel="license" href="https://creativecommons.org/licenses/by/4.0/"/>
943+
<link rel="alternate" href="https://example.com/"/>
944+
</feed>"#;
945+
946+
let feed = parse_atom10(xml).unwrap();
947+
assert_eq!(
948+
feed.feed.license.as_deref(),
949+
Some("https://creativecommons.org/licenses/by/4.0/")
950+
);
951+
assert_eq!(feed.feed.link.as_deref(), Some("https://example.com/"));
952+
}
953+
954+
#[test]
955+
fn test_parse_atom_license_entry() {
956+
let xml = br#"<?xml version="1.0"?>
957+
<feed xmlns="http://www.w3.org/2005/Atom">
958+
<entry>
959+
<title>Licensed Entry</title>
960+
<id>urn:uuid:1</id>
961+
<link rel="license" href="https://creativecommons.org/licenses/by-sa/3.0/"/>
962+
<link rel="alternate" href="https://example.com/entry/1"/>
963+
</entry>
964+
</feed>"#;
965+
966+
let feed = parse_atom10(xml).unwrap();
967+
assert_eq!(feed.entries.len(), 1);
968+
assert_eq!(
969+
feed.entries[0].license.as_deref(),
970+
Some("https://creativecommons.org/licenses/by-sa/3.0/")
971+
);
972+
assert_eq!(
973+
feed.entries[0].link.as_deref(),
974+
Some("https://example.com/entry/1")
975+
);
976+
}
929977
}

crates/feedparser-rs-core/src/parser/common.rs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,20 @@ pub fn is_content_tag(name: &[u8]) -> Option<&str> {
209209
extract_ns_local_name(name, b"content:")
210210
}
211211

212+
/// Check if element is a Syndication namespaced tag
213+
///
214+
/// # Examples
215+
///
216+
/// ```ignore
217+
/// assert_eq!(is_syn_tag(b"syn:updatePeriod"), Some("updatePeriod"));
218+
/// assert_eq!(is_syn_tag(b"syn:updateFrequency"), Some("updateFrequency"));
219+
/// assert_eq!(is_syn_tag(b"dc:creator"), None);
220+
/// ```
221+
#[inline]
222+
pub fn is_syn_tag(name: &[u8]) -> Option<&str> {
223+
extract_ns_local_name(name, b"syn:")
224+
}
225+
212226
/// Check if element is a Media RSS namespaced tag
213227
///
214228
/// # Examples
@@ -285,6 +299,47 @@ pub fn extract_xml_base(
285299
.map(|s| s.to_string())
286300
}
287301

302+
/// Extract xml:lang attribute from element
303+
///
304+
/// Returns the language code if xml:lang or lang attribute exists.
305+
/// Respects `max_attribute_length` limit for `DoS` protection.
306+
///
307+
/// # Arguments
308+
///
309+
/// * `element` - The XML element to extract xml:lang from
310+
/// * `max_attr_length` - Maximum allowed attribute length (`DoS` protection)
311+
///
312+
/// # Returns
313+
///
314+
/// * `Some(String)` - The xml:lang value if found and within length limit
315+
/// * `None` - If attribute not found or exceeds length limit
316+
///
317+
/// # Examples
318+
///
319+
/// ```ignore
320+
/// use feedparser_rs::parser::common::extract_xml_lang;
321+
///
322+
/// let element = /* BytesStart from quick-xml */;
323+
/// if let Some(lang) = extract_xml_lang(&element, 1024) {
324+
/// println!("Language: {}", lang);
325+
/// }
326+
/// ```
327+
pub fn extract_xml_lang(
328+
element: &quick_xml::events::BytesStart,
329+
max_attr_length: usize,
330+
) -> Option<String> {
331+
element
332+
.attributes()
333+
.flatten()
334+
.find(|attr| {
335+
let key = attr.key.as_ref();
336+
key == b"xml:lang" || key == b"lang"
337+
})
338+
.filter(|attr| attr.value.len() <= max_attr_length)
339+
.and_then(|attr| attr.unescape_value().ok())
340+
.map(|s| s.to_string())
341+
}
342+
288343
/// Read text content from current XML element (handles text and CDATA)
289344
pub fn read_text(
290345
reader: &mut Reader<&[u8]>,

0 commit comments

Comments
 (0)