Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions crates/feedparser-rs-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ pub use types::{
TextType, parse_duration, parse_explicit,
};

pub use namespace::syndication::{SyndicationMeta, UpdatePeriod};

#[cfg(feature = "http")]
pub use http::{FeedHttpClient, FeedHttpResponse};

Expand Down
7 changes: 7 additions & 0 deletions crates/feedparser-rs-core/src/namespace/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ pub mod dublin_core;
pub mod georss;
/// Media RSS specification
pub mod media_rss;
/// Syndication Module for RSS 1.0
pub mod syndication;

/// Common namespace URIs used in feeds
pub mod namespaces {
Expand All @@ -56,6 +58,9 @@ pub mod namespaces {
/// RSS 1.0
pub const RSS_10: &str = "http://purl.org/rss/1.0/";

/// Syndication Module for RSS 1.0
pub const SYNDICATION: &str = "http://purl.org/rss/1.0/modules/syndication/";

/// iTunes Podcast
pub const ITUNES: &str = "http://www.itunes.com/dtds/podcast-1.0.dtd";

Expand Down Expand Up @@ -88,6 +93,7 @@ pub fn get_namespace_uri(prefix: &str) -> Option<&'static str> {
"media" => Some(namespaces::MEDIA),
"atom" => Some(namespaces::ATOM),
"rdf" => Some(namespaces::RDF),
"syn" | "syndication" => Some(namespaces::SYNDICATION),
"itunes" => Some(namespaces::ITUNES),
"podcast" => Some(namespaces::PODCAST),
"georss" => Some(namespaces::GEORSS),
Expand All @@ -113,6 +119,7 @@ pub fn get_namespace_prefix(uri: &str) -> Option<&'static str> {
namespaces::MEDIA => Some("media"),
namespaces::ATOM => Some("atom"),
namespaces::RDF => Some("rdf"),
namespaces::SYNDICATION => Some("syn"),
namespaces::ITUNES => Some("itunes"),
namespaces::PODCAST => Some("podcast"),
namespaces::GEORSS => Some("georss"),
Expand Down
211 changes: 211 additions & 0 deletions crates/feedparser-rs-core/src/namespace/syndication.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
/// Syndication Module for RSS 1.0
///
/// Namespace: <http://purl.org/rss/1.0/modules/syndication/>
/// Prefix: syn
///
/// This module provides parsing support for the Syndication namespace,
/// used in RSS 1.0 feeds to indicate update schedules and frequencies.
///
/// Elements:
/// - `syn:updatePeriod` → Update period (hourly, daily, weekly, monthly, yearly)
/// - `syn:updateFrequency` → Number of times per period
/// - `syn:updateBase` → Base date for update schedule (ISO 8601)
use crate::types::FeedMeta;

/// Syndication namespace URI
pub const SYNDICATION_NAMESPACE: &str = "http://purl.org/rss/1.0/modules/syndication/";

/// Valid update period values
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum UpdatePeriod {
/// Update hourly
Hourly,
/// Update daily
Daily,
/// Update weekly
Weekly,
/// Update monthly
Monthly,
/// Update yearly
Yearly,
}

impl UpdatePeriod {
/// Parse update period from string (case-insensitive)
///
/// Returns `None` if the string doesn't match any valid period.
#[must_use]
pub fn parse(s: &str) -> Option<Self> {
match s.to_lowercase().as_str() {
"hourly" => Some(Self::Hourly),
"daily" => Some(Self::Daily),
"weekly" => Some(Self::Weekly),
"monthly" => Some(Self::Monthly),
"yearly" => Some(Self::Yearly),
_ => None,
}
}

/// Convert to string representation
#[must_use]
pub const fn as_str(&self) -> &'static str {
match self {
Self::Hourly => "hourly",
Self::Daily => "daily",
Self::Weekly => "weekly",
Self::Monthly => "monthly",
Self::Yearly => "yearly",
}
}
}

/// Syndication metadata
#[derive(Debug, Clone, Default)]
pub struct SyndicationMeta {
/// Update period (hourly, daily, weekly, monthly, yearly)
pub update_period: Option<UpdatePeriod>,
/// Number of times updated per period
pub update_frequency: Option<u32>,
/// Base date for update schedule (ISO 8601)
pub update_base: Option<String>,
}

/// Handle Syndication namespace element at feed level
///
/// # Arguments
///
/// * `element` - Local name of the element (without namespace prefix)
/// * `text` - Text content of the element
/// * `feed` - Feed metadata to update
pub fn handle_feed_element(element: &str, text: &str, feed: &mut FeedMeta) {
match element {
"updatePeriod" => {
if let Some(period) = UpdatePeriod::parse(text) {
if feed.syndication.is_none() {
feed.syndication = Some(SyndicationMeta::default());
}
if let Some(syn) = &mut feed.syndication {
syn.update_period = Some(period);
}
}
}
"updateFrequency" => {
if let Ok(freq) = text.parse::<u32>() {
if feed.syndication.is_none() {
feed.syndication = Some(SyndicationMeta::default());
}
if let Some(syn) = &mut feed.syndication {
syn.update_frequency = Some(freq);
}
}
}
"updateBase" => {
if feed.syndication.is_none() {
feed.syndication = Some(SyndicationMeta::default());
}
if let Some(syn) = &mut feed.syndication {
syn.update_base = Some(text.to_string());
}
}
_ => {
// Ignore unknown syndication elements
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_update_period_parse() {
assert_eq!(UpdatePeriod::parse("hourly"), Some(UpdatePeriod::Hourly));
assert_eq!(UpdatePeriod::parse("daily"), Some(UpdatePeriod::Daily));
assert_eq!(UpdatePeriod::parse("weekly"), Some(UpdatePeriod::Weekly));
assert_eq!(UpdatePeriod::parse("monthly"), Some(UpdatePeriod::Monthly));
assert_eq!(UpdatePeriod::parse("yearly"), Some(UpdatePeriod::Yearly));
assert_eq!(UpdatePeriod::parse("invalid"), None);
}

#[test]
fn test_update_period_case_insensitive() {
assert_eq!(UpdatePeriod::parse("HOURLY"), Some(UpdatePeriod::Hourly));
assert_eq!(UpdatePeriod::parse("Daily"), Some(UpdatePeriod::Daily));
assert_eq!(UpdatePeriod::parse("WeeKLY"), Some(UpdatePeriod::Weekly));
}

#[test]
fn test_update_period_as_str() {
assert_eq!(UpdatePeriod::Hourly.as_str(), "hourly");
assert_eq!(UpdatePeriod::Daily.as_str(), "daily");
assert_eq!(UpdatePeriod::Weekly.as_str(), "weekly");
assert_eq!(UpdatePeriod::Monthly.as_str(), "monthly");
assert_eq!(UpdatePeriod::Yearly.as_str(), "yearly");
}

#[test]
fn test_handle_update_period() {
let mut feed = FeedMeta::default();

handle_feed_element("updatePeriod", "daily", &mut feed);

assert!(feed.syndication.is_some());
let syn = feed.syndication.as_ref().unwrap();
assert_eq!(syn.update_period, Some(UpdatePeriod::Daily));
}

#[test]
fn test_handle_update_frequency() {
let mut feed = FeedMeta::default();

handle_feed_element("updateFrequency", "2", &mut feed);

assert!(feed.syndication.is_some());
let syn = feed.syndication.as_ref().unwrap();
assert_eq!(syn.update_frequency, Some(2));
}

#[test]
fn test_handle_update_base() {
let mut feed = FeedMeta::default();

handle_feed_element("updateBase", "2024-12-18T00:00:00Z", &mut feed);

assert!(feed.syndication.is_some());
let syn = feed.syndication.as_ref().unwrap();
assert_eq!(syn.update_base.as_deref(), Some("2024-12-18T00:00:00Z"));
}

#[test]
fn test_handle_multiple_elements() {
let mut feed = FeedMeta::default();

handle_feed_element("updatePeriod", "hourly", &mut feed);
handle_feed_element("updateFrequency", "1", &mut feed);
handle_feed_element("updateBase", "2024-01-01T00:00:00Z", &mut feed);

let syn = feed.syndication.as_ref().unwrap();
assert_eq!(syn.update_period, Some(UpdatePeriod::Hourly));
assert_eq!(syn.update_frequency, Some(1));
assert_eq!(syn.update_base.as_deref(), Some("2024-01-01T00:00:00Z"));
}

#[test]
fn test_handle_invalid_frequency() {
let mut feed = FeedMeta::default();

handle_feed_element("updateFrequency", "not-a-number", &mut feed);

// Should not create syndication metadata for invalid input
assert!(feed.syndication.is_none());
}

#[test]
fn test_handle_unknown_element() {
let mut feed = FeedMeta::default();

handle_feed_element("unknown", "value", &mut feed);

assert!(feed.syndication.is_none());
}
}
14 changes: 14 additions & 0 deletions crates/feedparser-rs-core/src/parser/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,20 @@ pub fn is_content_tag(name: &[u8]) -> Option<&str> {
extract_ns_local_name(name, b"content:")
}

/// Check if element is a Syndication namespaced tag
///
/// # Examples
///
/// ```ignore
/// assert_eq!(is_syn_tag(b"syn:updatePeriod"), Some("updatePeriod"));
/// assert_eq!(is_syn_tag(b"syn:updateFrequency"), Some("updateFrequency"));
/// assert_eq!(is_syn_tag(b"dc:creator"), None);
/// ```
#[inline]
pub fn is_syn_tag(name: &[u8]) -> Option<&str> {
extract_ns_local_name(name, b"syn:")
}

/// Check if element is a Media RSS namespaced tag
///
/// # Examples
Expand Down
74 changes: 71 additions & 3 deletions crates/feedparser-rs-core/src/parser/rss10.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@
use crate::{
ParserLimits,
error::{FeedError, Result},
namespace::dublin_core,
namespace::{content, dublin_core, syndication},
types::{Entry, FeedVersion, Image, ParsedFeed, TextConstruct, TextType},
};
use quick_xml::{Reader, events::Event};

use super::common::{
EVENT_BUFFER_CAPACITY, LimitedCollectionExt, check_depth, init_feed, is_dc_tag, read_text,
skip_element,
EVENT_BUFFER_CAPACITY, LimitedCollectionExt, check_depth, init_feed, is_content_tag, is_dc_tag,
is_syn_tag, read_text, skip_element,
};

/// Parse RSS 1.0 (RDF) feed from raw bytes
Expand Down Expand Up @@ -223,6 +223,10 @@ fn parse_channel(
let dc_elem = dc_element.to_string();
let text = read_text(reader, &mut buf, limits)?;
dublin_core::handle_feed_element(&dc_elem, &text, &mut feed.feed);
} else if let Some(syn_element) = is_syn_tag(full_name.as_ref()) {
let syn_elem = syn_element.to_string();
let text = read_text(reader, &mut buf, limits)?;
syndication::handle_feed_element(&syn_elem, &text, &mut feed.feed);
} else {
skip_element(reader, &mut buf, limits, *depth)?;
}
Expand Down Expand Up @@ -288,6 +292,10 @@ fn parse_item(
let text = read_text(reader, buf, limits)?;
// dublin_core::handle_entry_element already handles dc:date -> published
dublin_core::handle_entry_element(&dc_elem, &text, &mut entry);
} else if let Some(content_element) = is_content_tag(full_name.as_ref()) {
let content_elem = content_element.to_string();
let text = read_text(reader, buf, limits)?;
content::handle_entry_element(&content_elem, &text, &mut entry);
} else {
skip_element(reader, buf, limits, *depth)?;
}
Expand Down Expand Up @@ -568,4 +576,64 @@ mod tests {
assert!(is_dc_tag(b"link").is_none());
assert!(is_dc_tag(b"atom:title").is_none());
}

#[test]
fn test_parse_rss10_with_content_encoded() {
let xml = br#"<?xml version="1.0"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns="http://purl.org/rss/1.0/"
xmlns:content="http://purl.org/rss/1.0/modules/content/">
<channel rdf:about="http://example.com/">
<title>Test</title>
<link>http://example.com</link>
<description>Test</description>
</channel>
<item rdf:about="http://example.com/1">
<title>Item 1</title>
<link>http://example.com/1</link>
<description>Brief summary</description>
<content:encoded><![CDATA[<p>Full <strong>HTML</strong> content</p>]]></content:encoded>
</item>
</rdf:RDF>"#;

let feed = parse_rss10(xml).unwrap();
assert_eq!(feed.entries.len(), 1);

let entry = &feed.entries[0];
assert_eq!(entry.summary.as_deref(), Some("Brief summary"));

// Verify content:encoded is parsed
assert!(!entry.content.is_empty());
assert_eq!(entry.content[0].content_type.as_deref(), Some("text/html"));
assert!(entry.content[0].value.contains("Full"));
assert!(entry.content[0].value.contains("HTML"));
}

#[test]
fn test_parse_rss10_with_syndication() {
let xml = br#"<?xml version="1.0"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns="http://purl.org/rss/1.0/"
xmlns:syn="http://purl.org/rss/1.0/modules/syndication/">
<channel rdf:about="http://example.com/">
<title>Test</title>
<link>http://example.com</link>
<description>Test</description>
<syn:updatePeriod>hourly</syn:updatePeriod>
<syn:updateFrequency>2</syn:updateFrequency>
<syn:updateBase>2024-01-01T00:00:00Z</syn:updateBase>
</channel>
</rdf:RDF>"#;

let feed = parse_rss10(xml).unwrap();
assert!(feed.feed.syndication.is_some());

let syn = feed.feed.syndication.as_ref().unwrap();
assert_eq!(
syn.update_period,
Some(crate::namespace::syndication::UpdatePeriod::Hourly)
);
assert_eq!(syn.update_frequency, Some(2));
assert_eq!(syn.update_base.as_deref(), Some("2024-01-01T00:00:00Z"));
}
}
3 changes: 3 additions & 0 deletions crates/feedparser-rs-core/src/types/feed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use super::{
podcast::{ItunesFeedMeta, PodcastMeta},
version::FeedVersion,
};
use crate::namespace::syndication::SyndicationMeta;
use crate::{ParserLimits, error::Result};
use chrono::{DateTime, Utc};
use quick_xml::Reader;
Expand Down Expand Up @@ -75,6 +76,8 @@ pub struct FeedMeta {
pub dc_rights: Option<String>,
/// License URL (Creative Commons, etc.)
pub license: Option<String>,
/// Syndication module metadata (RSS 1.0)
pub syndication: Option<SyndicationMeta>,
}

/// Parsed feed result
Expand Down
Loading
Loading