Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions crates/feedparser-rs-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,10 @@ pub use error::{FeedError, Result};
pub use limits::{LimitError, ParserLimits};
pub use parser::{detect_format, parse, parse_with_limits};
pub use types::{
Content, Enclosure, Entry, FeedMeta, FeedVersion, Generator, Image, LimitedCollectionExt, Link,
ParsedFeed, Person, Source, Tag, TextConstruct, TextType,
Content, Enclosure, Entry, FeedMeta, FeedVersion, Generator, Image, ItunesCategory,
ItunesEntryMeta, ItunesFeedMeta, ItunesOwner, LimitedCollectionExt, Link, ParsedFeed, Person,
PodcastFunding, PodcastMeta, PodcastPerson, PodcastTranscript, Source, Tag, TextConstruct,
TextType, parse_duration, parse_explicit,
};

#[cfg(test)]
Expand Down
247 changes: 240 additions & 7 deletions crates/feedparser-rs-core/src/parser/rss.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ use crate::{
ParserLimits,
error::{FeedError, Result},
types::{
Enclosure, Entry, FeedVersion, Image, Link, ParsedFeed, Source, Tag, TextConstruct,
TextType,
Enclosure, Entry, FeedVersion, Image, ItunesCategory, ItunesEntryMeta, ItunesFeedMeta,
ItunesOwner, Link, ParsedFeed, PodcastFunding, PodcastMeta, Source, Tag, TextConstruct,
TextType, parse_duration, parse_explicit,
},
util::parse_date,
};
Expand Down Expand Up @@ -105,7 +106,9 @@ fn parse_channel(
)));
}

match e.local_name().as_ref() {
// Use full qualified name to distinguish standard RSS tags from namespaced tags
// (e.g., <image> vs <itunes:image>, <category> vs <itunes:category>)
match e.name().as_ref() {
b"title" => {
feed.feed.title = Some(read_text(reader, &mut buf, limits)?);
}
Expand Down Expand Up @@ -185,7 +188,111 @@ fn parse_channel(
}
}
}
_ => skip_element(reader, &mut buf, limits, *depth)?,
tag => {
// Check for iTunes and Podcast 2.0 namespace tags
let handled = if is_itunes_tag(tag, b"author") {
let text = read_text(reader, &mut buf, limits)?;
let itunes =
feed.feed.itunes.get_or_insert_with(ItunesFeedMeta::default);
itunes.author = Some(text);
true
} else if is_itunes_tag(tag, b"owner") {
let itunes =
feed.feed.itunes.get_or_insert_with(ItunesFeedMeta::default);
if let Ok(owner) = parse_itunes_owner(reader, &mut buf, limits, depth) {
itunes.owner = Some(owner);
}
true
} else if is_itunes_tag(tag, b"category") {
// Parse category inline to avoid borrow conflicts
let mut category_text = String::new();
for attr in e.attributes().flatten() {
if attr.key.as_ref() == b"text"
&& let Ok(value) = attr.unescape_value()
{
category_text =
value.chars().take(limits.max_attribute_length).collect();
}
}
let itunes =
feed.feed.itunes.get_or_insert_with(ItunesFeedMeta::default);
itunes.categories.push(ItunesCategory {
text: category_text,
subcategory: None,
});
skip_element(reader, &mut buf, limits, *depth)?;
true
} else if is_itunes_tag(tag, b"explicit") {
let text = read_text(reader, &mut buf, limits)?;
let itunes =
feed.feed.itunes.get_or_insert_with(ItunesFeedMeta::default);
itunes.explicit = parse_explicit(&text);
true
} else if is_itunes_tag(tag, b"image") {
let itunes =
feed.feed.itunes.get_or_insert_with(ItunesFeedMeta::default);
for attr in e.attributes().flatten() {
if attr.key.as_ref() == b"href"
&& let Ok(value) = attr.unescape_value()
{
itunes.image = Some(
value.chars().take(limits.max_attribute_length).collect(),
);
}
}
// NOTE: Don't call skip_element - itunes:image is typically self-closing
// and calling skip_element would consume the next tag's end event
true
} else if is_itunes_tag(tag, b"keywords") {
let text = read_text(reader, &mut buf, limits)?;
let itunes =
feed.feed.itunes.get_or_insert_with(ItunesFeedMeta::default);
itunes.keywords = text
.split(',')
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
.collect();
true
} else if is_itunes_tag(tag, b"type") {
let text = read_text(reader, &mut buf, limits)?;
let itunes =
feed.feed.itunes.get_or_insert_with(ItunesFeedMeta::default);
itunes.podcast_type = Some(text);
true
} else if tag.starts_with(b"podcast:guid") {
let text = read_text(reader, &mut buf, limits)?;
let podcast =
feed.feed.podcast.get_or_insert_with(PodcastMeta::default);
podcast.guid = Some(text);
true
} else if tag.starts_with(b"podcast:funding") {
// Parse funding inline to avoid borrow conflicts
let mut url = String::new();
for attr in e.attributes().flatten() {
if attr.key.as_ref() == b"url"
&& let Ok(value) = attr.unescape_value()
{
url = value.chars().take(limits.max_attribute_length).collect();
}
}
let message_text = read_text(reader, &mut buf, limits)?;
let message = if message_text.is_empty() {
None
} else {
Some(message_text)
};
let podcast =
feed.feed.podcast.get_or_insert_with(PodcastMeta::default);
podcast.funding.push(PodcastFunding { url, message });
true
} else {
false
};

if !handled {
skip_element(reader, &mut buf, limits, *depth)?;
}
}
}
*depth = depth.saturating_sub(1);
}
Expand Down Expand Up @@ -222,7 +329,8 @@ fn parse_item(
)));
}

match e.local_name().as_ref() {
// Use full qualified name to distinguish standard RSS tags from namespaced tags
match e.name().as_ref() {
b"title" => {
entry.title = Some(read_text(reader, buf, limits)?);
}
Expand Down Expand Up @@ -285,8 +393,72 @@ fn parse_item(
entry.source = Some(source);
}
}
_ => {
skip_element(reader, buf, limits, *depth)?;
tag => {
// Check for iTunes and Podcast 2.0 namespace tags
let handled = if is_itunes_tag(tag, b"title") {
let text = read_text(reader, buf, limits)?;
let itunes = entry.itunes.get_or_insert_with(ItunesEntryMeta::default);
itunes.title = Some(text);
true
} else if is_itunes_tag(tag, b"author") {
let text = read_text(reader, buf, limits)?;
let itunes = entry.itunes.get_or_insert_with(ItunesEntryMeta::default);
itunes.author = Some(text);
true
} else if is_itunes_tag(tag, b"duration") {
let text = read_text(reader, buf, limits)?;
let itunes = entry.itunes.get_or_insert_with(ItunesEntryMeta::default);
itunes.duration = parse_duration(&text);
true
} else if is_itunes_tag(tag, b"explicit") {
let text = read_text(reader, buf, limits)?;
let itunes = entry.itunes.get_or_insert_with(ItunesEntryMeta::default);
itunes.explicit = parse_explicit(&text);
true
} else if is_itunes_tag(tag, b"image") {
let itunes = entry.itunes.get_or_insert_with(ItunesEntryMeta::default);
for attr in e.attributes().flatten() {
if attr.key.as_ref() == b"href"
&& let Ok(value) = attr.unescape_value()
{
itunes.image = Some(
value.chars().take(limits.max_attribute_length).collect(),
);
}
}
// NOTE: Don't call skip_element - itunes:image is typically self-closing
true
} else if is_itunes_tag(tag, b"episode") {
let text = read_text(reader, buf, limits)?;
let itunes = entry.itunes.get_or_insert_with(ItunesEntryMeta::default);
itunes.episode = text.parse().ok();
true
} else if is_itunes_tag(tag, b"season") {
let text = read_text(reader, buf, limits)?;
let itunes = entry.itunes.get_or_insert_with(ItunesEntryMeta::default);
itunes.season = text.parse().ok();
true
} else if is_itunes_tag(tag, b"episodeType") {
let text = read_text(reader, buf, limits)?;
let itunes = entry.itunes.get_or_insert_with(ItunesEntryMeta::default);
itunes.episode_type = Some(text);
true
} else if tag.starts_with(b"podcast:transcript") {
// Podcast 2.0 transcript not stored in Entry for now
skip_element(reader, buf, limits, *depth)?;
true
} else if tag.starts_with(b"podcast:person") {
// Parse person inline to avoid borrow conflicts
// Podcast 2.0 person not stored in Entry for now (no podcast field)
skip_element(reader, buf, limits, *depth)?;
true
} else {
false
};

if !handled {
skip_element(reader, buf, limits, *depth)?;
}
}
}
*depth = depth.saturating_sub(1);
Expand Down Expand Up @@ -415,6 +587,67 @@ fn parse_source(
Ok(Source { title, link, id })
}

/// Check if element name matches an iTunes namespace tag
///
/// iTunes tags can appear as either:
/// - `itunes:tag` (with namespace prefix)
/// - Just `tag` in the iTunes namespace URI
///
/// The fallback `name == tag` is intentional and safe because:
/// 1. iTunes namespace elements SHOULD have a prefix (e.g., `itunes:author`)
/// 2. Fallback exists for feeds that don't use the prefix but declare iTunes namespace
/// 3. Match order in calling code ensures standard RSS elements (title, link, etc.) are
/// handled first in the outer match statement, preventing incorrect matches
#[inline]
fn is_itunes_tag(name: &[u8], tag: &[u8]) -> bool {
// Check for "itunes:tag" pattern
if name.starts_with(b"itunes:") && &name[7..] == tag {
return true;
}
// Also check for just the tag name (some feeds don't use prefix)
name == tag
}

/// Parse iTunes owner from <itunes:owner> element
fn parse_itunes_owner(
reader: &mut Reader<&[u8]>,
buf: &mut Vec<u8>,
limits: &ParserLimits,
depth: &mut usize,
) -> Result<ItunesOwner> {
let mut owner = ItunesOwner::default();

loop {
match reader.read_event_into(buf) {
Ok(Event::Start(e)) => {
*depth += 1;
if *depth > limits.max_nesting_depth {
return Err(FeedError::InvalidFormat(format!(
"XML nesting depth {} exceeds maximum {}",
depth, limits.max_nesting_depth
)));
}

let tag_name = e.local_name();
if is_itunes_tag(tag_name.as_ref(), b"name") {
owner.name = Some(read_text(reader, buf, limits)?);
} else if is_itunes_tag(tag_name.as_ref(), b"email") {
owner.email = Some(read_text(reader, buf, limits)?);
} else {
skip_element(reader, buf, limits, *depth)?;
}
*depth = depth.saturating_sub(1);
}
Ok(Event::End(_) | Event::Eof) => break,
Err(e) => return Err(e.into()),
_ => {}
}
buf.clear();
}

Ok(owner)
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
7 changes: 6 additions & 1 deletion crates/feedparser-rs-core/src/types/entry.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use super::common::{Content, Enclosure, Link, Person, Source, Tag, TextConstruct};
use super::{
common::{Content, Enclosure, Link, Person, Source, Tag, TextConstruct},
podcast::ItunesEntryMeta,
};
use chrono::{DateTime, Utc};

/// Feed entry/item
Expand Down Expand Up @@ -48,6 +51,8 @@ pub struct Entry {
pub comments: Option<String>,
/// Source feed reference
pub source: Option<Source>,
/// iTunes episode metadata (if present)
pub itunes: Option<ItunesEntryMeta>,
}

impl Entry {
Expand Down
5 changes: 5 additions & 0 deletions crates/feedparser-rs-core/src/types/feed.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use super::{
common::{Generator, Image, Link, Person, Tag, TextConstruct},
entry::Entry,
podcast::{ItunesFeedMeta, PodcastMeta},
version::FeedVersion,
};
use chrono::{DateTime, Utc};
Expand Down Expand Up @@ -57,6 +58,10 @@ pub struct FeedMeta {
pub id: Option<String>,
/// Time-to-live (update frequency hint) in minutes
pub ttl: Option<u32>,
/// iTunes podcast metadata (if present)
pub itunes: Option<ItunesFeedMeta>,
/// Podcast 2.0 namespace metadata (if present)
pub podcast: Option<PodcastMeta>,
}

/// Parsed feed result
Expand Down
5 changes: 5 additions & 0 deletions crates/feedparser-rs-core/src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ mod common;
mod entry;
mod feed;
pub mod generics;
mod podcast;
mod version;

pub use common::{
Expand All @@ -10,4 +11,8 @@ pub use common::{
pub use entry::Entry;
pub use feed::{FeedMeta, ParsedFeed};
pub use generics::{FromAttributes, LimitedCollectionExt, ParseFrom};
pub use podcast::{
ItunesCategory, ItunesEntryMeta, ItunesFeedMeta, ItunesOwner, PodcastFunding, PodcastMeta,
PodcastPerson, PodcastTranscript, parse_duration, parse_explicit,
};
pub use version::FeedVersion;
Loading