diff --git a/crates/feedparser-rs-core/src/parser/atom.rs b/crates/feedparser-rs-core/src/parser/atom.rs
index 4e48f74..1f2e4cd 100644
--- a/crates/feedparser-rs-core/src/parser/atom.rs
+++ b/crates/feedparser-rs-core/src/parser/atom.rs
@@ -136,6 +136,10 @@ fn parse_feed_element(
{
feed.feed.link = Some(link.href.clone());
}
+ if feed.feed.license.is_none() && link.rel.as_deref() == Some("license")
+ {
+ feed.feed.license = Some(link.href.clone());
+ }
feed.feed
.links
.try_push_limited(link, limits.max_links_per_feed);
@@ -305,6 +309,9 @@ fn parse_entry(
if entry.link.is_none() && link.rel.as_deref() == Some("alternate") {
entry.link = Some(link.href.clone());
}
+ if entry.license.is_none() && link.rel.as_deref() == Some("license") {
+ entry.license = Some(link.href.clone());
+ }
entry
.links
.try_push_limited(link, limits.max_links_per_entry);
@@ -926,4 +933,45 @@ mod tests {
assert_eq!(feed.feed.links.len(), 1);
assert_eq!(feed.feed.tags.len(), 1);
}
+
+ #[test]
+ fn test_parse_atom_license_feed() {
+ let xml = br#"
+
+ Test Feed
+
+
+ "#;
+
+ let feed = parse_atom10(xml).unwrap();
+ assert_eq!(
+ feed.feed.license.as_deref(),
+ Some("https://creativecommons.org/licenses/by/4.0/")
+ );
+ assert_eq!(feed.feed.link.as_deref(), Some("https://example.com/"));
+ }
+
+ #[test]
+ fn test_parse_atom_license_entry() {
+ let xml = br#"
+
+
+ Licensed Entry
+ urn:uuid:1
+
+
+
+ "#;
+
+ let feed = parse_atom10(xml).unwrap();
+ assert_eq!(feed.entries.len(), 1);
+ assert_eq!(
+ feed.entries[0].license.as_deref(),
+ Some("https://creativecommons.org/licenses/by-sa/3.0/")
+ );
+ assert_eq!(
+ feed.entries[0].link.as_deref(),
+ Some("https://example.com/entry/1")
+ );
+ }
}
diff --git a/crates/feedparser-rs-core/src/parser/common.rs b/crates/feedparser-rs-core/src/parser/common.rs
index efa5540..569c24b 100644
--- a/crates/feedparser-rs-core/src/parser/common.rs
+++ b/crates/feedparser-rs-core/src/parser/common.rs
@@ -285,6 +285,47 @@ pub fn extract_xml_base(
.map(|s| s.to_string())
}
+/// Extract xml:lang attribute from element
+///
+/// Returns the language code if xml:lang or lang attribute exists.
+/// Respects `max_attribute_length` limit for `DoS` protection.
+///
+/// # Arguments
+///
+/// * `element` - The XML element to extract xml:lang from
+/// * `max_attr_length` - Maximum allowed attribute length (`DoS` protection)
+///
+/// # Returns
+///
+/// * `Some(String)` - The xml:lang value if found and within length limit
+/// * `None` - If attribute not found or exceeds length limit
+///
+/// # Examples
+///
+/// ```ignore
+/// use feedparser_rs::parser::common::extract_xml_lang;
+///
+/// let element = /* BytesStart from quick-xml */;
+/// if let Some(lang) = extract_xml_lang(&element, 1024) {
+/// println!("Language: {}", lang);
+/// }
+/// ```
+pub fn extract_xml_lang(
+ element: &quick_xml::events::BytesStart,
+ max_attr_length: usize,
+) -> Option {
+ element
+ .attributes()
+ .flatten()
+ .find(|attr| {
+ let key = attr.key.as_ref();
+ key == b"xml:lang" || key == b"lang"
+ })
+ .filter(|attr| attr.value.len() <= max_attr_length)
+ .and_then(|attr| attr.unescape_value().ok())
+ .map(|s| s.to_string())
+}
+
/// Read text content from current XML element (handles text and CDATA)
pub fn read_text(
reader: &mut Reader<&[u8]>,
diff --git a/crates/feedparser-rs-core/src/parser/rss.rs b/crates/feedparser-rs-core/src/parser/rss.rs
index 6223c33..0d40799 100644
--- a/crates/feedparser-rs-core/src/parser/rss.rs
+++ b/crates/feedparser-rs-core/src/parser/rss.rs
@@ -15,8 +15,8 @@ use crate::{
use quick_xml::{Reader, events::Event};
use super::common::{
- EVENT_BUFFER_CAPACITY, LimitedCollectionExt, check_depth, init_feed, is_content_tag, is_dc_tag,
- is_itunes_tag, is_media_tag, read_text, skip_element,
+ EVENT_BUFFER_CAPACITY, LimitedCollectionExt, check_depth, extract_xml_lang, init_feed,
+ is_content_tag, is_dc_tag, is_itunes_tag, is_media_tag, read_text, skip_element,
};
/// Error message for malformed XML attributes (shared constant)
@@ -110,10 +110,16 @@ pub fn parse_rss20_with_limits(data: &[u8], limits: ParserLimits) -> Result {
+ let channel_lang = extract_xml_lang(&e, limits.max_attribute_length);
depth += 1;
- if let Err(e) =
- parse_channel(&mut reader, &mut feed, &limits, &mut depth, &mut base_ctx)
- {
+ if let Err(e) = parse_channel(
+ &mut reader,
+ &mut feed,
+ &limits,
+ &mut depth,
+ &mut base_ctx,
+ channel_lang.as_deref(),
+ ) {
feed.bozo = true;
feed.bozo_exception = Some(e.to_string());
}
@@ -140,6 +146,7 @@ fn parse_channel(
limits: &ParserLimits,
depth: &mut usize,
base_ctx: &mut BaseUrlContext,
+ channel_lang: Option<&str>,
) -> Result<()> {
let mut buf = Vec::with_capacity(EVENT_BUFFER_CAPACITY);
@@ -163,7 +170,15 @@ fn parse_channel(
match tag.as_slice() {
b"title" | b"link" | b"description" | b"language" | b"pubDate"
| b"managingEditor" | b"webMaster" | b"generator" | b"ttl" | b"category" => {
- parse_channel_standard(reader, &mut buf, &tag, feed, limits, base_ctx)?;
+ parse_channel_standard(
+ reader,
+ &mut buf,
+ &tag,
+ feed,
+ limits,
+ base_ctx,
+ channel_lang,
+ )?;
}
b"image" => {
if let Ok(image) = parse_image(reader, &mut buf, limits, depth) {
@@ -171,11 +186,16 @@ fn parse_channel(
}
}
b"item" => {
+ let item_lang = extract_xml_lang(&e, limits.max_attribute_length);
+
if !feed.check_entry_limit(reader, &mut buf, limits, depth)? {
continue;
}
- match parse_item(reader, &mut buf, limits, depth, base_ctx) {
+ let effective_lang = item_lang.as_deref().or(channel_lang);
+
+ match parse_item(reader, &mut buf, limits, depth, base_ctx, effective_lang)
+ {
Ok((entry, has_attr_errors)) => {
if has_attr_errors {
feed.bozo = true;
@@ -261,10 +281,17 @@ fn parse_channel_standard(
feed: &mut ParsedFeed,
limits: &ParserLimits,
base_ctx: &mut BaseUrlContext,
+ channel_lang: Option<&str>,
) -> Result<()> {
match tag {
b"title" => {
- feed.feed.title = Some(read_text(reader, buf, limits)?);
+ let text = read_text(reader, buf, limits)?;
+ feed.feed.set_title(TextConstruct {
+ value: text,
+ content_type: TextType::Text,
+ language: channel_lang.map(String::from),
+ base: base_ctx.base().map(String::from),
+ });
}
b"link" => {
let link_text = read_text(reader, buf, limits)?;
@@ -276,7 +303,13 @@ fn parse_channel_standard(
}
}
b"description" => {
- feed.feed.subtitle = Some(read_text(reader, buf, limits)?);
+ let text = read_text(reader, buf, limits)?;
+ feed.feed.set_subtitle(TextConstruct {
+ value: text,
+ content_type: TextType::Html,
+ language: channel_lang.map(String::from),
+ base: base_ctx.base().map(String::from),
+ });
}
b"language" => {
feed.feed.language = Some(read_text(reader, buf, limits)?);
@@ -500,6 +533,9 @@ fn parse_channel_namespace(
} else if let Some(_media_element) = is_media_tag(tag) {
skip_element(reader, buf, limits, depth)?;
Ok(true)
+ } else if tag.starts_with(b"creativeCommons:license") || tag == b"license" {
+ feed.feed.license = Some(read_text(reader, buf, limits)?);
+ Ok(true)
} else {
Ok(false)
}
@@ -516,6 +552,7 @@ fn parse_item(
limits: &ParserLimits,
depth: &mut usize,
base_ctx: &BaseUrlContext,
+ item_lang: Option<&str>,
) -> Result<(Entry, bool)> {
let mut entry = Entry::with_capacity();
let mut has_attr_errors = false;
@@ -544,7 +581,9 @@ fn parse_item(
match tag.as_slice() {
b"title" | b"link" | b"description" | b"guid" | b"pubDate" | b"author"
| b"category" | b"comments" => {
- parse_item_standard(reader, buf, &tag, &mut entry, limits, base_ctx)?;
+ parse_item_standard(
+ reader, buf, &tag, &mut entry, limits, base_ctx, item_lang,
+ )?;
}
b"enclosure" => {
if let Some(mut enclosure) = parse_enclosure(&attrs, limits) {
@@ -603,10 +642,17 @@ fn parse_item_standard(
entry: &mut Entry,
limits: &ParserLimits,
base_ctx: &BaseUrlContext,
+ item_lang: Option<&str>,
) -> Result<()> {
match tag {
b"title" => {
- entry.title = Some(read_text(reader, buf, limits)?);
+ let text = read_text(reader, buf, limits)?;
+ entry.set_title(TextConstruct {
+ value: text,
+ content_type: TextType::Text,
+ language: item_lang.map(String::from),
+ base: base_ctx.base().map(String::from),
+ });
}
b"link" => {
let link_text = read_text(reader, buf, limits)?;
@@ -622,13 +668,12 @@ fn parse_item_standard(
);
}
b"description" => {
- let desc = read_text(reader, buf, limits)?;
- entry.summary = Some(desc.clone());
- entry.summary_detail = Some(TextConstruct {
- value: desc,
+ let text = read_text(reader, buf, limits)?;
+ entry.set_summary(TextConstruct {
+ value: text,
content_type: TextType::Html,
- language: None,
- base: None,
+ language: item_lang.map(String::from),
+ base: base_ctx.base().map(String::from),
});
}
b"guid" => {
@@ -857,6 +902,9 @@ fn parse_item_namespace(
depth,
)?;
Ok(true)
+ } else if tag.starts_with(b"creativeCommons:license") || tag == b"license" {
+ entry.license = Some(read_text(reader, buf, limits)?);
+ Ok(true)
} else {
Ok(false)
}
@@ -1791,4 +1839,142 @@ mod tests {
);
assert!(entry.content[0].value.contains(""));
}
+
+ #[test]
+ fn test_parse_rss_xml_lang_channel() {
+ let xml = br#"
+
+
+ English Channel
+ Test description
+
+ "#;
+
+ let feed = parse_rss20(xml).unwrap();
+ assert_eq!(feed.feed.title.as_deref(), Some("English Channel"));
+
+ assert!(feed.feed.title_detail.is_some());
+ let title_detail = feed.feed.title_detail.as_ref().unwrap();
+ assert_eq!(title_detail.language.as_deref(), Some("en-US"));
+
+ assert!(feed.feed.subtitle_detail.is_some());
+ let subtitle_detail = feed.feed.subtitle_detail.as_ref().unwrap();
+ assert_eq!(subtitle_detail.language.as_deref(), Some("en-US"));
+ }
+
+ #[test]
+ fn test_parse_rss_xml_lang_item() {
+ let xml = b"
+
+
+ -
+ Article en fran\xc3\xa7ais
+ Description en fran\xc3\xa7ais
+
+ -
+ English Article
+ English description
+
+
+ ";
+
+ let feed = parse_rss20(xml).unwrap();
+ assert_eq!(feed.entries.len(), 2);
+
+ let french_entry = &feed.entries[0];
+ assert!(french_entry.title_detail.is_some());
+ assert_eq!(
+ french_entry
+ .title_detail
+ .as_ref()
+ .unwrap()
+ .language
+ .as_deref(),
+ Some("fr-FR")
+ );
+ assert_eq!(
+ french_entry
+ .summary_detail
+ .as_ref()
+ .unwrap()
+ .language
+ .as_deref(),
+ Some("fr-FR")
+ );
+
+ let english_entry = &feed.entries[1];
+ assert!(english_entry.title_detail.is_some());
+ assert_eq!(
+ english_entry
+ .title_detail
+ .as_ref()
+ .unwrap()
+ .language
+ .as_deref(),
+ Some("en")
+ );
+ }
+
+ #[test]
+ fn test_parse_rss_xml_lang_empty() {
+ let xml = br#"
+
+
+ Empty Lang Channel
+ Test with empty xml:lang
+ -
+ Empty Lang Item
+
+
+ "#;
+
+ let feed = parse_rss20(xml).unwrap();
+
+ // Empty xml:lang should be treated as empty string (converted to None or empty)
+ if let Some(ref title_detail) = feed.feed.title_detail {
+ assert_eq!(title_detail.language.as_deref(), Some(""));
+ }
+
+ assert_eq!(feed.entries.len(), 1);
+ if let Some(ref title_detail) = feed.entries[0].title_detail {
+ assert_eq!(title_detail.language.as_deref(), Some(""));
+ }
+ }
+
+ #[test]
+ fn test_parse_rss_license_channel() {
+ let xml = br#"
+
+
+ Test Feed
+ https://creativecommons.org/licenses/by/4.0/
+
+ "#;
+
+ let feed = parse_rss20(xml).unwrap();
+ assert_eq!(
+ feed.feed.license.as_deref(),
+ Some("https://creativecommons.org/licenses/by/4.0/")
+ );
+ }
+
+ #[test]
+ fn test_parse_rss_license_item() {
+ let xml = br#"
+
+
+ -
+ Licensed Item
+ https://creativecommons.org/licenses/by-sa/3.0/
+
+
+ "#;
+
+ let feed = parse_rss20(xml).unwrap();
+ assert_eq!(feed.entries.len(), 1);
+ assert_eq!(
+ feed.entries[0].license.as_deref(),
+ Some("https://creativecommons.org/licenses/by-sa/3.0/")
+ );
+ }
}
diff --git a/crates/feedparser-rs-core/src/types/entry.rs b/crates/feedparser-rs-core/src/types/entry.rs
index 6e8b2e6..7120ef4 100644
--- a/crates/feedparser-rs-core/src/types/entry.rs
+++ b/crates/feedparser-rs-core/src/types/entry.rs
@@ -74,6 +74,8 @@ pub struct Entry {
pub podcast_persons: Vec,
/// `GeoRSS` location data
pub geo: Option,
+ /// License URL (Creative Commons, etc.)
+ pub license: Option,
}
impl Entry {
diff --git a/crates/feedparser-rs-core/src/types/feed.rs b/crates/feedparser-rs-core/src/types/feed.rs
index ede71ad..247f61d 100644
--- a/crates/feedparser-rs-core/src/types/feed.rs
+++ b/crates/feedparser-rs-core/src/types/feed.rs
@@ -73,6 +73,8 @@ pub struct FeedMeta {
pub dc_publisher: Option,
/// Dublin Core rights (copyright)
pub dc_rights: Option,
+ /// License URL (Creative Commons, etc.)
+ pub license: Option,
}
/// Parsed feed result
diff --git a/crates/feedparser-rs-node/index.d.ts b/crates/feedparser-rs-node/index.d.ts
index 66037ae..c2c0687 100644
--- a/crates/feedparser-rs-node/index.d.ts
+++ b/crates/feedparser-rs-node/index.d.ts
@@ -85,6 +85,8 @@ export interface Entry {
podcastTranscripts: Array
/** Podcast persons */
podcastPersons: Array
+ /** License URL (Creative Commons, etc.) */
+ license?: string
}
/** Feed metadata */
@@ -139,6 +141,8 @@ export interface FeedMeta {
id?: string
/** Time-to-live (update frequency hint) in minutes */
ttl?: number
+ /** License URL (Creative Commons, etc.) */
+ license?: string
}
/** Generator metadata */
diff --git a/crates/feedparser-rs-node/src/lib.rs b/crates/feedparser-rs-node/src/lib.rs
index 3c1de01..02060db 100644
--- a/crates/feedparser-rs-node/src/lib.rs
+++ b/crates/feedparser-rs-node/src/lib.rs
@@ -317,6 +317,8 @@ pub struct FeedMeta {
pub id: Option,
/// Time-to-live (update frequency hint) in minutes
pub ttl: Option,
+ /// License URL (Creative Commons, etc.)
+ pub license: Option,
}
impl From for FeedMeta {
@@ -347,6 +349,7 @@ impl From for FeedMeta {
tags: core.tags.into_iter().map(Tag::from).collect(),
id: core.id,
ttl: core.ttl,
+ license: core.license,
}
}
}
@@ -402,6 +405,8 @@ pub struct Entry {
pub podcast_transcripts: Vec,
/// Podcast persons
pub podcast_persons: Vec,
+ /// License URL (Creative Commons, etc.)
+ pub license: Option,
}
impl From for Entry {
@@ -477,6 +482,7 @@ impl From for Entry {
v.extend(core.podcast_persons.into_iter().map(PodcastPerson::from));
v
},
+ license: core.license,
}
}
}
diff --git a/crates/feedparser-rs-py/src/types/entry.rs b/crates/feedparser-rs-py/src/types/entry.rs
index 41635a8..387313d 100644
--- a/crates/feedparser-rs-py/src/types/entry.rs
+++ b/crates/feedparser-rs-py/src/types/entry.rs
@@ -214,6 +214,11 @@ impl PyEntry {
.collect()
}
+ #[getter]
+ fn license(&self) -> Option<&str> {
+ self.inner.license.as_deref()
+ }
+
fn __repr__(&self) -> String {
format!(
"Entry(title='{}', id='{}')",
diff --git a/crates/feedparser-rs-py/src/types/feed_meta.rs b/crates/feedparser-rs-py/src/types/feed_meta.rs
index 51bb353..5b8c3da 100644
--- a/crates/feedparser-rs-py/src/types/feed_meta.rs
+++ b/crates/feedparser-rs-py/src/types/feed_meta.rs
@@ -207,6 +207,11 @@ impl PyFeedMeta {
.map(|p| PyPodcastMeta::from_core(p.clone()))
}
+ #[getter]
+ fn license(&self) -> Option<&str> {
+ self.inner.license.as_deref()
+ }
+
fn __repr__(&self) -> String {
format!(
"FeedMeta(title='{}', link='{}')",