Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions crates/feedparser-rs-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ pub mod http;
mod limits;
/// Namespace handlers for extended feed formats
pub mod namespace;
mod options;
mod parser;

/// Type definitions for feed data structures
Expand All @@ -62,6 +63,7 @@ pub mod util;

pub use error::{FeedError, Result};
pub use limits::{LimitError, ParserLimits};
pub use options::ParseOptions;
pub use parser::{detect_format, parse, parse_with_limits};
pub use types::{
Content, Enclosure, Entry, FeedMeta, FeedVersion, Generator, Image, ItunesCategory,
Expand Down
279 changes: 279 additions & 0 deletions crates/feedparser-rs-core/src/namespace/cc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
//! Creative Commons namespace support for license information
//!
//! Handles Creative Commons license metadata in RSS and Atom feeds.
//! Supports both the modern `cc:license` (with `rdf:resource` attribute)
//! and legacy `creativeCommons:license` text elements.
//!
//! # Supported Elements
//!
//! - `cc:license` (with `rdf:resource` attribute) - Modern CC namespace
//! - `creativeCommons:license` (text element) - Legacy Userland namespace
//!
//! # Specification
//!
//! Creative Commons: <http://creativecommons.org/ns>
//! Legacy: <http://backend.userland.com/creativeCommonsRssModule>

use crate::Entry;
use crate::limits::ParserLimits;
use crate::types::generics::LimitedCollectionExt;
use crate::types::{FeedMeta, Link};

/// Creative Commons namespace URI (modern)
pub const CC: &str = "http://creativecommons.org/ns#";

/// Creative Commons legacy namespace URI (Userland)
pub const CREATIVE_COMMONS: &str = "http://backend.userland.com/creativeCommonsRssModule";

/// Handle Creative Commons element at feed level
///
/// Converts CC license information to a link with `rel="license"`
/// and adds it to the feed's links collection.
///
/// # Arguments
///
/// * `tag` - Element local name (e.g., "license")
/// * `attrs` - Element attributes as (name, value) pairs
/// * `text` - Element text content
/// * `feed` - Feed metadata to update
/// * `limits` - Parser limits for bounded collections
///
/// # Returns
///
/// `true` if element was recognized and handled, `false` otherwise
pub fn handle_feed_element(
tag: &[u8],
attrs: &[(Vec<u8>, String)],
text: &str,
feed: &mut FeedMeta,
limits: &ParserLimits,
) -> bool {
match tag {
b"license" => {
if let Some(license_url) = extract_license_url(attrs, text) {
feed.links.try_push_limited(
Link {
href: license_url,
rel: Some("license".to_string()),
..Default::default()
},
limits.max_links_per_feed,
);
}
true
}
_ => false,
}
}

/// Handle Creative Commons element at entry level
///
/// Converts CC license information to a link with `rel="license"`
/// and adds it to the entry's links collection.
///
/// # Arguments
///
/// * `tag` - Element local name (e.g., "license")
/// * `attrs` - Element attributes as (name, value) pairs
/// * `text` - Element text content
/// * `entry` - Entry to update
/// * `limits` - Parser limits for bounded collections
///
/// # Returns
///
/// `true` if element was recognized and handled, `false` otherwise
pub fn handle_entry_element(
tag: &[u8],
attrs: &[(Vec<u8>, String)],
text: &str,
entry: &mut Entry,
limits: &ParserLimits,
) -> bool {
match tag {
b"license" => {
if let Some(license_url) = extract_license_url(attrs, text) {
entry.links.try_push_limited(
Link {
href: license_url,
rel: Some("license".to_string()),
..Default::default()
},
limits.max_links_per_entry,
);
}
true
}
_ => false,
}
}

/// Extract license URL from element
///
/// Tries two methods in order:
/// 1. `rdf:resource` attribute (modern cc:license format)
/// 2. Text content (legacy creativeCommons:license format)
///
/// # Arguments
///
/// * `attrs` - Element attributes
/// * `text` - Element text content
///
/// # Returns
///
/// License URL if found, `None` otherwise
fn extract_license_url(attrs: &[(Vec<u8>, String)], text: &str) -> Option<String> {
// Try rdf:resource attribute first (modern format)
// <cc:license rdf:resource="http://creativecommons.org/licenses/by/4.0/" />
for (name, value) in attrs {
if (name == b"resource" || name.ends_with(b":resource")) && !value.is_empty() {
return Some(value.clone());
}
}

// Fall back to text content (legacy format)
// <creativeCommons:license>http://creativecommons.org/licenses/by/4.0/</creativeCommons:license>
let trimmed = text.trim();
if !trimmed.is_empty() {
return Some(trimmed.to_string());
}

None
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_extract_license_url_from_attribute() {
let attrs = vec![(
b"resource".to_vec(),
"http://creativecommons.org/licenses/by/4.0/".to_string(),
)];
let url = extract_license_url(&attrs, "").unwrap();
assert_eq!(url, "http://creativecommons.org/licenses/by/4.0/");
}

#[test]
fn test_extract_license_url_from_namespaced_attribute() {
let attrs = vec![(
b"rdf:resource".to_vec(),
"http://creativecommons.org/licenses/by-sa/4.0/".to_string(),
)];
let url = extract_license_url(&attrs, "").unwrap();
assert_eq!(url, "http://creativecommons.org/licenses/by-sa/4.0/");
}

#[test]
fn test_extract_license_url_from_text() {
let url =
extract_license_url(&[], "http://creativecommons.org/licenses/by-nc/4.0/").unwrap();
assert_eq!(url, "http://creativecommons.org/licenses/by-nc/4.0/");
}

#[test]
fn test_extract_license_url_from_text_with_whitespace() {
let url =
extract_license_url(&[], " http://creativecommons.org/licenses/by-nd/4.0/ ").unwrap();
assert_eq!(url, "http://creativecommons.org/licenses/by-nd/4.0/");
}

#[test]
fn test_extract_license_url_prefers_attribute() {
// If both attribute and text present, attribute wins
let attrs = vec![(
b"rdf:resource".to_vec(),
"http://creativecommons.org/licenses/by/4.0/".to_string(),
)];
let url =
extract_license_url(&attrs, "http://creativecommons.org/licenses/by-sa/4.0/").unwrap();
assert_eq!(url, "http://creativecommons.org/licenses/by/4.0/");
}

#[test]
fn test_extract_license_url_empty() {
assert!(extract_license_url(&[], "").is_none());
assert!(extract_license_url(&[], " ").is_none());
}

#[test]
fn test_handle_feed_element_license() {
let mut feed = FeedMeta::default();
let limits = ParserLimits::default();

let attrs = vec![(
b"rdf:resource".to_vec(),
"http://creativecommons.org/licenses/by/4.0/".to_string(),
)];

let handled = handle_feed_element(b"license", &attrs, "", &mut feed, &limits);
assert!(handled);
assert_eq!(feed.links.len(), 1);
assert_eq!(
feed.links[0].href,
"http://creativecommons.org/licenses/by/4.0/"
);
assert_eq!(feed.links[0].rel.as_deref(), Some("license"));
}

#[test]
fn test_handle_entry_element_license() {
let mut entry = Entry::default();
let limits = ParserLimits::default();

let handled = handle_entry_element(
b"license",
&[],
"http://creativecommons.org/licenses/by-sa/4.0/",
&mut entry,
&limits,
);
assert!(handled);
assert_eq!(entry.links.len(), 1);
assert_eq!(
entry.links[0].href,
"http://creativecommons.org/licenses/by-sa/4.0/"
);
assert_eq!(entry.links[0].rel.as_deref(), Some("license"));
}

#[test]
fn test_handle_feed_element_unknown() {
let mut feed = FeedMeta::default();
let limits = ParserLimits::default();

let handled = handle_feed_element(b"unknown", &[], "", &mut feed, &limits);
assert!(!handled);
}

#[test]
fn test_handle_entry_element_unknown() {
let mut entry = Entry::default();
let limits = ParserLimits::default();

let handled = handle_entry_element(b"unknown", &[], "", &mut entry, &limits);
assert!(!handled);
}

#[test]
fn test_multiple_licenses() {
let mut feed = FeedMeta::default();
let limits = ParserLimits::default();

let attrs1 = vec![(
b"rdf:resource".to_vec(),
"http://creativecommons.org/licenses/by/4.0/".to_string(),
)];
handle_feed_element(b"license", &attrs1, "", &mut feed, &limits);

let attrs2 = vec![(
b"rdf:resource".to_vec(),
"http://creativecommons.org/licenses/by-sa/4.0/".to_string(),
)];
handle_feed_element(b"license", &attrs2, "", &mut feed, &limits);

assert_eq!(feed.links.len(), 2);
assert_eq!(feed.links[0].rel.as_deref(), Some("license"));
assert_eq!(feed.links[1].rel.as_deref(), Some("license"));
}
}
Loading