Skip to content

Commit 2b973d2

Browse files
authored
feat: complete namespace support (Podcast 2.0, Media RSS, GeoRSS) (#35)
* feat(podcast): add Podcast 2.0 chapters, soundbites and iTunes complete/new-feed-url Phase 1 of namespace completion: - Add PodcastChapters type for podcast:chapters element - Add PodcastSoundbite type for podcast:soundbite element - Add PodcastEntryMeta container for entry-level podcast metadata - Add itunes:complete and itunes:new-feed-url fields to ItunesFeedMeta - Add DoS limits: max_podcast_transcripts, max_podcast_funding, max_podcast_persons - Add SSRF security warnings to all URL fields in podcast types - Optimize parse_duration() and parse_explicit() to avoid allocations - Add comprehensive tests for new functionality BREAKING: Entry struct now has `podcast: Option<PodcastEntryMeta>` field * feat(media): add Media RSS full attributes and podcast:value parsing Phase 2 of namespace completion: - Add MediaContent type with full Media RSS spec (medium, bitrate, framerate, expression, isDefault) - Add MediaThumbnail type with NTP time offset support - Add PodcastValue and PodcastValueRecipient types for value-for-value monetization - Implement podcast:value parsing with nested valueRecipient elements - Add max_value_recipients DoS limit (default: 20) - Add SSRF security warnings to MediaContent.url and MediaThumbnail.url - Add media_content_to_enclosure() conversion helper - Add comprehensive unit tests for all new types * feat(compat): add compatibility module and feed-level GeoRSS support Phase 3 (final) of namespace completion: - Add compat module with Python feedparser compatibility helpers: - normalize_version() converts FeedVersion to Python format - format_duration() converts seconds to HH:MM:SS - is_valid_version() validates version identifiers - Add geo field to FeedMeta for feed-level geographic data - Add handle_feed_element() to GeoRSS namespace handler - Add is_georss_tag() helper to parser common module - Integrate GeoRSS parsing at channel level in RSS and RSS 1.0 parsers - Add comprehensive tests for all new functionality * fix: resolve CI lint and formatting issues - Add #![allow(missing_docs)] to test file - Fix clippy warnings (float_cmp, format_push_string) - Apply rustfmt formatting fixes * fix: export PodcastValue and PodcastValueRecipient types
1 parent c9ccaca commit 2b973d2

File tree

15 files changed

+2122
-69
lines changed

15 files changed

+2122
-69
lines changed
Lines changed: 186 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,187 @@
1-
// Compatibility utilities for feedparser API
2-
//
3-
// This module provides utilities to ensure API compatibility with
4-
// Python's feedparser library.
1+
//! Compatibility utilities for feedparser API
2+
//!
3+
//! This module provides utilities to ensure API compatibility with
4+
//! Python's feedparser library.
55
6-
// TODO: Implement in later phases as needed
6+
use crate::types::FeedVersion;
7+
8+
/// Normalize feed type string to Python feedparser format
9+
///
10+
/// Converts version enum to Python feedparser-compatible string format:
11+
/// - "RSS 2.0" -> "rss20"
12+
/// - "Atom 1.0" -> "atom10"
13+
/// - etc.
14+
///
15+
/// # Arguments
16+
///
17+
/// * `version` - Feed version to normalize
18+
///
19+
/// # Returns
20+
///
21+
/// Normalized version string compatible with Python feedparser
22+
///
23+
/// # Examples
24+
///
25+
/// ```
26+
/// use feedparser_rs::{compat::normalize_version, FeedVersion};
27+
///
28+
/// assert_eq!(normalize_version(FeedVersion::Rss20), "rss20");
29+
/// assert_eq!(normalize_version(FeedVersion::Atom10), "atom10");
30+
/// assert_eq!(normalize_version(FeedVersion::Unknown), "");
31+
/// ```
32+
#[must_use]
33+
pub fn normalize_version(version: FeedVersion) -> String {
34+
version.as_str().to_string()
35+
}
36+
37+
/// Convert duration in seconds to HH:MM:SS format
38+
///
39+
/// Formats duration for display in podcast feeds and other contexts
40+
/// where human-readable time format is needed.
41+
///
42+
/// # Arguments
43+
///
44+
/// * `seconds` - Duration in seconds
45+
///
46+
/// # Returns
47+
///
48+
/// Duration string in HH:MM:SS format
49+
///
50+
/// # Examples
51+
///
52+
/// ```
53+
/// use feedparser_rs::compat::format_duration;
54+
///
55+
/// assert_eq!(format_duration(0), "0:00:00");
56+
/// assert_eq!(format_duration(90), "0:01:30");
57+
/// assert_eq!(format_duration(3661), "1:01:01");
58+
/// assert_eq!(format_duration(36000), "10:00:00");
59+
/// ```
60+
#[must_use]
61+
pub fn format_duration(seconds: u32) -> String {
62+
let hours = seconds / 3600;
63+
let minutes = (seconds % 3600) / 60;
64+
let secs = seconds % 60;
65+
format!("{hours}:{minutes:02}:{secs:02}")
66+
}
67+
68+
/// Check if a string is a valid feed version identifier
69+
///
70+
/// Validates whether a version string matches one of the known
71+
/// feed format versions supported by feedparser.
72+
///
73+
/// # Arguments
74+
///
75+
/// * `version` - Version string to validate
76+
///
77+
/// # Returns
78+
///
79+
/// `true` if version is valid, `false` otherwise
80+
///
81+
/// # Examples
82+
///
83+
/// ```
84+
/// use feedparser_rs::compat::is_valid_version;
85+
///
86+
/// assert!(is_valid_version("rss20"));
87+
/// assert!(is_valid_version("atom10"));
88+
/// assert!(is_valid_version("json11"));
89+
/// assert!(!is_valid_version("invalid"));
90+
/// assert!(!is_valid_version(""));
91+
/// ```
92+
#[must_use]
93+
pub fn is_valid_version(version: &str) -> bool {
94+
matches!(
95+
version,
96+
"rss090"
97+
| "rss091"
98+
| "rss092"
99+
| "rss10"
100+
| "rss20"
101+
| "atom03"
102+
| "atom10"
103+
| "json10"
104+
| "json11"
105+
)
106+
}
107+
108+
#[cfg(test)]
109+
mod tests {
110+
use super::*;
111+
112+
#[test]
113+
fn test_normalize_version() {
114+
assert_eq!(normalize_version(FeedVersion::Rss20), "rss20");
115+
assert_eq!(normalize_version(FeedVersion::Rss10), "rss10");
116+
assert_eq!(normalize_version(FeedVersion::Atom10), "atom10");
117+
assert_eq!(normalize_version(FeedVersion::Atom03), "atom03");
118+
assert_eq!(normalize_version(FeedVersion::JsonFeed10), "json10");
119+
assert_eq!(normalize_version(FeedVersion::JsonFeed11), "json11");
120+
assert_eq!(normalize_version(FeedVersion::Unknown), "");
121+
}
122+
123+
#[test]
124+
fn test_format_duration_zero() {
125+
assert_eq!(format_duration(0), "0:00:00");
126+
}
127+
128+
#[test]
129+
fn test_format_duration_seconds_only() {
130+
assert_eq!(format_duration(30), "0:00:30");
131+
assert_eq!(format_duration(59), "0:00:59");
132+
}
133+
134+
#[test]
135+
fn test_format_duration_minutes() {
136+
assert_eq!(format_duration(60), "0:01:00");
137+
assert_eq!(format_duration(90), "0:01:30");
138+
assert_eq!(format_duration(150), "0:02:30");
139+
assert_eq!(format_duration(3599), "0:59:59");
140+
}
141+
142+
#[test]
143+
fn test_format_duration_hours() {
144+
assert_eq!(format_duration(3600), "1:00:00");
145+
assert_eq!(format_duration(3661), "1:01:01");
146+
assert_eq!(format_duration(7200), "2:00:00");
147+
assert_eq!(format_duration(36000), "10:00:00");
148+
}
149+
150+
#[test]
151+
fn test_format_duration_large() {
152+
assert_eq!(format_duration(86399), "23:59:59");
153+
assert_eq!(format_duration(86400), "24:00:00");
154+
assert_eq!(format_duration(90061), "25:01:01");
155+
}
156+
157+
#[test]
158+
fn test_is_valid_version_valid() {
159+
assert!(is_valid_version("rss090"));
160+
assert!(is_valid_version("rss091"));
161+
assert!(is_valid_version("rss092"));
162+
assert!(is_valid_version("rss10"));
163+
assert!(is_valid_version("rss20"));
164+
assert!(is_valid_version("atom03"));
165+
assert!(is_valid_version("atom10"));
166+
assert!(is_valid_version("json10"));
167+
assert!(is_valid_version("json11"));
168+
}
169+
170+
#[test]
171+
fn test_is_valid_version_invalid() {
172+
assert!(!is_valid_version(""));
173+
assert!(!is_valid_version("invalid"));
174+
assert!(!is_valid_version("rss30"));
175+
assert!(!is_valid_version("atom20"));
176+
assert!(!is_valid_version("RSS20")); // Case sensitive
177+
assert!(!is_valid_version("json12"));
178+
assert!(!is_valid_version("rdf"));
179+
}
180+
181+
#[test]
182+
fn test_is_valid_version_edge_cases() {
183+
assert!(!is_valid_version(" rss20"));
184+
assert!(!is_valid_version("rss20 "));
185+
assert!(!is_valid_version("rss 20"));
186+
}
187+
}

crates/feedparser-rs-core/src/lib.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@
3838
//! for representing parsed feed data. The main entry point is the [`parse`] function which
3939
//! automatically detects feed format and returns parsed results.
4040
41-
mod compat;
41+
/// Compatibility utilities for Python feedparser API
42+
pub mod compat;
4243
mod error;
4344
#[cfg(feature = "http")]
4445
/// HTTP client module for fetching feeds from URLs
@@ -68,8 +69,9 @@ pub use parser::{detect_format, parse, parse_with_limits};
6869
pub use types::{
6970
Content, Enclosure, Entry, FeedMeta, FeedVersion, Generator, Image, ItunesCategory,
7071
ItunesEntryMeta, ItunesFeedMeta, ItunesOwner, LimitedCollectionExt, Link, ParsedFeed, Person,
71-
PodcastFunding, PodcastMeta, PodcastPerson, PodcastTranscript, Source, Tag, TextConstruct,
72-
TextType, parse_duration, parse_explicit,
72+
PodcastChapters, PodcastEntryMeta, PodcastFunding, PodcastMeta, PodcastPerson,
73+
PodcastSoundbite, PodcastTranscript, PodcastValue, PodcastValueRecipient, Source, Tag,
74+
TextConstruct, TextType, parse_duration, parse_explicit,
7375
};
7476

7577
pub use namespace::syndication::{SyndicationMeta, UpdatePeriod};

crates/feedparser-rs-core/src/limits.rs

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,42 @@ pub struct ParserLimits {
107107
///
108108
/// Default: 64 KB
109109
pub max_attribute_length: usize,
110+
111+
/// Maximum number of podcast soundbites per entry
112+
///
113+
/// Podcast 2.0 soundbite elements for shareable clips.
114+
///
115+
/// Default: 10 soundbites
116+
pub max_podcast_soundbites: usize,
117+
118+
/// Maximum number of podcast transcripts per entry
119+
///
120+
/// Podcast 2.0 transcript elements.
121+
///
122+
/// Default: 20 transcripts
123+
pub max_podcast_transcripts: usize,
124+
125+
/// Maximum number of podcast funding elements per feed
126+
///
127+
/// Podcast 2.0 funding elements for donation links.
128+
///
129+
/// Default: 20 funding elements
130+
pub max_podcast_funding: usize,
131+
132+
/// Maximum number of podcast person elements per entry
133+
///
134+
/// Podcast 2.0 person elements for hosts, guests, etc.
135+
///
136+
/// Default: 50 persons
137+
pub max_podcast_persons: usize,
138+
139+
/// Maximum number of podcast value recipients per feed
140+
///
141+
/// Podcast 2.0 value recipients for payment splitting.
142+
/// Prevents `DoS` from feeds with excessive recipient lists.
143+
///
144+
/// Default: 20 recipients
145+
pub max_value_recipients: usize,
110146
}
111147

112148
impl Default for ParserLimits {
@@ -129,6 +165,11 @@ impl Default for ParserLimits {
129165
max_text_length: 10 * 1024 * 1024, // 10 MB
130166
max_feed_size_bytes: 100 * 1024 * 1024, // 100 MB
131167
max_attribute_length: 64 * 1024, // 64 KB
168+
max_podcast_soundbites: 10,
169+
max_podcast_transcripts: 20,
170+
max_podcast_funding: 20,
171+
max_podcast_persons: 50,
172+
max_value_recipients: 20,
132173
}
133174
}
134175
}
@@ -163,6 +204,11 @@ impl ParserLimits {
163204
max_text_length: 1024 * 1024, // 1 MB
164205
max_feed_size_bytes: 10 * 1024 * 1024, // 10 MB
165206
max_attribute_length: 8 * 1024, // 8 KB
207+
max_podcast_soundbites: 5,
208+
max_podcast_transcripts: 5,
209+
max_podcast_funding: 5,
210+
max_podcast_persons: 10,
211+
max_value_recipients: 5,
166212
}
167213
}
168214

@@ -195,6 +241,11 @@ impl ParserLimits {
195241
max_text_length: 50 * 1024 * 1024, // 50 MB
196242
max_feed_size_bytes: 500 * 1024 * 1024, // 500 MB
197243
max_attribute_length: 256 * 1024, // 256 KB
244+
max_podcast_soundbites: 50,
245+
max_podcast_transcripts: 100,
246+
max_podcast_funding: 50,
247+
max_podcast_persons: 200,
248+
max_value_recipients: 50,
198249
}
199250
}
200251

@@ -393,4 +444,49 @@ mod tests {
393444
assert!(msg.contains("200000000"));
394445
assert!(msg.contains("100000000"));
395446
}
447+
448+
#[test]
449+
fn test_max_value_recipients_default() {
450+
let limits = ParserLimits::default();
451+
assert_eq!(limits.max_value_recipients, 20);
452+
}
453+
454+
#[test]
455+
fn test_max_value_recipients_strict() {
456+
let limits = ParserLimits::strict();
457+
assert_eq!(limits.max_value_recipients, 5);
458+
assert!(limits.max_value_recipients < ParserLimits::default().max_value_recipients);
459+
}
460+
461+
#[test]
462+
fn test_max_value_recipients_permissive() {
463+
let limits = ParserLimits::permissive();
464+
assert_eq!(limits.max_value_recipients, 50);
465+
assert!(limits.max_value_recipients > ParserLimits::default().max_value_recipients);
466+
}
467+
468+
#[test]
469+
fn test_value_recipients_limit_enforcement() {
470+
let limits = ParserLimits::default();
471+
472+
// Within limit
473+
assert!(
474+
limits
475+
.check_collection_size(19, limits.max_value_recipients, "value_recipients")
476+
.is_ok()
477+
);
478+
479+
// At limit
480+
assert!(
481+
limits
482+
.check_collection_size(20, limits.max_value_recipients, "value_recipients")
483+
.is_err()
484+
);
485+
486+
// Exceeds limit
487+
let result =
488+
limits.check_collection_size(21, limits.max_value_recipients, "value_recipients");
489+
assert!(result.is_err());
490+
assert!(matches!(result, Err(LimitError::CollectionTooLarge { .. })));
491+
}
396492
}

0 commit comments

Comments
 (0)