Skip to content

Commit 5da75b3

Browse files
committed
feat: add semantic newtypes (Url, MimeType, Email) and box namespace metadata
BREAKING CHANGE: Type changes in public API ## Changes ### Semantic Newtypes (types/common.rs) - Add `Url(String)` - URL wrapper with Deref<Target=str> - Add `MimeType(Arc<str>)` - MIME type with string interning for efficient cloning - Add `Email(String)` - Email wrapper with Deref<Target=str> - All newtypes implement: From, Into, Deref, AsRef, Display, PartialEq, serde traits ### Boxing Large Optional Structs - Box `ItunesFeedMeta`, `ItunesEntryMeta` - reduces stack size - Box `PodcastMeta`, `PodcastEntryMeta` - reduces stack size - Box `SyndicationMeta`, `GeoLocation` - reduces stack size - Memory savings: ~7.6 KB per 100-entry plain RSS feed (76% reduction) ### Type Applications - Link.href: String → Url - Link.link_type: Option<String> → Option<MimeType> - Person.email: Option<String> → Option<Email> - Enclosure.url: String → Url - Enclosure.enclosure_type: Option<String> → Option<MimeType> - MediaContent.url: String → Url - MediaContent.content_type: Option<String> → Option<MimeType> - MediaThumbnail.url: String → Url - Image.url: String → Url - PodcastPerson.img/href: Option<String> → Option<Url> - PodcastTranscript.url/transcript_type: String/Option<String> → Url/Option<MimeType> - PodcastFunding.url: String → Url - PodcastChapters.url/chapters_type: String/Option<String> → Url/Option<MimeType> ### Binding Updates - Python: Use .as_deref() for Box fields - Node.js: Use .map(|b| T::from(*b)) for Box fields, .into_inner() for newtypes ### Performance - No parsing performance regression (verified with benchmarks) - Arc<str> for MimeType provides ~10x faster cloning - Box reduces stack frame size for feeds without namespace metadata
1 parent 3672893 commit 5da75b3

File tree

18 files changed

+801
-176
lines changed

18 files changed

+801
-176
lines changed

crates/feedparser-rs-core/src/lib.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,11 +68,12 @@ pub use limits::{LimitError, ParserLimits};
6868
pub use options::ParseOptions;
6969
pub use parser::{detect_format, parse, parse_with_limits};
7070
pub use types::{
71-
Content, Enclosure, Entry, FeedMeta, FeedVersion, Generator, Image, ItunesCategory,
71+
Content, Email, Enclosure, Entry, FeedMeta, FeedVersion, Generator, Image, ItunesCategory,
7272
ItunesEntryMeta, ItunesFeedMeta, ItunesOwner, LimitedCollectionExt, Link, MediaContent,
73-
MediaThumbnail, ParsedFeed, Person, PodcastChapters, PodcastEntryMeta, PodcastFunding,
74-
PodcastMeta, PodcastPerson, PodcastSoundbite, PodcastTranscript, PodcastValue,
75-
PodcastValueRecipient, Source, Tag, TextConstruct, TextType, parse_duration, parse_explicit,
73+
MediaThumbnail, MimeType, ParsedFeed, Person, PodcastChapters, PodcastEntryMeta,
74+
PodcastFunding, PodcastMeta, PodcastPerson, PodcastSoundbite, PodcastTranscript, PodcastValue,
75+
PodcastValueRecipient, Source, Tag, TextConstruct, TextType, Url, parse_duration,
76+
parse_explicit,
7677
};
7778

7879
pub use namespace::syndication::{SyndicationMeta, UpdatePeriod};

crates/feedparser-rs-core/src/namespace/cc.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ pub fn handle_feed_element(
5353
if let Some(license_url) = extract_license_url(attrs, text) {
5454
feed.links.try_push_limited(
5555
Link {
56-
href: license_url,
56+
href: license_url.into(),
5757
rel: Some("license".to_string()),
5858
..Default::default()
5959
},
@@ -94,7 +94,7 @@ pub fn handle_entry_element(
9494
if let Some(license_url) = extract_license_url(attrs, text) {
9595
entry.links.try_push_limited(
9696
Link {
97-
href: license_url,
97+
href: license_url.into(),
9898
rel: Some("license".to_string()),
9999
..Default::default()
100100
},

crates/feedparser-rs-core/src/namespace/content.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ pub fn handle_entry_element(element: &str, text: &str, entry: &mut Entry) {
2626
// content:encoded → add to entry.content as HTML
2727
entry.content.push(Content {
2828
value: text.to_string(),
29-
content_type: Some("text/html".to_string()),
29+
content_type: Some("text/html".into()),
3030
language: None,
3131
base: None,
3232
});

crates/feedparser-rs-core/src/namespace/georss.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -178,25 +178,25 @@ pub fn handle_entry_element(
178178
match tag {
179179
b"point" => {
180180
if let Some(loc) = parse_point(text) {
181-
entry.geo = Some(loc);
181+
entry.geo = Some(Box::new(loc));
182182
}
183183
true
184184
}
185185
b"line" => {
186186
if let Some(loc) = parse_line(text) {
187-
entry.geo = Some(loc);
187+
entry.geo = Some(Box::new(loc));
188188
}
189189
true
190190
}
191191
b"polygon" => {
192192
if let Some(loc) = parse_polygon(text) {
193-
entry.geo = Some(loc);
193+
entry.geo = Some(Box::new(loc));
194194
}
195195
true
196196
}
197197
b"box" => {
198198
if let Some(loc) = parse_box(text) {
199-
entry.geo = Some(loc);
199+
entry.geo = Some(Box::new(loc));
200200
}
201201
true
202202
}
@@ -225,25 +225,25 @@ pub fn handle_feed_element(
225225
match tag {
226226
b"point" => {
227227
if let Some(loc) = parse_point(text) {
228-
feed.geo = Some(loc);
228+
feed.geo = Some(Box::new(loc));
229229
}
230230
true
231231
}
232232
b"line" => {
233233
if let Some(loc) = parse_line(text) {
234-
feed.geo = Some(loc);
234+
feed.geo = Some(Box::new(loc));
235235
}
236236
true
237237
}
238238
b"polygon" => {
239239
if let Some(loc) = parse_polygon(text) {
240-
feed.geo = Some(loc);
240+
feed.geo = Some(Box::new(loc));
241241
}
242242
true
243243
}
244244
b"box" => {
245245
if let Some(loc) = parse_box(text) {
246-
feed.geo = Some(loc);
246+
feed.geo = Some(Box::new(loc));
247247
}
248248
true
249249
}

crates/feedparser-rs-core/src/namespace/media_rss.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,17 @@
1414
/// - `media:keywords` → tags (comma-separated)
1515
/// - `media:category` → tags
1616
/// - `media:credit` → contributors
17+
///
18+
/// # Type Design Note
19+
///
20+
/// The [`MediaContent`] and [`MediaThumbnail`] types in this module use raw `String`
21+
/// fields instead of the `Url`/`MimeType` newtypes from `types::common`. This is
22+
/// intentional:
23+
///
24+
/// 1. These are internal parsing types with extended attributes (medium, bitrate,
25+
/// framerate, expression, `is_default`) not present in the public API types.
26+
/// 2. The `media_content_to_enclosure` function handles conversion to public types.
27+
/// 3. The public API types in `types::common::MediaContent` use proper newtypes.
1728
use crate::types::{Enclosure, Entry, Tag};
1829

1930
/// Media RSS namespace URI
@@ -191,8 +202,8 @@ pub fn handle_entry_element(element: &str, text: &str, entry: &mut Entry) {
191202
/// ```
192203
pub fn media_content_to_enclosure(content: &MediaContent) -> Enclosure {
193204
Enclosure {
194-
url: content.url.clone(),
195-
enclosure_type: content.type_.clone(),
205+
url: content.url.clone().into(),
206+
enclosure_type: content.type_.as_ref().map(|t| t.clone().into()),
196207
length: content.file_size,
197208
}
198209
}

crates/feedparser-rs-core/src/namespace/syndication.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ pub fn handle_feed_element(element: &str, text: &str, feed: &mut FeedMeta) {
8282
"updatePeriod" => {
8383
if let Some(period) = UpdatePeriod::parse(text) {
8484
if feed.syndication.is_none() {
85-
feed.syndication = Some(SyndicationMeta::default());
85+
feed.syndication = Some(Box::new(SyndicationMeta::default()));
8686
}
8787
if let Some(syn) = &mut feed.syndication {
8888
syn.update_period = Some(period);
@@ -92,7 +92,7 @@ pub fn handle_feed_element(element: &str, text: &str, feed: &mut FeedMeta) {
9292
"updateFrequency" => {
9393
if let Ok(freq) = text.parse::<u32>() {
9494
if feed.syndication.is_none() {
95-
feed.syndication = Some(SyndicationMeta::default());
95+
feed.syndication = Some(Box::new(SyndicationMeta::default()));
9696
}
9797
if let Some(syn) = &mut feed.syndication {
9898
syn.update_frequency = Some(freq);
@@ -101,7 +101,7 @@ pub fn handle_feed_element(element: &str, text: &str, feed: &mut FeedMeta) {
101101
}
102102
"updateBase" => {
103103
if feed.syndication.is_none() {
104-
feed.syndication = Some(SyndicationMeta::default());
104+
feed.syndication = Some(Box::new(SyndicationMeta::default()));
105105
}
106106
if let Some(syn) = &mut feed.syndication {
107107
syn.update_base = Some(text.to_string());

crates/feedparser-rs-core/src/parser/atom.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -130,15 +130,15 @@ fn parse_feed_element(
130130
element.attributes().flatten(),
131131
limits.max_attribute_length,
132132
) {
133-
link.href = base_ctx.resolve_safe(&link.href);
133+
link.href = base_ctx.resolve_safe(&link.href).into();
134134

135135
if feed.feed.link.is_none() && link.rel.as_deref() == Some("alternate")
136136
{
137-
feed.feed.link = Some(link.href.clone());
137+
feed.feed.link = Some(link.href.to_string());
138138
}
139139
if feed.feed.license.is_none() && link.rel.as_deref() == Some("license")
140140
{
141-
feed.feed.license = Some(link.href.clone());
141+
feed.feed.license = Some(link.href.to_string());
142142
}
143143
feed.feed
144144
.links
@@ -304,13 +304,13 @@ fn parse_entry(
304304
element.attributes().flatten(),
305305
limits.max_attribute_length,
306306
) {
307-
link.href = base_ctx.resolve_safe(&link.href);
307+
link.href = base_ctx.resolve_safe(&link.href).into();
308308

309309
if entry.link.is_none() && link.rel.as_deref() == Some("alternate") {
310-
entry.link = Some(link.href.clone());
310+
entry.link = Some(link.href.to_string());
311311
}
312312
if entry.license.is_none() && link.rel.as_deref() == Some("license") {
313-
entry.license = Some(link.href.clone());
313+
entry.license = Some(link.href.to_string());
314314
}
315315
entry
316316
.links
@@ -496,7 +496,7 @@ fn parse_person(
496496

497497
match e.local_name().as_ref() {
498498
b"name" => name = Some(read_text(reader, buf, limits)?),
499-
b"email" => email = Some(read_text(reader, buf, limits)?),
499+
b"email" => email = Some(read_text(reader, buf, limits)?.into()),
500500
b"uri" => uri = Some(read_text(reader, buf, limits)?),
501501
_ => skip_element(reader, buf, limits, *depth)?,
502502
}
@@ -560,7 +560,7 @@ fn parse_content(
560560
continue;
561561
}
562562
if attr.key.as_ref() == b"type" {
563-
content_type = Some(bytes_to_string(&attr.value));
563+
content_type = Some(bytes_to_string(&attr.value).into());
564564
}
565565
}
566566

@@ -599,7 +599,7 @@ fn parse_atom_source(
599599
limits.max_attribute_length,
600600
) && link.is_none()
601601
{
602-
link = Some(l.href);
602+
link = Some(l.href.to_string());
603603
}
604604
skip_to_end(reader, buf, b"link")?;
605605
}

crates/feedparser-rs-core/src/parser/json.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ fn parse_feed_metadata(json: &Value, feed: &mut FeedMeta, limits: &ParserLimits)
109109
&& favicon.len() <= limits.max_text_length
110110
{
111111
feed.image = Some(Image {
112-
url: favicon.to_string(),
112+
url: favicon.to_string().into(),
113113
title: None,
114114
link: None,
115115
width: None,
@@ -185,7 +185,7 @@ fn parse_item(json: &Value, limits: &ParserLimits) -> Entry {
185185

186186
if let Some(image) = json.get("image").and_then(|v| v.as_str()) {
187187
let _ = entry.links.try_push_limited(
188-
Link::enclosure(image, Some("image/*".to_string())),
188+
Link::enclosure(image, Some("image/*".into())),
189189
limits.max_entries,
190190
);
191191
}

0 commit comments

Comments
 (0)