Skip to content

Commit e008ce1

Browse files
committed
feat(types): add compact_str optimization for short strings
- Add SmallString type alias using CompactString - Strings ≤24 bytes are stored inline without heap allocation - Applied to fields that typically contain short values: - Link.rel, Link.hreflang (language codes) - Person.name (author names) - Tag.term, Tag.scheme, Tag.label - TextConstruct.language, Content.language - Generator.version - Entry.id, Entry.author, Entry.publisher, Entry.dc_creator - FeedMeta.author, FeedMeta.publisher, FeedMeta.language - FeedMeta.dc_creator, FeedMeta.dc_publisher Performance impact: ~3% reduction in allocations (493→478/parse) - CompactString is same size as String (24 bytes on 64-bit) - No regression in parsing benchmarks - All 591 tests pass
1 parent c87f336 commit e008ce1

File tree

14 files changed

+186
-124
lines changed

14 files changed

+186
-124
lines changed

Cargo.lock

Lines changed: 31 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ categories = ["parsing", "web-programming"]
2020
ammonia = "4.1"
2121
anyhow = "1.0"
2222
chrono = { version = "0.4", default-features = false }
23+
compact_str = { version = "0.9", features = ["serde"] }
2324
criterion = "0.8"
2425
encoding_rs = "0.8"
2526
flate2 = "1.1"

crates/feedparser-rs-core/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ repository.workspace = true
1313
[dependencies]
1414
ammonia.workspace = true
1515
chrono = { workspace = true, features = ["std", "clock"] }
16+
compact_str.workspace = true
1617
encoding_rs.workspace = true
1718
html-escape.workspace = true
1819
memchr.workspace = true

crates/feedparser-rs-core/src/namespace/cc.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ pub fn handle_feed_element(
5454
feed.links.try_push_limited(
5555
Link {
5656
href: license_url.into(),
57-
rel: Some("license".to_string()),
57+
rel: Some("license".into()),
5858
..Default::default()
5959
},
6060
limits.max_links_per_feed,
@@ -95,7 +95,7 @@ pub fn handle_entry_element(
9595
entry.links.try_push_limited(
9696
Link {
9797
href: license_url.into(),
98-
rel: Some("license".to_string()),
98+
rel: Some("license".into()),
9999
..Default::default()
100100
},
101101
limits.max_links_per_entry,

crates/feedparser-rs-core/src/namespace/dublin_core.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@ pub fn handle_feed_element(element: &str, text: &str, feed: &mut FeedMeta) {
3434
"creator" => {
3535
// dc:creator → author (if not already set)
3636
if feed.author.is_none() {
37-
feed.author = Some(text.to_string());
37+
feed.author = Some(text.into());
3838
}
3939
// Store in dc_creator field
40-
feed.dc_creator = Some(text.to_string());
40+
feed.dc_creator = Some(text.into());
4141
// Also add to authors list
4242
feed.authors.push(Person::from_name(text));
4343
}
@@ -62,9 +62,9 @@ pub fn handle_feed_element(element: &str, text: &str, feed: &mut FeedMeta) {
6262
"publisher" => {
6363
// dc:publisher → publisher
6464
if feed.publisher.is_none() {
65-
feed.publisher = Some(text.to_string());
65+
feed.publisher = Some(text.into());
6666
}
67-
feed.dc_publisher = Some(text.to_string());
67+
feed.dc_publisher = Some(text.into());
6868
}
6969
"rights" => {
7070
// dc:rights → rights (if not already set)
@@ -82,7 +82,7 @@ pub fn handle_feed_element(element: &str, text: &str, feed: &mut FeedMeta) {
8282
"language" => {
8383
// dc:language → language
8484
if feed.language.is_none() {
85-
feed.language = Some(text.to_string());
85+
feed.language = Some(text.into());
8686
}
8787
}
8888
"identifier" => {
@@ -112,9 +112,9 @@ pub fn handle_entry_element(element: &str, text: &str, entry: &mut Entry) {
112112
match element {
113113
"creator" => {
114114
if entry.author.is_none() {
115-
entry.author = Some(text.to_string());
115+
entry.author = Some(text.into());
116116
}
117-
entry.dc_creator = Some(text.to_string());
117+
entry.dc_creator = Some(text.into());
118118
entry.authors.push(Person::from_name(text));
119119
}
120120
"date" => {
@@ -142,7 +142,7 @@ pub fn handle_entry_element(element: &str, text: &str, entry: &mut Entry) {
142142
}
143143
"identifier" => {
144144
if entry.id.is_none() {
145-
entry.id = Some(text.to_string());
145+
entry.id = Some(text.into());
146146
}
147147
}
148148
"contributor" => {

crates/feedparser-rs-core/src/parser/atom.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ fn parse_entry(
321321
}
322322
}
323323
b"id" if !is_empty => {
324-
entry.id = Some(read_text(reader, buf, limits)?);
324+
entry.id = Some(read_text(reader, buf, limits)?.into());
325325
}
326326
b"updated" if !is_empty => {
327327
let text = read_text(reader, buf, limits)?;
@@ -495,7 +495,7 @@ fn parse_person(
495495
check_depth(*depth, limits.max_nesting_depth)?;
496496

497497
match e.local_name().as_ref() {
498-
b"name" => name = Some(read_text(reader, buf, limits)?),
498+
b"name" => name = Some(read_text(reader, buf, limits)?.into()),
499499
b"email" => email = Some(read_text(reader, buf, limits)?.into()),
500500
b"uri" => uri = Some(read_text(reader, buf, limits)?),
501501
_ => skip_element(reader, buf, limits, *depth)?,
@@ -534,7 +534,7 @@ fn parse_generator(
534534
}
535535
match attr.key.as_ref() {
536536
b"uri" => uri = Some(bytes_to_string(&attr.value)),
537-
b"version" => version = Some(bytes_to_string(&attr.value)),
537+
b"version" => version = Some(bytes_to_string(&attr.value).into()),
538538
_ => {}
539539
}
540540
}

crates/feedparser-rs-core/src/parser/json.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ pub fn parse_json_feed_with_limits(data: &[u8], limits: ParserLimits) -> Result<
2929
)));
3030
}
3131

32-
let mut feed = ParsedFeed::new();
32+
let mut feed = ParsedFeed::with_capacity(limits.max_entries);
3333

3434
let json: Value = match serde_json::from_slice(data) {
3535
Ok(v) => v,
@@ -129,7 +129,7 @@ fn parse_feed_metadata(json: &Value, feed: &mut FeedMeta, limits: &ParserLimits)
129129
if let Some(language) = json.get("language").and_then(|v| v.as_str())
130130
&& language.len() <= limits.max_text_length
131131
{
132-
feed.language = Some(language.to_string());
132+
feed.language = Some(language.into());
133133
}
134134

135135
if let Some(expired) = json.get("expired").and_then(Value::as_bool)
@@ -143,7 +143,7 @@ fn parse_item(json: &Value, limits: &ParserLimits) -> Entry {
143143
let mut entry = Entry::default();
144144

145145
if let Some(id) = json.get("id").and_then(|v| v.as_str()) {
146-
entry.id = Some(id.to_string());
146+
entry.id = Some(id.into());
147147
}
148148

149149
if let Some(url) = json.get("url").and_then(|v| v.as_str()) {
@@ -218,10 +218,10 @@ fn parse_item(json: &Value, limits: &ParserLimits) -> Entry {
218218

219219
if let Some(language) = json.get("language").and_then(|v| v.as_str()) {
220220
if let Some(detail) = &mut entry.title_detail {
221-
detail.language = Some(language.to_string());
221+
detail.language = Some(language.into());
222222
}
223223
if let Some(detail) = &mut entry.summary_detail {
224-
detail.language = Some(language.to_string());
224+
detail.language = Some(language.into());
225225
}
226226
}
227227

@@ -243,7 +243,7 @@ fn parse_item(json: &Value, limits: &ParserLimits) -> Entry {
243243
/// Extracts authors from JSON Feed format (supports both "authors" array and legacy "author" object)
244244
fn parse_authors(
245245
json: &Value,
246-
author: &mut Option<String>,
246+
author: &mut Option<crate::types::SmallString>,
247247
author_detail: &mut Option<Person>,
248248
authors: &mut Vec<Person>,
249249
limits: &ParserLimits,

crates/feedparser-rs-core/src/parser/rss.rs

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -29,25 +29,28 @@ const MALFORMED_ATTRIBUTES_ERROR: &str = "Malformed XML attributes";
2929
/// Note: Keys are cloned to `Vec<u8>` because `quick_xml::Attribute` owns the key
3030
/// data only for the lifetime of the event, but we need to store attributes across
3131
/// multiple parsing calls in `parse_enclosure` and other functions.
32+
///
33+
/// Pre-allocates space for 4 attributes (typical for enclosures: url, type, length, maybe one more)
3234
#[inline]
3335
fn collect_attributes(e: &quick_xml::events::BytesStart) -> (Vec<(Vec<u8>, String)>, bool) {
3436
let mut has_errors = false;
35-
let attrs = e
36-
.attributes()
37-
.filter_map(|result| {
38-
if let Ok(attr) = result {
37+
let mut attrs = Vec::with_capacity(4);
38+
39+
for result in e.attributes() {
40+
match result {
41+
Ok(attr) => {
3942
if let Ok(v) = attr.unescape_value() {
40-
Some((attr.key.as_ref().to_vec(), v.to_string()))
43+
attrs.push((attr.key.as_ref().to_vec(), v.to_string()));
4144
} else {
4245
has_errors = true;
43-
None
4446
}
45-
} else {
47+
}
48+
Err(_) => {
4649
has_errors = true;
47-
None
4850
}
49-
})
50-
.collect();
51+
}
52+
}
53+
5154
(attrs, has_errors)
5255
}
5356

@@ -330,7 +333,7 @@ fn parse_channel_standard(
330333
feed.feed.set_title(TextConstruct {
331334
value: text,
332335
content_type: TextType::Text,
333-
language: channel_lang.map(String::from),
336+
language: channel_lang.map(std::convert::Into::into),
334337
base: base_ctx.base().map(String::from),
335338
});
336339
}
@@ -348,12 +351,12 @@ fn parse_channel_standard(
348351
feed.feed.set_subtitle(TextConstruct {
349352
value: text,
350353
content_type: TextType::Html,
351-
language: channel_lang.map(String::from),
354+
language: channel_lang.map(std::convert::Into::into),
352355
base: base_ctx.base().map(String::from),
353356
});
354357
}
355358
b"language" => {
356-
feed.feed.language = Some(read_text(reader, buf, limits)?);
359+
feed.feed.language = Some(read_text(reader, buf, limits)?.into());
357360
}
358361
b"pubDate" => {
359362
let text = read_text(reader, buf, limits)?;
@@ -367,10 +370,10 @@ fn parse_channel_standard(
367370
}
368371
}
369372
b"managingEditor" => {
370-
feed.feed.author = Some(read_text(reader, buf, limits)?);
373+
feed.feed.author = Some(read_text(reader, buf, limits)?.into());
371374
}
372375
b"webMaster" => {
373-
feed.feed.publisher = Some(read_text(reader, buf, limits)?);
376+
feed.feed.publisher = Some(read_text(reader, buf, limits)?.into());
374377
}
375378
b"generator" => {
376379
feed.feed.generator = Some(read_text(reader, buf, limits)?);
@@ -383,7 +386,7 @@ fn parse_channel_standard(
383386
let term = read_text(reader, buf, limits)?;
384387
feed.feed.tags.try_push_limited(
385388
Tag {
386-
term,
389+
term: term.into(),
387390
scheme: None,
388391
label: None,
389392
},
@@ -750,7 +753,7 @@ fn parse_item_standard(
750753
entry.set_title(TextConstruct {
751754
value: text,
752755
content_type: TextType::Text,
753-
language: item_lang.map(String::from),
756+
language: item_lang.map(std::convert::Into::into),
754757
base: base_ctx.base().map(String::from),
755758
});
756759
}
@@ -761,7 +764,7 @@ fn parse_item_standard(
761764
entry.links.try_push_limited(
762765
Link {
763766
href: resolved_link.into(),
764-
rel: Some("alternate".to_string()),
767+
rel: Some("alternate".into()),
765768
..Default::default()
766769
},
767770
limits.max_links_per_entry,
@@ -772,25 +775,25 @@ fn parse_item_standard(
772775
entry.set_summary(TextConstruct {
773776
value: text,
774777
content_type: TextType::Html,
775-
language: item_lang.map(String::from),
778+
language: item_lang.map(std::convert::Into::into),
776779
base: base_ctx.base().map(String::from),
777780
});
778781
}
779782
b"guid" => {
780-
entry.id = Some(read_text(reader, buf, limits)?);
783+
entry.id = Some(read_text(reader, buf, limits)?.into());
781784
}
782785
b"pubDate" => {
783786
let text = read_text(reader, buf, limits)?;
784787
entry.published = parse_date(&text);
785788
}
786789
b"author" => {
787-
entry.author = Some(read_text(reader, buf, limits)?);
790+
entry.author = Some(read_text(reader, buf, limits)?.into());
788791
}
789792
b"category" => {
790793
let term = read_text(reader, buf, limits)?;
791794
entry.tags.try_push_limited(
792795
Tag {
793-
term,
796+
term: term.into(),
794797
scheme: None,
795798
label: None,
796799
},
@@ -1348,7 +1351,7 @@ fn parse_podcast_value(
13481351
let suggested = find_attribute(attrs, b"suggested")
13491352
.map(|v| truncate_to_length(v, limits.max_attribute_length));
13501353

1351-
let mut recipients = Vec::new();
1354+
let mut recipients = Vec::with_capacity(2);
13521355

13531356
loop {
13541357
match reader.read_event_into(buf) {

crates/feedparser-rs-core/src/parser/rss10.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ pub fn parse_rss10_with_limits(data: &[u8], limits: ParserLimits) -> Result<Pars
9191
|| attr.key.local_name().as_ref() == b"about")
9292
&& let Ok(value) = attr.unescape_value()
9393
{
94-
feed.feed.id = Some(value.to_string());
94+
feed.feed.id = Some(value.as_ref().into());
9595
}
9696
}
9797
if let Err(e) = parse_channel(&mut reader, &mut feed, &limits, &mut depth) {
@@ -265,7 +265,7 @@ fn parse_item(
265265
item_id: Option<String>,
266266
) -> Result<Entry> {
267267
let mut entry = Entry::with_capacity();
268-
entry.id = item_id;
268+
entry.id = item_id.map(std::convert::Into::into);
269269

270270
loop {
271271
match reader.read_event_into(buf) {

0 commit comments

Comments
 (0)