Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 70 additions & 18 deletions crates/feedparser-rs-core/src/http/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ use super::validation::validate_url;
use crate::error::{FeedError, Result};
use reqwest::blocking::{Client, Response};
use reqwest::header::{
ACCEPT, ACCEPT_ENCODING, HeaderMap, HeaderValue, IF_MODIFIED_SINCE, IF_NONE_MATCH, USER_AGENT,
ACCEPT, ACCEPT_ENCODING, HeaderMap, HeaderName, HeaderValue, IF_MODIFIED_SINCE, IF_NONE_MATCH,
USER_AGENT,
};
use std::collections::HashMap;
use std::time::Duration;
Expand Down Expand Up @@ -63,6 +64,25 @@ impl FeedHttpClient {
self
}

/// Insert header with consistent error handling
///
/// Helper method to reduce boilerplate in header insertion.
#[inline]
fn insert_header(
headers: &mut HeaderMap,
name: HeaderName,
value: &str,
field_name: &str,
) -> Result<()> {
headers.insert(
name,
HeaderValue::from_str(value).map_err(|e| FeedError::Http {
message: format!("Invalid {field_name}: {e}"),
})?,
);
Ok(())
}

/// Fetches a feed from the given URL
///
/// Supports conditional GET with `ETag` and `Last-Modified` headers.
Expand Down Expand Up @@ -91,12 +111,7 @@ impl FeedHttpClient {
let mut headers = HeaderMap::new();

// Standard headers
headers.insert(
USER_AGENT,
HeaderValue::from_str(&self.user_agent).map_err(|e| FeedError::Http {
message: format!("Invalid User-Agent: {e}"),
})?,
);
Self::insert_header(&mut headers, USER_AGENT, &self.user_agent, "User-Agent")?;

headers.insert(
ACCEPT,
Expand All @@ -112,21 +127,16 @@ impl FeedHttpClient {

// Conditional GET headers
if let Some(etag_val) = etag {
headers.insert(
IF_NONE_MATCH,
HeaderValue::from_str(etag_val).map_err(|e| FeedError::Http {
message: format!("Invalid ETag: {e}"),
})?,
);
Self::insert_header(&mut headers, IF_NONE_MATCH, etag_val, "ETag")?;
}

if let Some(modified_val) = modified {
headers.insert(
Self::insert_header(
&mut headers,
IF_MODIFIED_SINCE,
HeaderValue::from_str(modified_val).map_err(|e| FeedError::Http {
message: format!("Invalid Last-Modified: {e}"),
})?,
);
modified_val,
"Last-Modified",
)?;
}

// Merge extra headers
Expand Down Expand Up @@ -266,4 +276,46 @@ mod tests {
let err_msg = result.err().unwrap().to_string();
assert!(err_msg.contains("Internal domain TLD not allowed"));
}

#[test]
fn test_insert_header_valid() {
let mut headers = HeaderMap::new();
let result =
FeedHttpClient::insert_header(&mut headers, USER_AGENT, "TestBot/1.0", "User-Agent");
assert!(result.is_ok());
assert_eq!(headers.get(USER_AGENT).unwrap(), "TestBot/1.0");
}

#[test]
fn test_insert_header_invalid_value() {
let mut headers = HeaderMap::new();
// Invalid header value with control characters
let result = FeedHttpClient::insert_header(
&mut headers,
USER_AGENT,
"Invalid\nHeader",
"User-Agent",
);
assert!(result.is_err());
match result {
Err(FeedError::Http { message }) => {
assert!(message.contains("Invalid User-Agent"));
}
_ => panic!("Expected Http error"),
}
}

#[test]
fn test_insert_header_multiple_headers() {
let mut headers = HeaderMap::new();

FeedHttpClient::insert_header(&mut headers, USER_AGENT, "TestBot/1.0", "User-Agent")
.unwrap();

FeedHttpClient::insert_header(&mut headers, ACCEPT, "application/xml", "Accept").unwrap();

assert_eq!(headers.len(), 2);
assert_eq!(headers.get(USER_AGENT).unwrap(), "TestBot/1.0");
assert_eq!(headers.get(ACCEPT).unwrap(), "application/xml");
}
}
36 changes: 6 additions & 30 deletions crates/feedparser-rs-core/src/namespace/dublin_core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,7 @@ pub fn handle_feed_element(element: &str, text: &str, feed: &mut FeedMeta) {
// Store in dc_creator field
feed.dc_creator = Some(text.to_string());
// Also add to authors list
feed.authors.push(Person {
name: Some(text.to_string()),
email: None,
uri: None,
});
feed.authors.push(Person::from_name(text));
}
"date" => {
// dc:date → updated (if not already set)
Expand All @@ -55,11 +51,7 @@ pub fn handle_feed_element(element: &str, text: &str, feed: &mut FeedMeta) {
}
"subject" => {
// dc:subject → tags
feed.tags.push(Tag {
term: text.to_string(),
scheme: None,
label: None,
});
feed.tags.push(Tag::new(text));
}
"description" => {
// dc:description → subtitle (if not already set)
Expand Down Expand Up @@ -101,11 +93,7 @@ pub fn handle_feed_element(element: &str, text: &str, feed: &mut FeedMeta) {
}
"contributor" => {
// dc:contributor → contributors
feed.contributors.push(Person {
name: Some(text.to_string()),
email: None,
uri: None,
});
feed.contributors.push(Person::from_name(text));
}
_ => {
// Ignore unknown DC elements (source, type, format, coverage, etc.)
Expand All @@ -127,11 +115,7 @@ pub fn handle_entry_element(element: &str, text: &str, entry: &mut Entry) {
entry.author = Some(text.to_string());
}
entry.dc_creator = Some(text.to_string());
entry.authors.push(Person {
name: Some(text.to_string()),
email: None,
uri: None,
});
entry.authors.push(Person::from_name(text));
}
"date" => {
if let Some(dt) = parse_date(text) {
Expand All @@ -144,11 +128,7 @@ pub fn handle_entry_element(element: &str, text: &str, entry: &mut Entry) {
}
"subject" => {
entry.dc_subject.push(text.to_string());
entry.tags.push(Tag {
term: text.to_string(),
scheme: None,
label: None,
});
entry.tags.push(Tag::new(text));
}
"description" => {
if entry.summary.is_none() {
Expand All @@ -166,11 +146,7 @@ pub fn handle_entry_element(element: &str, text: &str, entry: &mut Entry) {
}
}
"contributor" => {
entry.contributors.push(Person {
name: Some(text.to_string()),
email: None,
uri: None,
});
entry.contributors.push(Person::from_name(text));
}
"rights" => {
entry.dc_rights = Some(text.to_string());
Expand Down
12 changes: 2 additions & 10 deletions crates/feedparser-rs-core/src/namespace/media_rss.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,21 +46,13 @@ pub fn handle_entry_element(element: &str, text: &str, entry: &mut Entry) {
for keyword in text.split(',') {
let keyword = keyword.trim();
if !keyword.is_empty() {
entry.tags.push(Tag {
term: keyword.to_string(),
scheme: None,
label: None,
});
entry.tags.push(Tag::new(keyword));
}
}
}
"category" => {
if !text.is_empty() {
entry.tags.push(Tag {
term: text.to_string(),
scheme: None,
label: None,
});
entry.tags.push(Tag::new(text));
}
}
_ => {
Expand Down
31 changes: 9 additions & 22 deletions crates/feedparser-rs-core/src/parser/atom.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,7 @@ fn parse_feed_element(
match element.name().as_ref() {
b"title" if !is_empty => {
let text = parse_text_construct(reader, &mut buf, &element, limits)?;
feed.feed.title = Some(text.value.clone());
feed.feed.title_detail = Some(text);
feed.feed.set_title(text);
}
b"link" => {
if let Some(link) = Link::from_attributes(
Expand All @@ -141,8 +140,7 @@ fn parse_feed_element(
}
b"subtitle" if !is_empty => {
let text = parse_text_construct(reader, &mut buf, &element, limits)?;
feed.feed.subtitle = Some(text.value.clone());
feed.feed.subtitle_detail = Some(text);
feed.feed.set_subtitle(text);
}
b"id" if !is_empty => {
feed.feed.id = Some(read_text(reader, &mut buf, limits)?);
Expand All @@ -154,8 +152,7 @@ fn parse_feed_element(
b"author" if !is_empty => {
if let Ok(person) = parse_person(reader, &mut buf, limits, depth) {
if feed.feed.author.is_none() {
feed.feed.author.clone_from(&person.name);
feed.feed.author_detail = Some(person.clone());
feed.feed.set_author(person.clone());
}
feed.feed
.authors
Expand All @@ -182,8 +179,7 @@ fn parse_feed_element(
}
b"generator" if !is_empty => {
let generator = parse_generator(reader, &mut buf, &element, limits)?;
feed.feed.generator = Some(generator.value.clone());
feed.feed.generator_detail = Some(generator);
feed.feed.set_generator(generator);
}
b"icon" if !is_empty => {
feed.feed.icon = Some(read_text(reader, &mut buf, limits)?);
Expand All @@ -193,16 +189,10 @@ fn parse_feed_element(
}
b"rights" if !is_empty => {
let text = parse_text_construct(reader, &mut buf, &element, limits)?;
feed.feed.rights = Some(text.value.clone());
feed.feed.rights_detail = Some(text);
feed.feed.set_rights(text);
}
b"entry" if !is_empty => {
if feed.entries.is_at_limit(limits.max_entries) {
feed.bozo = true;
feed.bozo_exception =
Some(format!("Entry limit exceeded: {}", limits.max_entries));
skip_element(reader, &mut buf, limits, *depth)?;
*depth = depth.saturating_sub(1);
if !feed.check_entry_limit(reader, &mut buf, limits, depth)? {
continue;
}

Expand Down Expand Up @@ -288,8 +278,7 @@ fn parse_entry(
match element.name().as_ref() {
b"title" if !is_empty => {
let text = parse_text_construct(reader, buf, &element, limits)?;
entry.title = Some(text.value.clone());
entry.title_detail = Some(text);
entry.set_title(text);
}
b"link" => {
if let Some(link) = Link::from_attributes(
Expand Down Expand Up @@ -320,8 +309,7 @@ fn parse_entry(
}
b"summary" if !is_empty => {
let text = parse_text_construct(reader, buf, &element, limits)?;
entry.summary = Some(text.value.clone());
entry.summary_detail = Some(text);
entry.set_summary(text);
}
b"content" if !is_empty => {
let content = parse_content(reader, buf, &element, limits)?;
Expand All @@ -332,8 +320,7 @@ fn parse_entry(
b"author" if !is_empty => {
if let Ok(person) = parse_person(reader, buf, limits, depth) {
if entry.author.is_none() {
entry.author.clone_from(&person.name);
entry.author_detail = Some(person.clone());
entry.set_author(person.clone());
}
entry.authors.try_push_limited(person, limits.max_authors);
}
Expand Down
63 changes: 51 additions & 12 deletions crates/feedparser-rs-core/src/parser/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,64 @@ use crate::{
use quick_xml::{Reader, events::Event};

pub use crate::types::{FromAttributes, LimitedCollectionExt};
pub use crate::util::text::bytes_to_string;

/// Initial capacity for XML event buffer (fits most elements)
pub const EVENT_BUFFER_CAPACITY: usize = 1024;

/// Initial capacity for text content (typical field size)
pub const TEXT_BUFFER_CAPACITY: usize = 256;

/// Creates a new event buffer with optimized capacity
///
/// This factory function provides a semantic way to create XML event buffers
/// with consistent capacity across all parsers. Using this instead of direct
/// `Vec::with_capacity()` calls makes it easier to tune buffer sizes in one place.
///
/// # Returns
///
/// A `Vec<u8>` pre-allocated with `EVENT_BUFFER_CAPACITY` (1024 bytes)
///
/// # Examples
///
/// ```ignore
/// use feedparser_rs_core::parser::common::new_event_buffer;
///
/// let mut buf = new_event_buffer();
/// assert!(buf.capacity() >= 1024);
/// ```
#[inline]
#[must_use]
#[allow(dead_code)] // Future use: Will be adopted when refactoring parsers
pub fn new_event_buffer() -> Vec<u8> {
Vec::with_capacity(EVENT_BUFFER_CAPACITY)
}

/// Creates a new text buffer with optimized capacity
///
/// This factory function provides a semantic way to create text content buffers
/// with consistent capacity across all parsers. Useful for accumulating text
/// content from XML elements.
///
/// # Returns
///
/// A `String` pre-allocated with `TEXT_BUFFER_CAPACITY` (256 bytes)
///
/// # Examples
///
/// ```ignore
/// use feedparser_rs_core::parser::common::new_text_buffer;
///
/// let mut text = new_text_buffer();
/// assert!(text.capacity() >= 256);
/// ```
#[inline]
#[must_use]
#[allow(dead_code)] // Future use: Will be adopted when refactoring parsers
pub fn new_text_buffer() -> String {
String::with_capacity(TEXT_BUFFER_CAPACITY)
}

/// Context for parsing operations
///
/// Bundles together common parsing state to reduce function parameter count.
Expand Down Expand Up @@ -106,18 +157,6 @@ pub fn check_depth(depth: usize, max_depth: usize) -> Result<()> {
Ok(())
}

/// Efficient string conversion from bytes - zero-copy for valid UTF-8
///
/// Uses `std::str::from_utf8()` for zero-copy conversion when the input
/// is valid UTF-8, falling back to lossy conversion otherwise.
#[inline]
pub fn bytes_to_string(value: &[u8]) -> String {
std::str::from_utf8(value).map_or_else(
|_| String::from_utf8_lossy(value).into_owned(),
std::string::ToString::to_string,
)
}

/// Read text content from current XML element (handles text and CDATA)
pub fn read_text(
reader: &mut Reader<&[u8]>,
Expand Down
Loading
Loading