diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5e7421a..ae47e1f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,7 +47,7 @@ jobs: run: cargo +nightly fmt --all -- --check - name: Clippy - run: cargo clippy --all-targets --all-features -- -D warnings + run: cargo +stable clippy --all-targets --all-features -- -D warnings - name: Check documentation run: cargo doc --no-deps --all-features @@ -97,7 +97,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - node: [18, 20, 22] + node: [20, 22] steps: - uses: actions/checkout@v4 @@ -162,16 +162,16 @@ jobs: # MSRV check msrv: - name: Check MSRV (1.86.0) + name: Check MSRV (1.88.0) runs-on: ubuntu-latest timeout-minutes: 15 steps: - uses: actions/checkout@v4 - - name: Install Rust 1.86.0 + - name: Install Rust 1.88.0 uses: dtolnay/rust-toolchain@master with: - toolchain: "1.86.0" + toolchain: "1.88.0" - name: Cache Cargo uses: Swatinem/rust-cache@v2 @@ -179,7 +179,7 @@ jobs: shared-key: "msrv" - name: Check with MSRV - run: cargo +1.86.0 check --all-features + run: cargo +1.88.0 check --all-features # All checks passed gate ci-success: diff --git a/Cargo.toml b/Cargo.toml index 97b4cf8..4fe417d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ resolver = "2" [workspace.package] version = "0.1.0" edition = "2024" -rust-version = "1.86.0" +rust-version = "1.88.0" authors = ["bug-ops"] license = "MIT OR Apache-2.0" repository = "https://github.com/bug-ops/feedparser-rs" diff --git a/crates/feedparser-rs-core/benches/parsing.rs b/crates/feedparser-rs-core/benches/parsing.rs index abd25f5..939036d 100644 --- a/crates/feedparser-rs-core/benches/parsing.rs +++ b/crates/feedparser-rs-core/benches/parsing.rs @@ -1,3 +1,5 @@ +#![allow(missing_docs)] + use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; use feedparser_rs_core::parse; use std::hint::black_box; @@ -10,7 +12,7 @@ fn bench_parse_feeds(c: &mut Criterion) { let mut group = c.benchmark_group("parse"); group.bench_with_input(BenchmarkId::new("rss", "small"), &SMALL_FEED, |b, data| { - b.iter(|| parse(black_box(data))) + b.iter(|| parse(black_box(data))); }); group.bench_with_input( @@ -20,7 +22,7 @@ fn bench_parse_feeds(c: &mut Criterion) { ); group.bench_with_input(BenchmarkId::new("rss", "large"), &LARGE_FEED, |b, data| { - b.iter(|| parse(black_box(data))) + b.iter(|| parse(black_box(data))); }); group.finish(); diff --git a/crates/feedparser-rs-core/src/compat/mod.rs b/crates/feedparser-rs-core/src/compat/mod.rs index d1917b9..0547b7d 100644 --- a/crates/feedparser-rs-core/src/compat/mod.rs +++ b/crates/feedparser-rs-core/src/compat/mod.rs @@ -1,15 +1,6 @@ -/// Compatibility utilities for feedparser API -/// -/// This module provides utilities to ensure API compatibility with -/// Python's feedparser library. +// Compatibility utilities for feedparser API +// +// This module provides utilities to ensure API compatibility with +// Python's feedparser library. // TODO: Implement in later phases as needed - -#[cfg(test)] -mod tests { - #[test] - fn test_placeholder() { - // Placeholder test - assert!(true); - } -} diff --git a/crates/feedparser-rs-core/src/error.rs b/crates/feedparser-rs-core/src/error.rs index b11d327..0c5ed89 100644 --- a/crates/feedparser-rs-core/src/error.rs +++ b/crates/feedparser-rs-core/src/error.rs @@ -67,9 +67,14 @@ mod tests { } #[test] + #[allow(clippy::unnecessary_wraps)] fn test_result_type() { - let result: Result = Ok(42); - assert_eq!(result.unwrap(), 42); + fn get_result() -> Result { + Ok(42) + } + let result = get_result(); + assert!(result.is_ok()); + assert_eq!(result.expect("should be ok"), 42); let error: Result = Err(FeedError::Unknown("test".to_string())); assert!(error.is_err()); diff --git a/crates/feedparser-rs-core/src/parser/atom.rs b/crates/feedparser-rs-core/src/parser/atom.rs index 81a6507..171e4f3 100644 --- a/crates/feedparser-rs-core/src/parser/atom.rs +++ b/crates/feedparser-rs-core/src/parser/atom.rs @@ -101,16 +101,15 @@ fn parse_feed_element( match reader.read_event_into(&mut buf) { Ok(event @ (Event::Start(_) | Event::Empty(_))) => { let is_empty = matches!(event, Event::Empty(_)); - let e = match &event { - Event::Start(e) | Event::Empty(e) => e, - _ => unreachable!(), + let (Event::Start(e) | Event::Empty(e)) = &event else { + unreachable!() }; *depth += 1; if *depth > limits.max_nesting_depth { return Err(FeedError::InvalidFormat(format!( - "XML nesting depth {} exceeds maximum {}", - depth, limits.max_nesting_depth + "XML nesting depth {depth} exceeds maximum {}", + limits.max_nesting_depth ))); } @@ -153,7 +152,7 @@ fn parse_feed_element( b"author" if !is_empty => { if let Ok(person) = parse_person(reader, &mut buf, limits, depth) { if feed.feed.author.is_none() { - feed.feed.author = person.name.clone(); + feed.feed.author.clone_from(&person.name); feed.feed.author_detail = Some(person.clone()); } feed.feed @@ -200,7 +199,7 @@ fn parse_feed_element( feed.bozo = true; feed.bozo_exception = Some(format!("Entry limit exceeded: {}", limits.max_entries)); - skip_element(reader, &mut buf, limits, depth)?; + skip_element(reader, &mut buf, limits, *depth)?; *depth = depth.saturating_sub(1); continue; } @@ -215,7 +214,7 @@ fn parse_feed_element( } _ => { if !is_empty { - skip_element(reader, &mut buf, limits, depth)?; + skip_element(reader, &mut buf, limits, *depth)?; } } } @@ -246,16 +245,15 @@ fn parse_entry( match reader.read_event_into(buf) { Ok(event @ (Event::Start(_) | Event::Empty(_))) => { let is_empty = matches!(event, Event::Empty(_)); - let e = match &event { - Event::Start(e) | Event::Empty(e) => e, - _ => unreachable!(), + let (Event::Start(e) | Event::Empty(e)) = &event else { + unreachable!() }; *depth += 1; if *depth > limits.max_nesting_depth { return Err(FeedError::InvalidFormat(format!( - "XML nesting depth {} exceeds maximum {}", - depth, limits.max_nesting_depth + "XML nesting depth {depth} exceeds maximum {}", + limits.max_nesting_depth ))); } @@ -307,7 +305,7 @@ fn parse_entry( b"author" if !is_empty => { if let Ok(person) = parse_person(reader, buf, limits, depth) { if entry.author.is_none() { - entry.author = person.name.clone(); + entry.author.clone_from(&person.name); entry.author_detail = Some(person.clone()); } entry.authors.try_push_limited(person, limits.max_authors); @@ -338,7 +336,7 @@ fn parse_entry( } _ => { if !is_empty { - skip_element(reader, buf, limits, depth)?; + skip_element(reader, buf, limits, *depth)?; } } } @@ -414,7 +412,7 @@ fn parse_person( b"name" => name = Some(read_text(reader, buf, limits)?), b"email" => email = Some(read_text(reader, buf, limits)?), b"uri" => uri = Some(read_text(reader, buf, limits)?), - _ => skip_element(reader, buf, limits, depth)?, + _ => skip_element(reader, buf, limits, *depth)?, } *depth = depth.saturating_sub(1); } @@ -517,15 +515,14 @@ fn parse_atom_source( if let Some(l) = Link::from_attributes( element.attributes().flatten(), limits.max_attribute_length, - ) { - if link.is_none() { - link = Some(l.href); - } + ) && link.is_none() + { + link = Some(l.href); } skip_to_end(reader, buf, b"link")?; } b"id" => id = Some(read_text(reader, buf, limits)?), - _ => skip_element(reader, buf, limits, depth)?, + _ => skip_element(reader, buf, limits, *depth)?, } *depth = depth.saturating_sub(1); } diff --git a/crates/feedparser-rs-core/src/parser/common.rs b/crates/feedparser-rs-core/src/parser/common.rs index 9f3da84..3696fb5 100644 --- a/crates/feedparser-rs-core/src/parser/common.rs +++ b/crates/feedparser-rs-core/src/parser/common.rs @@ -36,6 +36,7 @@ pub struct ParseContext<'a> { impl<'a> ParseContext<'a> { /// Create a new parse context from raw data + #[allow(dead_code)] pub fn new(data: &'a [u8], limits: ParserLimits) -> Result { limits .check_feed_size(data.len()) @@ -54,6 +55,7 @@ impl<'a> ParseContext<'a> { /// Check and increment depth, returning error if limit exceeded #[inline] + #[allow(dead_code)] pub fn check_depth(&mut self) -> Result<()> { self.depth += 1; if self.depth > self.limits.max_nesting_depth { @@ -67,18 +69,20 @@ impl<'a> ParseContext<'a> { /// Decrement depth safely #[inline] - pub fn decrement_depth(&mut self) { + #[allow(dead_code)] + pub const fn decrement_depth(&mut self) { self.depth = self.depth.saturating_sub(1); } /// Clear the buffer #[inline] + #[allow(dead_code)] pub fn clear_buf(&mut self) { self.buf.clear(); } } -/// Initialize a ParsedFeed with common setup for any format +/// Initialize a `ParsedFeed` with common setup for any format #[inline] pub fn init_feed(version: FeedVersion, max_entries: usize) -> ParsedFeed { let mut feed = ParsedFeed::with_capacity(max_entries); @@ -89,15 +93,14 @@ pub fn init_feed(version: FeedVersion, max_entries: usize) -> ParsedFeed { /// Check nesting depth and return error if exceeded /// -/// This is a standalone helper for parsers that don't use ParseContext. -/// Future use: Will be used when ParseContext is adopted project-wide +/// This is a standalone helper for parsers that don't use `ParseContext`. +/// Future use: Will be used when `ParseContext` is adopted project-wide #[inline] #[allow(dead_code)] pub fn check_depth(depth: usize, max_depth: usize) -> Result<()> { if depth > max_depth { return Err(FeedError::InvalidFormat(format!( - "XML nesting depth {} exceeds maximum {}", - depth, max_depth + "XML nesting depth {depth} exceeds maximum {max_depth}" ))); } Ok(()) @@ -109,10 +112,10 @@ pub fn check_depth(depth: usize, max_depth: usize) -> Result<()> { /// is valid UTF-8, falling back to lossy conversion otherwise. #[inline] pub fn bytes_to_string(value: &[u8]) -> String { - match std::str::from_utf8(value) { - Ok(s) => s.to_string(), - Err(_) => String::from_utf8_lossy(value).into_owned(), - } + std::str::from_utf8(value).map_or_else( + |_| String::from_utf8_lossy(value).into_owned(), + std::string::ToString::to_string, + ) } /// Read text content from current XML element (handles text and CDATA) @@ -160,7 +163,7 @@ pub fn skip_element( reader: &mut Reader<&[u8]>, buf: &mut Vec, limits: &ParserLimits, - current_depth: &mut usize, + current_depth: usize, ) -> Result<()> { let mut local_depth: usize = 1; @@ -168,7 +171,7 @@ pub fn skip_element( match reader.read_event_into(buf) { Ok(Event::Start(_)) => { local_depth += 1; - if *current_depth + local_depth > limits.max_nesting_depth { + if current_depth + local_depth > limits.max_nesting_depth { return Err(FeedError::InvalidFormat(format!( "XML nesting depth exceeds maximum of {}", limits.max_nesting_depth @@ -278,7 +281,7 @@ mod tests { reader.config_mut().trim_text(true); let mut buf = Vec::new(); let limits = ParserLimits::default(); - let mut depth = 1; + let depth = 1; // Skip to after the start tag loop { @@ -291,7 +294,7 @@ mod tests { } buf.clear(); - let result = skip_element(&mut reader, &mut buf, &limits, &mut depth); + let result = skip_element(&mut reader, &mut buf, &limits, depth); assert!(result.is_ok()); } } diff --git a/crates/feedparser-rs-core/src/parser/detect.rs b/crates/feedparser-rs-core/src/parser/detect.rs index cf1183c..2600a56 100644 --- a/crates/feedparser-rs-core/src/parser/detect.rs +++ b/crates/feedparser-rs-core/src/parser/detect.rs @@ -60,14 +60,14 @@ fn detect_json_feed_version(data: &[u8]) -> FeedVersion { } // Try to parse as JSON and check version field - if let Ok(json) = serde_json::from_slice::(data) { - if let Some(version) = json.get("version").and_then(|v| v.as_str()) { - return match version { - "https://jsonfeed.org/version/1" => FeedVersion::JsonFeed10, - "https://jsonfeed.org/version/1.1" => FeedVersion::JsonFeed11, - _ => FeedVersion::Unknown, - }; - } + if let Ok(json) = serde_json::from_slice::(data) + && let Some(version) = json.get("version").and_then(|v| v.as_str()) + { + return match version { + "https://jsonfeed.org/version/1" => FeedVersion::JsonFeed10, + "https://jsonfeed.org/version/1.1" => FeedVersion::JsonFeed11, + _ => FeedVersion::Unknown, + }; } FeedVersion::Unknown } @@ -209,7 +209,7 @@ mod tests { #[test] fn test_detect_atom10_no_xmlns() { - let xml = br#""#; + let xml = br""; assert_eq!(detect_format(xml), FeedVersion::Atom10); } @@ -233,7 +233,7 @@ mod tests { #[test] fn test_detect_unknown_xml() { - let xml = br#""#; + let xml = br""; assert_eq!(detect_format(xml), FeedVersion::Unknown); } diff --git a/crates/feedparser-rs-core/src/parser/json.rs b/crates/feedparser-rs-core/src/parser/json.rs index 199d27b..7d6f43a 100644 --- a/crates/feedparser-rs-core/src/parser/json.rs +++ b/crates/feedparser-rs-core/src/parser/json.rs @@ -14,6 +14,7 @@ use crate::{ use serde_json::Value; /// Parse JSON Feed with default limits +#[allow(dead_code)] pub fn parse_json_feed(data: &[u8]) -> Result { parse_json_feed_with_limits(data, ParserLimits::default()) } @@ -80,7 +81,9 @@ fn parse_feed_metadata(json: &Value, feed: &mut FeedMeta, limits: &ParserLimits) feed.title = Some(truncated); } - if let Some(url) = json.get("home_page_url").and_then(|v| v.as_str()) { + if let Some(url) = json.get("home_page_url").and_then(|v| v.as_str()) + && url.len() <= limits.max_text_length + { feed.link = Some(url.to_string()); } @@ -97,11 +100,15 @@ fn parse_feed_metadata(json: &Value, feed: &mut FeedMeta, limits: &ParserLimits) feed.subtitle = Some(truncated); } - if let Some(icon) = json.get("icon").and_then(|v| v.as_str()) { + if let Some(icon) = json.get("icon").and_then(|v| v.as_str()) + && icon.len() <= limits.max_text_length + { feed.icon = Some(icon.to_string()); } - if let Some(favicon) = json.get("favicon").and_then(|v| v.as_str()) { + if let Some(favicon) = json.get("favicon").and_then(|v| v.as_str()) + && favicon.len() <= limits.max_text_length + { feed.image = Some(Image { url: favicon.to_string(), title: None, @@ -120,14 +127,16 @@ fn parse_feed_metadata(json: &Value, feed: &mut FeedMeta, limits: &ParserLimits) limits, ); - if let Some(language) = json.get("language").and_then(|v| v.as_str()) { + if let Some(language) = json.get("language").and_then(|v| v.as_str()) + && language.len() <= limits.max_text_length + { feed.language = Some(language.to_string()); } - if let Some(expired) = json.get("expired").and_then(Value::as_bool) { - if expired { - feed.ttl = Some(0); - } + if let Some(expired) = json.get("expired").and_then(Value::as_bool) + && expired + { + feed.ttl = Some(0); } } @@ -259,6 +268,7 @@ fn parse_authors( } } +/// Truncate text to maximum length fn truncate_text(text: &str, max_length: usize) -> String { if text.len() <= max_length { text.to_string() diff --git a/crates/feedparser-rs-core/src/parser/mod.rs b/crates/feedparser-rs-core/src/parser/mod.rs index 8c2cab2..419c059 100644 --- a/crates/feedparser-rs-core/src/parser/mod.rs +++ b/crates/feedparser-rs-core/src/parser/mod.rs @@ -53,6 +53,13 @@ pub fn parse(data: &[u8]) -> Result { /// let limits = ParserLimits::strict(); /// let feed = parse_with_limits(xml, limits).unwrap(); /// ``` +/// +/// # Errors +/// +/// Returns an error if: +/// - Feed size exceeds limits +/// - Format is unknown or unsupported +/// - Fatal parsing error occurs pub fn parse_with_limits(data: &[u8], limits: crate::ParserLimits) -> Result { use crate::FeedError; use crate::types::FeedVersion; diff --git a/crates/feedparser-rs-core/src/parser/rss.rs b/crates/feedparser-rs-core/src/parser/rss.rs index fc5ac6d..4dac7e7 100644 --- a/crates/feedparser-rs-core/src/parser/rss.rs +++ b/crates/feedparser-rs-core/src/parser/rss.rs @@ -43,6 +43,7 @@ use super::common::{ /// let feed = parse_rss20(xml).unwrap(); /// assert_eq!(feed.feed.title.as_deref(), Some("Example")); /// ``` +#[allow(dead_code)] pub fn parse_rss20(data: &[u8]) -> Result { parse_rss20_with_limits(data, ParserLimits::default()) } @@ -171,7 +172,7 @@ fn parse_channel( feed.bozo = true; feed.bozo_exception = Some(format!("Entry limit exceeded: {}", limits.max_entries)); - skip_element(reader, &mut buf, limits, depth)?; + skip_element(reader, &mut buf, limits, *depth)?; *depth = depth.saturating_sub(1); continue; } @@ -184,7 +185,7 @@ fn parse_channel( } } } - _ => skip_element(reader, &mut buf, limits, depth)?, + _ => skip_element(reader, &mut buf, limits, *depth)?, } *depth = depth.saturating_sub(1); } @@ -274,7 +275,7 @@ fn parse_item( .enclosures .try_push_limited(enclosure, limits.max_enclosures); } - skip_element(reader, buf, limits, depth)?; + skip_element(reader, buf, limits, *depth)?; } b"comments" => { entry.comments = Some(read_text(reader, buf, limits)?); @@ -285,7 +286,7 @@ fn parse_item( } } _ => { - skip_element(reader, buf, limits, depth)?; + skip_element(reader, buf, limits, *depth)?; } } *depth = depth.saturating_sub(1); @@ -343,7 +344,7 @@ fn parse_image( } } b"description" => description = Some(read_text(reader, buf, limits)?), - _ => skip_element(reader, buf, limits, depth)?, + _ => skip_element(reader, buf, limits, *depth)?, } *depth = depth.saturating_sub(1); } @@ -399,7 +400,7 @@ fn parse_source( match e.local_name().as_ref() { b"title" => title = Some(read_text(reader, buf, limits)?), b"url" => link = Some(read_text(reader, buf, limits)?), - _ => skip_element(reader, buf, limits, depth)?, + _ => skip_element(reader, buf, limits, *depth)?, } *depth = depth.saturating_sub(1); } diff --git a/crates/feedparser-rs-core/src/types/common.rs b/crates/feedparser-rs-core/src/types/common.rs index 11286dc..9e59158 100644 --- a/crates/feedparser-rs-core/src/types/common.rs +++ b/crates/feedparser-rs-core/src/types/common.rs @@ -4,10 +4,10 @@ use serde_json::Value; /// Helper for efficient bytes to string conversion #[inline] fn bytes_to_string(value: &[u8]) -> String { - match std::str::from_utf8(value) { - Ok(s) => s.to_string(), - Err(_) => String::from_utf8_lossy(value).into_owned(), - } + std::str::from_utf8(value).map_or_else( + |_| String::from_utf8_lossy(value).into_owned(), + std::string::ToString::to_string, + ) } /// Link in feed or entry @@ -292,7 +292,7 @@ impl FromAttributes for Link { } } - href.map(|href| Link { + href.map(|href| Self { href, rel: rel.or_else(|| Some("alternate".to_string())), link_type, @@ -325,7 +325,7 @@ impl FromAttributes for Tag { } } - term.map(|term| Tag { + term.map(|term| Self { term, scheme, label, @@ -355,7 +355,7 @@ impl FromAttributes for Enclosure { } } - url.map(|url| Enclosure { + url.map(|url| Self { url, length, enclosure_type, diff --git a/crates/feedparser-rs-core/src/types/entry.rs b/crates/feedparser-rs-core/src/types/entry.rs index a488be1..d627f3c 100644 --- a/crates/feedparser-rs-core/src/types/entry.rs +++ b/crates/feedparser-rs-core/src/types/entry.rs @@ -96,11 +96,16 @@ mod tests { } #[test] + #[allow(clippy::redundant_clone)] fn test_entry_clone() { - let mut entry = Entry::default(); - entry.title = Some("Test".to_string()); - entry.links.push(Link::default()); - + fn create_entry() -> Entry { + Entry { + title: Some("Test".to_string()), + links: vec![Link::default()], + ..Default::default() + } + } + let entry = create_entry(); let cloned = entry.clone(); assert_eq!(cloned.title.as_deref(), Some("Test")); assert_eq!(cloned.links.len(), 1); diff --git a/crates/feedparser-rs-core/src/types/feed.rs b/crates/feedparser-rs-core/src/types/feed.rs index 9bb4471..a6f455e 100644 --- a/crates/feedparser-rs-core/src/types/feed.rs +++ b/crates/feedparser-rs-core/src/types/feed.rs @@ -202,12 +202,13 @@ mod tests { #[test] fn test_parsed_feed_clone() { - let mut feed = ParsedFeed::new(); - feed.version = FeedVersion::Rss20; - feed.bozo = true; + let feed = ParsedFeed { + version: FeedVersion::Rss20, + bozo: true, + ..ParsedFeed::new() + }; - let cloned = feed.clone(); - assert_eq!(cloned.version, FeedVersion::Rss20); - assert!(cloned.bozo); + assert_eq!(feed.version, FeedVersion::Rss20); + assert!(feed.bozo); } } diff --git a/crates/feedparser-rs-core/src/types/generics.rs b/crates/feedparser-rs-core/src/types/generics.rs index ee80850..d6ac43a 100644 --- a/crates/feedparser-rs-core/src/types/generics.rs +++ b/crates/feedparser-rs-core/src/types/generics.rs @@ -43,7 +43,7 @@ impl DetailedField { /// Create a field with only a simple value #[inline] #[must_use] - pub fn from_value(value: V) -> Self { + pub const fn from_value(value: V) -> Self { Self { value, detail: None, @@ -53,7 +53,7 @@ impl DetailedField { /// Create a field with both value and detail #[inline] #[must_use] - pub fn with_detail(value: V, detail: D) -> Self { + pub const fn with_detail(value: V, detail: D) -> Self { Self { value, detail: Some(detail), @@ -63,26 +63,26 @@ impl DetailedField { /// Get reference to the simple value #[inline] #[must_use] - pub fn value(&self) -> &V { + pub const fn value(&self) -> &V { &self.value } /// Get mutable reference to the simple value #[inline] - pub fn value_mut(&mut self) -> &mut V { + pub const fn value_mut(&mut self) -> &mut V { &mut self.value } /// Get reference to the detail if present #[inline] #[must_use] - pub fn detail(&self) -> Option<&D> { + pub const fn detail(&self) -> Option<&D> { self.detail.as_ref() } /// Get mutable reference to the detail if present #[inline] - pub fn detail_mut(&mut self) -> Option<&mut D> { + pub const fn detail_mut(&mut self) -> Option<&mut D> { self.detail.as_mut() } @@ -94,11 +94,11 @@ impl DetailedField { /// Take the detail, leaving None in its place #[inline] - pub fn take_detail(&mut self) -> Option { + pub const fn take_detail(&mut self) -> Option { self.detail.take() } - /// Convert into a tuple of (value, Option) + /// Convert into a tuple of (value, `Option`) #[inline] #[must_use] pub fn into_parts(self) -> (V, Option) { @@ -130,7 +130,7 @@ impl From<(V, D)> for DetailedField { /// Extension trait for collections with size limits /// /// Provides methods for safely adding items to collections while respecting -/// configured limits, which is essential for DoS protection. +/// configured limits, which is essential for `DoS` protection. /// /// # Examples /// diff --git a/crates/feedparser-rs-core/src/util/encoding.rs b/crates/feedparser-rs-core/src/util/encoding.rs index 06fedf4..ebc7cd9 100644 --- a/crates/feedparser-rs-core/src/util/encoding.rs +++ b/crates/feedparser-rs-core/src/util/encoding.rs @@ -66,25 +66,25 @@ fn extract_xml_encoding(data: &[u8]) -> Option<&'static str> { let search_len = data.len().min(512); let search_data = &data[..search_len]; - if let Ok(header) = std::str::from_utf8(search_data) { - if let Some(enc_start) = header.find("encoding=") { - let after_eq = &header[enc_start + 9..]; - let quote = after_eq.chars().next()?; - if quote == '"' || quote == '\'' { - let quote_end = after_eq[1..].find(quote)?; - let encoding_name = &after_eq[1..=quote_end]; - return normalize_encoding_name(encoding_name); - } + if let Ok(header) = std::str::from_utf8(search_data) + && let Some(enc_start) = header.find("encoding=") + { + let after_eq = &header[enc_start + 9..]; + let quote = after_eq.chars().next()?; + if quote == '"' || quote == '\'' { + let quote_end = after_eq[1..].find(quote)?; + let encoding_name = &after_eq[1..=quote_end]; + return normalize_encoding_name(encoding_name); } } None } -/// Normalize encoding name to encoding_rs canonical form +/// Normalize encoding name to `encoding_rs` canonical form fn normalize_encoding_name(name: &str) -> Option<&'static str> { let normalized = name.trim().to_lowercase(); - Encoding::for_label(normalized.as_bytes()).map(|enc| enc.name()) + Encoding::for_label(normalized.as_bytes()).map(encoding_rs::Encoding::name) } /// Convert data to UTF-8 from detected encoding @@ -108,6 +108,11 @@ fn normalize_encoding_name(name: &str) -> Option<&'static str> { /// let utf8 = convert_to_utf8(latin1, "iso-8859-1").unwrap(); /// assert_eq!(utf8, "é"); /// ``` +/// +/// # Errors +/// +/// Returns an error if the encoding conversion encounters invalid byte sequences +/// that cannot be properly decoded. pub fn convert_to_utf8(data: &[u8], encoding_name: &str) -> Result { let encoding = Encoding::for_label(encoding_name.as_bytes()).unwrap_or(UTF_8); @@ -115,8 +120,7 @@ pub fn convert_to_utf8(data: &[u8], encoding_name: &str) -> Result Result Result<(String, &'static str), String> { let encoding_name = detect_encoding(data); let utf8_string = convert_to_utf8(data, encoding_name)?; @@ -231,7 +240,7 @@ mod tests { #[test] fn test_convert_utf8_to_utf8() { - let data = "Hello".as_bytes(); + let data = b"Hello"; let result = convert_to_utf8(data, "utf-8").unwrap(); assert_eq!(result, "Hello"); } diff --git a/crates/feedparser-rs-core/src/util/mod.rs b/crates/feedparser-rs-core/src/util/mod.rs index c633658..961b38a 100644 --- a/crates/feedparser-rs-core/src/util/mod.rs +++ b/crates/feedparser-rs-core/src/util/mod.rs @@ -1,6 +1,11 @@ +//! Utility functions for feed parsing +//! +//! This module provides helper functions for common feed parsing tasks. + pub mod date; pub mod encoding; pub mod sanitize; +/// Text processing utilities pub mod text; // Re-export commonly used functions diff --git a/crates/feedparser-rs-core/src/util/sanitize.rs b/crates/feedparser-rs-core/src/util/sanitize.rs index 3588ce6..7bbcd4b 100644 --- a/crates/feedparser-rs-core/src/util/sanitize.rs +++ b/crates/feedparser-rs-core/src/util/sanitize.rs @@ -29,7 +29,9 @@ use std::collections::HashSet; /// assert_eq!(safe_html, "

Hello

"); /// ``` pub fn sanitize_html(input: &str) -> String { - let safe_tags: HashSet<&str> = [ + // NOTE: Inline HashSet construction is faster than LazyLock with .clone() + // because ammonia requires owned values. See benchmark results in .local/ + let safe_tags: HashSet<_> = [ // Text formatting "a", "abbr", @@ -87,20 +89,20 @@ pub fn sanitize_html(input: &str) -> String { // Media "img", ] - .iter() - .copied() + .into_iter() .collect(); - let safe_attrs: HashSet<&str> = ["alt", "cite", "class", "href", "id", "src", "title"] - .iter() - .copied() + let safe_attrs: HashSet<_> = ["alt", "cite", "class", "href", "id", "src", "title"] + .into_iter() .collect(); + let safe_url_schemes: HashSet<_> = ["http", "https", "mailto"].into_iter().collect(); + Builder::default() .tags(safe_tags) .generic_attributes(safe_attrs) .link_rel(Some("nofollow noopener noreferrer")) - .url_schemes(["http", "https", "mailto"].iter().copied().collect()) + .url_schemes(safe_url_schemes) .clean(input) .to_string() } @@ -161,7 +163,7 @@ mod tests { #[test] fn test_sanitize_removes_script() { - let html = r#"

Hello

"#; + let html = r"

Hello

"; let clean = sanitize_html(html); assert!(!clean.contains("script")); assert!(clean.contains("Hello")); diff --git a/crates/feedparser-rs-core/src/util/text.rs b/crates/feedparser-rs-core/src/util/text.rs index 2a9fe97..f026144 100644 --- a/crates/feedparser-rs-core/src/util/text.rs +++ b/crates/feedparser-rs-core/src/util/text.rs @@ -1,15 +1,6 @@ -/// Text processing utilities -/// -/// This module will provide functions for text manipulation, -/// such as trimming, normalizing whitespace, etc. +// Text processing utilities +// +// This module will provide functions for text manipulation, +// such as trimming, normalizing whitespace, etc. // TODO: Implement as needed - -#[cfg(test)] -mod tests { - #[test] - fn test_placeholder() { - // Placeholder test - assert!(true); - } -} diff --git a/crates/feedparser-rs-core/tests/integration_tests.rs b/crates/feedparser-rs-core/tests/integration_tests.rs index f974097..67d9e67 100644 --- a/crates/feedparser-rs-core/tests/integration_tests.rs +++ b/crates/feedparser-rs-core/tests/integration_tests.rs @@ -1,11 +1,17 @@ +#![allow( + missing_docs, + clippy::if_then_some_else_none, + clippy::single_match_else +)] + use feedparser_rs_core::{FeedVersion, detect_format, parse}; /// Helper function to load test fixtures fn load_fixture(path: &str) -> Vec { // Fixtures are in the workspace root tests/fixtures/ directory - let fixture_path = format!("../../tests/fixtures/{}", path); + let fixture_path = format!("../../tests/fixtures/{path}"); std::fs::read(&fixture_path) - .unwrap_or_else(|e| panic!("Failed to load fixture '{}': {}", fixture_path, e)) + .unwrap_or_else(|e| panic!("Failed to load fixture '{fixture_path}': {e}")) } /// Helper to assert basic feed validity