Skip to content

Commit 158fbfe

Browse files
committed
feat: implement Phase 3 - JSON Feed parser, utilities, and DRY refactoring
Phase 3 Implementation: - Add JSON Feed 1.0/1.1 parser with full spec support - Implement extended date parsing (20+ formats including RFC 2822, RFC 3339, ISO 8601) - Add HTML sanitization with ammonia (XSS protection, safe tags whitelist) - Add encoding detection with BOM support (UTF-8, UTF-16 LE/BE, UTF-32) - Add encoding conversion via encoding_rs Code Deduplication (ParseFrom trait with GAT): - Add generic ParseFrom<Source> trait for unified parsing interface - Implement ParseFrom<&Value> for Person and Enclosure (JSON parsing) - Add builder methods: Link::alternate(), Link::self_link(), Link::enclosure() - Add builder methods: TextConstruct::text(), TextConstruct::html() - Add builder methods: Content::html(), Content::plain(), Tag::new() - Unify author parsing logic with parse_authors() helper Security: - Enforce parser limits (max_entries, max_text_length, max_nesting_depth) - Proper BOM detection order (UTF-32 before UTF-16) - HTML sanitization prevents XSS attacks Tests: - 164 tests passing (154 unit + 10 integration) - JSON Feed parsing tests with fixtures - Encoding detection and conversion tests - HTML sanitization security tests
1 parent 43a0c66 commit 158fbfe

File tree

17 files changed

+1421
-38
lines changed

17 files changed

+1421
-38
lines changed

crates/feedparser-rs-core/src/lib.rs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,25 @@ mod compat;
4242
mod error;
4343
mod limits;
4444
mod parser;
45-
mod types;
46-
mod util;
45+
46+
/// Type definitions for feed data structures
47+
///
48+
/// This module contains all the data types used to represent parsed feeds,
49+
/// including the main `ParsedFeed` struct and related types.
50+
pub mod types;
51+
52+
/// Utility functions for feed parsing
53+
///
54+
/// This module provides helper functions for date parsing, HTML sanitization,
55+
/// and encoding detection that are useful for feed processing.
56+
pub mod util;
4757

4858
pub use error::{FeedError, Result};
4959
pub use limits::{LimitError, ParserLimits};
5060
pub use parser::{detect_format, parse, parse_with_limits};
5161
pub use types::{
52-
Content, Enclosure, Entry, FeedMeta, FeedVersion, Generator, Image, Link, ParsedFeed, Person,
53-
Source, Tag, TextConstruct, TextType,
62+
Content, Enclosure, Entry, FeedMeta, FeedVersion, Generator, Image, LimitedCollectionExt,
63+
Link, ParsedFeed, Person, Source, Tag, TextConstruct, TextType,
5464
};
5565

5666
#[cfg(test)]

crates/feedparser-rs-core/src/parser/atom.rs

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,7 @@ use super::common::{
3232
///
3333
/// # Examples
3434
///
35-
/// ```
36-
/// use feedparser_rs_core::parser::atom::parse_atom10;
37-
///
35+
/// ```ignore
3836
/// let xml = br#"
3937
/// <feed xmlns="http://www.w3.org/2005/Atom">
4038
/// <title>Example Feed</title>
@@ -47,6 +45,7 @@ use super::common::{
4745
/// let feed = parse_atom10(xml).unwrap();
4846
/// assert_eq!(feed.feed.title.as_deref(), Some("Example Feed"));
4947
/// ```
48+
#[allow(dead_code)]
5049
pub fn parse_atom10(data: &[u8]) -> Result<ParsedFeed> {
5150
parse_atom10_with_limits(data, ParserLimits::default())
5251
}
@@ -100,7 +99,7 @@ fn parse_feed_element(
10099

101100
loop {
102101
match reader.read_event_into(&mut buf) {
103-
Ok(event @ Event::Start(_)) | Ok(event @ Event::Empty(_)) => {
102+
Ok(event @ (Event::Start(_) | Event::Empty(_))) => {
104103
let is_empty = matches!(event, Event::Empty(_));
105104
let e = match &event {
106105
Event::Start(e) | Event::Empty(e) => e,
@@ -240,7 +239,7 @@ fn parse_entry(
240239

241240
loop {
242241
match reader.read_event_into(buf) {
243-
Ok(event @ Event::Start(_)) | Ok(event @ Event::Empty(_)) => {
242+
Ok(event @ (Event::Start(_) | Event::Empty(_))) => {
244243
let is_empty = matches!(event, Event::Empty(_));
245244
let e = match &event {
246245
Event::Start(e) | Event::Empty(e) => e,
@@ -494,7 +493,7 @@ fn parse_atom_source(
494493

495494
loop {
496495
match reader.read_event_into(buf) {
497-
Ok(Event::Start(e)) | Ok(Event::Empty(e)) => {
496+
Ok(Event::Start(e) | Event::Empty(e)) => {
498497
*depth += 1;
499498
if *depth > limits.max_nesting_depth {
500499
return Err(FeedError::InvalidFormat(format!(

crates/feedparser-rs-core/src/parser/common.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ pub const TEXT_BUFFER_CAPACITY: usize = 256;
2121
/// Context for parsing operations
2222
///
2323
/// Bundles together common parsing state to reduce function parameter count.
24+
/// Future use: Will be adopted when refactoring parsers to reduce parameter passing
25+
#[allow(dead_code)]
2426
pub struct ParseContext<'a> {
2527
/// XML reader
2628
pub reader: Reader<&'a [u8]>,
@@ -88,7 +90,9 @@ pub fn init_feed(version: FeedVersion, max_entries: usize) -> ParsedFeed {
8890
/// Check nesting depth and return error if exceeded
8991
///
9092
/// This is a standalone helper for parsers that don't use ParseContext.
93+
/// Future use: Will be used when ParseContext is adopted project-wide
9194
#[inline]
95+
#[allow(dead_code)]
9296
pub fn check_depth(depth: usize, max_depth: usize) -> Result<()> {
9397
if depth > max_depth {
9498
return Err(FeedError::InvalidFormat(format!(

crates/feedparser-rs-core/src/parser/detect.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ fn detect_xml_format(data: &[u8]) -> FeedVersion {
9797
// Read events until we find the root element
9898
loop {
9999
match reader.read_event_into(&mut buf) {
100-
Ok(Event::Start(e)) | Ok(Event::Empty(e)) => {
100+
Ok(Event::Start(e) | Event::Empty(e)) => {
101101
let name = e.local_name();
102102

103103
match name.as_ref() {

0 commit comments

Comments
 (0)