Skip to content

Commit d475481

Browse files
committed
feat(examples): add comprehensive real-world usage examples
Add four executable examples demonstrating feedparser-rs features: - parse_file.rs: Local file parsing for RSS/Atom feeds - parse_url.rs: HTTP fetching with ETag/Last-Modified caching - podcast_feed.rs: iTunes and Podcast 2.0 namespace metadata - error_handling.rs: Bozo pattern and graceful error recovery Include sample feed files for offline testing: - sample_rss.xml, sample_atom.xml, sample_podcast.xml - malformed_feed.xml for error handling tests Examples demonstrate: - Type-safe Url, MimeType, Email newtype usage - Deref<Target=str> for transparent string operations - HTTP conditional GET with caching headers - ParserLimits for DoS protection
1 parent 8d8a40c commit d475481

File tree

8 files changed

+1121
-0
lines changed

8 files changed

+1121
-0
lines changed
Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
//! Example: Error handling and the bozo pattern
2+
//!
3+
//! Demonstrates:
4+
//! - The bozo flag for malformed feeds
5+
//! - Graceful error recovery
6+
//! - Extracting data from broken feeds
7+
//! - Different types of parsing errors
8+
//! - Resource limits protection
9+
//!
10+
//! The "bozo" pattern (from Python feedparser) means:
11+
//! - Never panic on malformed input
12+
//! - Set bozo=true flag when issues occur
13+
//! - Continue parsing and extract whatever data is available
14+
//!
15+
//! Run with:
16+
//! ```bash
17+
//! cargo run --example error_handling
18+
//! ```
19+
20+
use feedparser_rs::{parse, parse_with_limits, ParserLimits};
21+
use std::fs;
22+
23+
fn main() -> Result<(), Box<dyn std::error::Error>> {
24+
println!("=== Feed Parser Example: Error Handling ===\n");
25+
26+
// Example 1: Parse malformed feed (bozo pattern)
27+
malformed_feed_example()?;
28+
29+
println!("\n{}\n", "=".repeat(60));
30+
31+
// Example 2: Resource limits
32+
resource_limits_example()?;
33+
34+
println!("\n{}\n", "=".repeat(60));
35+
36+
// Example 3: Invalid XML recovery
37+
invalid_xml_example()?;
38+
39+
println!("\n{}\n", "=".repeat(60));
40+
41+
// Example 4: Network error handling
42+
network_error_example()?;
43+
44+
Ok(())
45+
}
46+
47+
fn malformed_feed_example() -> Result<(), Box<dyn std::error::Error>> {
48+
println!("Example 1: Malformed Feed (Bozo Pattern)");
49+
println!("{}", "-".repeat(40));
50+
51+
let feed_path = "examples/feeds/malformed_feed.xml";
52+
let feed_data = fs::read(feed_path)?;
53+
54+
println!("Parsing feed with known issues...\n");
55+
56+
// The parser will NOT panic, even with malformed XML
57+
let feed = parse(&feed_data)?;
58+
59+
// Check the bozo flag
60+
println!("Bozo flag: {}", feed.bozo);
61+
62+
if feed.bozo {
63+
println!("Feed has parsing issues!");
64+
65+
if let Some(exception) = &feed.bozo_exception {
66+
println!("Exception details: {}", exception);
67+
}
68+
69+
println!("\nDespite errors, we can still extract data:");
70+
}
71+
72+
// Even with errors, we can extract available data
73+
if let Some(title) = &feed.feed.title {
74+
println!(" Feed title: {}", title);
75+
}
76+
77+
if let Some(link) = &feed.feed.link {
78+
println!(" Feed link: {}", link);
79+
}
80+
81+
println!("\nEntries found: {}", feed.entries.len());
82+
for (i, entry) in feed.entries.iter().enumerate() {
83+
println!("\n Entry {}:", i + 1);
84+
if let Some(title) = &entry.title {
85+
println!(" Title: {}", title);
86+
}
87+
if let Some(link) = &entry.link {
88+
println!(" Link: {}", link);
89+
}
90+
if let Some(summary) = &entry.summary {
91+
println!(" Summary: {}", summary);
92+
}
93+
94+
// Some entries may have unparseable dates
95+
if entry.published.is_some() {
96+
println!(" Published: {}", entry.published.as_ref().unwrap());
97+
} else {
98+
println!(" Published: (unable to parse date)");
99+
}
100+
}
101+
102+
println!("\nKey takeaway: The parser extracts as much data as possible,");
103+
println!("even when the feed has errors. Always check the bozo flag!");
104+
105+
Ok(())
106+
}
107+
108+
fn resource_limits_example() -> Result<(), Box<dyn std::error::Error>> {
109+
println!("Example 2: Resource Limits Protection");
110+
println!("{}", "-".repeat(40));
111+
112+
// Create a feed that exceeds limits
113+
let huge_feed = format!(
114+
r#"<?xml version="1.0"?>
115+
<rss version="2.0">
116+
<channel>
117+
<title>{}</title>
118+
<link>https://example.com</link>
119+
</channel>
120+
</rss>"#,
121+
"A".repeat(200_000)
122+
);
123+
124+
println!("Testing with strict limits:");
125+
let strict_limits = ParserLimits::strict();
126+
println!(" Max text length: {}", strict_limits.max_text_length);
127+
println!(" Max entries: {}", strict_limits.max_entries);
128+
129+
match parse_with_limits(huge_feed.as_bytes(), strict_limits) {
130+
Ok(feed) => {
131+
println!("\nParsed with limits:");
132+
if let Some(title) = &feed.feed.title {
133+
println!(" Title length: {} chars (may be truncated)", title.len());
134+
}
135+
}
136+
Err(e) => {
137+
println!("\nLimits exceeded: {}", e);
138+
println!("This protects against DoS attacks and resource exhaustion.");
139+
}
140+
}
141+
142+
// Now try with default (more permissive) limits
143+
println!("\n\nTesting with default limits:");
144+
let default_limits = ParserLimits::default();
145+
println!(" Max text length: {}", default_limits.max_text_length);
146+
147+
match parse_with_limits(huge_feed.as_bytes(), default_limits) {
148+
Ok(feed) => {
149+
println!("\nParsed successfully:");
150+
if let Some(title) = &feed.feed.title {
151+
println!(" Title length: {} chars", title.len());
152+
}
153+
}
154+
Err(e) => {
155+
println!("Error: {}", e);
156+
}
157+
}
158+
159+
println!("\nUse strict limits for untrusted input!");
160+
println!("Use default limits for known/trusted feeds.");
161+
162+
Ok(())
163+
}
164+
165+
fn invalid_xml_example() -> Result<(), Box<dyn std::error::Error>> {
166+
println!("Example 3: Invalid XML Recovery");
167+
println!("{}", "-".repeat(40));
168+
169+
// Various types of invalid XML
170+
let test_cases = vec![
171+
(
172+
"Unclosed tag",
173+
b"<rss version='2.0'><channel><title>Test</channel></rss>".as_slice(),
174+
),
175+
(
176+
"Invalid entity",
177+
b"<rss version='2.0'><channel><title>Test &#xFFFF;</title></channel></rss>".as_slice(),
178+
),
179+
(
180+
"Missing required elements",
181+
b"<rss version='2.0'><channel></channel></rss>".as_slice(),
182+
),
183+
];
184+
185+
for (name, xml) in test_cases {
186+
println!("\nTest case: {}", name);
187+
print!(" ");
188+
189+
match parse(xml) {
190+
Ok(feed) => {
191+
if feed.bozo {
192+
println!("Parsed with bozo flag set");
193+
if let Some(ex) = &feed.bozo_exception {
194+
println!(" Exception: {}", ex);
195+
}
196+
} else {
197+
println!("Parsed successfully");
198+
}
199+
200+
// Show what we recovered
201+
if feed.feed.title.is_some() {
202+
println!(" Recovered title: {:?}", feed.feed.title);
203+
}
204+
}
205+
Err(e) => {
206+
// Some errors are unrecoverable
207+
println!("Unrecoverable error: {}", e);
208+
}
209+
}
210+
}
211+
212+
println!("\n\nThe parser attempts to recover from common XML errors");
213+
println!("and extract as much information as possible.");
214+
215+
Ok(())
216+
}
217+
218+
fn network_error_example() -> Result<(), Box<dyn std::error::Error>> {
219+
println!("Example 4: Network Error Handling");
220+
println!("{}", "-".repeat(40));
221+
222+
#[cfg(feature = "http")]
223+
{
224+
use feedparser_rs::parse_url;
225+
226+
println!("Testing various network scenarios:\n");
227+
228+
// Test case 1: Invalid URL
229+
println!("1. Invalid URL:");
230+
match parse_url("not-a-valid-url", None, None, None) {
231+
Ok(_) => println!(" Unexpected success"),
232+
Err(e) => println!(" Error (expected): {}", e),
233+
}
234+
235+
// Test case 2: Non-existent domain
236+
println!("\n2. Non-existent domain:");
237+
match parse_url("https://this-domain-definitely-does-not-exist-12345.com/feed.xml", None, None, None) {
238+
Ok(_) => println!(" Unexpected success"),
239+
Err(e) => println!(" Error (expected): {}", e),
240+
}
241+
242+
// Test case 3: 404 Not Found
243+
println!("\n3. HTTP 404:");
244+
match parse_url("https://httpbin.org/status/404", None, None, None) {
245+
Ok(_) => println!(" Unexpected success"),
246+
Err(e) => println!(" Error (expected): {}", e),
247+
}
248+
249+
println!("\n\nProper error handling:");
250+
println!("- Use Result type for all fallible operations");
251+
println!("- Match on specific error types for better UX");
252+
println!("- Provide helpful error messages to users");
253+
println!("- Implement retry logic for transient failures");
254+
println!("- Use timeouts to prevent hanging");
255+
}
256+
257+
#[cfg(not(feature = "http"))]
258+
{
259+
println!("HTTP feature not enabled.");
260+
println!("Enable with: cargo run --example error_handling --features http");
261+
}
262+
263+
Ok(())
264+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<rss version="2.0">
3+
<channel>
4+
<title>Test Feed with Issues</title>
5+
<link>https://example.com/bad-feed</link>
6+
<description>This feed has various malformations to test error handling</description>
7+
8+
<item>
9+
<title>Post with unclosed tag</title>
10+
<link>https://example.com/post1
11+
<description>Missing closing link tag</description>
12+
<pubDate>Invalid date format here</pubDate>
13+
</item>
14+
15+
<item>
16+
<title>Post with invalid entity: &#xFFFF;</title>
17+
<link>https://example.com/post2</link>
18+
<description>Contains invalid XML character</description>
19+
</item>
20+
21+
<item>
22+
<title>Normal Post</title>
23+
<link>https://example.com/post3</link>
24+
<description>This one is fine</description>
25+
<pubDate>Sat, 28 Dec 2024 12:00:00 GMT</pubDate>
26+
</item>
27+
</channel>
28+
</rss>
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<feed xmlns="http://www.w3.org/2005/Atom">
3+
<title>Example Science Feed</title>
4+
<link href="https://example.com/science"/>
5+
<link rel="self" href="https://example.com/science/atom.xml"/>
6+
<updated>2024-12-28T12:00:00Z</updated>
7+
<id>https://example.com/science</id>
8+
<author>
9+
<name>Dr. Alice Cooper</name>
10+
<email>[email protected]</email>
11+
<uri>https://example.com/authors/alice</uri>
12+
</author>
13+
<subtitle>Latest discoveries in science and technology</subtitle>
14+
15+
<entry>
16+
<title>Quantum Computing Breakthrough</title>
17+
<link href="https://example.com/science/quantum-2024"/>
18+
<id>https://example.com/science/quantum-2024</id>
19+
<updated>2024-12-28T10:00:00Z</updated>
20+
<published>2024-12-28T10:00:00Z</published>
21+
<author>
22+
<name>Dr. Alice Cooper</name>
23+
<email>[email protected]</email>
24+
</author>
25+
<summary>New advances in quantum error correction</summary>
26+
<content type="html">
27+
&lt;p&gt;Researchers have made significant progress in quantum error correction,
28+
bringing us closer to practical quantum computers.&lt;/p&gt;
29+
</content>
30+
<category term="Quantum Physics"/>
31+
<category term="Computing"/>
32+
</entry>
33+
34+
<entry>
35+
<title>Climate Change Impact Study</title>
36+
<link href="https://example.com/science/climate-study"/>
37+
<id>https://example.com/science/climate-study</id>
38+
<updated>2024-12-27T15:30:00Z</updated>
39+
<published>2024-12-27T15:30:00Z</published>
40+
<author>
41+
<name>Dr. Bob Zhang</name>
42+
<email>[email protected]</email>
43+
</author>
44+
<summary>Long-term effects on coastal ecosystems</summary>
45+
<content type="text">
46+
New research shows accelerating changes in coastal ecosystems due to rising temperatures.
47+
</content>
48+
<category term="Climate Science"/>
49+
<category term="Ecology"/>
50+
</entry>
51+
</feed>

0 commit comments

Comments
 (0)