Skip to content

Commit 6a1ddae

Browse files
committed
feat(filter): better error support in filter, fix buffer size
1 parent c9a2095 commit 6a1ddae

File tree

10 files changed

+229
-184
lines changed

10 files changed

+229
-184
lines changed

src/filter/error.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
use std::result;
2+
3+
/// This error describes all of the potential failures that can occur during the filter process.
4+
#[derive(Debug)]
5+
#[non_exhaustive]
6+
pub enum FilterBodyError {
7+
/// Error while reading or writing to the buffer
8+
IoError(std::io::Error),
9+
/// Error while parsing the html
10+
HtmlParseError(crate::html::HtmlParseError),
11+
}
12+
13+
impl std::fmt::Display for FilterBodyError {
14+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
15+
match self {
16+
Self::IoError(source) => write!(f, "{}", source),
17+
Self::HtmlParseError(source) => write!(f, "{}", source),
18+
}
19+
}
20+
}
21+
22+
impl std::error::Error for FilterBodyError {}
23+
24+
impl From<std::io::Error> for FilterBodyError {
25+
fn from(error: std::io::Error) -> Self {
26+
Self::IoError(error)
27+
}
28+
}
29+
30+
impl From<crate::html::HtmlParseError> for FilterBodyError {
31+
fn from(error: crate::html::HtmlParseError) -> Self {
32+
Self::HtmlParseError(error)
33+
}
34+
}
35+
36+
pub type Result<T> = result::Result<T, FilterBodyError>;

src/filter/filter_body.rs

Lines changed: 56 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use crate::api::{BodyFilter, TextAction};
2+
use crate::filter::error::Result;
23
use crate::filter::gzip_filter_body::{GzDecodeFilterBody, GzEncodeFilterBody};
34
use crate::filter::html_body_action::HtmlBodyVisitor;
45
use crate::filter::text_filter_body::{TextFilterAction, TextFilterBodyAction};
@@ -7,6 +8,7 @@ use crate::http::Header;
78

89
pub struct FilterBodyAction {
910
chain: Vec<FilterBodyActionItem>,
11+
in_error: bool,
1012
}
1113

1214
pub enum FilterBodyActionItem {
@@ -42,35 +44,64 @@ impl FilterBodyAction {
4244
chain.push(FilterBodyActionItem::Gzip(GzEncodeFilterBody::new()));
4345
}
4446

45-
Self { chain }
47+
Self { chain, in_error: false }
4648
}
4749

4850
pub fn is_empty(&self) -> bool {
4951
self.chain.is_empty()
5052
}
5153

52-
// Note: no need for `mut` here ?
53-
pub fn filter(&mut self, mut data: Vec<u8>) -> Vec<u8> {
54+
pub fn filter(&mut self, data: Vec<u8>) -> Vec<u8> {
55+
if self.in_error {
56+
return data;
57+
}
58+
59+
match self.do_filter(data.clone()) {
60+
Ok(filtered) => filtered,
61+
Err(_) => {
62+
self.in_error = true;
63+
64+
data
65+
}
66+
}
67+
}
68+
69+
fn do_filter(&mut self, mut data: Vec<u8>) -> Result<Vec<u8>> {
5470
for item in &mut self.chain {
55-
data = item.filter(data);
71+
data = item.filter(data)?;
5672

5773
if data.is_empty() {
5874
break;
5975
}
6076
}
6177

62-
data
78+
Ok(data)
6379
}
6480

6581
pub fn end(&mut self) -> Vec<u8> {
82+
if self.in_error {
83+
return Vec::new();
84+
}
85+
86+
match self.do_end() {
87+
Ok(end) => end,
88+
Err(_) => {
89+
self.in_error = true;
90+
91+
Vec::new()
92+
}
93+
}
94+
}
95+
96+
fn do_end(&mut self) -> Result<Vec<u8>> {
6697
let mut data = None;
6798

6899
for item in &mut self.chain {
69100
let new_data = match data {
70-
None => item.end(),
101+
None => item.end()?,
71102
Some(str) => {
72-
let mut end_str = item.filter(str);
73-
end_str.extend(item.end());
103+
let mut end_str = item.filter(str)?;
104+
end_str.extend(item.end()?);
74105

75106
end_str
76107
}
@@ -79,7 +110,7 @@ impl FilterBodyAction {
79110
data = if new_data.is_empty() { None } else { Some(new_data) };
80111
}
81112

82-
data.unwrap_or_else(|| Vec::new())
113+
Ok(data.unwrap_or_else(|| Vec::new()))
83114
}
84115
}
85116

@@ -100,22 +131,22 @@ impl FilterBodyActionItem {
100131
}
101132
}
102133

103-
pub fn filter(&mut self, data: Vec<u8>) -> Vec<u8> {
104-
match self {
105-
FilterBodyActionItem::Html(html_body_filter) => html_body_filter.filter(data),
134+
pub fn filter(&mut self, data: Vec<u8>) -> Result<Vec<u8>> {
135+
Ok(match self {
136+
FilterBodyActionItem::Html(html_body_filter) => html_body_filter.filter(data)?,
106137
FilterBodyActionItem::Text(text_body_filter) => text_body_filter.filter(data),
107-
FilterBodyActionItem::UnGzip(gzip_body_filter) => gzip_body_filter.filter(data),
108-
FilterBodyActionItem::Gzip(gzip_body_filter) => gzip_body_filter.filter(data),
109-
}
138+
FilterBodyActionItem::UnGzip(gzip_body_filter) => gzip_body_filter.filter(data)?,
139+
FilterBodyActionItem::Gzip(gzip_body_filter) => gzip_body_filter.filter(data)?,
140+
})
110141
}
111142

112-
pub fn end(&mut self) -> Vec<u8> {
113-
match self {
143+
pub fn end(&mut self) -> Result<Vec<u8>> {
144+
Ok(match self {
114145
FilterBodyActionItem::Html(html_body_filter) => html_body_filter.end(),
115146
FilterBodyActionItem::Text(text_body_filter) => text_body_filter.end(),
116-
FilterBodyActionItem::UnGzip(gzip_body_filter) => gzip_body_filter.end(),
117-
FilterBodyActionItem::Gzip(gzip_body_filter) => gzip_body_filter.end(),
118-
}
147+
FilterBodyActionItem::UnGzip(gzip_body_filter) => gzip_body_filter.end()?,
148+
FilterBodyActionItem::Gzip(gzip_body_filter) => gzip_body_filter.end()?,
149+
})
119150
}
120151
}
121152

@@ -125,7 +156,6 @@ mod tests {
125156
use crate::api::HTMLBodyFilter;
126157
use flate2::write::{GzDecoder, GzEncoder};
127158
use flate2::Compression;
128-
use std::io;
129159
use std::io::prelude::*;
130160

131161
#[test]
@@ -144,11 +174,12 @@ mod tests {
144174
let mut filtered = filter.filter(bytes.clone());
145175
filtered.extend(filter.end());
146176

147-
let mut gz = flate2::read::GzDecoder::new(filtered.as_slice());
148-
let mut s = String::new();
149-
gz.read_to_string(&mut s).unwrap();
177+
let mut gz = GzDecoder::new(Vec::new());
178+
gz.write_all(&filtered).unwrap();
179+
let data = gz.finish().unwrap();
180+
let after_filter = String::from_utf8(data.to_vec()).unwrap();
150181

151-
assert_eq!(before_filter, s);
182+
assert_eq!(before_filter, after_filter);
152183
}
153184

154185
#[test]

src/filter/gzip_filter_body.rs

Lines changed: 13 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use crate::filter::error::Result;
12
use flate2::write::{GzDecoder, GzEncoder};
23
use std::io::{Cursor, Read, Write};
34

@@ -18,42 +19,20 @@ impl GzDecodeFilterBody {
1819
}
1920
}
2021

21-
pub fn filter(&mut self, data: Vec<u8>) -> Vec<u8> {
22+
pub fn filter(&mut self, data: Vec<u8>) -> Result<Vec<u8>> {
2223
let mut decoded = vec![0u8; BUFFER_FRAME_SIZE];
2324

24-
match self.decoder.write_all(data.as_slice()) {
25-
Ok(()) => {}
26-
Err(err) => {
27-
log::error!("Error while decoding gzip: {}", err);
25+
self.decoder.write_all(data.as_slice())?;
26+
let readed = self.decoder.read(&mut decoded)?;
2827

29-
return data;
30-
}
31-
}
32-
33-
self.decoder.read(&mut decoded).expect("Error while decoding gzip");
34-
35-
decoded
36-
37-
// match self.decoder.read(&mut decoded) {
38-
// Ok(size) => decoded[..size].to_vec(),
39-
// Err(err) => {
40-
// panic!("Error while decoding gzip: {}", err);
41-
// }
42-
// }
28+
Ok(decoded[..readed].to_vec())
4329
}
4430

45-
pub fn end(&mut self) -> Vec<u8> {
31+
pub fn end(&mut self) -> Result<Vec<u8>> {
4632
let mut decoder = GzDecoder::new(Cursor::new(Vec::new()));
4733
std::mem::swap(&mut self.decoder, &mut decoder);
4834

49-
match decoder.finish() {
50-
Ok(data) => data.into_inner(),
51-
Err(err) => {
52-
log::error!("Error while encoding gzip: {}", err);
53-
54-
Vec::new()
55-
}
56-
}
35+
Ok(decoder.finish()?.into_inner())
5736
}
5837
}
5938

@@ -64,39 +43,19 @@ impl GzEncodeFilterBody {
6443
}
6544
}
6645

67-
pub fn filter(&mut self, data: Vec<u8>) -> Vec<u8> {
46+
pub fn filter(&mut self, data: Vec<u8>) -> Result<Vec<u8>> {
6847
let mut encoded = vec![0u8; BUFFER_FRAME_SIZE];
6948

70-
match self.encoder.write_all(data.as_slice()) {
71-
Ok(()) => {}
72-
Err(err) => {
73-
panic!("Error while encoding gzip: {}", err);
49+
self.encoder.write_all(data.as_slice())?;
50+
let readed = self.encoder.read(&mut encoded)?;
7451

75-
return data;
76-
}
77-
}
78-
79-
match self.encoder.read(&mut encoded) {
80-
Ok(size) => encoded[..size].to_vec(),
81-
Err(err) => {
82-
log::error!("Error while encoding gzip: {}", err);
83-
84-
return data;
85-
}
86-
}
52+
Ok(encoded[..readed].to_vec())
8753
}
8854

89-
pub fn end(&mut self) -> Vec<u8> {
55+
pub fn end(&mut self) -> Result<Vec<u8>> {
9056
let mut encoder = GzEncoder::new(Cursor::new(Vec::new()), flate2::Compression::default());
9157
std::mem::swap(&mut self.encoder, &mut encoder);
9258

93-
match encoder.finish() {
94-
Ok(data) => data.into_inner(),
95-
Err(err) => {
96-
log::error!("Error while encoding gzip: {}", err);
97-
98-
Vec::new()
99-
}
100-
}
59+
Ok(encoder.finish()?.into_inner())
10160
}
10261
}

src/filter/html_body_action/body_append.rs

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use super::super::html_filter_body::VOID_ELEMENTS;
22
use super::evaluate;
3+
use crate::filter::error::Result;
34
use crate::html;
45

56
#[derive(Debug)]
@@ -39,7 +40,7 @@ impl BodyAppend {
3940
(next_enter, next_leave, should_buffer, data)
4041
}
4142

42-
pub fn leave(&mut self, data: String) -> (Option<String>, Option<String>, String) {
43+
pub fn leave(&mut self, data: String) -> Result<(Option<String>, Option<String>, String)> {
4344
let next_enter = Some(self.element_tree[self.position].clone());
4445
let is_processing = self.position + 1 >= self.element_tree.len();
4546
let next_leave = if self.position as i32 > 0 {
@@ -54,43 +55,43 @@ impl BodyAppend {
5455
if let Some(css_selector) = self.css_selector.as_ref() {
5556
if !css_selector.is_empty() {
5657
if !evaluate(data.as_str(), css_selector.as_str()) {
57-
return (next_enter, next_leave, append_child(data, self.content.clone()));
58+
return Ok((next_enter, next_leave, append_child(data, self.content.clone())?));
5859
}
5960

60-
return (next_enter, next_leave, data);
61+
return Ok((next_enter, next_leave, data));
6162
}
6263
}
6364

6465
let mut new_data = self.content.clone();
6566
new_data.push_str(data.as_str());
6667

67-
return (next_enter, next_leave, new_data);
68+
return Ok((next_enter, next_leave, new_data));
6869
}
6970

70-
(next_enter, next_leave, data)
71+
Ok((next_enter, next_leave, data))
7172
}
7273

7374
pub fn first(&self) -> String {
7475
self.element_tree[0].clone()
7576
}
7677
}
7778

78-
fn append_child(content: String, child: String) -> String {
79+
fn append_child(content: String, child: String) -> Result<String> {
7980
let buffer = &mut content.as_bytes() as &mut dyn std::io::Read;
8081
let mut tokenizer = html::Tokenizer::new(buffer);
8182
let mut output = "".to_string();
8283
let mut level = 0;
8384

8485
loop {
85-
let token_type = tokenizer.next();
86+
let token_type = tokenizer.next()?;
8687

8788
if token_type == html::TokenType::ErrorToken {
88-
return content;
89+
return Ok(content);
8990
}
9091

9192
if token_type == html::TokenType::StartTagToken {
9293
level += 1;
93-
let (tag_name, _) = tokenizer.tag_name();
94+
let (tag_name, _) = tokenizer.tag_name()?;
9495

9596
if VOID_ELEMENTS.contains(tag_name.unwrap().as_str()) {
9697
level -= 1;
@@ -102,13 +103,13 @@ fn append_child(content: String, child: String) -> String {
102103

103104
if level == 0 {
104105
output.push_str(child.as_str());
105-
output.push_str(tokenizer.raw_as_string().as_str());
106-
output.push_str(tokenizer.buffered_as_string().as_str());
106+
output.push_str(tokenizer.raw_as_string()?.as_str());
107+
output.push_str(tokenizer.buffered_as_string()?.as_str());
107108

108-
return output;
109+
return Ok(output);
109110
}
110111
}
111112

112-
output.push_str(tokenizer.raw_as_string().as_str());
113+
output.push_str(tokenizer.raw_as_string()?.as_str());
113114
}
114115
}

0 commit comments

Comments
 (0)