Skip to content

Commit 041113c

Browse files
committed
Use memchr to search for characters to escape
1 parent ca1c09a commit 041113c

File tree

3 files changed

+218
-115
lines changed

3 files changed

+218
-115
lines changed

src/escapei.rs

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
//! Manage xml character escapes
22
3-
use memchr::memchr2_iter;
3+
use memchr::{memchr2_iter, memchr3_iter};
44
use std::borrow::Cow;
55
use std::ops::Range;
66

7+
use crate::utils::MergeIter;
78
#[cfg(test)]
89
use pretty_assertions::assert_eq;
910

@@ -72,7 +73,14 @@ impl std::error::Error for EscapeError {}
7273
/// | `'` | `'`
7374
/// | `"` | `"`
7475
pub fn escape(raw: &str) -> Cow<str> {
75-
_escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&' | b'\'' | b'\"'))
76+
let bytes = raw.as_bytes();
77+
_escape(
78+
raw,
79+
MergeIter::new(
80+
memchr3_iter(b'<', b'>', b'&', bytes),
81+
memchr2_iter(b'\'', b'"', bytes),
82+
),
83+
)
7684
}
7785

7886
/// Escapes an `&str` and replaces xml special characters (`<`, `>`, `&`)
@@ -89,24 +97,23 @@ pub fn escape(raw: &str) -> Cow<str> {
8997
/// | `>` | `&gt;`
9098
/// | `&` | `&amp;`
9199
pub fn partial_escape(raw: &str) -> Cow<str> {
92-
_escape(raw, |ch| matches!(ch, b'<' | b'>' | b'&'))
100+
_escape(raw, memchr3_iter(b'<', b'>', b'&', raw.as_bytes()))
93101
}
94102

95103
/// Escapes an `&str` and replaces a subset of xml special characters (`<`, `>`,
96104
/// `&`, `'`, `"`) with their corresponding xml escaped value.
97-
pub(crate) fn _escape<F: Fn(u8) -> bool>(raw: &str, escape_chars: F) -> Cow<str> {
105+
pub(crate) fn _escape<It>(raw: &str, escapes: It) -> Cow<str>
106+
where
107+
It: Iterator<Item = usize>,
108+
{
98109
let bytes = raw.as_bytes();
99110
let mut escaped = None;
100-
let mut iter = bytes.iter();
101-
let mut pos = 0;
102-
while let Some(i) = iter.position(|&b| escape_chars(b)) {
103-
if escaped.is_none() {
104-
escaped = Some(Vec::with_capacity(raw.len()));
105-
}
106-
let escaped = escaped.as_mut().expect("initialized");
107-
let new_pos = pos + i;
108-
escaped.extend_from_slice(&bytes[pos..new_pos]);
109-
match bytes[new_pos] {
111+
let mut last_pos = 0;
112+
for i in escapes {
113+
let escaped = escaped.get_or_insert_with(|| Vec::with_capacity(raw.len()));
114+
let byte = bytes[i];
115+
escaped.extend_from_slice(&bytes[last_pos..i]);
116+
match byte {
110117
b'<' => escaped.extend_from_slice(b"&lt;"),
111118
b'>' => escaped.extend_from_slice(b"&gt;"),
112119
b'\'' => escaped.extend_from_slice(b"&apos;"),
@@ -124,11 +131,11 @@ pub(crate) fn _escape<F: Fn(u8) -> bool>(raw: &str, escape_chars: F) -> Cow<str>
124131
"Only '<', '>','\', '&', '\"', '\\t', '\\r', '\\n', and ' ' are escaped"
125132
),
126133
}
127-
pos = new_pos + 1;
134+
last_pos = i + 1;
128135
}
129136

130137
if let Some(mut escaped) = escaped {
131-
if let Some(raw) = bytes.get(pos..) {
138+
if let Some(raw) = bytes.get(last_pos..) {
132139
escaped.extend_from_slice(raw);
133140
}
134141
// SAFETY: we operate on UTF-8 input and search for an one byte chars only,

src/se/simple_type.rs

Lines changed: 138 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
use crate::errors::serialize::DeError;
77
use crate::escapei::_escape;
88
use crate::se::{Indent, QuoteLevel};
9+
use crate::utils::MergeIter;
10+
use memchr::{memchr2_iter, memchr3_iter, memchr_iter};
911
use serde::ser::{
1012
Impossible, Serialize, SerializeSeq, SerializeTuple, SerializeTupleStruct, Serializer,
1113
};
@@ -29,67 +31,96 @@ fn escape_item(value: &str, target: QuoteTarget, level: QuoteLevel) -> Cow<str>
2931
use QuoteLevel::*;
3032
use QuoteTarget::*;
3133

34+
let bytes = value.as_bytes();
35+
3236
match (target, level) {
33-
(_, Full) => _escape(value, |ch| match ch {
34-
// Spaces used as delimiters of list items, cannot be used in the item
35-
b' ' | b'\r' | b'\n' | b'\t' => true,
36-
// Required characters to escape
37-
b'&' | b'<' | b'>' | b'\'' | b'\"' => true,
38-
_ => false,
39-
}),
37+
(_, Full) => _escape(
38+
value,
39+
// ' ', '\r', '\n', '\t': Spaces used as delimiters of list items, cannot be used in the item
40+
// '&', '<', '>', '\'', '"': Required characters to escape
41+
MergeIter::new(
42+
MergeIter::new(
43+
memchr3_iter(b' ', b'\r', b'\n', bytes),
44+
memchr3_iter(b'\t', b'&', b'<', bytes),
45+
),
46+
memchr3_iter(b'>', b'\'', b'"', bytes),
47+
),
48+
),
4049
//----------------------------------------------------------------------
41-
(Text, Partial) => _escape(value, |ch| match ch {
42-
// Spaces used as delimiters of list items, cannot be used in the item
43-
b' ' | b'\r' | b'\n' | b'\t' => true,
44-
// Required characters to escape
45-
b'&' | b'<' | b'>' => true,
46-
_ => false,
47-
}),
48-
(Text, Minimal) => _escape(value, |ch| match ch {
49-
// Spaces used as delimiters of list items, cannot be used in the item
50-
b' ' | b'\r' | b'\n' | b'\t' => true,
51-
// Required characters to escape
52-
b'&' | b'<' => true,
53-
_ => false,
54-
}),
50+
(Text, Partial) => _escape(
51+
value,
52+
// ' ', '\r', '\n', '\t': Spaces used as delimiters of list items, cannot be used in the item
53+
// '&', '<', '>': Required characters to escape
54+
MergeIter::new(
55+
MergeIter::new(
56+
memchr3_iter(b' ', b'\r', b'\n', bytes),
57+
memchr3_iter(b'\t', b'&', b'<', bytes),
58+
),
59+
memchr_iter(b'>', bytes),
60+
),
61+
),
62+
(Text, Minimal) => _escape(
63+
value,
64+
// ' ', '\r', '\n', '\t': Spaces used as delimiters of list items, cannot be used in the item
65+
// '&', '<': Required characters to escape
66+
MergeIter::new(
67+
memchr3_iter(b' ', b'\r', b'\n', bytes),
68+
memchr3_iter(b'\t', b'&', b'<', bytes),
69+
),
70+
),
5571
//----------------------------------------------------------------------
56-
(DoubleQAttr, Partial) => _escape(value, |ch| match ch {
57-
// Spaces used as delimiters of list items, cannot be used in the item
58-
b' ' | b'\r' | b'\n' | b'\t' => true,
59-
// Required characters to escape
60-
b'&' | b'<' | b'>' => true,
61-
// Double quoted attribute should escape quote
62-
b'"' => true,
63-
_ => false,
64-
}),
65-
(DoubleQAttr, Minimal) => _escape(value, |ch| match ch {
66-
// Spaces used as delimiters of list items, cannot be used in the item
67-
b' ' | b'\r' | b'\n' | b'\t' => true,
68-
// Required characters to escape
69-
b'&' | b'<' => true,
70-
// Double quoted attribute should escape quote
71-
b'"' => true,
72-
_ => false,
73-
}),
72+
(DoubleQAttr, Partial) => _escape(
73+
value,
74+
// ' ', '\r', '\n', '\t': Spaces used as delimiters of list items, cannot be used in the item
75+
// '&', '<', '>': Required characters to escape
76+
MergeIter::new(
77+
MergeIter::new(
78+
memchr3_iter(b' ', b'\r', b'\n', bytes),
79+
memchr3_iter(b'\t', b'&', b'<', bytes),
80+
),
81+
memchr2_iter(b'>', b'"', bytes),
82+
),
83+
),
84+
(DoubleQAttr, Minimal) => _escape(
85+
value,
86+
// ' ', '\r', '\n', '\t': Spaces used as delimiters of list items, cannot be used in the item
87+
// '&', '<': Required characters to escape
88+
// '"': Double quoted attribute should escape quote
89+
MergeIter::new(
90+
MergeIter::new(
91+
memchr3_iter(b' ', b'\r', b'\n', bytes),
92+
memchr3_iter(b'\t', b'&', b'<', bytes),
93+
),
94+
memchr_iter(b'"', bytes),
95+
),
96+
),
7497
//----------------------------------------------------------------------
75-
(SingleQAttr, Partial) => _escape(value, |ch| match ch {
76-
// Spaces used as delimiters of list items
77-
b' ' | b'\r' | b'\n' | b'\t' => true,
78-
// Required characters to escape
79-
b'&' | b'<' | b'>' => true,
80-
// Single quoted attribute should escape quote
81-
b'\'' => true,
82-
_ => false,
83-
}),
84-
(SingleQAttr, Minimal) => _escape(value, |ch| match ch {
85-
// Spaces used as delimiters of list items
86-
b' ' | b'\r' | b'\n' | b'\t' => true,
87-
// Required characters to escape
88-
b'&' | b'<' => true,
89-
// Single quoted attribute should escape quote
90-
b'\'' => true,
91-
_ => false,
92-
}),
98+
(SingleQAttr, Partial) => _escape(
99+
value,
100+
// ' ', '\r', '\n', '\t': Spaces used as delimiters of list items, cannot be used in the item
101+
// '&', '<', '>': Required characters to escape
102+
// '\'': Single quoted attribute should escape quote
103+
MergeIter::new(
104+
MergeIter::new(
105+
memchr3_iter(b' ', b'\r', b'\n', bytes),
106+
memchr3_iter(b'\t', b'&', b'<', bytes),
107+
),
108+
memchr2_iter(b'>', b'\'', bytes),
109+
),
110+
),
111+
(SingleQAttr, Minimal) => _escape(
112+
value,
113+
// ' ', '\r', '\n', '\t': Spaces used as delimiters of list items, cannot be used in the item
114+
// '&', '<': Required characters to escape
115+
// '\'': Single quoted attribute should escape quote
116+
MergeIter::new(
117+
MergeIter::new(
118+
memchr3_iter(b' ', b'\r', b'\n', bytes),
119+
memchr3_iter(b'\t', b'&', b'<', bytes),
120+
),
121+
memchr_iter(b'\'', bytes),
122+
),
123+
),
93124
}
94125
}
95126

@@ -98,53 +129,61 @@ fn escape_list(value: &str, target: QuoteTarget, level: QuoteLevel) -> Cow<str>
98129
use QuoteLevel::*;
99130
use QuoteTarget::*;
100131

132+
let bytes = value.as_bytes();
133+
101134
match (target, level) {
102-
(_, Full) => _escape(value, |ch| match ch {
103-
// Required characters to escape
104-
b'&' | b'<' | b'>' | b'\'' | b'\"' => true,
105-
_ => false,
106-
}),
135+
(_, Full) => _escape(
136+
value,
137+
// '&', '<', '>', '\'', '"': Required characters to escape
138+
MergeIter::new(
139+
memchr3_iter(b'&', b'<', b'>', bytes),
140+
memchr2_iter(b'\'', b'"', bytes),
141+
),
142+
),
107143
//----------------------------------------------------------------------
108-
(Text, Partial) => _escape(value, |ch| match ch {
109-
// Required characters to escape
110-
b'&' | b'<' | b'>' => true,
111-
_ => false,
112-
}),
113-
(Text, Minimal) => _escape(value, |ch| match ch {
114-
// Required characters to escape
115-
b'&' | b'<' => true,
116-
_ => false,
117-
}),
144+
(Text, Partial) => _escape(
145+
value,
146+
// '&', '<', '>': Required characters to escape
147+
memchr3_iter(b'&', b'<', b'>', bytes),
148+
),
149+
(Text, Minimal) => _escape(
150+
value,
151+
// '&', '<': Required characters to escape
152+
memchr2_iter(b'&', b'<', bytes),
153+
),
118154
//----------------------------------------------------------------------
119-
(DoubleQAttr, Partial) => _escape(value, |ch| match ch {
155+
(DoubleQAttr, Partial) => _escape(
156+
value,
157+
// '&', '<', '>': Required characters to escape
158+
// '"': Double quoted attribute should escape quote
159+
MergeIter::new(
160+
memchr3_iter(b'&', b'<', b'>', bytes),
161+
memchr_iter(b'"', bytes),
162+
),
163+
),
164+
(DoubleQAttr, Minimal) => _escape(
165+
value,
166+
// '&', '<': Required characters to escape
167+
// '"': Double quoted attribute should escape quote
120168
// Required characters to escape
121-
b'&' | b'<' | b'>' => true,
122-
// Double quoted attribute should escape quote
123-
b'"' => true,
124-
_ => false,
125-
}),
126-
(DoubleQAttr, Minimal) => _escape(value, |ch| match ch {
127-
// Required characters to escape
128-
b'&' | b'<' => true,
129-
// Double quoted attribute should escape quote
130-
b'"' => true,
131-
_ => false,
132-
}),
169+
memchr3_iter(b'&', b'<', b'"', bytes),
170+
),
133171
//----------------------------------------------------------------------
134-
(SingleQAttr, Partial) => _escape(value, |ch| match ch {
135-
// Required characters to escape
136-
b'&' | b'<' | b'>' => true,
137-
// Single quoted attribute should escape quote
138-
b'\'' => true,
139-
_ => false,
140-
}),
141-
(SingleQAttr, Minimal) => _escape(value, |ch| match ch {
142-
// Required characters to escape
143-
b'&' | b'<' => true,
144-
// Single quoted attribute should escape quote
145-
b'\'' => true,
146-
_ => false,
147-
}),
172+
(SingleQAttr, Partial) => _escape(
173+
value,
174+
// '&', '<', '>': Required characters to escape
175+
// '\'': Single quoted attribute should escape quote
176+
MergeIter::new(
177+
memchr3_iter(b'&', b'<', b'>', bytes),
178+
memchr_iter(b'\'', bytes),
179+
),
180+
),
181+
(SingleQAttr, Minimal) => _escape(
182+
value,
183+
// '&', '<': Required characters to escape
184+
// '\': Single quoted attribute should escape quote
185+
memchr3_iter(b'&', b'<', b'\'', bytes),
186+
),
148187
}
149188
}
150189

0 commit comments

Comments
 (0)