Skip to content

Commit ae8db96

Browse files
authored
Merge pull request #659 from Mingun/tests
Fix test for ISO-8859-8-I encoding and small refactoring
2 parents ede4fb9 + 1e19a45 commit ae8db96

File tree

8 files changed

+85
-94
lines changed

8 files changed

+85
-94
lines changed

src/errors.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ use std::sync::Arc;
1212
/// The error type used by this crate.
1313
#[derive(Clone, Debug)]
1414
pub enum Error {
15-
/// IO error.
15+
/// XML document cannot be read from or written to underlying source.
1616
///
17-
/// `Arc<IoError>` instead of `IoError` since `IoError` is not `Clone`.
17+
/// Contains the reference-counted I/O error to make the error type `Clone`able.
1818
Io(Arc<IoError>),
1919
/// Input decoding error. If [`encoding`] feature is disabled, contains `None`,
2020
/// otherwise contains the UTF-8 decoding error

src/reader/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ macro_rules! read_event_impl {
213213
}
214214

215215
/// Read bytes up to `<` and skip it. If current byte (after skipping all space
216-
/// characters if [`Parser::trim_text_start`] is `true`) is already `<`, then
216+
/// characters if [`ReaderState::trim_text_start`] is `true`) is already `<`, then
217217
/// returns the next event, otherwise stay at position just after the `<` symbol.
218218
///
219219
/// Moves parser to the `OpenedTag` state.
@@ -409,7 +409,7 @@ enum ParseState {
409409
/// [`Event::Start`] event. The next event emitted will be an [`Event::End`],
410410
/// after which reader returned to the `ClosedTag` state.
411411
///
412-
/// [`expand_empty_elements`]: Parser::expand_empty_elements
412+
/// [`expand_empty_elements`]: ReaderState::expand_empty_elements
413413
Empty,
414414
/// Reader enters this state when `Eof` event generated or an error occurred.
415415
/// This is the last state, the reader stay in it forever.

src/reader/state.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,17 +132,18 @@ impl ReaderState {
132132
/// Wraps content of `buf` into the [`Event::End`] event. Does the check that
133133
/// end name matches the last opened start name if `self.check_end_names` is set.
134134
pub fn emit_end<'b>(&mut self, buf: &'b [u8]) -> Result<Event<'b>> {
135+
// Strip the `/` character. `content` contains data between `</` and `>`
136+
let content = &buf[1..];
135137
// XML standard permits whitespaces after the markup name in closing tags.
136138
// Let's strip them from the buffer before comparing tag names.
137139
let name = if self.trim_markup_names_in_closing_tags {
138-
if let Some(pos_end_name) = buf[1..].iter().rposition(|&b| !is_whitespace(b)) {
139-
let (name, _) = buf[1..].split_at(pos_end_name + 1);
140-
name
140+
if let Some(pos_end_name) = content.iter().rposition(|&b| !is_whitespace(b)) {
141+
&content[..pos_end_name + 1]
141142
} else {
142-
&buf[1..]
143+
content
143144
}
144145
} else {
145-
&buf[1..]
146+
content
146147
};
147148

148149
let decoder = self.decoder();

test-gen/src/main.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,15 @@ fn main() {
284284
.expect(&format!("label `{}` is unsupported", label));
285285

286286
process_index(enc, &codepoints);
287+
if enc == ISO_8859_8 {
288+
// ISO_8859_8_I does not have its own index in encoding/indexes.json,
289+
// but it have the same mapping as ISO_8859_8.
290+
//
291+
// Wikipedia (https://en.wikipedia.org/wiki/ISO-8859-8-I):
292+
// The WHATWG Encoding Standard used by HTML5 treats ISO-8859-8 and ISO-8859-8-I
293+
// as distinct encodings with the same mapping due to influence on the layout direction
294+
process_index(ISO_8859_8_I, &codepoints);
295+
}
287296
}
288297
// https://encoding.spec.whatwg.org/#x-user-defined-decoder
289298
make_xml(X_USER_DEFINED, '\u{F780}'..='\u{F7FF}');

tests/documents/encoding/ISO-8859-8-I.xml

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
1-
㰿硭氠敮捯摩湧㴢䥓伭㠸㔹ⴸⵉ∿㸊㱲潯琠慴瑲楢畴攱㴢Ă̄Ԇ܈ऊଌഎ༐ᄒጔᔖ᜘ᤚᬜᴞἠ℣␥✨⤪⬬⴮⼰ㄲ㌴㔶㜸㤺㬽㸿䁁䉃䑅䙇䡉䩋䱍乏偑剓呕噗塙婛屝幟恡扣摥晧桩橫汭湯灱牳瑵癷硹穻籽繿肁芃蒅蚇袉誋貍躏邑銓钕隗颙骛鲝麟ꀢਠ††⁡瑴物扵瑥㈽✁ȃЅ؇ࠉ਋఍ฏထሓᐕᘗ᠙ᨛᰝḟ‡∣␥⠩⨫Ⱝⸯ〱㈳㐵㘷㠹㨻㴾㽀䅂䍄䕆䝈䥊䭌䵎佐兒協啖坘奚孜嵞彠慢捤敦杨楪歬浮潰煲獴當睸祺筼絾羀膂莄薆螈覊讌趎辐醒鎔閖鞘馚鮜鶞龠✊†††㩁䉃䑅䙇䡉䩋䱍乏偑剓呕噗塙婟慢捤敦杨楪歬浮潰煲獴當睸祺㴺䅂䍄䕆䝈䥊䭌䵎佐兒協啖坘奚彡扣摥晧桩橫汭湯灱牳瑵癷硹稊㸊†㰿㩁䉃䑅䙇䡉䩋䱍乏偑剓呕噗塙婟慢捤敦杨楪歬浮潰煲獴當睸祺㼾ਠ‼ℭⴁȃЅ؇ࠉ਋఍ฏထሓᐕᘗ᠙ᨛᰝḟ‡∣␥☧⠩⨫Ⱝⸯ〱㈳㐵㘷㠹㨻㰽㸿䁁䉃䑅䙇䡉䩋䱍乏偑剓呕噗塙婛屝幟恡扣摥晧桩橫汭湯灱牳瑵癷硹穻籽繿肁芃蒅蚇袉誋貍躏邑銓钕隗颙骛鲝麟ꀭⴾਠ ȃЅ؇ࠉ਋఍ฏထሓᐕᘗ᠙ᨛᰝḟ‡∣␥✨⤪⬬⴮⼰ㄲ㌴㔶㜸㤺㬽㸿䁁䉃䑅䙇䡉䩋䱍乏偑剓呕噗塙婛屝幟恡扣摥晧桩橫汭湯灱牳瑵癷硹穻籽繿肁芃蒅蚇袉誋貍躏邑銓钕隗颙骛鲝麟ꀊ†㱮猺敬敭敮琠湳㩡瑴物扵瑥㴢癡汵攱∠硭汮猺湳㴢湡浥獰慣攢⼾ਠ‼孛䍄䅔䅛嬁ȃЅ؇ࠉ਋఍ฏထሓᐕᘗ᠙ᨛᰝḟ‡∣␥✨⤪⬬⴮⼰ㄲ㌴㔶㜸㤺㬽㸿䁁䉃䑅䙇䡉䩋䱍乏偑剓呕噗塙婛屝幟恡扣摥晧桩橫汭湯灱牳瑵癷硹穻籽繿肁芃蒅蚇袉誋貍躏邑銓钕隗颙骛鲝麟ꁝ崾਼⽲潯琾
2-
<root attribute1="
3-
 !#$%'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~���������������������������������"
4-
attribute2='
5-
 !"#$%()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~���������������������������������'
6-
:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz=:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz
1+
<?xml version="1.1" encoding="ISO-8859-8-I"?>
2+
<!--This is generated file. Edit <quick-xml>/test-gen/src/main.rs instead-->
3+
<root attribute1="
4+
!#$%'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~…■ぅΗ┤���葦桶患況弦沙悉醤珀矣粤肄蓍裨跋鈿韵鴦���燾"
5+
attribute2='
6+
!"#$%()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~…■ぅΗ┤���葦桶患況弦沙悉醤珀矣粤肄蓍裨跋鈿韵鴦���燾'
7+
:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz珀矣粤肄蓍裨跋鈿韵鴦���=:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz珀矣粤肄蓍裨跋鈿韵鴦���
78
>
8-
<?:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz?>
9-
<!--
10-
!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~���������������������������������-->
11-

12-
!"#$%'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~���������������������������������
13-
<ns:element ns:attribute="value1" xmlns:ns="namespace"/>
14-
<[[CDATA[[
15-
!"#$%'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~���������������������������������]]>
9+
<?:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz珀矣粤肄蓍裨跋鈿韵鴦���?>
10+
<!--
11+
!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~…■ぅΗ┤���葦桶患況弦沙悉醤珀矣粤肄蓍裨跋鈿韵鴦���-->
12+
13+
!"#$%'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~…■ぅΗ┤���葦桶患況弦沙悉醤珀矣粤肄蓍裨跋鈿韵鴦���
14+
<ns::ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz珀矣粤肄蓍裨跋鈿韵鴦��� ns:attribute="value1" xmlns:ns="namespace"/>
15+
<![CDATA[
16+
!"#$%'()*+,-./0123456789:;=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~…■ぅΗ┤���葦桶患況弦沙悉醤珀矣粤肄蓍裨跋鈿韵鴦���]]>
1617
</root>

tests/issues.rs

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,22 @@ use quick_xml::name::QName;
99
use quick_xml::reader::Reader;
1010
use quick_xml::Error;
1111

12+
/// Regression test for https://github.com/tafia/quick-xml/issues/94
13+
#[test]
14+
fn issue94() {
15+
let data = br#"<Run>
16+
<!B>
17+
</Run>"#;
18+
let mut reader = Reader::from_reader(&data[..]);
19+
reader.trim_text(true);
20+
loop {
21+
match reader.read_event() {
22+
Ok(Event::Eof) | Err(..) => break,
23+
_ => (),
24+
}
25+
}
26+
}
27+
1228
/// Regression test for https://github.com/tafia/quick-xml/issues/115
1329
#[test]
1430
fn issue115() {
@@ -22,6 +38,41 @@ fn issue115() {
2238
}
2339
}
2440

41+
/// Regression test for https://github.com/tafia/quick-xml/issues/299
42+
#[test]
43+
fn issue299() -> Result<(), Error> {
44+
let xml = r#"
45+
<?xml version="1.0" encoding="utf8"?>
46+
<MICEX_DOC xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
47+
<SECURITY SecurityId="PLZL" ISIN="RU000A0JNAA8" SecShortName="Short Name" PriceType="CASH">
48+
<RECORDS RecNo="1" TradeNo="1111" TradeDate="2021-07-08" TradeTime="15:00:00" BuySell="S" SettleCode="Y1Dt" Decimals="3" Price="13057.034" Quantity="766" Value="10001688.29" AccInt="0" Amount="10001688.29" Balance="766" TrdAccId="X0011" ClientDetails="2222" CPFirmId="3333" CPFirmShortName="Firm Short Name" Price2="13057.034" RepoPart="2" ReportTime="16:53:27" SettleTime="17:47:06" ClientCode="4444" DueDate="2021-07-09" EarlySettleStatus="N" RepoRate="5.45" RateType="FIX"/>
49+
</SECURITY>
50+
</MICEX_DOC>
51+
"#;
52+
let mut reader = Reader::from_str(xml);
53+
loop {
54+
match reader.read_event()? {
55+
Event::Start(e) | Event::Empty(e) => {
56+
let attr_count = match e.name().as_ref() {
57+
b"MICEX_DOC" => 1,
58+
b"SECURITY" => 4,
59+
b"RECORDS" => 26,
60+
_ => unreachable!(),
61+
};
62+
assert_eq!(
63+
attr_count,
64+
e.attributes().filter(Result::is_ok).count(),
65+
"mismatch att count on '{:?}'",
66+
reader.decoder().decode(e.name().as_ref())
67+
);
68+
}
69+
Event::Eof => break,
70+
_ => (),
71+
}
72+
}
73+
Ok(())
74+
}
75+
2576
/// Regression test for https://github.com/tafia/quick-xml/issues/360
2677
#[test]
2778
fn issue360() {

tests/test.rs

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ use quick_xml::events::attributes::Attribute;
22
use quick_xml::events::Event::*;
33
use quick_xml::name::QName;
44
use quick_xml::reader::Reader;
5-
use quick_xml::Error;
65
use std::borrow::Cow;
76

87
use pretty_assertions::assert_eq;
@@ -89,21 +88,6 @@ fn test_comment_starting_with_gt() {
8988
}
9089
}
9190

92-
#[test]
93-
fn test_issue94() {
94-
let data = br#"<Run>
95-
<!B>
96-
</Run>"#;
97-
let mut reader = Reader::from_reader(&data[..]);
98-
reader.trim_text(true);
99-
loop {
100-
match reader.read_event() {
101-
Ok(Eof) | Err(..) => break,
102-
_ => (),
103-
}
104-
}
105-
}
106-
10791
#[test]
10892
fn test_no_trim() {
10993
let mut reader = Reader::from_str(" <tag> text </tag> ");
@@ -151,37 +135,3 @@ fn test_clone_reader() {
151135
assert!(matches!(cloned.read_event().unwrap(), Text(_)));
152136
assert!(matches!(cloned.read_event().unwrap(), End(_)));
153137
}
154-
155-
#[test]
156-
fn test_issue299() -> Result<(), Error> {
157-
let xml = r#"
158-
<?xml version="1.0" encoding="utf8"?>
159-
<MICEX_DOC xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
160-
<SECURITY SecurityId="PLZL" ISIN="RU000A0JNAA8" SecShortName="Short Name" PriceType="CASH">
161-
<RECORDS RecNo="1" TradeNo="1111" TradeDate="2021-07-08" TradeTime="15:00:00" BuySell="S" SettleCode="Y1Dt" Decimals="3" Price="13057.034" Quantity="766" Value="10001688.29" AccInt="0" Amount="10001688.29" Balance="766" TrdAccId="X0011" ClientDetails="2222" CPFirmId="3333" CPFirmShortName="Firm Short Name" Price2="13057.034" RepoPart="2" ReportTime="16:53:27" SettleTime="17:47:06" ClientCode="4444" DueDate="2021-07-09" EarlySettleStatus="N" RepoRate="5.45" RateType="FIX"/>
162-
</SECURITY>
163-
</MICEX_DOC>
164-
"#;
165-
let mut reader = Reader::from_str(xml);
166-
loop {
167-
match reader.read_event()? {
168-
Start(e) | Empty(e) => {
169-
let attr_count = match e.name().as_ref() {
170-
b"MICEX_DOC" => 1,
171-
b"SECURITY" => 4,
172-
b"RECORDS" => 26,
173-
_ => unreachable!(),
174-
};
175-
assert_eq!(
176-
attr_count,
177-
e.attributes().filter(Result::is_ok).count(),
178-
"mismatch att count on '{:?}'",
179-
reader.decoder().decode(e.name().as_ref())
180-
);
181-
}
182-
Eof => break,
183-
_ => (),
184-
}
185-
}
186-
Ok(())
187-
}

tests/unit_tests.rs

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -435,27 +435,6 @@ fn test_offset_err_comment() {
435435
}
436436
}
437437

438-
#[test]
439-
fn test_offset_err_comment_2_buf() {
440-
let mut r = Reader::from_str("<a><!--b>");
441-
r.trim_text(true);
442-
443-
let _ = r.read_event().unwrap();
444-
assert_eq!(r.buffer_position(), 3);
445-
446-
match r.read_event() {
447-
// error at char 4: no closing --> tag found
448-
Err(e) => assert_eq!(
449-
r.buffer_position(),
450-
4,
451-
"expecting buf_pos = 4, found {}, err {:?}",
452-
r.buffer_position(),
453-
e
454-
),
455-
e => panic!("expecting error, found {:?}", e),
456-
}
457-
}
458-
459438
#[test]
460439
fn test_offset_err_comment_trim_text() {
461440
let mut r = Reader::from_str("<a>\r\n <!--b>");

0 commit comments

Comments
 (0)