Skip to content

Commit f6a0584

Browse files
committed
refactor: duplicate integer and strig parser before removing writer
Remove the writer without affecting other parts of the code.
1 parent 3a7ea5d commit f6a0584

File tree

3 files changed

+975
-4
lines changed

3 files changed

+975
-4
lines changed

src/parsers/tokenizer/integer.rs

Lines changed: 369 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,369 @@
1+
//! Bencoded integer parser.
2+
//!
3+
//! It reads bencoded bytes from the input and writes JSON bytes to the output.
4+
use std::io::{self, Read};
5+
6+
use crate::rw::{byte_reader::ByteReader, writer::Writer};
7+
8+
use super::{
9+
error::{Error, ReadContext, WriteContext},
10+
BENCODE_END_INTEGER,
11+
};
12+
13+
/// The current state parsing the integer.
14+
#[derive(PartialEq)]
15+
#[allow(clippy::enum_variant_names)]
16+
enum StateExpecting {
17+
Start, // S
18+
DigitOrSign, // DoS
19+
DigitAfterSign, // DaS
20+
DigitOrEnd, // DoE
21+
}
22+
23+
/// It parses an integer bencoded value.
24+
///
25+
/// # Errors
26+
///
27+
/// Will return an error if it can't read from the input or write to the
28+
/// output.
29+
///
30+
/// # Panics
31+
///
32+
/// Will panic if we reach the end of the input without completing the integer
33+
/// (without reaching the end of the integer `e`).
34+
pub fn parse<R: Read, W: Writer>(
35+
reader: &mut ByteReader<R>,
36+
writer: &mut W,
37+
) -> Result<Vec<u8>, Error> {
38+
let mut state = StateExpecting::Start;
39+
let mut first_digit_is_zero = false;
40+
let mut value = vec![];
41+
42+
loop {
43+
let byte = next_byte(reader, writer)?;
44+
45+
let char = byte as char;
46+
47+
state = match state {
48+
StateExpecting::Start => {
49+
// Discard the 'i' byte
50+
StateExpecting::DigitOrSign
51+
}
52+
StateExpecting::DigitOrSign => {
53+
if char == '-' {
54+
writer.write_byte(byte)?;
55+
value.push(byte);
56+
57+
StateExpecting::DigitAfterSign
58+
} else if char.is_ascii_digit() {
59+
writer.write_byte(byte)?;
60+
value.push(byte);
61+
62+
if char == '0' {
63+
first_digit_is_zero = true;
64+
}
65+
66+
StateExpecting::DigitOrEnd
67+
} else {
68+
return Err(Error::UnexpectedByteParsingInteger(
69+
ReadContext {
70+
byte: Some(byte),
71+
pos: reader.input_byte_counter(),
72+
latest_bytes: reader.captured_bytes(),
73+
},
74+
WriteContext {
75+
byte: Some(byte),
76+
pos: writer.output_byte_counter(),
77+
latest_bytes: writer.captured_bytes(),
78+
},
79+
));
80+
}
81+
}
82+
StateExpecting::DigitAfterSign => {
83+
if char.is_ascii_digit() {
84+
writer.write_byte(byte)?;
85+
value.push(byte);
86+
87+
if char == '0' {
88+
first_digit_is_zero = true;
89+
}
90+
91+
StateExpecting::DigitOrEnd
92+
} else {
93+
return Err(Error::UnexpectedByteParsingInteger(
94+
ReadContext {
95+
byte: Some(byte),
96+
pos: reader.input_byte_counter(),
97+
latest_bytes: reader.captured_bytes(),
98+
},
99+
WriteContext {
100+
byte: Some(byte),
101+
pos: writer.output_byte_counter(),
102+
latest_bytes: writer.captured_bytes(),
103+
},
104+
));
105+
}
106+
}
107+
StateExpecting::DigitOrEnd => {
108+
if char.is_ascii_digit() {
109+
writer.write_byte(byte)?;
110+
value.push(byte);
111+
112+
if char == '0' && first_digit_is_zero {
113+
return Err(Error::LeadingZerosInIntegersNotAllowed(
114+
ReadContext {
115+
byte: Some(byte),
116+
pos: reader.input_byte_counter(),
117+
latest_bytes: reader.captured_bytes(),
118+
},
119+
WriteContext {
120+
byte: Some(byte),
121+
pos: writer.output_byte_counter(),
122+
latest_bytes: writer.captured_bytes(),
123+
},
124+
));
125+
}
126+
127+
StateExpecting::DigitOrEnd
128+
} else if byte == BENCODE_END_INTEGER {
129+
return Ok(value);
130+
} else {
131+
return Err(Error::UnexpectedByteParsingInteger(
132+
ReadContext {
133+
byte: Some(byte),
134+
pos: reader.input_byte_counter(),
135+
latest_bytes: reader.captured_bytes(),
136+
},
137+
WriteContext {
138+
byte: Some(byte),
139+
pos: writer.output_byte_counter(),
140+
latest_bytes: writer.captured_bytes(),
141+
},
142+
));
143+
}
144+
}
145+
};
146+
}
147+
}
148+
149+
/// It reads the next byte from the input.
150+
///
151+
/// # Errors
152+
///
153+
/// Will return an error if the end of input was reached.
154+
fn next_byte<R: Read, W: Writer>(reader: &mut ByteReader<R>, writer: &W) -> Result<u8, Error> {
155+
match reader.read_byte() {
156+
Ok(byte) => Ok(byte),
157+
Err(err) => {
158+
if err.kind() == io::ErrorKind::UnexpectedEof {
159+
return Err(Error::UnexpectedEndOfInputParsingInteger(
160+
ReadContext {
161+
byte: None,
162+
pos: reader.input_byte_counter(),
163+
latest_bytes: reader.captured_bytes(),
164+
},
165+
WriteContext {
166+
byte: None,
167+
pos: writer.output_byte_counter(),
168+
latest_bytes: writer.captured_bytes(),
169+
},
170+
));
171+
}
172+
Err(err.into())
173+
}
174+
}
175+
}
176+
177+
#[cfg(test)]
178+
mod tests {
179+
use crate::{
180+
parsers::{error::Error, integer::parse},
181+
rw::{byte_reader::ByteReader, string_writer::StringWriter},
182+
};
183+
184+
fn bencode_to_json_unchecked(input_buffer: &[u8]) -> String {
185+
let mut output = String::new();
186+
187+
parse_bencode(input_buffer, &mut output).expect("Bencode to JSON conversion failed");
188+
189+
output
190+
}
191+
192+
fn try_bencode_to_json(input_buffer: &[u8]) -> Result<String, Error> {
193+
let mut output = String::new();
194+
195+
match parse_bencode(input_buffer, &mut output) {
196+
Ok(_value) => Ok(output),
197+
Err(err) => Err(err),
198+
}
199+
}
200+
201+
fn parse_bencode(input_buffer: &[u8], output: &mut String) -> Result<Vec<u8>, Error> {
202+
let mut reader = ByteReader::new(input_buffer);
203+
204+
let mut writer = StringWriter::new(output);
205+
206+
parse(&mut reader, &mut writer)
207+
}
208+
209+
mod for_helpers {
210+
use crate::parsers::tokenizer::integer::tests::try_bencode_to_json;
211+
212+
#[test]
213+
fn bencode_to_json_wrapper_succeeds() {
214+
assert_eq!(try_bencode_to_json(b"i0e").unwrap(), "0".to_string());
215+
}
216+
217+
#[test]
218+
fn bencode_to_json_wrapper_fails() {
219+
assert!(try_bencode_to_json(b"i").is_err());
220+
}
221+
}
222+
223+
#[test]
224+
fn zero() {
225+
assert_eq!(bencode_to_json_unchecked(b"i0e"), "0".to_string());
226+
}
227+
228+
#[test]
229+
fn one_digit_integer() {
230+
assert_eq!(bencode_to_json_unchecked(b"i1e"), "1".to_string());
231+
}
232+
233+
#[test]
234+
fn two_digits_integer() {
235+
assert_eq!(bencode_to_json_unchecked(b"i42e"), "42".to_string());
236+
}
237+
238+
#[test]
239+
fn negative_integer() {
240+
assert_eq!(bencode_to_json_unchecked(b"i-1e"), "-1".to_string());
241+
}
242+
243+
mod it_should_fail {
244+
use std::io::{self, Read};
245+
246+
use crate::{
247+
parsers::{
248+
error::Error,
249+
tokenizer::integer::{parse, tests::try_bencode_to_json},
250+
},
251+
rw::{byte_reader::ByteReader, string_writer::StringWriter},
252+
};
253+
254+
#[test]
255+
fn when_it_cannot_read_more_bytes_from_input() {
256+
let unfinished_int = b"i42";
257+
258+
let result = try_bencode_to_json(unfinished_int);
259+
260+
assert!(matches!(
261+
result,
262+
Err(Error::UnexpectedEndOfInputParsingInteger { .. })
263+
));
264+
}
265+
266+
#[test]
267+
fn when_it_finds_an_invalid_byte() {
268+
let int_with_invalid_byte = b"iae";
269+
270+
let result = try_bencode_to_json(int_with_invalid_byte);
271+
272+
assert!(matches!(
273+
result,
274+
Err(Error::UnexpectedByteParsingInteger { .. })
275+
));
276+
}
277+
278+
#[test]
279+
fn when_it_finds_leading_zeros() {
280+
// Leading zeros are not allowed.Only the zero integer can start with zero.
281+
282+
let int_with_invalid_byte = b"i00e";
283+
284+
let result = try_bencode_to_json(int_with_invalid_byte);
285+
286+
assert!(matches!(
287+
result,
288+
Err(Error::LeadingZerosInIntegersNotAllowed { .. })
289+
));
290+
}
291+
292+
#[test]
293+
fn when_it_finds_leading_zeros_in_a_negative_integer() {
294+
// Leading zeros are not allowed.Only the zero integer can start with zero.
295+
296+
let int_with_invalid_byte = b"i-00e";
297+
298+
let result = try_bencode_to_json(int_with_invalid_byte);
299+
300+
assert!(matches!(
301+
result,
302+
Err(Error::LeadingZerosInIntegersNotAllowed { .. })
303+
));
304+
}
305+
306+
mod when_it_receives_a_unexpected_byte {
307+
use crate::parsers::{error::Error, tokenizer::integer::tests::try_bencode_to_json};
308+
309+
#[test]
310+
fn while_expecting_a_digit_or_sign() {
311+
let int_with_invalid_byte = b"ia";
312+
313+
let result = try_bencode_to_json(int_with_invalid_byte);
314+
315+
assert!(matches!(
316+
result,
317+
Err(Error::UnexpectedByteParsingInteger { .. })
318+
));
319+
}
320+
321+
#[test]
322+
fn while_expecting_digit_after_the_sign() {
323+
let int_with_invalid_byte = b"i-a";
324+
325+
let result = try_bencode_to_json(int_with_invalid_byte);
326+
327+
assert!(matches!(
328+
result,
329+
Err(Error::UnexpectedByteParsingInteger { .. })
330+
));
331+
}
332+
333+
#[test]
334+
fn while_expecting_digit_or_end() {
335+
let int_with_invalid_byte = b"i-1a";
336+
337+
let result = try_bencode_to_json(int_with_invalid_byte);
338+
339+
assert!(matches!(
340+
result,
341+
Err(Error::UnexpectedByteParsingInteger { .. })
342+
));
343+
}
344+
}
345+
346+
#[test]
347+
fn when_it_receives_a_non_eof_io_error() {
348+
struct FaultyReader;
349+
350+
impl Read for FaultyReader {
351+
fn read(&mut self, _buf: &mut [u8]) -> io::Result<usize> {
352+
Err(io::Error::new(
353+
io::ErrorKind::PermissionDenied,
354+
"Permission denied",
355+
))
356+
}
357+
}
358+
359+
let mut reader = ByteReader::new(FaultyReader);
360+
361+
let mut output = String::new();
362+
let mut writer = StringWriter::new(&mut output);
363+
364+
let result = parse(&mut reader, &mut writer);
365+
366+
assert!(matches!(result, Err(Error::Io(_))));
367+
}
368+
}
369+
}
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
//! Bencode tokenizer. Given an input stream, it returns a stream of tokens.
2+
pub mod integer;
3+
pub mod string;
4+
25
use std::io::{self, Read};
36

4-
use super::{
5-
error::{self, ReadContext, WriteContext},
6-
integer, string,
7-
};
7+
use super::error::{self, ReadContext, WriteContext};
88

99
use crate::rw::{byte_reader::ByteReader, byte_writer::ByteWriter, writer::Writer};
1010

0 commit comments

Comments
 (0)