Skip to content

Commit 3a7ea5d

Browse files
committed
refactor: extract mod tokenizer
1 parent 63b9b73 commit 3a7ea5d

File tree

3 files changed

+201
-180
lines changed

3 files changed

+201
-180
lines changed

src/parsers/integer.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use crate::rw::{byte_reader::ByteReader, writer::Writer};
77

88
use super::{
99
error::{Error, ReadContext, WriteContext},
10-
BENCODE_END_INTEGER,
10+
tokenizer::BENCODE_END_INTEGER,
1111
};
1212

1313
/// The current state parsing the integer.

src/parsers/mod.rs

Lines changed: 4 additions & 179 deletions
Original file line numberDiff line numberDiff line change
@@ -7,35 +7,26 @@ pub mod error;
77
pub mod integer;
88
pub mod stack;
99
pub mod string;
10+
pub mod tokenizer;
1011

1112
/* TODO:
1213
13-
- Remove writer from tokenizer.
14-
- Implement trait Iterator for tokenizer.
1514
- Rename this parser to generator.
1615
1716
*/
1817

1918
use core::str;
2019
use std::{
2120
fmt::Write as FmtWrite,
22-
io::{self, Read, Write as IoWrite},
21+
io::{Read, Write as IoWrite},
2322
};
2423

2524
use derive_more::derive::Display;
2625
use error::{ReadContext, WriteContext};
2726
use stack::{Stack, State};
27+
use tokenizer::{BencodeToken, BencodeTokenizer};
2828

29-
use crate::rw::{
30-
byte_reader::ByteReader, byte_writer::ByteWriter, string_writer::StringWriter, writer::Writer,
31-
};
32-
33-
// Bencoded reserved bytes
34-
const BENCODE_BEGIN_INTEGER: u8 = b'i';
35-
const BENCODE_END_INTEGER: u8 = b'e';
36-
const BENCODE_BEGIN_LIST: u8 = b'l';
37-
const BENCODE_BEGIN_DICT: u8 = b'd';
38-
const BENCODE_END_LIST_OR_DICT: u8 = b'e';
29+
use crate::rw::{byte_writer::ByteWriter, string_writer::StringWriter, writer::Writer};
3930

4031
#[derive(Debug, PartialEq, Display)]
4132
pub enum BencodeType {
@@ -45,172 +36,6 @@ pub enum BencodeType {
4536
Dict,
4637
}
4738

48-
#[derive(Debug, PartialEq)]
49-
pub enum BencodeToken {
50-
Integer(Vec<u8>),
51-
String(Vec<u8>),
52-
BeginList,
53-
BeginDict,
54-
EndListOrDict,
55-
LineBreak,
56-
}
57-
58-
pub struct BencodeTokenizer<R: Read> {
59-
byte_reader: ByteReader<R>,
60-
}
61-
62-
impl<R: Read> BencodeTokenizer<R> {
63-
pub fn new(reader: R) -> Self {
64-
BencodeTokenizer {
65-
byte_reader: ByteReader::new(reader),
66-
}
67-
}
68-
69-
fn next_token<W: Writer>(
70-
&mut self,
71-
writer: &mut W,
72-
) -> Result<Option<BencodeToken>, error::Error> {
73-
let capture_output = Vec::new();
74-
let mut null_writer = ByteWriter::new(capture_output);
75-
76-
let opt_peeked_byte = Self::peek_byte(&mut self.byte_reader, &null_writer)?;
77-
78-
match opt_peeked_byte {
79-
Some(peeked_byte) => {
80-
match peeked_byte {
81-
BENCODE_BEGIN_INTEGER => {
82-
let value = integer::parse(&mut self.byte_reader, &mut null_writer)?;
83-
Ok(Some(BencodeToken::Integer(value)))
84-
}
85-
b'0'..=b'9' => {
86-
let value = string::parse(&mut self.byte_reader, &mut null_writer)?;
87-
Ok(Some(BencodeToken::String(value)))
88-
}
89-
BENCODE_BEGIN_LIST => {
90-
let _byte = Self::read_peeked_byte(
91-
peeked_byte,
92-
&mut self.byte_reader,
93-
&null_writer,
94-
)?;
95-
Ok(Some(BencodeToken::BeginList))
96-
}
97-
BENCODE_BEGIN_DICT => {
98-
let _byte = Self::read_peeked_byte(
99-
peeked_byte,
100-
&mut self.byte_reader,
101-
&null_writer,
102-
)?;
103-
Ok(Some(BencodeToken::BeginDict))
104-
}
105-
BENCODE_END_LIST_OR_DICT => {
106-
let _byte = Self::read_peeked_byte(
107-
peeked_byte,
108-
&mut self.byte_reader,
109-
&null_writer,
110-
)?;
111-
Ok(Some(BencodeToken::EndListOrDict))
112-
}
113-
b'\n' => {
114-
// todo: we should not return any token and continue to the next token.
115-
// Ignore line breaks at the beginning, the end, or between values
116-
let _byte = Self::read_peeked_byte(
117-
peeked_byte,
118-
&mut self.byte_reader,
119-
&null_writer,
120-
)?;
121-
Ok(Some(BencodeToken::LineBreak))
122-
}
123-
_ => Err(error::Error::UnrecognizedFirstBencodeValueByte(
124-
ReadContext {
125-
byte: Some(peeked_byte),
126-
pos: self.byte_reader.input_byte_counter(),
127-
latest_bytes: self.byte_reader.captured_bytes(),
128-
},
129-
WriteContext {
130-
byte: Some(peeked_byte),
131-
pos: writer.output_byte_counter(),
132-
latest_bytes: writer.captured_bytes(),
133-
},
134-
)),
135-
}
136-
}
137-
None => Ok(None),
138-
}
139-
}
140-
141-
/// It reads the next byte from the input consuming it. It returns `None` if
142-
/// the input has ended.
143-
///
144-
/// # Errors
145-
///
146-
/// Will return and errors if:
147-
///
148-
/// - It can't read from the input.
149-
/// - The byte read is not the expected one (the previously peeked byte).
150-
fn read_peeked_byte<W: Writer>(
151-
peeked_byte: u8,
152-
reader: &mut ByteReader<R>,
153-
writer: &W,
154-
) -> Result<Option<u8>, error::Error> {
155-
match reader.read_byte() {
156-
Ok(byte) => {
157-
if byte == peeked_byte {
158-
return Ok(Some(byte));
159-
}
160-
Err(error::Error::ReadByteAfterPeekingDoesMatchPeekedByte(
161-
ReadContext {
162-
byte: Some(byte),
163-
pos: reader.input_byte_counter(),
164-
latest_bytes: reader.captured_bytes(),
165-
},
166-
WriteContext {
167-
byte: Some(byte),
168-
pos: writer.output_byte_counter(),
169-
latest_bytes: writer.captured_bytes(),
170-
},
171-
))
172-
}
173-
Err(err) => {
174-
if err.kind() == io::ErrorKind::UnexpectedEof {
175-
return Ok(None);
176-
}
177-
Err(err.into())
178-
}
179-
}
180-
}
181-
182-
/// It peeks the next byte from the input without consuming it. It returns
183-
/// `None` if the input has ended.
184-
///
185-
/// # Errors
186-
///
187-
/// Will return and errors if it can't read from the input.
188-
fn peek_byte<W: Writer>(
189-
reader: &mut ByteReader<R>,
190-
_writer: &W,
191-
) -> Result<Option<u8>, error::Error> {
192-
match reader.peek_byte() {
193-
Ok(byte) => Ok(Some(byte)),
194-
Err(err) => {
195-
if err.kind() == io::ErrorKind::UnexpectedEof {
196-
return Ok(None);
197-
}
198-
Err(err.into())
199-
}
200-
}
201-
}
202-
203-
/// Returns the number of bytes that have been read from the input.
204-
pub fn input_byte_counter(&self) -> u64 {
205-
self.byte_reader.input_byte_counter()
206-
}
207-
208-
/// Returns a copy of the bytes that have been read from the input.
209-
pub fn captured_bytes(&self) -> Vec<u8> {
210-
self.byte_reader.captured_bytes()
211-
}
212-
}
213-
21439
pub struct BencodeParser<R: Read> {
21540
tokenizer: BencodeTokenizer<R>,
21641
num_processed_tokens: u64,

0 commit comments

Comments
 (0)