@@ -7,35 +7,26 @@ pub mod error;
77pub mod integer;
88pub mod stack;
99pub mod string;
10+ pub mod tokenizer;
1011
1112/* TODO:
1213
13- - Remove writer from tokenizer.
14- - Implement trait Iterator for tokenizer.
1514- Rename this parser to generator.
1615
1716*/
1817
1918use core:: str;
2019use std:: {
2120 fmt:: Write as FmtWrite ,
22- io:: { self , Read , Write as IoWrite } ,
21+ io:: { Read , Write as IoWrite } ,
2322} ;
2423
2524use derive_more:: derive:: Display ;
2625use error:: { ReadContext , WriteContext } ;
2726use stack:: { Stack , State } ;
27+ use tokenizer:: { BencodeToken , BencodeTokenizer } ;
2828
29- use crate :: rw:: {
30- byte_reader:: ByteReader , byte_writer:: ByteWriter , string_writer:: StringWriter , writer:: Writer ,
31- } ;
32-
33- // Bencoded reserved bytes
34- const BENCODE_BEGIN_INTEGER : u8 = b'i' ;
35- const BENCODE_END_INTEGER : u8 = b'e' ;
36- const BENCODE_BEGIN_LIST : u8 = b'l' ;
37- const BENCODE_BEGIN_DICT : u8 = b'd' ;
38- const BENCODE_END_LIST_OR_DICT : u8 = b'e' ;
29+ use crate :: rw:: { byte_writer:: ByteWriter , string_writer:: StringWriter , writer:: Writer } ;
3930
4031#[ derive( Debug , PartialEq , Display ) ]
4132pub enum BencodeType {
@@ -45,172 +36,6 @@ pub enum BencodeType {
4536 Dict ,
4637}
4738
48- #[ derive( Debug , PartialEq ) ]
49- pub enum BencodeToken {
50- Integer ( Vec < u8 > ) ,
51- String ( Vec < u8 > ) ,
52- BeginList ,
53- BeginDict ,
54- EndListOrDict ,
55- LineBreak ,
56- }
57-
58- pub struct BencodeTokenizer < R : Read > {
59- byte_reader : ByteReader < R > ,
60- }
61-
62- impl < R : Read > BencodeTokenizer < R > {
63- pub fn new ( reader : R ) -> Self {
64- BencodeTokenizer {
65- byte_reader : ByteReader :: new ( reader) ,
66- }
67- }
68-
69- fn next_token < W : Writer > (
70- & mut self ,
71- writer : & mut W ,
72- ) -> Result < Option < BencodeToken > , error:: Error > {
73- let capture_output = Vec :: new ( ) ;
74- let mut null_writer = ByteWriter :: new ( capture_output) ;
75-
76- let opt_peeked_byte = Self :: peek_byte ( & mut self . byte_reader , & null_writer) ?;
77-
78- match opt_peeked_byte {
79- Some ( peeked_byte) => {
80- match peeked_byte {
81- BENCODE_BEGIN_INTEGER => {
82- let value = integer:: parse ( & mut self . byte_reader , & mut null_writer) ?;
83- Ok ( Some ( BencodeToken :: Integer ( value) ) )
84- }
85- b'0' ..=b'9' => {
86- let value = string:: parse ( & mut self . byte_reader , & mut null_writer) ?;
87- Ok ( Some ( BencodeToken :: String ( value) ) )
88- }
89- BENCODE_BEGIN_LIST => {
90- let _byte = Self :: read_peeked_byte (
91- peeked_byte,
92- & mut self . byte_reader ,
93- & null_writer,
94- ) ?;
95- Ok ( Some ( BencodeToken :: BeginList ) )
96- }
97- BENCODE_BEGIN_DICT => {
98- let _byte = Self :: read_peeked_byte (
99- peeked_byte,
100- & mut self . byte_reader ,
101- & null_writer,
102- ) ?;
103- Ok ( Some ( BencodeToken :: BeginDict ) )
104- }
105- BENCODE_END_LIST_OR_DICT => {
106- let _byte = Self :: read_peeked_byte (
107- peeked_byte,
108- & mut self . byte_reader ,
109- & null_writer,
110- ) ?;
111- Ok ( Some ( BencodeToken :: EndListOrDict ) )
112- }
113- b'\n' => {
114- // todo: we should not return any token and continue to the next token.
115- // Ignore line breaks at the beginning, the end, or between values
116- let _byte = Self :: read_peeked_byte (
117- peeked_byte,
118- & mut self . byte_reader ,
119- & null_writer,
120- ) ?;
121- Ok ( Some ( BencodeToken :: LineBreak ) )
122- }
123- _ => Err ( error:: Error :: UnrecognizedFirstBencodeValueByte (
124- ReadContext {
125- byte : Some ( peeked_byte) ,
126- pos : self . byte_reader . input_byte_counter ( ) ,
127- latest_bytes : self . byte_reader . captured_bytes ( ) ,
128- } ,
129- WriteContext {
130- byte : Some ( peeked_byte) ,
131- pos : writer. output_byte_counter ( ) ,
132- latest_bytes : writer. captured_bytes ( ) ,
133- } ,
134- ) ) ,
135- }
136- }
137- None => Ok ( None ) ,
138- }
139- }
140-
141- /// It reads the next byte from the input consuming it. It returns `None` if
142- /// the input has ended.
143- ///
144- /// # Errors
145- ///
146- /// Will return and errors if:
147- ///
148- /// - It can't read from the input.
149- /// - The byte read is not the expected one (the previously peeked byte).
150- fn read_peeked_byte < W : Writer > (
151- peeked_byte : u8 ,
152- reader : & mut ByteReader < R > ,
153- writer : & W ,
154- ) -> Result < Option < u8 > , error:: Error > {
155- match reader. read_byte ( ) {
156- Ok ( byte) => {
157- if byte == peeked_byte {
158- return Ok ( Some ( byte) ) ;
159- }
160- Err ( error:: Error :: ReadByteAfterPeekingDoesMatchPeekedByte (
161- ReadContext {
162- byte : Some ( byte) ,
163- pos : reader. input_byte_counter ( ) ,
164- latest_bytes : reader. captured_bytes ( ) ,
165- } ,
166- WriteContext {
167- byte : Some ( byte) ,
168- pos : writer. output_byte_counter ( ) ,
169- latest_bytes : writer. captured_bytes ( ) ,
170- } ,
171- ) )
172- }
173- Err ( err) => {
174- if err. kind ( ) == io:: ErrorKind :: UnexpectedEof {
175- return Ok ( None ) ;
176- }
177- Err ( err. into ( ) )
178- }
179- }
180- }
181-
182- /// It peeks the next byte from the input without consuming it. It returns
183- /// `None` if the input has ended.
184- ///
185- /// # Errors
186- ///
187- /// Will return and errors if it can't read from the input.
188- fn peek_byte < W : Writer > (
189- reader : & mut ByteReader < R > ,
190- _writer : & W ,
191- ) -> Result < Option < u8 > , error:: Error > {
192- match reader. peek_byte ( ) {
193- Ok ( byte) => Ok ( Some ( byte) ) ,
194- Err ( err) => {
195- if err. kind ( ) == io:: ErrorKind :: UnexpectedEof {
196- return Ok ( None ) ;
197- }
198- Err ( err. into ( ) )
199- }
200- }
201- }
202-
203- /// Returns the number of bytes that have been read from the input.
204- pub fn input_byte_counter ( & self ) -> u64 {
205- self . byte_reader . input_byte_counter ( )
206- }
207-
208- /// Returns a copy of the bytes that have been read from the input.
209- pub fn captured_bytes ( & self ) -> Vec < u8 > {
210- self . byte_reader . captured_bytes ( )
211- }
212- }
213-
21439pub struct BencodeParser < R : Read > {
21540 tokenizer : BencodeTokenizer < R > ,
21641 num_processed_tokens : u64 ,
0 commit comments