Skip to content

Do iterative bencode parser using manual stack? #3

@baseplate-admin

Description

@baseplate-admin
use std::collections::BTreeMap;

#[derive(Debug)]
pub enum BencodeValue<'a> {
    Int(i64),
    Str(&'a [u8]),
    List(Vec<BencodeValue<'a>>),
    Dict(BTreeMap<&'a [u8], BencodeValue<'a>>),
}

/// Represents a container being built on the stack
enum Container<'a> {
    List(Vec<BencodeValue<'a>>),
    Dict(BTreeMap<&'a [u8], BencodeValue<'a>>),
}

/// Represents an item on the manual parsing stack
struct StackItem<'a> {
    container: Container<'a>,
    pending_key: Option<&'a [u8]>, // only used for dicts
}

pub struct Decoder<'a> {
    data: &'a [u8],
    pos: usize,
}

impl<'a> Decoder<'a> {
    pub fn new(data: &'a [u8]) -> Self {
        Self { data, pos: 0 }
    }

    fn peek(&self) -> Option<u8> {
        self.data.get(self.pos).copied()
    }

    fn bump(&mut self) -> Option<u8> {
        let b = self.peek()?;
        self.pos += 1;
        Some(b)
    }

    /// Main iterative decode
    pub fn decode(&mut self) -> Result<BencodeValue<'a>, &'static str> {
        let mut stack: Vec<StackItem<'a>> = Vec::new();
        let mut root: Option<BencodeValue<'a>> = None;

        while self.pos < self.data.len() {
            let b = self.peek().ok_or("Unexpected EOF")?;

            let value = match b {
                b'0'..=b'9' => self.decode_string()?,
                b'i' => self.decode_int()?,
                b'l' => {
                    self.bump();
                    stack.push(StackItem {
                        container: Container::List(Vec::new()),
                        pending_key: None,
                    });
                    continue;
                }
                b'd' => {
                    self.bump();
                    stack.push(StackItem {
                        container: Container::Dict(BTreeMap::new()),
                        pending_key: None,
                    });
                    continue;
                }
                b'e' => {
                    self.bump();
                    let completed = stack.pop().ok_or("Unexpected 'e'")?;
                    let completed_val = match completed.container {
                        Container::List(v) => BencodeValue::List(v),
                        Container::Dict(m) => BencodeValue::Dict(m),
                    };

                    if let Some(top) = stack.last_mut() {
                        match &mut top.container {
                            Container::List(v) => v.push(completed_val),
                            Container::Dict(m) => {
                                let key = top.pending_key.take().ok_or("Missing dict key")?;
                                m.insert(key, completed_val);
                            }
                        }
                        continue;
                    } else {
                        return Ok(completed_val); // finished parsing root
                    }
                }
                _ => return Err("Invalid prefix"),
            };

            // Push value into top of stack or set as root
            if let Some(top) = stack.last_mut() {
                match &mut top.container {
                    Container::List(v) => v.push(value),
                    Container::Dict(m) => {
                        if top.pending_key.is_none() {
                            if let BencodeValue::Str(k) = value {
                                top.pending_key = Some(k);
                            } else {
                                return Err("Dict key must be string");
                            }
                        } else {
                            let key = top.pending_key.take().unwrap();
                            m.insert(key, value);
                        }
                    }
                }
            } else {
                // stack empty → this is the root
                root = Some(value);
            }
        }

        root.ok_or("No data parsed")
    }

    /// Decode string like "4:spam"
    fn decode_string(&mut self) -> Result<BencodeValue<'a>, &'static str> {
        let start = self.pos;
        while self.data[self.pos].is_ascii_digit() {
            self.pos += 1;
        }

        if self.pos >= self.data.len() || self.data[self.pos] != b':' {
            return Err("Invalid string");
        }

        let len = std::str::from_utf8(&self.data[start..self.pos])
            .map_err(|_| "Invalid UTF-8")?
            .parse::<usize>()
            .map_err(|_| "Invalid length")?;

        self.pos += 1;
        let end = self.pos + len;
        if end > self.data.len() {
            return Err("Unexpected EOF");
        }

        let s = &self.data[self.pos..end];
        self.pos = end;
        Ok(BencodeValue::Str(s))
    }

    /// Decode integer like "i42e"
    fn decode_int(&mut self) -> Result<BencodeValue<'a>, &'static str> {
        if self.bump() != Some(b'i') {
            return Err("Invalid integer prefix");
        }
        let start = self.pos;
        while self.data[self.pos] != b'e' {
            self.pos += 1;
        }

        let n = std::str::from_utf8(&self.data[start..self.pos])
            .map_err(|_| "Invalid UTF-8")?
            .parse::<i64>()
            .map_err(|_| "Invalid integer")?;

        self.bump(); // skip 'e'
        Ok(BencodeValue::Int(n))
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_manual_stack() {
        let mut d = Decoder::new(b"4:spam");
        assert_eq!(d.decode().unwrap(), BencodeValue::Str(b"spam"));

        let mut d = Decoder::new(b"i42e");
        assert_eq!(d.decode().unwrap(), BencodeValue::Int(42));

        let mut d = Decoder::new(b"l4:spam4:eggsi42ee");
        if let BencodeValue::List(v) = d.decode().unwrap() {
            assert_eq!(v.len(), 3);
        } else {
            panic!("Expected list");
        }

        let mut d = Decoder::new(b"d3:cow3:moo4:spam4:eggse");
        if let BencodeValue::Dict(m) = d.decode().unwrap() {
            assert_eq!(m.len(), 2);
        } else {
            panic!("Expected dict");
        }

        // Deep nesting test
        let mut d = Decoder::new(b"l4:spamli1ei2ei3eee");
        if let BencodeValue::List(v) = d.decode().unwrap() {
            assert_eq!(v.len(), 2);
            if let BencodeValue::List(inner) = &v[1] {
                assert_eq!(inner.len(), 3);
            } else {
                panic!("Expected inner list");
            }
        }
    }
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions