diff --git a/include/sexp/parser.hpp b/include/sexp/parser.hpp index f4435ae..4bb07bd 100644 --- a/include/sexp/parser.hpp +++ b/include/sexp/parser.hpp @@ -33,14 +33,14 @@ class Parser public: enum { USE_ARRAYS = true }; - static Value from_string(std::string const& str, bool use_arrays = false); - static Value from_stream(std::istream& stream, bool use_arrays = false); + static Value from_string(std::string const& str, bool use_arrays = false, int depth = -1); + static Value from_stream(std::istream& stream, bool use_arrays = false, int depth = -1); - static std::vector from_string_many(std::string const& str, bool use_arrays = false); - static std::vector from_stream_many(std::istream& stream, bool use_arrays = false); + static std::vector from_string_many(std::string const& str, bool use_arrays = false, int depth = -1); + static std::vector from_stream_many(std::istream& stream, bool use_arrays = false, int depth = -1); public: - Parser(Lexer& lexer); + Parser(Lexer& lexer, int depth = -1); ~Parser(); private: @@ -51,7 +51,10 @@ class Parser private: Lexer& m_lexer; + const int m_depth; + Lexer::TokenType m_token; + int m_current_depth; private: Parser(const Parser&); diff --git a/include/sexp/value.hpp b/include/sexp/value.hpp index 86887d8..f4769c9 100644 --- a/include/sexp/value.hpp +++ b/include/sexp/value.hpp @@ -204,6 +204,7 @@ class Value inline bool is_integer() const { return m_type == Type::INTEGER; } inline bool is_real() const { return (m_type == Type::REAL || m_type == Type::INTEGER); } inline bool is_string() const { return m_type == Type::STRING; } + inline bool is_translatable_string() const; inline bool is_symbol() const { return m_type == Type::SYMBOL; } inline bool is_cons() const { return m_type == Type::CONS; } inline bool is_array() const { return m_type == Type::ARRAY; } @@ -343,6 +344,19 @@ Value::operator==(Value const& rhs) const } } +inline bool +Value::is_translatable_string() const +{ + if (m_type != Type::ARRAY) + return false; + + const std::vector& array = *m_data.m_array; + return array.size() == 2 && + array[0].is_symbol() && + array[0].as_string() == "_" && + array[1].is_string(); +} + inline Value const& Value::get_car() const { diff --git a/src/parser.cpp b/src/parser.cpp index 47bf095..15526c2 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -27,17 +27,17 @@ namespace sexp { Value -Parser::from_string(std::string const& str, bool use_arrays) +Parser::from_string(std::string const& str, bool use_arrays, int depth) { std::istringstream is(str); - return from_stream(is); + return from_stream(is, use_arrays, depth); } Value -Parser::from_stream(std::istream& stream, bool use_arrays) +Parser::from_stream(std::istream& stream, bool use_arrays, int depth) { Lexer lexer(stream, use_arrays); - Parser parser(lexer); + Parser parser(lexer, depth); Value result = parser.read(); if (parser.m_token != Lexer::TOKEN_EOF) { @@ -47,23 +47,25 @@ Parser::from_stream(std::istream& stream, bool use_arrays) } std::vector -Parser::from_string_many(std::string const& str, bool use_arrays) +Parser::from_string_many(std::string const& str, bool use_arrays, int depth) { std::istringstream is(str); - return from_stream_many(is); + return from_stream_many(is, use_arrays, depth); } std::vector -Parser::from_stream_many(std::istream& stream, bool use_arrays) +Parser::from_stream_many(std::istream& stream, bool use_arrays, int depth) { Lexer lexer(stream, use_arrays); - Parser parser(lexer); + Parser parser(lexer, depth); return parser.read_many(); } -Parser::Parser(Lexer& lexer) : +Parser::Parser(Lexer& lexer, int depth) : m_lexer(lexer), - m_token(m_lexer.get_next_token()) + m_depth(depth), + m_token(m_lexer.get_next_token()), + m_current_depth(-1) { } @@ -101,20 +103,36 @@ Parser::read() { case Lexer::TOKEN_OPEN_PAREN: m_token = m_lexer.get_next_token(); - if(m_token == Lexer::TOKEN_CLOSE_PAREN) + if (m_token == Lexer::TOKEN_CLOSE_PAREN) + break; + + ++m_current_depth; + // If a depth is specified, do not parse objects below it. + if (m_depth >= 0 && m_current_depth > m_depth) { - result = Value::nil(); + while (m_token != Lexer::TOKEN_CLOSE_PAREN) + { + if (m_token == Lexer::TOKEN_OPEN_PAREN) + read(); // Recursively go through the tokens of child objects. + else + m_token = m_lexer.get_next_token(); + } } else { result = Value::cons(read(), Value::nil()); Value* cur = &result; - while(m_token != Lexer::TOKEN_CLOSE_PAREN) + while (m_token != Lexer::TOKEN_CLOSE_PAREN) { if (m_token == Lexer::TOKEN_DOT) { m_token = m_lexer.get_next_token(); - cur->set_cdr(read()); + + Value val = read(); + // Do not include values that would be nil for being below the specified depth. + if (m_depth < 0 || m_current_depth != m_depth || !val.is_nil()) + cur->set_cdr(std::move(val)); + if (m_token != Lexer::TOKEN_CLOSE_PAREN) { parse_error("Expected ')'"); @@ -123,11 +141,17 @@ Parser::read() } else { - cur->set_cdr(Value::cons(read(), Value::nil())); - cur = &cur->get_cdr(); + Value val = read(); + // Do not include values that would be nil for being below the specified depth. + if (m_depth < 0 || m_current_depth != m_depth || !val.is_nil()) + { + cur->set_cdr(Value::cons(std::move(val), Value::nil())); + cur = &cur->get_cdr(); + } } } } + --m_current_depth; break; case Lexer::TOKEN_SYMBOL: @@ -155,16 +179,59 @@ Parser::read() break; case Lexer::TOKEN_ARRAY_START: + m_token = m_lexer.get_next_token(); + if (m_token == Lexer::TOKEN_CLOSE_PAREN) + break; + + ++m_current_depth; + // If a depth is specified, do not parse arrays below it. + if (m_depth >= 0 && m_current_depth > m_depth) + { + while (m_token != Lexer::TOKEN_CLOSE_PAREN) + { + if (m_token == Lexer::TOKEN_ARRAY_START) + { + read(); // Recursively go through the tokens of child arrays. + } + else + { + // If the array is just below the specified depth, but contains a translatable string, parse it. + if (m_current_depth - 1 == m_depth && + m_token == Lexer::TOKEN_SYMBOL && m_lexer.get_string() == "_") + { + m_token = m_lexer.get_next_token(); + if (m_token == Lexer::TOKEN_STRING) + { + const std::string str = m_lexer.get_string(); + m_token = m_lexer.get_next_token(); + if (m_token == Lexer::TOKEN_CLOSE_PAREN) + { + result = Value::array({ Value::symbol("_"), Value::string(str) }); + break; + } + } + } + else + { + m_token = m_lexer.get_next_token(); + } + } + } + } + else { - m_token = m_lexer.get_next_token(); std::vector arr; do { - arr.emplace_back(read()); + Value val = read(); + // Do not include values that would be nil for being below the specified depth. + if (m_depth < 0 || m_current_depth != m_depth || !val.is_nil()) + arr.push_back(std::move(val)); } - while(m_token != Lexer::TOKEN_CLOSE_PAREN); + while (m_token != Lexer::TOKEN_CLOSE_PAREN); result = Value::array(std::move(arr)); } + --m_current_depth; break; case Lexer::TOKEN_EOF: diff --git a/tests/parser_test.cpp b/tests/parser_test.cpp index 2c52504..49663fd 100644 --- a/tests/parser_test.cpp +++ b/tests/parser_test.cpp @@ -150,6 +150,23 @@ TEST(ParserTest, parse_symbol) } } +TEST(ParserTest, parse_cons) +{ + char const* sx_str = "(1 \"foo\" (bar))"; + auto sx = sexp::Parser::from_string(sx_str, false); + ASSERT_TRUE(sx.is_cons()); + ASSERT_EQ(sexp::Value::Type::CONS, sx.get_type()); + ASSERT_EQ(sx_str, sx.str()); +} + +TEST(ParserTest, parse_cons_depth_limit) +{ + auto sx = sexp::Parser::from_string("(1 \"foo\" (bar foo (foo 1) 2 (\"bar\" . 4)))", false, 1); + ASSERT_TRUE(sx.is_cons()); + ASSERT_EQ(sexp::Value::Type::CONS, sx.get_type()); + ASSERT_EQ("(1 \"foo\" (bar foo 2))", sx.str()); +} + TEST(ParserTest, parse_array) { char const* sx_str = "#(1 \"foo\" #(bar))"; @@ -159,6 +176,14 @@ TEST(ParserTest, parse_array) ASSERT_EQ(sx_str, sx.str()); } +TEST(ParserTest, parse_array_depth_limit) +{ + auto sx = sexp::Parser::from_string("#(1 \"foo\" #(bar foo #(foo 1) 2 #(\"bar\" 4)))", false, 1); + ASSERT_TRUE(sx.is_array()); + ASSERT_EQ(sexp::Value::Type::ARRAY, sx.get_type()); + ASSERT_EQ("#(1 \"foo\" #(bar foo 2))", sx.str()); +} + // FIXME: Compare data structure or use simple strings?! // "(foo . bar)" as string is ambigous in the current parser as . can be handled as symbol, not pair TEST(ParserTest, simple_pair)