Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions include/sexp/parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@ class Parser
public:
enum { USE_ARRAYS = true };

static Value from_string(std::string const& str, bool use_arrays = false);
static Value from_stream(std::istream& stream, bool use_arrays = false);
static Value from_string(std::string const& str, bool use_arrays = false, int depth = -1);
static Value from_stream(std::istream& stream, bool use_arrays = false, int depth = -1);

static std::vector<Value> from_string_many(std::string const& str, bool use_arrays = false);
static std::vector<Value> from_stream_many(std::istream& stream, bool use_arrays = false);
static std::vector<Value> from_string_many(std::string const& str, bool use_arrays = false, int depth = -1);
static std::vector<Value> from_stream_many(std::istream& stream, bool use_arrays = false, int depth = -1);

public:
Parser(Lexer& lexer);
Parser(Lexer& lexer, int depth = -1);
~Parser();

private:
Expand All @@ -51,7 +51,10 @@ class Parser

private:
Lexer& m_lexer;
const int m_depth;

Lexer::TokenType m_token;
int m_current_depth;

private:
Parser(const Parser&);
Expand Down
14 changes: 14 additions & 0 deletions include/sexp/value.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ class Value
inline bool is_integer() const { return m_type == Type::INTEGER; }
inline bool is_real() const { return (m_type == Type::REAL || m_type == Type::INTEGER); }
inline bool is_string() const { return m_type == Type::STRING; }
inline bool is_translatable_string() const;
inline bool is_symbol() const { return m_type == Type::SYMBOL; }
inline bool is_cons() const { return m_type == Type::CONS; }
inline bool is_array() const { return m_type == Type::ARRAY; }
Expand Down Expand Up @@ -343,6 +344,19 @@ Value::operator==(Value const& rhs) const
}
}

inline bool
Value::is_translatable_string() const
{
if (m_type != Type::ARRAY)
return false;

const std::vector<Value>& array = *m_data.m_array;
return array.size() == 2 &&
array[0].is_symbol() &&
array[0].as_string() == "_" &&
array[1].is_string();
}

inline Value const&
Value::get_car() const
{
Expand Down
105 changes: 86 additions & 19 deletions src/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,17 @@
namespace sexp {

Value
Parser::from_string(std::string const& str, bool use_arrays)
Parser::from_string(std::string const& str, bool use_arrays, int depth)
{
std::istringstream is(str);
return from_stream(is);
return from_stream(is, use_arrays, depth);
}

Value
Parser::from_stream(std::istream& stream, bool use_arrays)
Parser::from_stream(std::istream& stream, bool use_arrays, int depth)
{
Lexer lexer(stream, use_arrays);
Parser parser(lexer);
Parser parser(lexer, depth);
Value result = parser.read();
if (parser.m_token != Lexer::TOKEN_EOF)
{
Expand All @@ -47,23 +47,25 @@ Parser::from_stream(std::istream& stream, bool use_arrays)
}

std::vector<Value>
Parser::from_string_many(std::string const& str, bool use_arrays)
Parser::from_string_many(std::string const& str, bool use_arrays, int depth)
{
std::istringstream is(str);
return from_stream_many(is);
return from_stream_many(is, use_arrays, depth);
}

std::vector<Value>
Parser::from_stream_many(std::istream& stream, bool use_arrays)
Parser::from_stream_many(std::istream& stream, bool use_arrays, int depth)
{
Lexer lexer(stream, use_arrays);
Parser parser(lexer);
Parser parser(lexer, depth);
return parser.read_many();
}

Parser::Parser(Lexer& lexer) :
Parser::Parser(Lexer& lexer, int depth) :
m_lexer(lexer),
m_token(m_lexer.get_next_token())
m_depth(depth),
m_token(m_lexer.get_next_token()),
m_current_depth(-1)
{
}

Expand Down Expand Up @@ -101,20 +103,36 @@ Parser::read()
{
case Lexer::TOKEN_OPEN_PAREN:
m_token = m_lexer.get_next_token();
if(m_token == Lexer::TOKEN_CLOSE_PAREN)
if (m_token == Lexer::TOKEN_CLOSE_PAREN)
break;

++m_current_depth;
// If a depth is specified, do not parse objects below it.
if (m_depth >= 0 && m_current_depth > m_depth)
{
result = Value::nil();
while (m_token != Lexer::TOKEN_CLOSE_PAREN)
{
if (m_token == Lexer::TOKEN_OPEN_PAREN)
read(); // Recursively go through the tokens of child objects.
else
m_token = m_lexer.get_next_token();
}
}
else
{
result = Value::cons(read(), Value::nil());
Value* cur = &result;
while(m_token != Lexer::TOKEN_CLOSE_PAREN)
while (m_token != Lexer::TOKEN_CLOSE_PAREN)
{
if (m_token == Lexer::TOKEN_DOT)
{
m_token = m_lexer.get_next_token();
cur->set_cdr(read());

Value val = read();
// Do not include values that would be nil for being below the specified depth.
if (m_depth < 0 || m_current_depth != m_depth || !val.is_nil())
cur->set_cdr(std::move(val));

if (m_token != Lexer::TOKEN_CLOSE_PAREN)
{
parse_error("Expected ')'");
Expand All @@ -123,11 +141,17 @@ Parser::read()
}
else
{
cur->set_cdr(Value::cons(read(), Value::nil()));
cur = &cur->get_cdr();
Value val = read();
// Do not include values that would be nil for being below the specified depth.
if (m_depth < 0 || m_current_depth != m_depth || !val.is_nil())
{
cur->set_cdr(Value::cons(std::move(val), Value::nil()));
cur = &cur->get_cdr();
}
}
}
}
--m_current_depth;
break;

case Lexer::TOKEN_SYMBOL:
Expand Down Expand Up @@ -155,16 +179,59 @@ Parser::read()
break;

case Lexer::TOKEN_ARRAY_START:
m_token = m_lexer.get_next_token();
if (m_token == Lexer::TOKEN_CLOSE_PAREN)
break;

++m_current_depth;
// If a depth is specified, do not parse arrays below it.
if (m_depth >= 0 && m_current_depth > m_depth)
{
while (m_token != Lexer::TOKEN_CLOSE_PAREN)
{
if (m_token == Lexer::TOKEN_ARRAY_START)
{
read(); // Recursively go through the tokens of child arrays.
}
else
{
// If the array is just below the specified depth, but contains a translatable string, parse it.
if (m_current_depth - 1 == m_depth &&
m_token == Lexer::TOKEN_SYMBOL && m_lexer.get_string() == "_")
{
m_token = m_lexer.get_next_token();
if (m_token == Lexer::TOKEN_STRING)
{
const std::string str = m_lexer.get_string();
m_token = m_lexer.get_next_token();
if (m_token == Lexer::TOKEN_CLOSE_PAREN)
{
result = Value::array({ Value::symbol("_"), Value::string(str) });
break;
}
}
}
else
{
m_token = m_lexer.get_next_token();
}
}
}
}
else
{
m_token = m_lexer.get_next_token();
std::vector<Value> arr;
do
{
arr.emplace_back(read());
Value val = read();
// Do not include values that would be nil for being below the specified depth.
if (m_depth < 0 || m_current_depth != m_depth || !val.is_nil())
arr.push_back(std::move(val));
}
while(m_token != Lexer::TOKEN_CLOSE_PAREN);
while (m_token != Lexer::TOKEN_CLOSE_PAREN);
result = Value::array(std::move(arr));
}
--m_current_depth;
break;

case Lexer::TOKEN_EOF:
Expand Down
25 changes: 25 additions & 0 deletions tests/parser_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,23 @@ TEST(ParserTest, parse_symbol)
}
}

TEST(ParserTest, parse_cons)
{
char const* sx_str = "(1 \"foo\" (bar))";
auto sx = sexp::Parser::from_string(sx_str, false);
ASSERT_TRUE(sx.is_cons());
ASSERT_EQ(sexp::Value::Type::CONS, sx.get_type());
ASSERT_EQ(sx_str, sx.str());
}

TEST(ParserTest, parse_cons_depth_limit)
{
auto sx = sexp::Parser::from_string("(1 \"foo\" (bar foo (foo 1) 2 (\"bar\" . 4)))", false, 1);
ASSERT_TRUE(sx.is_cons());
ASSERT_EQ(sexp::Value::Type::CONS, sx.get_type());
ASSERT_EQ("(1 \"foo\" (bar foo 2))", sx.str());
}

TEST(ParserTest, parse_array)
{
char const* sx_str = "#(1 \"foo\" #(bar))";
Expand All @@ -159,6 +176,14 @@ TEST(ParserTest, parse_array)
ASSERT_EQ(sx_str, sx.str());
}

TEST(ParserTest, parse_array_depth_limit)
{
auto sx = sexp::Parser::from_string("#(1 \"foo\" #(bar foo #(foo 1) 2 #(\"bar\" 4)))", false, 1);
ASSERT_TRUE(sx.is_array());
ASSERT_EQ(sexp::Value::Type::ARRAY, sx.get_type());
ASSERT_EQ("#(1 \"foo\" #(bar foo 2))", sx.str());
}

// FIXME: Compare data structure or use simple strings?!
// "(foo . bar)" as string is ambigous in the current parser as . can be handled as symbol, not pair
TEST(ParserTest, simple_pair)
Expand Down