diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d9dbd245..814b311ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,6 +47,7 @@ c4_add_library(ryml c4/yml/emit.hpp c4/yml/error.hpp c4/yml/error.def.hpp + c4/yml/escape_scalar.hpp c4/yml/event_handler_stack.hpp c4/yml/event_handler_tree.hpp c4/yml/filter_processor.hpp diff --git a/changelog/current.md b/changelog/current.md index 0a4ac4546..9281d198b 100644 --- a/changelog/current.md +++ b/changelog/current.md @@ -57,6 +57,7 @@ - improve behavior of `Tree` methods accepting scalars: all standard buffer types are now accepted (ie, `str`, `bytes`, `bytearray` and `memoryview`). - [PR#565](https://github.com/biojppm/rapidyaml/pull/565) (fixes [#564](https://github.com/biojppm/rapidyaml/issues/564)) - `Tree` arena: allow relocation of zero-length strings when placed at the end (relax assertions triggered in `Tree::_relocated()`) - [PR#563](https://github.com/biojppm/rapidyaml/pull/563) (fixes [#562](https://github.com/biojppm/rapidyaml/issues/562)) - Fix bug in `NodeRef::cend()` +- [PR#568](https://github.com/biojppm/rapidyaml/pull/568) - Move `escape_scalar()` from `c4/yml/extra/scalar.hpp` to `c4/yml/escape_scalar.hpp` (and removed the original header) ### Fixes in YAML parsing diff --git a/src/c4/yml/detail/dbgprint.hpp b/src/c4/yml/detail/dbgprint.hpp index 7c8956392..0a82e1f24 100644 --- a/src/c4/yml/detail/dbgprint.hpp +++ b/src/c4/yml/detail/dbgprint.hpp @@ -1,14 +1,6 @@ #ifndef _C4_YML_DETAIL_DBGPRINT_HPP_ #define _C4_YML_DETAIL_DBGPRINT_HPP_ -#ifndef _C4_YML_COMMON_HPP_ -#include "../common.hpp" -#endif - -#ifdef RYML_DBG -#include -#endif - //----------------------------------------------------------------------------- // debug prints @@ -23,29 +15,31 @@ # define _c4presc(...) # define _c4prscalar(msg, scalar, keep_newlines) #else -# define _c4dbgt(fmt, ...) do { if(_dbg_enabled()) { \ - this->_dbg ("{}:{}: " fmt , __FILE__, __LINE__, __VA_ARGS__); } } while(0) +# define _c4dbgt(fmt, ...) do { \ + if(_dbg_enabled()) { \ + this->_dbg ("{}:{}: " fmt , __FILE__, __LINE__, __VA_ARGS__); \ + } \ + } while(0) # define _c4dbgpf(fmt, ...) _dbg_printf("{}:{}: " fmt "\n", __FILE__, __LINE__, __VA_ARGS__) # define _c4dbgpf_(fmt, ...) _dbg_printf("{}:{}: " fmt , __FILE__, __LINE__, __VA_ARGS__) # define _c4dbgp(msg) _dbg_printf("{}:{}: " msg "\n", __FILE__, __LINE__ ) # define _c4dbgp_(msg) _dbg_printf("{}:{}: " msg , __FILE__, __LINE__ ) # define _c4dbgq(msg) _dbg_printf(msg "\n") -# define _c4presc(...) do { if(_dbg_enabled()) __c4presc(__VA_ARGS__); } while(0) +# define _c4presc(...) __c4presc(__VA_ARGS__) # define _c4prscalar(msg, scalar, keep_newlines) \ do { \ - _c4dbgpf_("{}: [{}]~~~", msg, scalar.len); \ if(_dbg_enabled()) { \ - __c4presc((scalar), (keep_newlines)); \ + _c4dbgpf_("{}: [{}]~~~", msg, scalar.len); \ + __c4presc((scalar), (keep_newlines)); \ + _c4dbgq("~~~"); \ } \ - _c4dbgq("~~~"); \ } while(0) -#endif // RYML_DBG //----------------------------------------------------------------------------- +// implementation -#ifdef RYML_DBG - +#include #if defined(C4_MSVC) || defined(C4_MINGW) #include @@ -56,12 +50,19 @@ #include #endif +#ifndef _C4_YML_ESCAPE_SCALAR_HPP_ +#include "c4/yml/escape_scalar.hpp" +#endif + +#ifndef _C4_DUMP_HPP_ +#include "c4/dump.hpp" +#endif -#include C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wattributes") namespace c4 { +namespace yml { inline bool& _dbg_enabled() { static bool enabled = true; return enabled; } inline C4_NO_INLINE void _dbg_set_enabled(bool yes) { _dbg_enabled() = yes; } inline C4_NO_INLINE void _dbg_dumper(csubstr s) @@ -83,7 +84,7 @@ C4_NO_INLINE void _dbg_dump(DumpFn &&dumpfn, csubstr fmt, Args&& ...args) results = format_dump_resume(std::forward(dumpfn), writebuf, fmt, std::forward(args)...); } // if any of the arguments failed to fit the buffer, allocate a - // larger buffer (up to a limit) and resume writing. + // larger buffer (with alloca(), up to a limit) and resume writing. // // results.bufsize is set to the size of the largest element // serialized. Eg int(1) will require 1 byte. @@ -106,58 +107,15 @@ C4_NO_INLINE void _dbg_printf(csubstr fmt, Args const& ...args) } inline C4_NO_INLINE void __c4presc(csubstr s, bool keep_newlines=false) { - if(!_dbg_enabled()) - return; // LCOV_EXCL_LINE - _RYML_ASSERT_BASIC(s.str || !s.len); - size_t prev = 0; - for(size_t i = 0; i < s.len; ++i) - { - switch(s.str[i]) - { - case '\n' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\n"); if(keep_newlines) { _dbg_dumper("\n"); } prev = i+1; break; - case '\t' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\t"); prev = i+1; break; - case '\0' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\0"); prev = i+1; break; - case '\r' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\r"); prev = i+1; break; - case '\f' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\f"); prev = i+1; break; - case '\b' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\b"); prev = i+1; break; - case '\v' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\v"); prev = i+1; break; - case '\a' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\a"); prev = i+1; break; - case '\x1b': _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\x1b"); prev = i+1; break; - case _RYML_CHCONST(-0x3e, 0xc2): - if(i+1 < s.len) - { - if(s.str[i+1] == _RYML_CHCONST(-0x60, 0xa0)) - { - _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\_"); prev = i+1; - } - else if(s.str[i+1] == _RYML_CHCONST(-0x7b,0x85)) - { - _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\N"); prev = i+1; - } - } - break; - case _RYML_CHCONST(-0x1e, 0xe2): - if(i+2 < s.len && s.str[i+1] == _RYML_CHCONST(-0x80,0x80)) - { - if(s.str[i+2] == _RYML_CHCONST(-0x58,0xa8)) - { - _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\L"); prev = i+1; - } - else if(s.str[i+2] == _RYML_CHCONST(-0x57,0xa9)) - { - _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\P"); prev = i+1; - } - } - break; - } - } - if(s.len > prev) - _dbg_dumper(s.sub(prev)); + if(_dbg_enabled()) + escape_scalar_fn(_dbg_dumper, s, keep_newlines); } inline C4_NO_INLINE void __c4presc(const char *s, size_t len, bool keep_newlines=false) { - __c4presc(csubstr(s, len), keep_newlines); + if(_dbg_enabled()) + escape_scalar_fn(_dbg_dumper, csubstr(s, len), keep_newlines); } +} // namespace yml } // namespace c4 C4_SUPPRESS_WARNING_GCC_POP diff --git a/src/c4/yml/escape_scalar.hpp b/src/c4/yml/escape_scalar.hpp new file mode 100644 index 000000000..32817be6d --- /dev/null +++ b/src/c4/yml/escape_scalar.hpp @@ -0,0 +1,202 @@ +#ifndef _C4_YML_ESCAPE_SCALAR_HPP_ +#define _C4_YML_ESCAPE_SCALAR_HPP_ + +#ifndef _C4_YML_COMMON_HPP_ +#include "c4/yml/common.hpp" +#endif + +namespace c4 { +namespace yml { + + +/** Iterate through a scalar and escape special characters in it. This + * function takes a callback (which accepts a single parameter of + * csubstr type) and, while processing, calls this callback as + * appropriate, passing ranges of the scalar and/or escaped + * characters. + * + * @param fn a sink function receiving a csubstr + * @param scalar the scalar to be escaped + * @param keep_newlines when true, `\n` will be escaped as `\\n\n` instead of just `\\n` + * + * Example usage: + * + * ```c++ + * // escape to stdout + * void escape_scalar(FILE *file, csubstr scalar) + * { + * auto print_ = [](csubstr repl){ + * fwrite(repl.len, 1, repl.str, file); + * }; + * escape_scalar_fn(std::ref(print_), scalar); + * } + * + * // escape to a different buffer and return the required buffer size + * size_t escape_scalar(substr buffer, csubstr scalar) + * { + * C4_ASSERT(!buffer.overlaps(scalar)); + * size_t pos = 0; + * auto _append = [&](csubstr repl){ + * if(repl.len && (pos + repl.len <= buffer.len)) + * memcpy(buffer.str + pos, repl.str, repl.len); + * pos += repl.len; + * }; + * escape_scalar_fn(std::ref(_append), scalar); + * return pos; + * } + * ``` + */ +template +void escape_scalar_fn(Fn &&fn, csubstr scalar, bool keep_newlines=false) +{ + size_t prev = 0; // the last position that was flushed + size_t skip = 0; // how much to add to prev + csubstr repl; // replacement string + bool newl = false; // to add a newline + // cast to u8 to avoid having to deal with negative + // signed chars (which are present some platforms) + uint8_t const* C4_RESTRICT s = reinterpret_cast(scalar.str); // NOLINT(*-reinterpret-cast) + // NOLINTBEGIN(*-goto) + for(size_t i = 0; i < scalar.len; ++i) + { + switch(s[i]) + { + case UINT8_C(0x0a): // \n + repl = "\\n"; + skip = 1; + if(keep_newlines) + newl = true; + goto flush_now; + case UINT8_C(0x5c): // '\\' + repl = "\\\\"; + skip = 1; + goto flush_now; + case UINT8_C(0x09): // \t + repl = "\\t"; + skip = 1; + goto flush_now; + case UINT8_C(0x0d): // \r + repl = "\\r"; + skip = 1; + goto flush_now; + case UINT8_C(0x00): // \0 + repl = "\\0"; + skip = 1; + goto flush_now; + case UINT8_C(0x0c): // \f (form feed) + repl = "\\f"; + skip = 1; + goto flush_now; + case UINT8_C(0x08): // \b (backspace) + repl = "\\b"; + skip = 1; + goto flush_now; + case UINT8_C(0x07): // \a (bell) + repl = "\\a"; + skip = 1; + goto flush_now; + case UINT8_C(0x0b): // \v (vertical tab) + repl = "\\v"; + skip = 1; + goto flush_now; + case UINT8_C(0x1b): // \e (escape) + repl = "\\e"; + skip = 1; + goto flush_now; + case UINT8_C(0xc2): // AKA -0x3e + if(i+1 < scalar.len) + { + if(s[i+1] == UINT8_C(0xa0)) // AKA -0x60 + { + repl = "\\_"; + skip = 2; + goto flush_now; + } + else if(s[i+1] == UINT8_C(0x85)) // AKA -0x7b + { + repl = "\\N"; + skip = 2; + goto flush_now; + } + } + continue; + case UINT8_C(0xe2): // AKA -0x1e + if(i+2 < scalar.len) + { + if(s[i+1] == UINT8_C(0x80)) // AKA -0x80 + { + if(s[i+2] == UINT8_C(0xa8)) // AKA -0x58 + { + repl = "\\L"; + skip = 3; + goto flush_now; + } + else if(s[i+2] == UINT8_C(0xa9)) // AKA -0x57 + { + repl = "\\P"; + skip = 3; + goto flush_now; + } + } + } + continue; + default: + continue; + } + flush_now: + std::forward(fn)(scalar.range(prev, i)); + std::forward(fn)(repl); + if(newl) + { + std::forward(fn)("\n"); + newl = false; + } + prev = i + skip; + } + // flush the rest + if(scalar.len > prev) + std::forward(fn)(scalar.sub(prev)); + // NOLINTEND(*-goto) +} + + +C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wattributes") +/** Escape a scalar to an existing buffer, using @ref escape_scalar_fn + * + * @note This is a utility/debugging functions, so it is provided in this + * (optional) header. For this reason, we inline it to obey to the + * One-Definition Rule. But then we set the noinline attribute to + * ensure they are not inlined in calling code. */ +inline C4_NO_INLINE size_t escape_scalar(substr buffer, csubstr scalar, bool keep_newlines=false) +{ + size_t pos = 0; + auto _append = [&pos, &buffer](csubstr repl){ + if(repl.len && (pos + repl.len <= buffer.len)) + memcpy(buffer.str + pos, repl.str, repl.len); + pos += repl.len; + }; + escape_scalar_fn(_append, scalar, keep_newlines); + return pos; +} +C4_SUPPRESS_WARNING_GCC_POP + + +/** formatting helper to escape a scalar with @ref escape_scalar()x */ +struct escaped_scalar +{ + escaped_scalar(csubstr s, bool keep_newl=false) : scalar(s), keep_newlines(keep_newl) {} + csubstr scalar; + bool keep_newlines; +}; + +/** formatting implementation to escape a scalar with @ref escape_scalar()x */ +inline size_t to_chars(substr buf, escaped_scalar e) +{ + return escape_scalar(buf, e.scalar, e.keep_newlines); +} + + +} // namespace yml +} // namespace c4 + +#endif /* _C4_YML_ESCAPE_SCALAR_HPP_ */ diff --git a/src/c4/yml/parse_engine.def.hpp b/src/c4/yml/parse_engine.def.hpp index cab45ec66..ab08e4fc1 100644 --- a/src/c4/yml/parse_engine.def.hpp +++ b/src/c4/yml/parse_engine.def.hpp @@ -468,13 +468,13 @@ C4_NO_INLINE void ParseEngine::_fmt_msg(DumpFn &&dumpfn) const size_t offs = 3u + to_chars(substr{}, st->pos.line) + to_chars(substr{}, st->pos.col); if(m_file.len) { - c4::_dbg_dump(std::forward(dumpfn), "{}:", m_file); + _dbg_dump(std::forward(dumpfn), "{}:", m_file); offs += m_file.len + 1; } - c4::_dbg_dump(std::forward(dumpfn), "{}:{}: ", st->pos.line, st->pos.col); + _dbg_dump(std::forward(dumpfn), "{}:{}: ", st->pos.line, st->pos.col); csubstr maybe_full_content = (contents.len < 80u ? contents : contents.first(80u)); csubstr maybe_ellipsis = (contents.len < 80u ? csubstr{} : csubstr("...")); - c4::_dbg_dump(std::forward(dumpfn), "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len); + _dbg_dump(std::forward(dumpfn), "{}{} (size={})\n", maybe_full_content, maybe_ellipsis, contents.len); // highlight the remaining portion of the previous line size_t firstcol = (size_t)(lc.rem.begin() - lc.full.begin()); size_t lastcol = firstcol + lc.rem.len; @@ -483,7 +483,7 @@ C4_NO_INLINE void ParseEngine::_fmt_msg(DumpFn &&dumpfn) const std::forward(dumpfn)("^"); for(size_t i = 1, e = (lc.rem.len < 80u ? lc.rem.len : 80u); i < e; ++i) std::forward(dumpfn)("~"); - c4::_dbg_dump(std::forward(dumpfn), "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1); + _dbg_dump(std::forward(dumpfn), "{} (cols {}-{})\n", maybe_ellipsis, firstcol+1, lastcol+1); } else { @@ -492,7 +492,7 @@ C4_NO_INLINE void ParseEngine::_fmt_msg(DumpFn &&dumpfn) const // next line: print the state flags { char flagbuf_[128]; - c4::_dbg_dump(std::forward(dumpfn), "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags)); + _dbg_dump(std::forward(dumpfn), "top state: {}\n", detail::_parser_flags_to_str(flagbuf_, m_evt_handler->m_curr->flags)); } } #endif @@ -525,9 +525,9 @@ void ParseEngine::_dbg(csubstr fmt, Args const& ...args) const { if(_dbg_enabled()) { - c4::_dbg_printf(fmt, args...); - c4::_dbg_dumper("\n"); - _fmt_msg(c4::_dbg_dumper); + _dbg_printf(fmt, args...); + _dbg_dumper("\n"); + _fmt_msg(_dbg_dumper); } } #endif diff --git a/src_extra/c4/yml/extra/event_handler_ints.hpp b/src_extra/c4/yml/extra/event_handler_ints.hpp index e37d5bab3..833a089ef 100644 --- a/src_extra/c4/yml/extra/event_handler_ints.hpp +++ b/src_extra/c4/yml/extra/event_handler_ints.hpp @@ -379,11 +379,11 @@ i : 12 | 13 14 * ``` * * The result of @ref estimate_events_ints_size() (click to see more - * info) must be an overprediction: it overpredicts for every single case - * among the many hundreds of cases covered in the unit tests. This is - * deliberate, and aims at ensuring that a retry parse is not needed. But - * conceivably, it may underpredict in some instances not found in the out - * tests. What to do then? + * info) must be an overprediction: it overpredicts for every single + * case among the many hundreds covered in the unit tests. This is + * deliberate, and aims at ensuring that a retry parse is not + * needed. But conceivably, it may underpredict in some instances not + * found in the out tests. What to do then? * * First, [open an issue](https://github.com/biojppm/rapidyaml/issues) to * allow the estimation to be improved! Second, there are two ways to diff --git a/src_extra/c4/yml/extra/event_handler_testsuite.cpp b/src_extra/c4/yml/extra/event_handler_testsuite.cpp index ad31897c0..d39d3ecf7 100644 --- a/src_extra/c4/yml/extra/event_handler_testsuite.cpp +++ b/src_extra/c4/yml/extra/event_handler_testsuite.cpp @@ -14,8 +14,8 @@ #ifndef _C4_YML_EXTRA_EVENT_HANDLER_TESTSUITE_HPP_ #include "c4/yml/extra/event_handler_testsuite.hpp" #endif -#ifndef _C4_YML_EXTRA_SCALAR_HPP_ -#include "c4/yml/extra/scalar.hpp" +#ifndef _C4_YML_ESCAPE_SCALAR_HPP_ +#include "c4/yml/escape_scalar.hpp" #endif #ifndef _C4_YML_EXTRA_STRING_HPP_ #include "c4/yml/extra/string.hpp" diff --git a/src_extra/c4/yml/extra/ints_to_testsuite.cpp b/src_extra/c4/yml/extra/ints_to_testsuite.cpp index 7d27f9981..c304c164a 100644 --- a/src_extra/c4/yml/extra/ints_to_testsuite.cpp +++ b/src_extra/c4/yml/extra/ints_to_testsuite.cpp @@ -8,8 +8,8 @@ #endif #endif -#ifndef _C4_YML_EXTRA_SCALAR_HPP_ -#include "c4/yml/extra/scalar.hpp" +#ifndef _C4_YML_ESCAPE_SCALAR_HPP_ +#include "c4/yml/escape_scalar.hpp" #endif #ifndef _C4_YML_EXTRA_INTS_UTILS_HPP_ diff --git a/src_extra/c4/yml/extra/ints_utils.cpp b/src_extra/c4/yml/extra/ints_utils.cpp index f2e4911ad..1b655e011 100644 --- a/src_extra/c4/yml/extra/ints_utils.cpp +++ b/src_extra/c4/yml/extra/ints_utils.cpp @@ -8,10 +8,6 @@ #endif #endif -#ifndef _C4_YML_EXTRA_SCALAR_HPP_ -#include "c4/yml/extra/scalar.hpp" -#endif - #ifndef _C4_YML_EXTRA_INTS_UTILS_HPP_ #include "c4/yml/extra/ints_utils.hpp" #endif diff --git a/src_extra/c4/yml/extra/scalar.cpp b/src_extra/c4/yml/extra/scalar.cpp deleted file mode 100644 index 67ed1116d..000000000 --- a/src_extra/c4/yml/extra/scalar.cpp +++ /dev/null @@ -1,95 +0,0 @@ -#ifdef RYML_SINGLE_HEADER_INTS - #ifndef _RYML_SINGLE_HEADER_AMALGAMATED_HPP_ - #include - #endif -#elif defined(RYML_SINGLE_HEADER) - #ifndef _RYML_SINGLE_HEADER_AMALGAMATED_HPP_ - #include - #endif -#endif - -#ifndef _C4_YML_EXTRA_SCALAR_HPP_ -#include -#endif - - -namespace c4 { -namespace yml { -namespace extra { - -size_t escape_scalar(substr buffer, csubstr val) -{ - size_t pos = 0; - #define _append(repl) \ - do { \ - if(repl.len && (pos + repl.len <= buffer.len)) \ - memcpy(buffer.str + pos, repl.str, repl.len); \ - pos += repl.len; \ - } while(0) - #define _c4flush_use_instead(i, repl, skip) \ - do { \ - _append(val.range(prev, i)); \ - _append(csubstr(repl)); \ - prev = i + skip; \ - } \ - while(0) - uint8_t const* C4_RESTRICT s = reinterpret_cast(val.str); - size_t prev = 0; - for(size_t i = 0; i < val.len; ++i) - { - switch(s[i]) - { - case UINT8_C(0x0a): // \n - _c4flush_use_instead(i, "\\n", 1); break; - case UINT8_C(0x5c): // '\\' - _c4flush_use_instead(i, "\\\\", 1); break; - case UINT8_C(0x09): // \t - _c4flush_use_instead(i, "\\t", 1); break; - case UINT8_C(0x0d): // \r - _c4flush_use_instead(i, "\\r", 1); break; - case UINT8_C(0x00): // \0 - _c4flush_use_instead(i, "\\0", 1); break; - case UINT8_C(0x0c): // \f (form feed) - _c4flush_use_instead(i, "\\f", 1); break; - case UINT8_C(0x08): // \b (backspace) - _c4flush_use_instead(i, "\\b", 1); break; - case UINT8_C(0x07): // \a (bell) - _c4flush_use_instead(i, "\\a", 1); break; - case UINT8_C(0x0b): // \v (vertical tab) - _c4flush_use_instead(i, "\\v", 1); break; - case UINT8_C(0x1b): // \e (escape) - _c4flush_use_instead(i, "\\e", 1); break; - case UINT8_C(0xc2): - if(i+1 < val.len) - { - const uint8_t np1 = s[i+1]; - if(np1 == UINT8_C(0xa0)) - _c4flush_use_instead(i, "\\_", 2); - else if(np1 == UINT8_C(0x85)) - _c4flush_use_instead(i, "\\N", 2); - } - break; - case UINT8_C(0xe2): - if(i+2 < val.len) - { - if(s[i+1] == UINT8_C(0x80)) - { - if(s[i+2] == UINT8_C(0xa8)) - _c4flush_use_instead(i, "\\L", 3); - else if(s[i+2] == UINT8_C(0xa9)) - _c4flush_use_instead(i, "\\P", 3); - } - } - break; - } - } - // flush the rest - _append(val.sub(prev)); - #undef _c4flush_use_instead - #undef _append - return pos; -} - -} // namespace extra -} // namespace yml -} // namespace c4 diff --git a/src_extra/c4/yml/extra/scalar.hpp b/src_extra/c4/yml/extra/scalar.hpp deleted file mode 100644 index 22082f5e0..000000000 --- a/src_extra/c4/yml/extra/scalar.hpp +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef _C4_YML_EXTRA_SCALAR_HPP_ -#define _C4_YML_EXTRA_SCALAR_HPP_ - -#ifndef _C4_SUBSTR_HPP_ -#include "c4/substr.hpp" -#endif - -namespace c4 { -namespace yml { -namespace extra { - -/** @addtogroup doc_event_handlers - * @{ */ - -size_t escape_scalar(substr s, csubstr val); - -/** @} */ - -} // namespace extra -} // namespace yml -} // namespace c4 - -#endif /* _C4_YML_EVT_EXTRA_SCALAR_HPP_ */ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 94c2e8fa0..65e204bdb 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -40,8 +40,6 @@ c4_add_library(ryml-_testlib LIBRARY_TYPE STATIC ../src_extra/c4/yml/extra/ints_utils.hpp ../src_extra/c4/yml/extra/ints_to_testsuite.cpp ../src_extra/c4/yml/extra/ints_to_testsuite.hpp - ../src_extra/c4/yml/extra/scalar.hpp - ../src_extra/c4/yml/extra/scalar.cpp ../src_extra/c4/yml/extra/string.hpp INC_DIRS ${CMAKE_CURRENT_LIST_DIR} ${CMAKE_CURRENT_LIST_DIR}/../src_extra LIBS ryml c4fs gtest @@ -110,6 +108,7 @@ ryml_add_test(preprocess) ryml_add_test(merge) ryml_add_test(location) ryml_add_test(bom) +ryml_add_test(escape_scalar) ryml_add_test_case_group(empty_file) ryml_add_test_case_group(doc) ryml_add_test_case_group(seq) @@ -258,8 +257,6 @@ if(RYML_TEST_SUITE) ../src_extra/c4/yml/extra/ints_utils.hpp ../src_extra/c4/yml/extra/ints_to_testsuite.cpp ../src_extra/c4/yml/extra/ints_to_testsuite.hpp - ../src_extra/c4/yml/extra/scalar.cpp - ../src_extra/c4/yml/extra/scalar.hpp ../src_extra/c4/yml/extra/string.hpp testsuite.cpp testsuite/testsuite_common.hpp @@ -379,12 +376,8 @@ if(RYML_TEST_FUZZ) ryml_add_fuzz_test(events_testsuite ../src_extra/c4/yml/extra/event_handler_testsuite.hpp ../src_extra/c4/yml/extra/event_handler_testsuite.cpp - ../src_extra/c4/yml/extra/scalar.hpp - ../src_extra/c4/yml/extra/scalar.cpp ../src_extra/c4/yml/extra/string.hpp) ryml_add_fuzz_test(events_ints ../src_extra/c4/yml/extra/event_handler_ints.hpp - ../src_extra/c4/yml/extra/event_handler_ints.cpp - ../src_extra/c4/yml/extra/scalar.hpp - ../src_extra/c4/yml/extra/scalar.cpp) + ../src_extra/c4/yml/extra/event_handler_ints.cpp) endif() diff --git a/test/test_escape_scalar.cpp b/test/test_escape_scalar.cpp new file mode 100644 index 000000000..4bffe4293 --- /dev/null +++ b/test/test_escape_scalar.cpp @@ -0,0 +1,95 @@ +#ifndef RYML_SINGLE_HEADER +#include +#endif +#include + +#include "./test_lib/test_case.hpp" + +namespace c4 { +namespace yml { + +struct EscapeScalarCase +{ + const char* file; + int line; + csubstr scalar; + csubstr escaped; +}; + +class EscapeScalarTest : public testing::TestWithParam {}; + + +TEST_P(EscapeScalarTest, escape_scalar) +{ + EscapeScalarCase const& ec = GetParam(); + printf("%s:%d: %s\n", ec.file, ec.line, ec.escaped.str); + RYML_TRACE_FMT("defined in:\n{}:{}: {}", ec.file, ec.line, ec.escaped); + std::string buf_; + // empty + substr buf = to_substr(buf_); + EXPECT_EQ(0, buf.len); + EXPECT_EQ(ec.escaped.len, escape_scalar(buf, ec.scalar)); + EXPECT_EQ(ec.escaped.len, to_chars(buf, escaped_scalar(ec.scalar))); + // insufficient + buf_.resize(ec.escaped.len / 2u); + buf = to_substr(buf_); + buf.fill('\0'); + EXPECT_EQ(ec.escaped.len, escape_scalar(buf, ec.scalar)); + buf.fill('\0'); + EXPECT_EQ(ec.escaped.len, to_chars(buf, escaped_scalar(ec.scalar))); + // enough size + buf_.resize(ec.escaped.len * 2u); + buf = to_substr(buf_); + buf.fill('\0'); + ASSERT_EQ(ec.escaped.len, escape_scalar(buf, ec.scalar)); + EXPECT_EQ(ec.escaped, buf.first(ec.escaped.len)); + buf.fill('\0'); + ASSERT_EQ(ec.escaped.len, to_chars(buf, escaped_scalar(ec.scalar))); + EXPECT_EQ(ec.escaped, buf.first(ec.escaped.len)); +} + +#define _ec(scalar, escaped) EscapeScalarCase{__FILE__, __LINE__, csubstr(scalar), csubstr(escaped)} +const EscapeScalarCase escape_cases[] = { + _ec("", ""), + _ec(" ", " "), + _ec("a", "a"), + _ec("\n", "\\n"), + _ec("\\", "\\\\"), + _ec("\t", "\\t"), + _ec("\r", "\\r"), + _ec("\0", "\\0"), + _ec("\f", "\\f"), + _ec("\b", "\\b"), + _ec("\a", "\\a"), + _ec("\v", "\\v"), + _ec("\x1b", "\\e"), + _ec("\xc2\xa0", "\\_"), + _ec("\xc2\x85", "\\N"), + _ec("\xc2\x86", "\xc2\x86"), + _ec("\xe2\x80\xa8", "\\L"), + _ec("\xe2\x80\xa9", "\\P"), + _ec("\xe2\x80\xa0", "\xe2\x80\xa0"), + _ec(" \t\r\n\0\f\a\v\x1b\xc2\x85\xc2\xa0\xe2\x80\xa8\xe2\x80\xa9 \b", + "\\t\\t\\r\\n\\0\\f\\a\\v\\e\\N\\_\\L\\P \\b"), +}; + + +INSTANTIATE_TEST_SUITE_P(EscapeScalar, EscapeScalarTest, testing::ValuesIn(escape_cases)); + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +// The other test executables are written to contain the declarative-style +// YmlTestCases. This executable does not have any but the build setup +// assumes it does, and links with the test lib, which requires an existing +// get_case() function. So this is here to act as placeholder until (if?) +// proper test cases are added here. This was detected in #47 (thanks +// @cburgard). +Case const* get_case(csubstr) +{ + return nullptr; +} + +} // namespace yml +} // namespace c4 diff --git a/test/test_extra_testsuite.cpp b/test/test_extra_testsuite.cpp index 9d0393901..f9d33e3b0 100644 --- a/test/test_extra_testsuite.cpp +++ b/test/test_extra_testsuite.cpp @@ -18,7 +18,7 @@ struct EventsCase int line; // previously, the strings below were of type std::string, but // valgrind was complaining of a problem during initialization of - // the parameterized test cases. Probably some SIOF? + // the parameterized test cases. Probably some SIOF from gtest code? // // So we use csubstr: csubstr name; @@ -61,9 +61,9 @@ TEST_P(EventsTest, from_parser) csubstr result = sink; _c4dbgpf("~~~\n{}~~~\n", result); // use the diff from std::string which is nice - std::string exp_copy(ec.expected_events_from_parser.str, ec.expected_events_from_parser.len); - std::string result_copy(result.str, result.len); - EXPECT_EQ(result_copy, exp_copy); + const std::string expected(ec.expected_events_from_parser.str, ec.expected_events_from_parser.len); + const std::string actual(result.str, result.len); + EXPECT_EQ(expected, actual); } TEST_P(EventsTest, from_tree) @@ -73,8 +73,9 @@ TEST_P(EventsTest, from_tree) RYML_TRACE_FMT("defined in:\n{}:{}: {}", ec.file, ec.line, ec.name); const Tree tree = parse_in_arena(to_csubstr(ec.src)); _c4dbg_tree("parsed tree", tree); - std::string exp_copy(ec.expected_events_from_tree.str, ec.expected_events_from_tree.len); - EXPECT_EQ(emit_events_from_tree(tree), exp_copy); + const std::string expected(ec.expected_events_from_tree.str, ec.expected_events_from_tree.len); + const std::string actual = emit_events_from_tree(tree); + EXPECT_EQ(expected, actual); } diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 89172eeb5..7fb2d7077 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -14,8 +14,6 @@ c4_add_executable(ryml-yaml-events ../src_extra/c4/yml/extra/ints_utils.cpp ../src_extra/c4/yml/extra/ints_to_testsuite.hpp ../src_extra/c4/yml/extra/ints_to_testsuite.cpp - ../src_extra/c4/yml/extra/scalar.hpp - ../src_extra/c4/yml/extra/scalar.cpp ../src_extra/c4/yml/extra/string.hpp ../test/testsuite/testsuite_events.hpp ../test/testsuite/testsuite_events_emitter.cpp diff --git a/tools/amalgamate.py b/tools/amalgamate.py index ec55f489b..de55dde67 100644 --- a/tools/amalgamate.py +++ b/tools/amalgamate.py @@ -127,6 +127,7 @@ def has_evt(*which): "src/c4/yml/common.hpp", "src/c4/yml/error.hpp", "src/c4/yml/error.def.hpp", + "src/c4/yml/escape_scalar.hpp", "src/c4/yml/node_type.hpp", "src/c4/yml/tag.hpp", am.onlyif(has_evt(Event.tree), "src/c4/yml/tree.hpp"), @@ -146,7 +147,6 @@ def has_evt(*which): am.onlyif(has_evt(Event.ints_to_testsuite), "src_extra/c4/yml/extra/ints_to_testsuite.hpp"), am.onlyif(has_evt(Event.testsuite), "src_extra/c4/yml/extra/string.hpp"), am.onlyif(has_evt(Event.testsuite), "src_extra/c4/yml/extra/event_handler_testsuite.hpp"), - am.onlyif(has_evt(Event.ints_utils, Event.testsuite), "src_extra/c4/yml/extra/scalar.hpp"), "src/c4/yml/parse_engine.hpp", "src/c4/yml/preprocess.hpp", am.onlyif(has_evt(Event.tree), "src/c4/yml/reference_resolver.hpp"), @@ -162,7 +162,6 @@ def has_evt(*which): "src/c4/yml/parse_engine.def.hpp", am.onlyif(has_evt(Event.tree), "src/c4/yml/tree.cpp"), am.onlyif(has_evt(Event.ints), "src_extra/c4/yml/extra/event_handler_ints.cpp"), - am.onlyif(has_evt(Event.ints_utils, Event.testsuite), "src_extra/c4/yml/extra/scalar.cpp"), am.onlyif(has_evt(Event.ints_utils), "src_extra/c4/yml/extra/ints_utils.cpp"), am.onlyif(has_evt(Event.ints_to_testsuite), "src_extra/c4/yml/extra/ints_to_testsuite.cpp"), am.onlyif(has_evt(Event.tree), "src/c4/yml/reference_resolver.cpp"), diff --git a/tools/yaml_events.cpp b/tools/yaml_events.cpp index 100c35298..c5e72a787 100644 --- a/tools/yaml_events.cpp +++ b/tools/yaml_events.cpp @@ -7,9 +7,9 @@ #include #include #include +#include #endif #include -#include #include #include #include