Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ c4_add_library(ryml
c4/yml/emit.hpp
c4/yml/error.hpp
c4/yml/error.def.hpp
c4/yml/escape_scalar.hpp
c4/yml/event_handler_stack.hpp
c4/yml/event_handler_tree.hpp
c4/yml/filter_processor.hpp
Expand Down
1 change: 1 addition & 0 deletions changelog/current.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
- improve behavior of `Tree` methods accepting scalars: all standard buffer types are now accepted (ie, `str`, `bytes`, `bytearray` and `memoryview`).
- [PR#565](https://github.com/biojppm/rapidyaml/pull/565) (fixes [#564](https://github.com/biojppm/rapidyaml/issues/564)) - `Tree` arena: allow relocation of zero-length strings when placed at the end (relax assertions triggered in `Tree::_relocated()`)
- [PR#563](https://github.com/biojppm/rapidyaml/pull/563) (fixes [#562](https://github.com/biojppm/rapidyaml/issues/562)) - Fix bug in `NodeRef::cend()`
- [PR#568](https://github.com/biojppm/rapidyaml/pull/568) - Move `escape_scalar()` from `c4/yml/extra/scalar.hpp` to `c4/yml/escape_scalar.hpp` (and removed the original header)


### Fixes in YAML parsing
Expand Down
92 changes: 25 additions & 67 deletions src/c4/yml/detail/dbgprint.hpp
Original file line number Diff line number Diff line change
@@ -1,14 +1,6 @@
#ifndef _C4_YML_DETAIL_DBGPRINT_HPP_
#define _C4_YML_DETAIL_DBGPRINT_HPP_

#ifndef _C4_YML_COMMON_HPP_
#include "../common.hpp"
#endif

#ifdef RYML_DBG
#include <cstdio>
#endif


//-----------------------------------------------------------------------------
// debug prints
Expand All @@ -23,29 +15,31 @@
# define _c4presc(...)
# define _c4prscalar(msg, scalar, keep_newlines)
#else
# define _c4dbgt(fmt, ...) do { if(_dbg_enabled()) { \
this->_dbg ("{}:{}: " fmt , __FILE__, __LINE__, __VA_ARGS__); } } while(0)
# define _c4dbgt(fmt, ...) do { \
if(_dbg_enabled()) { \
this->_dbg ("{}:{}: " fmt , __FILE__, __LINE__, __VA_ARGS__); \
} \
} while(0)
# define _c4dbgpf(fmt, ...) _dbg_printf("{}:{}: " fmt "\n", __FILE__, __LINE__, __VA_ARGS__)
# define _c4dbgpf_(fmt, ...) _dbg_printf("{}:{}: " fmt , __FILE__, __LINE__, __VA_ARGS__)
# define _c4dbgp(msg) _dbg_printf("{}:{}: " msg "\n", __FILE__, __LINE__ )
# define _c4dbgp_(msg) _dbg_printf("{}:{}: " msg , __FILE__, __LINE__ )
# define _c4dbgq(msg) _dbg_printf(msg "\n")
# define _c4presc(...) do { if(_dbg_enabled()) __c4presc(__VA_ARGS__); } while(0)
# define _c4presc(...) __c4presc(__VA_ARGS__)
# define _c4prscalar(msg, scalar, keep_newlines) \
do { \
_c4dbgpf_("{}: [{}]~~~", msg, scalar.len); \
if(_dbg_enabled()) { \
__c4presc((scalar), (keep_newlines)); \
_c4dbgpf_("{}: [{}]~~~", msg, scalar.len); \
__c4presc((scalar), (keep_newlines)); \
_c4dbgq("~~~"); \
} \
_c4dbgq("~~~"); \
} while(0)
#endif // RYML_DBG


//-----------------------------------------------------------------------------
// implementation

#ifdef RYML_DBG

#include <cstdio>

#if defined(C4_MSVC) || defined(C4_MINGW)
#include <malloc.h>
Expand All @@ -56,12 +50,19 @@
#include <alloca.h>
#endif

#ifndef _C4_YML_ESCAPE_SCALAR_HPP_
#include "c4/yml/escape_scalar.hpp"
#endif

#ifndef _C4_DUMP_HPP_
#include "c4/dump.hpp"
#endif

#include <c4/dump.hpp>

C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wattributes")

namespace c4 {
namespace yml {
inline bool& _dbg_enabled() { static bool enabled = true; return enabled; }
inline C4_NO_INLINE void _dbg_set_enabled(bool yes) { _dbg_enabled() = yes; }
inline C4_NO_INLINE void _dbg_dumper(csubstr s)
Expand All @@ -83,7 +84,7 @@ C4_NO_INLINE void _dbg_dump(DumpFn &&dumpfn, csubstr fmt, Args&& ...args)
results = format_dump_resume(std::forward<DumpFn>(dumpfn), writebuf, fmt, std::forward<Args>(args)...);
}
// if any of the arguments failed to fit the buffer, allocate a
// larger buffer (up to a limit) and resume writing.
// larger buffer (with alloca(), up to a limit) and resume writing.
//
// results.bufsize is set to the size of the largest element
// serialized. Eg int(1) will require 1 byte.
Expand All @@ -106,58 +107,15 @@ C4_NO_INLINE void _dbg_printf(csubstr fmt, Args const& ...args)
}
inline C4_NO_INLINE void __c4presc(csubstr s, bool keep_newlines=false)
{
if(!_dbg_enabled())
return; // LCOV_EXCL_LINE
_RYML_ASSERT_BASIC(s.str || !s.len);
size_t prev = 0;
for(size_t i = 0; i < s.len; ++i)
{
switch(s.str[i])
{
case '\n' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\n"); if(keep_newlines) { _dbg_dumper("\n"); } prev = i+1; break;
case '\t' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\t"); prev = i+1; break;
case '\0' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\0"); prev = i+1; break;
case '\r' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\r"); prev = i+1; break;
case '\f' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\f"); prev = i+1; break;
case '\b' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\b"); prev = i+1; break;
case '\v' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\v"); prev = i+1; break;
case '\a' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\a"); prev = i+1; break;
case '\x1b': _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\x1b"); prev = i+1; break;
case _RYML_CHCONST(-0x3e, 0xc2):
if(i+1 < s.len)
{
if(s.str[i+1] == _RYML_CHCONST(-0x60, 0xa0))
{
_dbg_dumper(s.range(prev, i)); _dbg_dumper("\\_"); prev = i+1;
}
else if(s.str[i+1] == _RYML_CHCONST(-0x7b,0x85))
{
_dbg_dumper(s.range(prev, i)); _dbg_dumper("\\N"); prev = i+1;
}
}
break;
case _RYML_CHCONST(-0x1e, 0xe2):
if(i+2 < s.len && s.str[i+1] == _RYML_CHCONST(-0x80,0x80))
{
if(s.str[i+2] == _RYML_CHCONST(-0x58,0xa8))
{
_dbg_dumper(s.range(prev, i)); _dbg_dumper("\\L"); prev = i+1;
}
else if(s.str[i+2] == _RYML_CHCONST(-0x57,0xa9))
{
_dbg_dumper(s.range(prev, i)); _dbg_dumper("\\P"); prev = i+1;
}
}
break;
}
}
if(s.len > prev)
_dbg_dumper(s.sub(prev));
if(_dbg_enabled())
escape_scalar_fn(_dbg_dumper, s, keep_newlines);
}
inline C4_NO_INLINE void __c4presc(const char *s, size_t len, bool keep_newlines=false)
{
__c4presc(csubstr(s, len), keep_newlines);
if(_dbg_enabled())
escape_scalar_fn(_dbg_dumper, csubstr(s, len), keep_newlines);
}
} // namespace yml
} // namespace c4

C4_SUPPRESS_WARNING_GCC_POP
Expand Down
202 changes: 202 additions & 0 deletions src/c4/yml/escape_scalar.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
#ifndef _C4_YML_ESCAPE_SCALAR_HPP_
#define _C4_YML_ESCAPE_SCALAR_HPP_

#ifndef _C4_YML_COMMON_HPP_
#include "c4/yml/common.hpp"
#endif

namespace c4 {
namespace yml {


/** Iterate through a scalar and escape special characters in it. This
* function takes a callback (which accepts a single parameter of
* csubstr type) and, while processing, calls this callback as
* appropriate, passing ranges of the scalar and/or escaped
* characters.
*
* @param fn a sink function receiving a csubstr
* @param scalar the scalar to be escaped
* @param keep_newlines when true, `\n` will be escaped as `\\n\n` instead of just `\\n`
*
* Example usage:
*
* ```c++
* // escape to stdout
* void escape_scalar(FILE *file, csubstr scalar)
* {
* auto print_ = [](csubstr repl){
* fwrite(repl.len, 1, repl.str, file);
* };
* escape_scalar_fn(std::ref(print_), scalar);
* }
*
* // escape to a different buffer and return the required buffer size
* size_t escape_scalar(substr buffer, csubstr scalar)
* {
* C4_ASSERT(!buffer.overlaps(scalar));
* size_t pos = 0;
* auto _append = [&](csubstr repl){
* if(repl.len && (pos + repl.len <= buffer.len))
* memcpy(buffer.str + pos, repl.str, repl.len);
* pos += repl.len;
* };
* escape_scalar_fn(std::ref(_append), scalar);
* return pos;
* }
* ```
*/
template<class Fn>
void escape_scalar_fn(Fn &&fn, csubstr scalar, bool keep_newlines=false)
{
size_t prev = 0; // the last position that was flushed
size_t skip = 0; // how much to add to prev
csubstr repl; // replacement string
bool newl = false; // to add a newline
// cast to u8 to avoid having to deal with negative
// signed chars (which are present some platforms)
uint8_t const* C4_RESTRICT s = reinterpret_cast<uint8_t const*>(scalar.str); // NOLINT(*-reinterpret-cast)
// NOLINTBEGIN(*-goto)
for(size_t i = 0; i < scalar.len; ++i)
{
switch(s[i])
{
case UINT8_C(0x0a): // \n
repl = "\\n";
skip = 1;
if(keep_newlines)
newl = true;
goto flush_now;
case UINT8_C(0x5c): // '\\'
repl = "\\\\";
skip = 1;
goto flush_now;
case UINT8_C(0x09): // \t
repl = "\\t";
skip = 1;
goto flush_now;
case UINT8_C(0x0d): // \r
repl = "\\r";
skip = 1;
goto flush_now;
case UINT8_C(0x00): // \0
repl = "\\0";
skip = 1;
goto flush_now;
case UINT8_C(0x0c): // \f (form feed)
repl = "\\f";
skip = 1;
goto flush_now;
case UINT8_C(0x08): // \b (backspace)
repl = "\\b";
skip = 1;
goto flush_now;
case UINT8_C(0x07): // \a (bell)
repl = "\\a";
skip = 1;
goto flush_now;
case UINT8_C(0x0b): // \v (vertical tab)
repl = "\\v";
skip = 1;
goto flush_now;
case UINT8_C(0x1b): // \e (escape)
repl = "\\e";
skip = 1;
goto flush_now;
case UINT8_C(0xc2): // AKA -0x3e
if(i+1 < scalar.len)
{
if(s[i+1] == UINT8_C(0xa0)) // AKA -0x60
{
repl = "\\_";
skip = 2;
goto flush_now;
}
else if(s[i+1] == UINT8_C(0x85)) // AKA -0x7b
{
repl = "\\N";
skip = 2;
goto flush_now;
}
}
continue;
case UINT8_C(0xe2): // AKA -0x1e
if(i+2 < scalar.len)
{
if(s[i+1] == UINT8_C(0x80)) // AKA -0x80
{
if(s[i+2] == UINT8_C(0xa8)) // AKA -0x58
{
repl = "\\L";
skip = 3;
goto flush_now;
}
else if(s[i+2] == UINT8_C(0xa9)) // AKA -0x57
{
repl = "\\P";
skip = 3;
goto flush_now;
}
}
}
continue;
default:
continue;
}
flush_now:
std::forward<Fn>(fn)(scalar.range(prev, i));
std::forward<Fn>(fn)(repl);
if(newl)
{
std::forward<Fn>(fn)("\n");
newl = false;
}
prev = i + skip;
}
// flush the rest
if(scalar.len > prev)
std::forward<Fn>(fn)(scalar.sub(prev));
// NOLINTEND(*-goto)
}


C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wattributes")
/** Escape a scalar to an existing buffer, using @ref escape_scalar_fn
*
* @note This is a utility/debugging functions, so it is provided in this
* (optional) header. For this reason, we inline it to obey to the
* One-Definition Rule. But then we set the noinline attribute to
* ensure they are not inlined in calling code. */
inline C4_NO_INLINE size_t escape_scalar(substr buffer, csubstr scalar, bool keep_newlines=false)
{
size_t pos = 0;
auto _append = [&pos, &buffer](csubstr repl){
if(repl.len && (pos + repl.len <= buffer.len))
memcpy(buffer.str + pos, repl.str, repl.len);
pos += repl.len;
};
escape_scalar_fn(_append, scalar, keep_newlines);
return pos;
}
C4_SUPPRESS_WARNING_GCC_POP


/** formatting helper to escape a scalar with @ref escape_scalar()x */
struct escaped_scalar
{
escaped_scalar(csubstr s, bool keep_newl=false) : scalar(s), keep_newlines(keep_newl) {}
csubstr scalar;
bool keep_newlines;
};

/** formatting implementation to escape a scalar with @ref escape_scalar()x */
inline size_t to_chars(substr buf, escaped_scalar e)
{
return escape_scalar(buf, e.scalar, e.keep_newlines);
}


} // namespace yml
} // namespace c4

#endif /* _C4_YML_ESCAPE_SCALAR_HPP_ */
Loading
Loading