Skip to content

Commit 5efa4da

Browse files
committed
Move escape_scalar() to c4/yml/escape_scalar.hpp
Move `escape_scalar()` from `c4/yml/extra/scalar.hpp` to `c4/yml/escape_scalar.hpp` (and removed the original header)
1 parent 806b8d8 commit 5efa4da

17 files changed

+349
-226
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ c4_add_library(ryml
4747
c4/yml/emit.hpp
4848
c4/yml/error.hpp
4949
c4/yml/error.def.hpp
50+
c4/yml/escape_scalar.hpp
5051
c4/yml/event_handler_stack.hpp
5152
c4/yml/event_handler_tree.hpp
5253
c4/yml/filter_processor.hpp

changelog/current.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
- improve behavior of `Tree` methods accepting scalars: all standard buffer types are now accepted (ie, `str`, `bytes`, `bytearray` and `memoryview`).
5858
- [PR#565](https://github.com/biojppm/rapidyaml/pull/565) (fixes [#564](https://github.com/biojppm/rapidyaml/issues/564)) - `Tree` arena: allow relocation of zero-length strings when placed at the end (relax assertions triggered in `Tree::_relocated()`)
5959
- [PR#563](https://github.com/biojppm/rapidyaml/pull/563) (fixes [#562](https://github.com/biojppm/rapidyaml/issues/562)) - Fix bug in `NodeRef::cend()`
60+
- [PR#568](https://github.com/biojppm/rapidyaml/pull/568) - Move `escape_scalar()` from `c4/yml/extra/scalar.hpp` to `c4/yml/escape_scalar.hpp` (and removed the original header)
6061

6162

6263
### Fixes in YAML parsing

src/c4/yml/detail/dbgprint.hpp

Lines changed: 25 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,6 @@
11
#ifndef _C4_YML_DETAIL_DBGPRINT_HPP_
22
#define _C4_YML_DETAIL_DBGPRINT_HPP_
33

4-
#ifndef _C4_YML_COMMON_HPP_
5-
#include "../common.hpp"
6-
#endif
7-
8-
#ifdef RYML_DBG
9-
#include <cstdio>
10-
#endif
11-
124

135
//-----------------------------------------------------------------------------
146
// debug prints
@@ -23,29 +15,31 @@
2315
# define _c4presc(...)
2416
# define _c4prscalar(msg, scalar, keep_newlines)
2517
#else
26-
# define _c4dbgt(fmt, ...) do { if(_dbg_enabled()) { \
27-
this->_dbg ("{}:{}: " fmt , __FILE__, __LINE__, __VA_ARGS__); } } while(0)
18+
# define _c4dbgt(fmt, ...) do { \
19+
if(_dbg_enabled()) { \
20+
this->_dbg ("{}:{}: " fmt , __FILE__, __LINE__, __VA_ARGS__); \
21+
} \
22+
} while(0)
2823
# define _c4dbgpf(fmt, ...) _dbg_printf("{}:{}: " fmt "\n", __FILE__, __LINE__, __VA_ARGS__)
2924
# define _c4dbgpf_(fmt, ...) _dbg_printf("{}:{}: " fmt , __FILE__, __LINE__, __VA_ARGS__)
3025
# define _c4dbgp(msg) _dbg_printf("{}:{}: " msg "\n", __FILE__, __LINE__ )
3126
# define _c4dbgp_(msg) _dbg_printf("{}:{}: " msg , __FILE__, __LINE__ )
3227
# define _c4dbgq(msg) _dbg_printf(msg "\n")
33-
# define _c4presc(...) do { if(_dbg_enabled()) __c4presc(__VA_ARGS__); } while(0)
28+
# define _c4presc(...) __c4presc(__VA_ARGS__)
3429
# define _c4prscalar(msg, scalar, keep_newlines) \
3530
do { \
36-
_c4dbgpf_("{}: [{}]~~~", msg, scalar.len); \
3731
if(_dbg_enabled()) { \
38-
__c4presc((scalar), (keep_newlines)); \
32+
_c4dbgpf_("{}: [{}]~~~", msg, scalar.len); \
33+
__c4presc((scalar), (keep_newlines)); \
34+
_c4dbgq("~~~"); \
3935
} \
40-
_c4dbgq("~~~"); \
4136
} while(0)
42-
#endif // RYML_DBG
4337

4438

4539
//-----------------------------------------------------------------------------
40+
// implementation
4641

47-
#ifdef RYML_DBG
48-
42+
#include <cstdio>
4943

5044
#if defined(C4_MSVC) || defined(C4_MINGW)
5145
#include <malloc.h>
@@ -56,12 +50,19 @@
5650
#include <alloca.h>
5751
#endif
5852

53+
#ifndef _C4_YML_ESCAPE_SCALAR_HPP_
54+
#include "c4/yml/escape_scalar.hpp"
55+
#endif
56+
57+
#ifndef _C4_DUMP_HPP_
58+
#include "c4/dump.hpp"
59+
#endif
5960

60-
#include <c4/dump.hpp>
6161

6262
C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wattributes")
6363

6464
namespace c4 {
65+
namespace yml {
6566
inline bool& _dbg_enabled() { static bool enabled = true; return enabled; }
6667
inline C4_NO_INLINE void _dbg_set_enabled(bool yes) { _dbg_enabled() = yes; }
6768
inline C4_NO_INLINE void _dbg_dumper(csubstr s)
@@ -83,7 +84,7 @@ C4_NO_INLINE void _dbg_dump(DumpFn &&dumpfn, csubstr fmt, Args&& ...args)
8384
results = format_dump_resume(std::forward<DumpFn>(dumpfn), writebuf, fmt, std::forward<Args>(args)...);
8485
}
8586
// if any of the arguments failed to fit the buffer, allocate a
86-
// larger buffer (up to a limit) and resume writing.
87+
// larger buffer (with alloca(), up to a limit) and resume writing.
8788
//
8889
// results.bufsize is set to the size of the largest element
8990
// serialized. Eg int(1) will require 1 byte.
@@ -106,58 +107,15 @@ C4_NO_INLINE void _dbg_printf(csubstr fmt, Args const& ...args)
106107
}
107108
inline C4_NO_INLINE void __c4presc(csubstr s, bool keep_newlines=false)
108109
{
109-
if(!_dbg_enabled())
110-
return; // LCOV_EXCL_LINE
111-
_RYML_ASSERT_BASIC(s.str || !s.len);
112-
size_t prev = 0;
113-
for(size_t i = 0; i < s.len; ++i)
114-
{
115-
switch(s.str[i])
116-
{
117-
case '\n' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\n"); if(keep_newlines) { _dbg_dumper("\n"); } prev = i+1; break;
118-
case '\t' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\t"); prev = i+1; break;
119-
case '\0' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\0"); prev = i+1; break;
120-
case '\r' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\r"); prev = i+1; break;
121-
case '\f' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\f"); prev = i+1; break;
122-
case '\b' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\b"); prev = i+1; break;
123-
case '\v' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\v"); prev = i+1; break;
124-
case '\a' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\a"); prev = i+1; break;
125-
case '\x1b': _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\x1b"); prev = i+1; break;
126-
case _RYML_CHCONST(-0x3e, 0xc2):
127-
if(i+1 < s.len)
128-
{
129-
if(s.str[i+1] == _RYML_CHCONST(-0x60, 0xa0))
130-
{
131-
_dbg_dumper(s.range(prev, i)); _dbg_dumper("\\_"); prev = i+1;
132-
}
133-
else if(s.str[i+1] == _RYML_CHCONST(-0x7b,0x85))
134-
{
135-
_dbg_dumper(s.range(prev, i)); _dbg_dumper("\\N"); prev = i+1;
136-
}
137-
}
138-
break;
139-
case _RYML_CHCONST(-0x1e, 0xe2):
140-
if(i+2 < s.len && s.str[i+1] == _RYML_CHCONST(-0x80,0x80))
141-
{
142-
if(s.str[i+2] == _RYML_CHCONST(-0x58,0xa8))
143-
{
144-
_dbg_dumper(s.range(prev, i)); _dbg_dumper("\\L"); prev = i+1;
145-
}
146-
else if(s.str[i+2] == _RYML_CHCONST(-0x57,0xa9))
147-
{
148-
_dbg_dumper(s.range(prev, i)); _dbg_dumper("\\P"); prev = i+1;
149-
}
150-
}
151-
break;
152-
}
153-
}
154-
if(s.len > prev)
155-
_dbg_dumper(s.sub(prev));
110+
if(_dbg_enabled())
111+
escape_scalar_fn(std::ref(_dbg_dumper), s, keep_newlines);
156112
}
157113
inline C4_NO_INLINE void __c4presc(const char *s, size_t len, bool keep_newlines=false)
158114
{
159-
__c4presc(csubstr(s, len), keep_newlines);
115+
if(_dbg_enabled())
116+
escape_scalar_fn(std::ref(_dbg_dumper), csubstr(s, len), keep_newlines);
160117
}
118+
} // namespace yml
161119
} // namespace c4
162120

163121
C4_SUPPRESS_WARNING_GCC_POP

src/c4/yml/escape_scalar.hpp

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
#ifndef _C4_YML_ESCAPE_SCALAR_HPP_
2+
#define _C4_YML_ESCAPE_SCALAR_HPP_
3+
4+
#ifndef _C4_YML_COMMON_HPP_
5+
#include "c4/yml/common.hpp"
6+
#endif
7+
8+
namespace c4 {
9+
namespace yml {
10+
11+
12+
/** Iterate through a scalar and escape special characters in it. This
13+
* function takes a callback (which accepts a single parameter of
14+
* csubstr type) and, while processing, calls this callback as
15+
* appropriate, passing ranges of the scalar and/or escaped
16+
* characters.
17+
*
18+
* @param fn a sink function receiving a csubstr
19+
* @param scalar the scalar to be escaped
20+
* @param keep_newlines when true, `\n` will be escaped as `\\n\n` instead of just `\\n`
21+
*
22+
* Example usage:
23+
*
24+
* ```c++
25+
* // escape to stdout
26+
* void escape_scalar(FILE *file, csubstr scalar)
27+
* {
28+
* auto print_ = [](csubstr repl){
29+
* fwrite(repl.len, 1, repl.str, file);
30+
* };
31+
* escape_scalar_fn(std::ref(print_), scalar);
32+
* }
33+
*
34+
* // escape to a different buffer and return the required buffer size
35+
* size_t escape_scalar(substr buffer, csubstr scalar)
36+
* {
37+
* C4_ASSERT(!buffer.overlaps(scalar));
38+
* size_t pos = 0;
39+
* auto _append = [&](csubstr repl){
40+
* if(repl.len && (pos + repl.len <= buffer.len))
41+
* memcpy(buffer.str + pos, repl.str, repl.len);
42+
* pos += repl.len;
43+
* };
44+
* escape_scalar_fn(std::ref(_append), scalar);
45+
* return pos;
46+
* }
47+
* ```
48+
*/
49+
template<class Fn>
50+
void escape_scalar_fn(Fn &&fn, csubstr scalar, bool keep_newlines=false)
51+
{
52+
size_t prev = 0; // the last position that was flushed
53+
size_t skip = 0; // how much to add to prev
54+
csubstr repl; // replacement string
55+
bool newl = false; // to add a newline
56+
// cast to u8 to avoid having to deal with negative
57+
// signed chars (which are present some platforms)
58+
uint8_t const* C4_RESTRICT s = reinterpret_cast<uint8_t const*>(scalar.str);
59+
for(size_t i = 0; i < scalar.len; ++i)
60+
{
61+
switch(s[i])
62+
{
63+
case UINT8_C(0x0a): // \n
64+
repl = "\\n";
65+
skip = 1;
66+
if(keep_newlines)
67+
newl = true;
68+
goto flush_now;
69+
case UINT8_C(0x5c): // '\\'
70+
repl = "\\\\";
71+
skip = 1;
72+
goto flush_now;
73+
case UINT8_C(0x09): // \t
74+
repl = "\\t";
75+
skip = 1;
76+
goto flush_now;
77+
case UINT8_C(0x0d): // \r
78+
repl = "\\r";
79+
skip = 1;
80+
goto flush_now;
81+
case UINT8_C(0x00): // \0
82+
repl = "\\0";
83+
skip = 1;
84+
goto flush_now;
85+
case UINT8_C(0x0c): // \f (form feed)
86+
repl = "\\f";
87+
skip = 1;
88+
goto flush_now;
89+
case UINT8_C(0x08): // \b (backspace)
90+
repl = "\\b";
91+
skip = 1;
92+
goto flush_now;
93+
case UINT8_C(0x07): // \a (bell)
94+
repl = "\\a";
95+
skip = 1;
96+
goto flush_now;
97+
case UINT8_C(0x0b): // \v (vertical tab)
98+
repl = "\\v";
99+
skip = 1;
100+
goto flush_now;
101+
case UINT8_C(0x1b): // \e (escape)
102+
repl = "\\e";
103+
skip = 1;
104+
goto flush_now;
105+
case UINT8_C(0xc2): // AKA -0x3e
106+
if(i+1 < scalar.len)
107+
{
108+
if(s[i+1] == UINT8_C(0xa0)) // AKA -0x60
109+
{
110+
repl = "\\_";
111+
skip = 2;
112+
goto flush_now;
113+
}
114+
else if(s[i+1] == UINT8_C(0x85)) // AKA -0x7b
115+
{
116+
repl = "\\N";
117+
skip = 2;
118+
goto flush_now;
119+
}
120+
}
121+
continue;
122+
case UINT8_C(0xe2): // AKA -0x1e
123+
if(i+2 < scalar.len)
124+
{
125+
if(s[i+1] == UINT8_C(0x80)) // AKA -0x80
126+
{
127+
if(s[i+2] == UINT8_C(0xa8)) // AKA -0x58
128+
{
129+
repl = "\\L";
130+
skip = 3;
131+
goto flush_now;
132+
}
133+
else if(s[i+2] == UINT8_C(0xa9)) // AKA -0x57
134+
{
135+
repl = "\\P";
136+
skip = 3;
137+
goto flush_now;
138+
}
139+
}
140+
}
141+
continue;
142+
default:
143+
continue;
144+
}
145+
flush_now:
146+
std::forward<Fn>(fn)(scalar.range(prev, i));
147+
std::forward<Fn>(fn)(repl);
148+
if(newl)
149+
{
150+
std::forward<Fn>(fn)("\n");
151+
newl = false;
152+
}
153+
prev = i + skip;
154+
}
155+
// flush the rest
156+
if(scalar.len > prev)
157+
std::forward<Fn>(fn)(scalar.sub(prev));
158+
#undef c4fn
159+
}
160+
161+
162+
C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wattributes")
163+
/** Escape a scalar to an existing buffer, using @ref escape_scalar_fn
164+
*
165+
* @note This is a utility/debugging functions, so it is provided in this
166+
* (optional) header. For this reason, we inline it to obey to the
167+
* One-Definition Rule. But then we set the noinline attribute to
168+
* ensure they are not inlined in calling code. */
169+
inline C4_NO_INLINE size_t escape_scalar(substr buffer, csubstr scalar, bool keep_newlines=false)
170+
{
171+
size_t pos = 0;
172+
auto _append = [&pos, &buffer](csubstr repl){
173+
if(repl.len && (pos + repl.len <= buffer.len))
174+
memcpy(buffer.str + pos, repl.str, repl.len);
175+
pos += repl.len;
176+
};
177+
escape_scalar_fn(std::ref(_append), scalar, keep_newlines);
178+
return pos;
179+
}
180+
C4_SUPPRESS_WARNING_GCC_POP
181+
182+
183+
/** formatting helper to escape a scalar with @ref escape_scalar()x */
184+
struct escaped_scalar
185+
{
186+
escaped_scalar(csubstr s, bool keep_newl=false) : scalar(s), keep_newlines(keep_newl) {}
187+
csubstr scalar;
188+
bool keep_newlines;
189+
};
190+
191+
/** formatting implementation to escape a scalar with @ref escape_scalar()x */
192+
inline size_t to_chars(substr buf, escaped_scalar e)
193+
{
194+
return escape_scalar(buf, e.scalar, e.keep_newlines);
195+
}
196+
197+
198+
} // namespace yml
199+
} // namespace c4
200+
201+
#endif /* _C4_YML_ESCAPE_SCALAR_HPP_ */

0 commit comments

Comments
 (0)