Skip to content

Commit f89fc87

Browse files
committed
Move escape_scalar() to c4/yml/escape_scalar.hpp
Move `escape_scalar()` from `c4/yml/extra/scalar.hpp` to `c4/yml/escape_scalar.hpp` (and removed the original header)
1 parent 806b8d8 commit f89fc87

17 files changed

+352
-226
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ c4_add_library(ryml
4747
c4/yml/emit.hpp
4848
c4/yml/error.hpp
4949
c4/yml/error.def.hpp
50+
c4/yml/escape_scalar.hpp
5051
c4/yml/event_handler_stack.hpp
5152
c4/yml/event_handler_tree.hpp
5253
c4/yml/filter_processor.hpp

changelog/current.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
- improve behavior of `Tree` methods accepting scalars: all standard buffer types are now accepted (ie, `str`, `bytes`, `bytearray` and `memoryview`).
5858
- [PR#565](https://github.com/biojppm/rapidyaml/pull/565) (fixes [#564](https://github.com/biojppm/rapidyaml/issues/564)) - `Tree` arena: allow relocation of zero-length strings when placed at the end (relax assertions triggered in `Tree::_relocated()`)
5959
- [PR#563](https://github.com/biojppm/rapidyaml/pull/563) (fixes [#562](https://github.com/biojppm/rapidyaml/issues/562)) - Fix bug in `NodeRef::cend()`
60+
- [PR#568](https://github.com/biojppm/rapidyaml/pull/568) - Move `escape_scalar()` from `c4/yml/extra/scalar.hpp` to `c4/yml/escape_scalar.hpp` (and removed the original header)
6061

6162

6263
### Fixes in YAML parsing

src/c4/yml/detail/dbgprint.hpp

Lines changed: 25 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,6 @@
11
#ifndef _C4_YML_DETAIL_DBGPRINT_HPP_
22
#define _C4_YML_DETAIL_DBGPRINT_HPP_
33

4-
#ifndef _C4_YML_COMMON_HPP_
5-
#include "../common.hpp"
6-
#endif
7-
8-
#ifdef RYML_DBG
9-
#include <cstdio>
10-
#endif
11-
124

135
//-----------------------------------------------------------------------------
146
// debug prints
@@ -23,29 +15,31 @@
2315
# define _c4presc(...)
2416
# define _c4prscalar(msg, scalar, keep_newlines)
2517
#else
26-
# define _c4dbgt(fmt, ...) do { if(_dbg_enabled()) { \
27-
this->_dbg ("{}:{}: " fmt , __FILE__, __LINE__, __VA_ARGS__); } } while(0)
18+
# define _c4dbgt(fmt, ...) do { \
19+
if(_dbg_enabled()) { \
20+
this->_dbg ("{}:{}: " fmt , __FILE__, __LINE__, __VA_ARGS__); \
21+
} \
22+
} while(0)
2823
# define _c4dbgpf(fmt, ...) _dbg_printf("{}:{}: " fmt "\n", __FILE__, __LINE__, __VA_ARGS__)
2924
# define _c4dbgpf_(fmt, ...) _dbg_printf("{}:{}: " fmt , __FILE__, __LINE__, __VA_ARGS__)
3025
# define _c4dbgp(msg) _dbg_printf("{}:{}: " msg "\n", __FILE__, __LINE__ )
3126
# define _c4dbgp_(msg) _dbg_printf("{}:{}: " msg , __FILE__, __LINE__ )
3227
# define _c4dbgq(msg) _dbg_printf(msg "\n")
33-
# define _c4presc(...) do { if(_dbg_enabled()) __c4presc(__VA_ARGS__); } while(0)
28+
# define _c4presc(...) __c4presc(__VA_ARGS__)
3429
# define _c4prscalar(msg, scalar, keep_newlines) \
3530
do { \
36-
_c4dbgpf_("{}: [{}]~~~", msg, scalar.len); \
3731
if(_dbg_enabled()) { \
38-
__c4presc((scalar), (keep_newlines)); \
32+
_c4dbgpf_("{}: [{}]~~~", msg, scalar.len); \
33+
__c4presc((scalar), (keep_newlines)); \
34+
_c4dbgq("~~~"); \
3935
} \
40-
_c4dbgq("~~~"); \
4136
} while(0)
42-
#endif // RYML_DBG
4337

4438

4539
//-----------------------------------------------------------------------------
40+
// implementation
4641

47-
#ifdef RYML_DBG
48-
42+
#include <cstdio>
4943

5044
#if defined(C4_MSVC) || defined(C4_MINGW)
5145
#include <malloc.h>
@@ -56,12 +50,19 @@
5650
#include <alloca.h>
5751
#endif
5852

53+
#ifndef _C4_YML_ESCAPE_SCALAR_HPP_
54+
#include "c4/yml/escape_scalar.hpp"
55+
#endif
56+
57+
#ifndef _C4_DUMP_HPP_
58+
#include "c4/dump.hpp"
59+
#endif
5960

60-
#include <c4/dump.hpp>
6161

6262
C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wattributes")
6363

6464
namespace c4 {
65+
namespace yml {
6566
inline bool& _dbg_enabled() { static bool enabled = true; return enabled; }
6667
inline C4_NO_INLINE void _dbg_set_enabled(bool yes) { _dbg_enabled() = yes; }
6768
inline C4_NO_INLINE void _dbg_dumper(csubstr s)
@@ -83,7 +84,7 @@ C4_NO_INLINE void _dbg_dump(DumpFn &&dumpfn, csubstr fmt, Args&& ...args)
8384
results = format_dump_resume(std::forward<DumpFn>(dumpfn), writebuf, fmt, std::forward<Args>(args)...);
8485
}
8586
// if any of the arguments failed to fit the buffer, allocate a
86-
// larger buffer (up to a limit) and resume writing.
87+
// larger buffer (with alloca(), up to a limit) and resume writing.
8788
//
8889
// results.bufsize is set to the size of the largest element
8990
// serialized. Eg int(1) will require 1 byte.
@@ -106,58 +107,15 @@ C4_NO_INLINE void _dbg_printf(csubstr fmt, Args const& ...args)
106107
}
107108
inline C4_NO_INLINE void __c4presc(csubstr s, bool keep_newlines=false)
108109
{
109-
if(!_dbg_enabled())
110-
return; // LCOV_EXCL_LINE
111-
_RYML_ASSERT_BASIC(s.str || !s.len);
112-
size_t prev = 0;
113-
for(size_t i = 0; i < s.len; ++i)
114-
{
115-
switch(s.str[i])
116-
{
117-
case '\n' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\n"); if(keep_newlines) { _dbg_dumper("\n"); } prev = i+1; break;
118-
case '\t' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\t"); prev = i+1; break;
119-
case '\0' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\0"); prev = i+1; break;
120-
case '\r' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\r"); prev = i+1; break;
121-
case '\f' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\f"); prev = i+1; break;
122-
case '\b' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\b"); prev = i+1; break;
123-
case '\v' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\v"); prev = i+1; break;
124-
case '\a' : _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\a"); prev = i+1; break;
125-
case '\x1b': _dbg_dumper(s.range(prev, i)); _dbg_dumper("\\x1b"); prev = i+1; break;
126-
case _RYML_CHCONST(-0x3e, 0xc2):
127-
if(i+1 < s.len)
128-
{
129-
if(s.str[i+1] == _RYML_CHCONST(-0x60, 0xa0))
130-
{
131-
_dbg_dumper(s.range(prev, i)); _dbg_dumper("\\_"); prev = i+1;
132-
}
133-
else if(s.str[i+1] == _RYML_CHCONST(-0x7b,0x85))
134-
{
135-
_dbg_dumper(s.range(prev, i)); _dbg_dumper("\\N"); prev = i+1;
136-
}
137-
}
138-
break;
139-
case _RYML_CHCONST(-0x1e, 0xe2):
140-
if(i+2 < s.len && s.str[i+1] == _RYML_CHCONST(-0x80,0x80))
141-
{
142-
if(s.str[i+2] == _RYML_CHCONST(-0x58,0xa8))
143-
{
144-
_dbg_dumper(s.range(prev, i)); _dbg_dumper("\\L"); prev = i+1;
145-
}
146-
else if(s.str[i+2] == _RYML_CHCONST(-0x57,0xa9))
147-
{
148-
_dbg_dumper(s.range(prev, i)); _dbg_dumper("\\P"); prev = i+1;
149-
}
150-
}
151-
break;
152-
}
153-
}
154-
if(s.len > prev)
155-
_dbg_dumper(s.sub(prev));
110+
if(_dbg_enabled())
111+
escape_scalar_fn(_dbg_dumper, s, keep_newlines);
156112
}
157113
inline C4_NO_INLINE void __c4presc(const char *s, size_t len, bool keep_newlines=false)
158114
{
159-
__c4presc(csubstr(s, len), keep_newlines);
115+
if(_dbg_enabled())
116+
escape_scalar_fn(_dbg_dumper, csubstr(s, len), keep_newlines);
160117
}
118+
} // namespace yml
161119
} // namespace c4
162120

163121
C4_SUPPRESS_WARNING_GCC_POP

src/c4/yml/escape_scalar.hpp

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
#ifndef _C4_YML_ESCAPE_SCALAR_HPP_
2+
#define _C4_YML_ESCAPE_SCALAR_HPP_
3+
4+
#ifndef _C4_YML_COMMON_HPP_
5+
#include "c4/yml/common.hpp"
6+
#endif
7+
8+
namespace c4 {
9+
namespace yml {
10+
11+
12+
/** Iterate through a scalar and escape special characters in it. This
13+
* function takes a callback (which accepts a single parameter of
14+
* csubstr type) and, while processing, calls this callback as
15+
* appropriate, passing ranges of the scalar and/or escaped
16+
* characters.
17+
*
18+
* @param fn a sink function receiving a csubstr
19+
* @param scalar the scalar to be escaped
20+
* @param keep_newlines when true, `\n` will be escaped as `\\n\n` instead of just `\\n`
21+
*
22+
* Example usage:
23+
*
24+
* ```c++
25+
* // escape to stdout
26+
* void escape_scalar(FILE *file, csubstr scalar)
27+
* {
28+
* auto print_ = [](csubstr repl){
29+
* fwrite(repl.len, 1, repl.str, file);
30+
* };
31+
* escape_scalar_fn(std::ref(print_), scalar);
32+
* }
33+
*
34+
* // escape to a different buffer and return the required buffer size
35+
* size_t escape_scalar(substr buffer, csubstr scalar)
36+
* {
37+
* C4_ASSERT(!buffer.overlaps(scalar));
38+
* size_t pos = 0;
39+
* auto _append = [&](csubstr repl){
40+
* if(repl.len && (pos + repl.len <= buffer.len))
41+
* memcpy(buffer.str + pos, repl.str, repl.len);
42+
* pos += repl.len;
43+
* };
44+
* escape_scalar_fn(std::ref(_append), scalar);
45+
* return pos;
46+
* }
47+
* ```
48+
*/
49+
template<class Fn>
50+
void escape_scalar_fn(Fn &&fn, csubstr scalar, bool keep_newlines=false)
51+
{
52+
size_t prev = 0; // the last position that was flushed
53+
size_t skip = 0; // how much to add to prev
54+
csubstr repl; // replacement string
55+
bool newl = false; // to add a newline
56+
// cast to u8 to avoid having to deal with negative
57+
// signed chars (which are present some platforms)
58+
uint8_t const* C4_RESTRICT s = reinterpret_cast<uint8_t const*>(scalar.str); // NOLINT(*-reinterpret-cast)
59+
// NOLINTBEGIN(*-goto)
60+
for(size_t i = 0; i < scalar.len; ++i)
61+
{
62+
switch(s[i])
63+
{
64+
case UINT8_C(0x0a): // \n
65+
repl = "\\n";
66+
skip = 1;
67+
if(keep_newlines)
68+
newl = true;
69+
goto flush_now;
70+
case UINT8_C(0x5c): // '\\'
71+
repl = "\\\\";
72+
skip = 1;
73+
goto flush_now;
74+
case UINT8_C(0x09): // \t
75+
repl = "\\t";
76+
skip = 1;
77+
goto flush_now;
78+
case UINT8_C(0x0d): // \r
79+
repl = "\\r";
80+
skip = 1;
81+
goto flush_now;
82+
case UINT8_C(0x00): // \0
83+
repl = "\\0";
84+
skip = 1;
85+
goto flush_now;
86+
case UINT8_C(0x0c): // \f (form feed)
87+
repl = "\\f";
88+
skip = 1;
89+
goto flush_now;
90+
case UINT8_C(0x08): // \b (backspace)
91+
repl = "\\b";
92+
skip = 1;
93+
goto flush_now;
94+
case UINT8_C(0x07): // \a (bell)
95+
repl = "\\a";
96+
skip = 1;
97+
goto flush_now;
98+
case UINT8_C(0x0b): // \v (vertical tab)
99+
repl = "\\v";
100+
skip = 1;
101+
goto flush_now;
102+
case UINT8_C(0x1b): // \e (escape)
103+
repl = "\\e";
104+
skip = 1;
105+
goto flush_now;
106+
case UINT8_C(0xc2): // AKA -0x3e
107+
if(i+1 < scalar.len)
108+
{
109+
if(s[i+1] == UINT8_C(0xa0)) // AKA -0x60
110+
{
111+
repl = "\\_";
112+
skip = 2;
113+
goto flush_now;
114+
}
115+
else if(s[i+1] == UINT8_C(0x85)) // AKA -0x7b
116+
{
117+
repl = "\\N";
118+
skip = 2;
119+
goto flush_now;
120+
}
121+
}
122+
continue;
123+
case UINT8_C(0xe2): // AKA -0x1e
124+
if(i+2 < scalar.len)
125+
{
126+
if(s[i+1] == UINT8_C(0x80)) // AKA -0x80
127+
{
128+
if(s[i+2] == UINT8_C(0xa8)) // AKA -0x58
129+
{
130+
repl = "\\L";
131+
skip = 3;
132+
goto flush_now;
133+
}
134+
else if(s[i+2] == UINT8_C(0xa9)) // AKA -0x57
135+
{
136+
repl = "\\P";
137+
skip = 3;
138+
goto flush_now;
139+
}
140+
}
141+
}
142+
continue;
143+
default:
144+
continue;
145+
}
146+
flush_now:
147+
std::forward<Fn>(fn)(scalar.range(prev, i));
148+
std::forward<Fn>(fn)(repl);
149+
if(newl)
150+
{
151+
std::forward<Fn>(fn)("\n");
152+
newl = false;
153+
}
154+
prev = i + skip;
155+
}
156+
// flush the rest
157+
if(scalar.len > prev)
158+
std::forward<Fn>(fn)(scalar.sub(prev));
159+
// NOLINTEND(*-goto)
160+
}
161+
162+
163+
C4_SUPPRESS_WARNING_GCC_WITH_PUSH("-Wattributes")
164+
/** Escape a scalar to an existing buffer, using @ref escape_scalar_fn
165+
*
166+
* @note This is a utility/debugging functions, so it is provided in this
167+
* (optional) header. For this reason, we inline it to obey to the
168+
* One-Definition Rule. But then we set the noinline attribute to
169+
* ensure they are not inlined in calling code. */
170+
inline C4_NO_INLINE size_t escape_scalar(substr buffer, csubstr scalar, bool keep_newlines=false)
171+
{
172+
size_t pos = 0;
173+
auto _append = [&pos, &buffer](csubstr repl){
174+
if(repl.len && (pos + repl.len <= buffer.len))
175+
memcpy(buffer.str + pos, repl.str, repl.len);
176+
pos += repl.len;
177+
};
178+
escape_scalar_fn(_append, scalar, keep_newlines);
179+
return pos;
180+
}
181+
C4_SUPPRESS_WARNING_GCC_POP
182+
183+
184+
/** formatting helper to escape a scalar with @ref escape_scalar()x */
185+
struct escaped_scalar
186+
{
187+
escaped_scalar(csubstr s, bool keep_newl=false) : scalar(s), keep_newlines(keep_newl) {}
188+
csubstr scalar;
189+
bool keep_newlines;
190+
};
191+
192+
/** formatting implementation to escape a scalar with @ref escape_scalar()x */
193+
inline size_t to_chars(substr buf, escaped_scalar e)
194+
{
195+
return escape_scalar(buf, e.scalar, e.keep_newlines);
196+
}
197+
198+
199+
} // namespace yml
200+
} // namespace c4
201+
202+
#endif /* _C4_YML_ESCAPE_SCALAR_HPP_ */

0 commit comments

Comments
 (0)