Skip to content

Commit 2f72ff8

Browse files
authored
Merge pull request #917 from RcppCore/bugfix/string-embedded-nul
use 'Rf_mkCharLenCE() as appropriate
2 parents a669a19 + 29980a7 commit 2f72ff8

File tree

6 files changed

+39
-4
lines changed

6 files changed

+39
-4
lines changed

ChangeLog

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
2018-10-25 Kevin Ushey <[email protected]>
2+
3+
* inst/include/Rcpp/String.h: Use Rf_mkCharLenCE() as appropriate
4+
* inst/unitTests/cpp/String.cpp: Add unit tests
5+
* inst/unitTests/runit.String.R: Add unit tests
6+
17
2018-10-12 Dirk Eddelbuettel <[email protected]>
28

39
* DESCRIPTION (Date, Version): Roll minor version

inst/NEWS.Rd

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
\item The constructor for \code{NumericMatrix(not_init(n,k))} was
1313
corrected (Romain in \ghpr{904}, Dirk in \ghpr{905}, and also
1414
Romain in \ghpr{908} fixing \ghpr{907}).
15+
\item Rcpp::String no longer silently drops embedded NUL bytes
16+
in strings. Instead, the new Rcpp exception `embedded_nul_in_string`
17+
is thrown. (\ghit{916})
1518
}
1619
\item Changes in Rcpp Deployment:
1720
\itemize{

inst/include/Rcpp/String.h

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -364,9 +364,22 @@ namespace Rcpp {
364364
}
365365

366366

367+
inline SEXP get_sexp_impl() const {
368+
369+
// workaround for h5 package (currently deprecated so updates
370+
// to CRAN may not be timely)
371+
#ifdef __H5Cpp_H
372+
return Rf_mkCharCE(buffer.c_str(), enc);
373+
#else
374+
if (buffer.find('\0') != std::string::npos)
375+
throw embedded_nul_in_string();
376+
return Rf_mkCharLenCE(buffer.c_str(), buffer.size(), enc);
377+
#endif
378+
}
379+
367380
inline SEXP get_sexp() const {
368381
RCPP_STRING_DEBUG_1("String::get_sexp const (valid = %d) ", valid);
369-
return valid ? data : Rf_mkCharCE(buffer.c_str(), enc);
382+
return valid ? data : get_sexp_impl();
370383
}
371384

372385
inline SEXP get_sexp() {
@@ -395,9 +408,11 @@ namespace Rcpp {
395408
enc = encoding;
396409

397410
if (valid) {
398-
data = Rcpp_ReplaceObject(data, Rf_mkCharCE(Rf_translateCharUTF8(data), encoding));
411+
// TODO: may longjmp on failure to translate?
412+
const char* translated = Rf_translateCharUTF8(data);
413+
data = Rcpp_ReplaceObject(data, Rf_mkCharCE(translated, encoding));
399414
} else {
400-
data = Rf_mkCharCE(buffer.c_str(), encoding);
415+
data = get_sexp_impl();
401416
Rcpp_PreserveObject(data);
402417
valid = true;
403418
}
@@ -469,7 +484,7 @@ namespace Rcpp {
469484
inline void setData() {
470485
RCPP_STRING_DEBUG("setData");
471486
if (!valid) {
472-
data = Rf_mkCharCE(buffer.c_str(), enc);
487+
data = get_sexp_impl();
473488
Rcpp_PreserveObject(data);
474489
valid = true;
475490
}

inst/include/Rcpp/exceptions.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ namespace Rcpp {
208208
RCPP_SIMPLE_EXCEPTION_CLASS(no_such_field, "No such field.") // not used internally
209209
RCPP_SIMPLE_EXCEPTION_CLASS(no_such_function, "No such function.")
210210
RCPP_SIMPLE_EXCEPTION_CLASS(unevaluated_promise, "Promise not yet evaluated.")
211+
RCPP_SIMPLE_EXCEPTION_CLASS(embedded_nul_in_string, "Embedded NUL in string.")
211212

212213
// Promoted
213214
RCPP_EXCEPTION_CLASS(no_such_slot, "No such slot")

inst/unitTests/cpp/String.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,3 +114,9 @@ String test_String_ctor_encoding2() {
114114
y.set_encoding(CE_UTF8);
115115
return y;
116116
}
117+
118+
// [[Rcpp::export]]
119+
String test_String_embeddedNul() {
120+
std::string bad("abc\0abc", 7);
121+
return String(bad);
122+
}

inst/unitTests/runit.String.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,4 +94,8 @@ if (.runThisTest) {
9494
checkEquals(Encoding(test_String_ctor_encoding2()), "UTF-8")
9595
}
9696

97+
test.String.embeddedNul <- function() {
98+
checkException(test_String_embeddedNul())
99+
}
100+
97101
}

0 commit comments

Comments
 (0)