@@ -53,54 +53,74 @@ namespace Rcpp {
5353 typedef internal::const_string_proxy<STRSXP> const_StringProxy;
5454
5555 /* * default constructor */
56- String ( ): data( Rf_mkChar(" " ) ), buffer(), valid(true ), buffer_ready(true ) {
56+ String ( ): data( Rf_mkChar(" " ) ), buffer(), valid(true ), buffer_ready(true ), enc(CE_NATIVE) {
5757 RCPP_STRING_DEBUG ( " String()" ) ;
5858 }
5959
6060 /* * copy constructor */
61- String ( const String& other) : data( other.get_sexp()), valid(true ), buffer_ready(false ) {
61+ String ( const String& other) : data( other.get_sexp()), valid(true ), buffer_ready(false ), enc(Rf_getCharCE(other.get_sexp())) {
62+ RCPP_STRING_DEBUG ( " String(const String&)" ) ;
63+ }
64+
65+ String ( const String& other, const std::string& enc) : data( other.get_sexp()), valid(true ), buffer_ready(false ) {
66+ set_encoding (enc);
6267 RCPP_STRING_DEBUG ( " String(const String&)" ) ;
6368 }
6469
6570 /* * construct a string from a single CHARSXP SEXP */
66- String (SEXP charsxp) : data(charsxp), valid(true ), buffer_ready(false ) {
71+ String (SEXP charsxp) : data(charsxp), valid(true ), buffer_ready(false ), enc(Rf_getCharCE(charsxp)) {
72+ RCPP_STRING_DEBUG ( " String(SEXP)" ) ;
73+ }
74+
75+ String (SEXP charsxp, const std::string& enc) : data(charsxp), valid(true ), buffer_ready(false ) {
76+ set_encoding (enc);
6777 RCPP_STRING_DEBUG ( " String(SEXP)" ) ;
6878 }
6979
7080 /* * from string proxy */
71- String ( const StringProxy& proxy ): data( proxy.get() ), valid(true ), buffer_ready(false ){
81+ String ( const StringProxy& proxy ): data( proxy.get() ), valid(true ), buffer_ready(false ), enc(Rf_getCharCE(proxy.get())){
82+ RCPP_STRING_DEBUG ( " String( const StringProxy&)" ) ;
83+ }
84+
85+ String ( const StringProxy& proxy, const std::string& enc ): data( proxy.get() ), valid(true ), buffer_ready(false ) {
86+ set_encoding (enc);
7287 RCPP_STRING_DEBUG ( " String( const StringProxy&)" ) ;
7388 }
89+
7490 /* * from string proxy */
75- String ( const const_StringProxy& proxy ): data( proxy.get() ), valid(true ), buffer_ready(false ){
91+ String ( const const_StringProxy& proxy ): data( proxy.get() ), valid(true ), buffer_ready(false ), enc(Rf_getCharCE(proxy.get())){
92+ RCPP_STRING_DEBUG ( " String( const const_StringProxy&)" ) ;
93+ }
94+
95+ String ( const const_StringProxy& proxy, const std::string& enc ): data( proxy.get() ), valid(true ), buffer_ready(false ) {
96+ set_encoding (enc);
7697 RCPP_STRING_DEBUG ( " String( const const_StringProxy&)" ) ;
7798 }
7899
79100 /* * from a std::string */
80- String ( const std::string& s) : buffer(s), valid(false ), buffer_ready(true ) {
101+ String ( const std::string& s) : buffer(s), valid(false ), buffer_ready(true ), enc(CE_NATIVE) {
81102 RCPP_STRING_DEBUG ( " String(const std::string& )" ) ;
82103 }
83104
84- String ( const std::wstring& s) : data(internal::make_charsexp(s)), valid(true ), buffer_ready(false ) {
105+ String ( const std::wstring& s) : data(internal::make_charsexp(s)), valid(true ), buffer_ready(false ), enc(CE_NATIVE) {
85106 RCPP_STRING_DEBUG ( " String(const std::wstring& )" ) ;
86107 }
87108
88109 /* * from a const char* */
89- String ( const char * s) : buffer(s), valid(false ), buffer_ready(true ){
110+ String ( const char * s) : buffer(s), valid(false ), buffer_ready(true ), enc(CE_NATIVE) {
90111 RCPP_STRING_DEBUG ( " String(const char*)" ) ;
91112 }
92113
93- String ( const wchar_t * s) : data(internal::make_charsexp(s)), valid(true ), buffer_ready(false ) {
114+ String ( const wchar_t * s) : data(internal::make_charsexp(s)), valid(true ), buffer_ready(false ), enc(CE_NATIVE) {
94115 RCPP_STRING_DEBUG ( " String(const wchar_t* s)" ) ;
95116 }
96117
97-
98118 /* * constructors from R primitives */
99- String ( int x ) : data( internal::r_coerce<INTSXP,STRSXP>(x) ), valid(true ), buffer_ready(false ) {}
100- String ( double x ) : data( internal::r_coerce<REALSXP,STRSXP>(x) ), valid(true ), buffer_ready(false ){}
101- String ( bool x ) : data( internal::r_coerce<LGLSXP,STRSXP>(x) ), valid( true ) , buffer_ready(false ){}
102- String ( Rcomplex x ) : data( internal::r_coerce<CPLXSXP,STRSXP>(x) ), valid( true ), buffer_ready(false ){}
103- String ( Rbyte x ) : data( internal::r_coerce<RAWSXP,STRSXP>(x) ), valid(true ), buffer_ready(false ){}
119+ String ( int x ) : data( internal::r_coerce<INTSXP,STRSXP>(x) ), valid(true ), buffer_ready(false ), enc(CE_NATIVE) {}
120+ String ( double x ) : data( internal::r_coerce<REALSXP,STRSXP>(x) ), valid(true ), buffer_ready(false ), enc(CE_NATIVE) {}
121+ String ( bool x ) : data( internal::r_coerce<LGLSXP,STRSXP>(x) ), valid( true ) , buffer_ready(false ), enc(CE_NATIVE) {}
122+ String ( Rcomplex x ) : data( internal::r_coerce<CPLXSXP,STRSXP>(x) ), valid( true ), buffer_ready(false ), enc(CE_NATIVE) {}
123+ String ( Rbyte x ) : data( internal::r_coerce<RAWSXP,STRSXP>(x) ), valid(true ), buffer_ready(false ), enc(CE_NATIVE) {}
104124
105125
106126 inline String& operator =( int x ){ data = internal::r_coerce<INTSXP ,STRSXP>( x ) ; valid = true ; buffer_ready = false ; return *this ; }
@@ -127,13 +147,13 @@ namespace Rcpp {
127147 inline String& operator =( const std::wstring& s){ return assign_wide_string (s) ; }
128148 inline String& operator =( const wchar_t * s){ return assign_wide_string (s) ; }
129149
130-
131150 inline String& operator +=( const std::string& s){
132151 RCPP_STRING_DEBUG ( " String::operator+=( std::string )" ) ;
133152 if ( is_na () ) return *this ;
134153 setBuffer () ; buffer += s ; valid = false ;
135154 return *this ;
136155 }
156+
137157 inline String& operator +=( const char * s){
138158 RCPP_STRING_DEBUG ( " String::operator+=( const char*)" ) ;
139159 if ( is_na () ) return *this ;
@@ -157,8 +177,8 @@ namespace Rcpp {
157177
158178 public:
159179
160- inline String& operator +=( const std::wstring& s){ return append_wide_string ( s ); }
161- inline String& operator +=( const wchar_t * s){ return append_wide_string ( s ); }
180+ inline String& operator +=( const std::wstring& s){ return append_wide_string ( s ); }
181+ inline String& operator +=( const wchar_t * s){ return append_wide_string ( s ); }
162182
163183 inline String& operator +=( const String& other ){
164184 RCPP_STRING_DEBUG ( " String::operator+=( const char*)" ) ;
@@ -214,8 +234,6 @@ namespace Rcpp {
214234 return replace_first ( s.get_cstring (), news.get_cstring () ) ;
215235 }
216236
217-
218-
219237 inline String& replace_last ( const char * s, const char * news ){
220238 RCPP_STRING_DEBUG_2 ( " String::replace_last( const char* = '%s' , const char* = '%s')" , s, news ) ;
221239 if ( is_na () ) return *this ;
@@ -312,7 +330,7 @@ namespace Rcpp {
312330
313331 inline SEXP get_sexp () const {
314332 RCPP_STRING_DEBUG_1 ( " String::get_sexp const ( valid = %d) " , valid ) ;
315- return valid ? data : Rf_mkChar ( buffer.c_str () ) ;
333+ return valid ? data : Rf_mkCharCE ( buffer.c_str (), enc ) ;
316334 }
317335
318336 inline SEXP get_sexp () {
@@ -329,11 +347,46 @@ namespace Rcpp {
329347 return std::wstring ( s, s + strlen (s) );
330348 }
331349
332-
333350 inline const char * get_cstring () const {
334351 return buffer_ready ? buffer.c_str () : CHAR (data) ;
335352 }
336353
354+ inline const std::string get_encoding () const {
355+ switch (enc) {
356+ case CE_BYTES:
357+ return " bytes" ;
358+ case CE_LATIN1:
359+ return " latin1" ;
360+ case CE_UTF8:
361+ return " UTF-8" ;
362+ default :
363+ return " unknown" ;
364+ }
365+ }
366+
367+ inline void set_encoding ( cetype_t encoding ) {
368+ enc = encoding;
369+
370+ if (valid) {
371+ data = Rf_mkCharCE (Rf_translateCharUTF8 (data), encoding);
372+ } else {
373+ data = Rf_mkCharCE (buffer.c_str (), encoding) ;
374+ valid = true ;
375+ }
376+ }
377+
378+ inline void set_encoding (const std::string & encoding) {
379+ if ( encoding == " bytes" ) {
380+ set_encoding ( CE_BYTES );
381+ } else if ( encoding == " latin1" ) {
382+ set_encoding ( CE_LATIN1 );
383+ } else if ( encoding == " UTF-8" ) {
384+ set_encoding ( CE_UTF8 );
385+ } else {
386+ set_encoding ( CE_ANY );
387+ }
388+ }
389+
337390 bool operator <( const Rcpp::String& other ) const {
338391 return strcmp ( get_cstring (), other.get_cstring () ) < 0 ;
339392 }
@@ -363,6 +416,9 @@ namespace Rcpp {
363416 /* * is the buffer initialized */
364417 bool buffer_ready ;
365418
419+ /* * the encoding of encapsulated CHARSXP */
420+ cetype_t enc;
421+
366422 inline bool is_na () const { return data == NA_STRING ; }
367423 inline void setBuffer (){
368424 if ( !buffer_ready){
@@ -373,7 +429,7 @@ namespace Rcpp {
373429 inline void setData (){
374430 RCPP_STRING_DEBUG ( " setData" ) ;
375431 if (!valid) {
376- data = Rf_mkChar (buffer.c_str ()) ;
432+ data = Rf_mkCharCE (buffer.c_str (), enc ) ;
377433 valid = true ;
378434 }
379435 }
@@ -403,8 +459,8 @@ namespace Rcpp {
403459 return s.get_sexp () ;
404460 }
405461
406- template <int RTYPE>
407- template <typename T>
462+ template <int RTYPE>
463+ template <typename T>
408464 string_proxy<RTYPE>& string_proxy<RTYPE>::operator +=(const T& rhs) {
409465 String tmp = get () ;
410466 tmp += rhs ;
@@ -415,7 +471,7 @@ namespace Rcpp {
415471 }
416472
417473
418- template <>
474+ template <>
419475 inline SEXP wrap<Rcpp::String>( const Rcpp::String& object) {
420476 RCPP_STRING_DEBUG ( " wrap<String>()" ) ;
421477 Shield<SEXP> res ( Rf_allocVector ( STRSXP, 1 ) ) ;
0 commit comments