@@ -53,54 +53,81 @@ namespace Rcpp {
53
53
typedef internal::const_string_proxy<STRSXP> const_StringProxy;
54
54
55
55
/* * default constructor */
56
- String ( ): data( Rf_mkChar(" " ) ), buffer(), valid(true ), buffer_ready(true ) {
56
+ String ( ): data( Rf_mkChar(" " ) ), buffer(), valid(true ), buffer_ready(true ), enc(CE_NATIVE) {
57
57
RCPP_STRING_DEBUG ( " String()" ) ;
58
58
}
59
59
60
60
/* * copy constructor */
61
- String ( const String& other) : data( other.get_sexp()), valid(true ), buffer_ready(false ) {
61
+ String ( const String& other) : data( other.get_sexp()), valid(true ), buffer_ready(false ),
62
+ enc (Rf_getCharCE(other.get_sexp())) {
63
+ RCPP_STRING_DEBUG ( " String(const String&)" ) ;
64
+ }
65
+
66
+ String ( const String& other, const char * enc) : data( other.get_sexp()), valid(true ), buffer_ready(false ) {
67
+ set_encoding (enc);
62
68
RCPP_STRING_DEBUG ( " String(const String&)" ) ;
63
69
}
64
70
65
71
/* * construct a string from a single CHARSXP SEXP */
66
- String (SEXP charsxp) : data(charsxp), valid(true ), buffer_ready(false ) {
72
+ String (SEXP charsxp) : data(charsxp), valid(true ), buffer_ready(false ),
73
+ enc(Rf_getCharCE(charsxp)) {
74
+ RCPP_STRING_DEBUG ( " String(SEXP)" ) ;
75
+ }
76
+
77
+ String (SEXP charsxp, const char * enc) : data(charsxp), valid(true ), buffer_ready(false ) {
78
+ set_encoding (enc);
67
79
RCPP_STRING_DEBUG ( " String(SEXP)" ) ;
68
80
}
69
81
70
82
/* * from string proxy */
71
- String ( const StringProxy& proxy ): data( proxy.get() ), valid(true ), buffer_ready(false ){
83
+ String ( const StringProxy& proxy ): data( proxy.get() ), valid(true ), buffer_ready(false ),
84
+ enc(Rf_getCharCE(proxy.get())){
72
85
RCPP_STRING_DEBUG ( " String( const StringProxy&)" ) ;
73
86
}
87
+
88
+ String ( const StringProxy& proxy, const char * enc ): data( proxy.get() ), valid(true ), buffer_ready(false ) {
89
+ set_encoding (enc);
90
+ RCPP_STRING_DEBUG ( " String( const StringProxy&)" ) ;
91
+ }
92
+
74
93
/* * from string proxy */
75
- String ( const const_StringProxy& proxy ): data( proxy.get() ), valid(true ), buffer_ready(false ){
94
+ String ( const const_StringProxy& proxy ): data( proxy.get() ), valid(true ), buffer_ready(false ),
95
+ enc(Rf_getCharCE(proxy.get())){
96
+ RCPP_STRING_DEBUG ( " String( const const_StringProxy&)" ) ;
97
+ }
98
+
99
+ String ( const const_StringProxy& proxy, const char * enc ): data( proxy.get() ), valid(true ), buffer_ready(false ) {
100
+ set_encoding (enc);
76
101
RCPP_STRING_DEBUG ( " String( const const_StringProxy&)" ) ;
77
102
}
78
103
79
104
/* * from a std::string */
80
- String ( const std::string& s) : buffer(s), valid(false ), buffer_ready(true ) {
105
+ String ( const std::string& s) : buffer(s), valid(false ), buffer_ready(true ),
106
+ enc(CE_NATIVE) {
81
107
RCPP_STRING_DEBUG ( " String(const std::string& )" ) ;
82
108
}
83
109
84
- String ( const std::wstring& s) : data(internal::make_charsexp(s)), valid(true ), buffer_ready(false ) {
110
+ String ( const std::wstring& s) : data(internal::make_charsexp(s)), valid(true ), buffer_ready(false ),
111
+ enc(CE_NATIVE) {
85
112
RCPP_STRING_DEBUG ( " String(const std::wstring& )" ) ;
86
113
}
87
114
88
115
/* * from a const char* */
89
- String ( const char * s) : buffer(s), valid(false ), buffer_ready(true ){
116
+ String ( const char * s) : buffer(s), valid(false ), buffer_ready(true ), enc(CE_NATIVE) {
90
117
RCPP_STRING_DEBUG ( " String(const char*)" ) ;
91
118
}
92
119
93
- String ( const wchar_t * s) : data(internal::make_charsexp(s)), valid(true ), buffer_ready(false ) {
120
+ String ( const wchar_t * s) : data(internal::make_charsexp(s)), valid(true ), buffer_ready(false ),
121
+ enc(CE_NATIVE) {
94
122
RCPP_STRING_DEBUG ( " String(const wchar_t* s)" ) ;
95
123
}
96
124
97
-
98
125
/* * constructors from R primitives */
99
- String ( int x ) : data( internal::r_coerce<INTSXP,STRSXP>(x) ), valid(true ), buffer_ready(false ) {}
100
- String ( double x ) : data( internal::r_coerce<REALSXP,STRSXP>(x) ), valid(true ), buffer_ready(false ){}
101
- String ( bool x ) : data( internal::r_coerce<LGLSXP,STRSXP>(x) ), valid( true ) , buffer_ready(false ){}
102
- String ( Rcomplex x ) : data( internal::r_coerce<CPLXSXP,STRSXP>(x) ), valid( true ), buffer_ready(false ){}
103
- String ( Rbyte x ) : data( internal::r_coerce<RAWSXP,STRSXP>(x) ), valid(true ), buffer_ready(false ){}
126
+ String ( int x ) : data( internal::r_coerce<INTSXP,STRSXP>(x) ), valid(true ), buffer_ready(false ), enc(CE_NATIVE) {}
127
+ String ( double x ) : data( internal::r_coerce<REALSXP,STRSXP>(x) ), valid(true ), buffer_ready(false ), enc(CE_NATIVE) {}
128
+ String ( bool x ) : data( internal::r_coerce<LGLSXP,STRSXP>(x) ), valid( true ) , buffer_ready(false ), enc(CE_NATIVE) {}
129
+ String ( Rcomplex x ) : data( internal::r_coerce<CPLXSXP,STRSXP>(x) ), valid( true ), buffer_ready(false ), enc(CE_NATIVE) {}
130
+ String ( Rbyte x ) : data( internal::r_coerce<RAWSXP,STRSXP>(x) ), valid(true ), buffer_ready(false ), enc(CE_NATIVE) {}
104
131
105
132
106
133
inline String& operator =( int x ){ data = internal::r_coerce<INTSXP ,STRSXP>( x ) ; valid = true ; buffer_ready = false ; return *this ; }
@@ -127,13 +154,13 @@ namespace Rcpp {
127
154
inline String& operator =( const std::wstring& s){ return assign_wide_string (s) ; }
128
155
inline String& operator =( const wchar_t * s){ return assign_wide_string (s) ; }
129
156
130
-
131
157
inline String& operator +=( const std::string& s){
132
158
RCPP_STRING_DEBUG ( " String::operator+=( std::string )" ) ;
133
159
if ( is_na () ) return *this ;
134
160
setBuffer () ; buffer += s ; valid = false ;
135
161
return *this ;
136
162
}
163
+
137
164
inline String& operator +=( const char * s){
138
165
RCPP_STRING_DEBUG ( " String::operator+=( const char*)" ) ;
139
166
if ( is_na () ) return *this ;
@@ -214,8 +241,6 @@ namespace Rcpp {
214
241
return replace_first ( s.get_cstring (), news.get_cstring () ) ;
215
242
}
216
243
217
-
218
-
219
244
inline String& replace_last ( const char * s, const char * news ){
220
245
RCPP_STRING_DEBUG_2 ( " String::replace_last( const char* = '%s' , const char* = '%s')" , s, news ) ;
221
246
if ( is_na () ) return *this ;
@@ -312,7 +337,7 @@ namespace Rcpp {
312
337
313
338
inline SEXP get_sexp () const {
314
339
RCPP_STRING_DEBUG_1 ( " String::get_sexp const ( valid = %d) " , valid ) ;
315
- return valid ? data : Rf_mkChar ( buffer.c_str () ) ;
340
+ return valid ? data : Rf_mkCharCE ( buffer.c_str (), enc ) ;
316
341
}
317
342
318
343
inline SEXP get_sexp () {
@@ -333,6 +358,39 @@ namespace Rcpp {
333
358
inline const char * get_cstring () const {
334
359
return buffer_ready ? buffer.c_str () : CHAR (data) ;
335
360
}
361
+
362
+ inline const char * get_encoding () const {
363
+ switch (enc) {
364
+ case CE_BYTES:
365
+ return " bytes" ;
366
+ case CE_LATIN1:
367
+ return " latin1" ;
368
+ case CE_UTF8:
369
+ return " UTF-8" ;
370
+ default :
371
+ return " unknown" ;
372
+ }
373
+ }
374
+
375
+ inline void set_encoding ( cetype_t encoding ) {
376
+ enc = encoding;
377
+ if (data != NULL )
378
+ data = Rf_mkCharCE (Rf_translateCharUTF8 (data), enc);
379
+ }
380
+
381
+ inline void set_encoding (const char * encoding) {
382
+ if ( encoding == " bytes" ) {
383
+ enc = CE_BYTES;
384
+ } else if ( encoding == " latin1" ) {
385
+ enc = CE_LATIN1;
386
+ } else if ( encoding == " UTF-8" ) {
387
+ enc = CE_UTF8;
388
+ } else {
389
+ enc = CE_ANY;
390
+ Rcout << " Unknown encoding" << std::endl;
391
+ }
392
+ set_encoding (enc);
393
+ }
336
394
337
395
bool operator <( const Rcpp::String& other ) const {
338
396
return strcmp ( get_cstring (), other.get_cstring () ) < 0 ;
@@ -353,6 +411,9 @@ namespace Rcpp {
353
411
354
412
/* * the CHARSXP this String encapsulates */
355
413
SEXP data ;
414
+
415
+ /* * the encoding of encapsulated CHARSXP */
416
+ cetype_t enc;
356
417
357
418
/* * a buffer used to do string operations withough going back to the SEXP */
358
419
std::string buffer ;
@@ -373,7 +434,7 @@ namespace Rcpp {
373
434
inline void setData (){
374
435
RCPP_STRING_DEBUG ( " setData" ) ;
375
436
if (!valid) {
376
- data = Rf_mkChar (buffer.c_str ()) ;
437
+ data = Rf_mkCharCE (buffer.c_str (), enc ) ;
377
438
valid = true ;
378
439
}
379
440
}
0 commit comments