@@ -53,54 +53,74 @@ namespace Rcpp {
53
53
typedef internal::const_string_proxy<STRSXP> const_StringProxy;
54
54
55
55
/* * default constructor */
56
- String ( ): data( Rf_mkChar(" " ) ), buffer(), valid(true ), buffer_ready(true ) {
56
+ String ( ): data( Rf_mkChar(" " ) ), buffer(), valid(true ), buffer_ready(true ), enc(CE_NATIVE) {
57
57
RCPP_STRING_DEBUG ( " String()" ) ;
58
58
}
59
59
60
60
/* * copy constructor */
61
- String ( const String& other) : data( other.get_sexp()), valid(true ), buffer_ready(false ) {
61
+ String ( const String& other) : data( other.get_sexp()), valid(true ), buffer_ready(false ), enc(Rf_getCharCE(other.get_sexp())) {
62
+ RCPP_STRING_DEBUG ( " String(const String&)" ) ;
63
+ }
64
+
65
+ String ( const String& other, const std::string& enc) : data( other.get_sexp()), valid(true ), buffer_ready(false ) {
66
+ set_encoding (enc);
62
67
RCPP_STRING_DEBUG ( " String(const String&)" ) ;
63
68
}
64
69
65
70
/* * construct a string from a single CHARSXP SEXP */
66
- String (SEXP charsxp) : data(charsxp), valid(true ), buffer_ready(false ) {
71
+ String (SEXP charsxp) : data(charsxp), valid(true ), buffer_ready(false ), enc(Rf_getCharCE(charsxp)) {
72
+ RCPP_STRING_DEBUG ( " String(SEXP)" ) ;
73
+ }
74
+
75
+ String (SEXP charsxp, const std::string& enc) : data(charsxp), valid(true ), buffer_ready(false ) {
76
+ set_encoding (enc);
67
77
RCPP_STRING_DEBUG ( " String(SEXP)" ) ;
68
78
}
69
79
70
80
/* * from string proxy */
71
- String ( const StringProxy& proxy ): data( proxy.get() ), valid(true ), buffer_ready(false ){
81
+ String ( const StringProxy& proxy ): data( proxy.get() ), valid(true ), buffer_ready(false ), enc(Rf_getCharCE(proxy.get())){
82
+ RCPP_STRING_DEBUG ( " String( const StringProxy&)" ) ;
83
+ }
84
+
85
+ String ( const StringProxy& proxy, const std::string& enc ): data( proxy.get() ), valid(true ), buffer_ready(false ) {
86
+ set_encoding (enc);
72
87
RCPP_STRING_DEBUG ( " String( const StringProxy&)" ) ;
73
88
}
89
+
74
90
/* * from string proxy */
75
- String ( const const_StringProxy& proxy ): data( proxy.get() ), valid(true ), buffer_ready(false ){
91
+ String ( const const_StringProxy& proxy ): data( proxy.get() ), valid(true ), buffer_ready(false ), enc(Rf_getCharCE(proxy.get())){
92
+ RCPP_STRING_DEBUG ( " String( const const_StringProxy&)" ) ;
93
+ }
94
+
95
+ String ( const const_StringProxy& proxy, const std::string& enc ): data( proxy.get() ), valid(true ), buffer_ready(false ) {
96
+ set_encoding (enc);
76
97
RCPP_STRING_DEBUG ( " String( const const_StringProxy&)" ) ;
77
98
}
78
99
79
100
/* * from a std::string */
80
- String ( const std::string& s) : buffer(s), valid(false ), buffer_ready(true ) {
101
+ String ( const std::string& s) : buffer(s), valid(false ), buffer_ready(true ), enc(CE_NATIVE) {
81
102
RCPP_STRING_DEBUG ( " String(const std::string& )" ) ;
82
103
}
83
104
84
- String ( const std::wstring& s) : data(internal::make_charsexp(s)), valid(true ), buffer_ready(false ) {
105
+ String ( const std::wstring& s) : data(internal::make_charsexp(s)), valid(true ), buffer_ready(false ), enc(CE_NATIVE) {
85
106
RCPP_STRING_DEBUG ( " String(const std::wstring& )" ) ;
86
107
}
87
108
88
109
/* * from a const char* */
89
- String ( const char * s) : buffer(s), valid(false ), buffer_ready(true ){
110
+ String ( const char * s) : buffer(s), valid(false ), buffer_ready(true ), enc(CE_NATIVE) {
90
111
RCPP_STRING_DEBUG ( " String(const char*)" ) ;
91
112
}
92
113
93
- String ( const wchar_t * s) : data(internal::make_charsexp(s)), valid(true ), buffer_ready(false ) {
114
+ String ( const wchar_t * s) : data(internal::make_charsexp(s)), valid(true ), buffer_ready(false ), enc(CE_NATIVE) {
94
115
RCPP_STRING_DEBUG ( " String(const wchar_t* s)" ) ;
95
116
}
96
117
97
-
98
118
/* * constructors from R primitives */
99
- String ( int x ) : data( internal::r_coerce<INTSXP,STRSXP>(x) ), valid(true ), buffer_ready(false ) {}
100
- String ( double x ) : data( internal::r_coerce<REALSXP,STRSXP>(x) ), valid(true ), buffer_ready(false ){}
101
- String ( bool x ) : data( internal::r_coerce<LGLSXP,STRSXP>(x) ), valid( true ) , buffer_ready(false ){}
102
- String ( Rcomplex x ) : data( internal::r_coerce<CPLXSXP,STRSXP>(x) ), valid( true ), buffer_ready(false ){}
103
- String ( Rbyte x ) : data( internal::r_coerce<RAWSXP,STRSXP>(x) ), valid(true ), buffer_ready(false ){}
119
+ String ( int x ) : data( internal::r_coerce<INTSXP,STRSXP>(x) ), valid(true ), buffer_ready(false ), enc(CE_NATIVE) {}
120
+ String ( double x ) : data( internal::r_coerce<REALSXP,STRSXP>(x) ), valid(true ), buffer_ready(false ), enc(CE_NATIVE) {}
121
+ String ( bool x ) : data( internal::r_coerce<LGLSXP,STRSXP>(x) ), valid( true ) , buffer_ready(false ), enc(CE_NATIVE) {}
122
+ String ( Rcomplex x ) : data( internal::r_coerce<CPLXSXP,STRSXP>(x) ), valid( true ), buffer_ready(false ), enc(CE_NATIVE) {}
123
+ String ( Rbyte x ) : data( internal::r_coerce<RAWSXP,STRSXP>(x) ), valid(true ), buffer_ready(false ), enc(CE_NATIVE) {}
104
124
105
125
106
126
inline String& operator =( int x ){ data = internal::r_coerce<INTSXP ,STRSXP>( x ) ; valid = true ; buffer_ready = false ; return *this ; }
@@ -127,13 +147,13 @@ namespace Rcpp {
127
147
inline String& operator =( const std::wstring& s){ return assign_wide_string (s) ; }
128
148
inline String& operator =( const wchar_t * s){ return assign_wide_string (s) ; }
129
149
130
-
131
150
inline String& operator +=( const std::string& s){
132
151
RCPP_STRING_DEBUG ( " String::operator+=( std::string )" ) ;
133
152
if ( is_na () ) return *this ;
134
153
setBuffer () ; buffer += s ; valid = false ;
135
154
return *this ;
136
155
}
156
+
137
157
inline String& operator +=( const char * s){
138
158
RCPP_STRING_DEBUG ( " String::operator+=( const char*)" ) ;
139
159
if ( is_na () ) return *this ;
@@ -157,8 +177,8 @@ namespace Rcpp {
157
177
158
178
public:
159
179
160
- inline String& operator +=( const std::wstring& s){ return append_wide_string ( s ); }
161
- inline String& operator +=( const wchar_t * s){ return append_wide_string ( s ); }
180
+ inline String& operator +=( const std::wstring& s){ return append_wide_string ( s ); }
181
+ inline String& operator +=( const wchar_t * s){ return append_wide_string ( s ); }
162
182
163
183
inline String& operator +=( const String& other ){
164
184
RCPP_STRING_DEBUG ( " String::operator+=( const char*)" ) ;
@@ -214,8 +234,6 @@ namespace Rcpp {
214
234
return replace_first ( s.get_cstring (), news.get_cstring () ) ;
215
235
}
216
236
217
-
218
-
219
237
inline String& replace_last ( const char * s, const char * news ){
220
238
RCPP_STRING_DEBUG_2 ( " String::replace_last( const char* = '%s' , const char* = '%s')" , s, news ) ;
221
239
if ( is_na () ) return *this ;
@@ -312,7 +330,7 @@ namespace Rcpp {
312
330
313
331
inline SEXP get_sexp () const {
314
332
RCPP_STRING_DEBUG_1 ( " String::get_sexp const ( valid = %d) " , valid ) ;
315
- return valid ? data : Rf_mkChar ( buffer.c_str () ) ;
333
+ return valid ? data : Rf_mkCharCE ( buffer.c_str (), enc ) ;
316
334
}
317
335
318
336
inline SEXP get_sexp () {
@@ -329,11 +347,46 @@ namespace Rcpp {
329
347
return std::wstring ( s, s + strlen (s) );
330
348
}
331
349
332
-
333
350
inline const char * get_cstring () const {
334
351
return buffer_ready ? buffer.c_str () : CHAR (data) ;
335
352
}
336
353
354
+ inline const std::string get_encoding () const {
355
+ switch (enc) {
356
+ case CE_BYTES:
357
+ return " bytes" ;
358
+ case CE_LATIN1:
359
+ return " latin1" ;
360
+ case CE_UTF8:
361
+ return " UTF-8" ;
362
+ default :
363
+ return " unknown" ;
364
+ }
365
+ }
366
+
367
+ inline void set_encoding ( cetype_t encoding ) {
368
+ enc = encoding;
369
+
370
+ if (valid) {
371
+ data = Rf_mkCharCE (Rf_translateCharUTF8 (data), encoding);
372
+ } else {
373
+ data = Rf_mkCharCE (buffer.c_str (), encoding) ;
374
+ valid = true ;
375
+ }
376
+ }
377
+
378
+ inline void set_encoding (const std::string & encoding) {
379
+ if ( encoding == " bytes" ) {
380
+ set_encoding ( CE_BYTES );
381
+ } else if ( encoding == " latin1" ) {
382
+ set_encoding ( CE_LATIN1 );
383
+ } else if ( encoding == " UTF-8" ) {
384
+ set_encoding ( CE_UTF8 );
385
+ } else {
386
+ set_encoding ( CE_ANY );
387
+ }
388
+ }
389
+
337
390
bool operator <( const Rcpp::String& other ) const {
338
391
return strcmp ( get_cstring (), other.get_cstring () ) < 0 ;
339
392
}
@@ -363,6 +416,9 @@ namespace Rcpp {
363
416
/* * is the buffer initialized */
364
417
bool buffer_ready ;
365
418
419
+ /* * the encoding of encapsulated CHARSXP */
420
+ cetype_t enc;
421
+
366
422
inline bool is_na () const { return data == NA_STRING ; }
367
423
inline void setBuffer (){
368
424
if ( !buffer_ready){
@@ -373,7 +429,7 @@ namespace Rcpp {
373
429
inline void setData (){
374
430
RCPP_STRING_DEBUG ( " setData" ) ;
375
431
if (!valid) {
376
- data = Rf_mkChar (buffer.c_str ()) ;
432
+ data = Rf_mkCharCE (buffer.c_str (), enc ) ;
377
433
valid = true ;
378
434
}
379
435
}
@@ -403,8 +459,8 @@ namespace Rcpp {
403
459
return s.get_sexp () ;
404
460
}
405
461
406
- template <int RTYPE>
407
- template <typename T>
462
+ template <int RTYPE>
463
+ template <typename T>
408
464
string_proxy<RTYPE>& string_proxy<RTYPE>::operator +=(const T& rhs) {
409
465
String tmp = get () ;
410
466
tmp += rhs ;
@@ -415,7 +471,7 @@ namespace Rcpp {
415
471
}
416
472
417
473
418
- template <>
474
+ template <>
419
475
inline SEXP wrap<Rcpp::String>( const Rcpp::String& object) {
420
476
RCPP_STRING_DEBUG ( " wrap<String>()" ) ;
421
477
Shield<SEXP> res ( Rf_allocVector ( STRSXP, 1 ) ) ;
0 commit comments