Skip to content

Commit 0bf8e97

Browse files
author
Qiang Kou
committed
rm string representation of encoding; CE_UTF8 as default encoding
1 parent 1fb8865 commit 0bf8e97

File tree

4 files changed

+32
-71
lines changed

4 files changed

+32
-71
lines changed

ChangeLog

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
2016-08-02 Qiang Kou <[email protected]>
2+
3+
* inst/include/Rcpp/String.h: CE_UTF8 as default encoding
4+
* inst/unitTests/cpp/String.cpp: Update unit test
5+
* inst/unitTests/runit.String.R: Idem
6+
17
2016-08-01 Nathan Russell <[email protected]>
28

39
* inst/include/Rcpp/vector/Vector.h: Added decreasing option for Vector

inst/include/Rcpp/String.h

Lines changed: 20 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
#define RCPP_STRING_DEBUG_3(fmt, M1, M2, M3)
4141
#endif
4242

43-
4443
namespace Rcpp {
4544

4645
/**
@@ -53,7 +52,7 @@ namespace Rcpp {
5352
typedef internal::const_string_proxy<STRSXP> const_StringProxy;
5453

5554
/** default constructor */
56-
String(): data(Rf_mkChar("")), buffer(), valid(true), buffer_ready(true), enc(CE_NATIVE) {
55+
String(): data(Rf_mkCharCE("", CE_UTF8)), buffer(), valid(true), buffer_ready(true), enc(CE_UTF8) {
5756
Rcpp_PreserveObject(data);
5857
RCPP_STRING_DEBUG("String()");
5958
}
@@ -64,12 +63,6 @@ namespace Rcpp {
6463
RCPP_STRING_DEBUG("String(const String&)");
6564
}
6665

67-
String(const String& other, const std::string& enc) : data(other.get_sexp()), valid(true), buffer_ready(false) {
68-
Rcpp_PreserveObject(data);
69-
set_encoding(enc);
70-
RCPP_STRING_DEBUG("String(const String&)");
71-
}
72-
7366
/** construct a string from a single CHARSXP SEXP */
7467
String(SEXP charsxp) : data(R_NilValue) {
7568
if (TYPEOF(charsxp) == STRSXP) {
@@ -88,33 +81,16 @@ namespace Rcpp {
8881
RCPP_STRING_DEBUG("String(SEXP)");
8982
}
9083

91-
String(SEXP charsxp, const std::string& enc) : data(R_NilValue) {
92-
if (TYPEOF(charsxp) == STRSXP) {
93-
data = STRING_ELT(charsxp, 0);
94-
} else if (TYPEOF(charsxp) == CHARSXP) {
95-
data = charsxp;
96-
}
97-
98-
if (::Rf_isString(data) && ::Rf_length(data) != 1)
99-
throw ::Rcpp::not_compatible("expecting a single value");
100-
101-
valid = true;
102-
buffer_ready = false;
103-
Rcpp_PreserveObject(data);
104-
set_encoding(enc);
105-
RCPP_STRING_DEBUG("String(SEXP)");
106-
}
107-
10884
/** from string proxy */
10985
String(const StringProxy& proxy): data(proxy.get()), valid(true), buffer_ready(false), enc(Rf_getCharCE(proxy.get())) {
11086
Rcpp_PreserveObject(data);
11187
RCPP_STRING_DEBUG("String(const StringProxy&)");
11288
}
11389

114-
String(const StringProxy& proxy, const std::string& enc): data(proxy.get()), valid(true), buffer_ready(false) {
90+
String(const StringProxy& proxy, cetype_t enc): data(proxy.get()), valid(true), buffer_ready(false) {
11591
Rcpp_PreserveObject(data);
11692
set_encoding(enc);
117-
RCPP_STRING_DEBUG("String(const StringProxy&)");
93+
RCPP_STRING_DEBUG("String(const StringProxy&, cetype_t)");
11894
}
11995

12096
/** from string proxy */
@@ -123,40 +99,40 @@ namespace Rcpp {
12399
RCPP_STRING_DEBUG("String(const const_StringProxy&)");
124100
}
125101

126-
String(const const_StringProxy& proxy, const std::string& enc): data(proxy.get()), valid(true), buffer_ready(false) {
102+
String(const const_StringProxy& proxy, cetype_t enc): data(proxy.get()), valid(true), buffer_ready(false) {
127103
Rcpp_PreserveObject(data);
128104
set_encoding(enc);
129-
RCPP_STRING_DEBUG("String(const const_StringProxy&)");
105+
RCPP_STRING_DEBUG("String(const const_StringProxy&, cetype_t)");
130106
}
131107

132108
/** from a std::string */
133-
String(const std::string& s) : buffer(s), valid(false), buffer_ready(true), enc(CE_NATIVE) {
109+
String(const std::string& s, cetype_t enc = CE_UTF8) : buffer(s), valid(false), buffer_ready(true), enc(enc) {
134110
data = R_NilValue;
135-
RCPP_STRING_DEBUG("String(const std::string&)");
111+
RCPP_STRING_DEBUG("String(const std::string&, cetype_t)");
136112
}
137113

138-
String(const std::wstring& s) : data(internal::make_charsexp(s)), valid(true), buffer_ready(false), enc(CE_NATIVE) {
114+
String(const std::wstring& s, cetype_t enc = CE_UTF8) : data(internal::make_charsexp(s)), valid(true), buffer_ready(false), enc(enc) {
139115
Rcpp_PreserveObject(data);
140-
RCPP_STRING_DEBUG("String(const std::wstring&)");
116+
RCPP_STRING_DEBUG("String(const std::wstring&, cetype_t)");
141117
}
142118

143119
/** from a const char* */
144-
String(const char* s) : buffer(s), valid(false), buffer_ready(true), enc(CE_NATIVE) {
120+
String(const char* s, cetype_t enc = CE_UTF8) : buffer(s), valid(false), buffer_ready(true), enc(enc) {
145121
data = R_NilValue;
146-
RCPP_STRING_DEBUG("String(const char*)");
122+
RCPP_STRING_DEBUG("String(const char*, cetype_t)");
147123
}
148124

149-
String(const wchar_t* s) : data(internal::make_charsexp(s)), valid(true), buffer_ready(false), enc(CE_NATIVE) {
125+
String(const wchar_t* s, cetype_t enc = CE_UTF8) : data(internal::make_charsexp(s)), valid(true), buffer_ready(false), enc(enc) {
150126
Rcpp_PreserveObject(data);
151-
RCPP_STRING_DEBUG("String(const wchar_t* s)");
127+
RCPP_STRING_DEBUG("String(const wchar_t* s, cetype_t)");
152128
}
153129

154130
/** constructors from R primitives */
155-
String(int x) : data(internal::r_coerce<INTSXP,STRSXP>(x)), valid(true), buffer_ready(false), enc(CE_NATIVE) {Rcpp_PreserveObject(data);}
156-
String(double x) : data(internal::r_coerce<REALSXP,STRSXP>(x)), valid(true), buffer_ready(false), enc(CE_NATIVE) {Rcpp_PreserveObject(data);}
157-
String(bool x) : data(internal::r_coerce<LGLSXP,STRSXP>(x)), valid(true) , buffer_ready(false), enc(CE_NATIVE) {Rcpp_PreserveObject(data);}
158-
String(Rcomplex x) : data(internal::r_coerce<CPLXSXP,STRSXP>(x)), valid(true), buffer_ready(false), enc(CE_NATIVE) {Rcpp_PreserveObject(data);}
159-
String(Rbyte x) : data(internal::r_coerce<RAWSXP,STRSXP>(x)), valid(true), buffer_ready(false), enc(CE_NATIVE) {Rcpp_PreserveObject(data);}
131+
String(int x) : data(internal::r_coerce<INTSXP,STRSXP>(x)), valid(true), buffer_ready(false), enc(CE_UTF8) {Rcpp_PreserveObject(data);}
132+
String(double x) : data(internal::r_coerce<REALSXP,STRSXP>(x)), valid(true), buffer_ready(false), enc(CE_UTF8) {Rcpp_PreserveObject(data);}
133+
String(bool x) : data(internal::r_coerce<LGLSXP,STRSXP>(x)), valid(true) , buffer_ready(false), enc(CE_UTF8) {Rcpp_PreserveObject(data);}
134+
String(Rcomplex x) : data(internal::r_coerce<CPLXSXP,STRSXP>(x)), valid(true), buffer_ready(false), enc(CE_UTF8) {Rcpp_PreserveObject(data);}
135+
String(Rbyte x) : data(internal::r_coerce<RAWSXP,STRSXP>(x)), valid(true), buffer_ready(false), enc(CE_UTF8) {Rcpp_PreserveObject(data);}
160136

161137
~String() {
162138
Rcpp_ReleaseObject(data);
@@ -406,17 +382,8 @@ namespace Rcpp {
406382
return buffer_ready ? buffer.c_str() : CHAR(data);
407383
}
408384

409-
inline const std::string get_encoding() const {
410-
switch (enc) {
411-
case CE_BYTES:
412-
return "bytes";
413-
case CE_LATIN1:
414-
return "latin1";
415-
case CE_UTF8:
416-
return "UTF-8";
417-
default:
418-
return "unknown";
419-
}
385+
inline cetype_t get_encoding() const {
386+
return enc;
420387
}
421388

422389
inline void set_encoding(cetype_t encoding) {
@@ -431,18 +398,6 @@ namespace Rcpp {
431398
}
432399
}
433400

434-
inline void set_encoding(const std::string & encoding) {
435-
if (encoding == "bytes") {
436-
set_encoding(CE_BYTES);
437-
} else if (encoding == "latin1") {
438-
set_encoding(CE_LATIN1);
439-
} else if (encoding == "UTF-8") {
440-
set_encoding(CE_UTF8);
441-
} else {
442-
set_encoding(CE_ANY);
443-
}
444-
}
445-
446401
bool operator<(const Rcpp::String& other) const {
447402
return strcmp(get_cstring(), other.get_cstring()) < 0;
448403
}

inst/unitTests/cpp/String.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,27 +90,27 @@ String test_push_front(String x) {
9090
}
9191

9292
// [[Rcpp::export]]
93-
String test_String_encoding(String x) {
93+
int test_String_encoding(String x) {
9494
return x.get_encoding();
9595
}
9696

9797
// [[Rcpp::export]]
9898
String test_String_set_encoding(String x) {
99-
x.set_encoding("UTF-8");
99+
x.set_encoding(CE_UTF8);
100100
return x;
101101
}
102102

103103
// [[Rcpp::export]]
104104
String test_String_ctor_encoding(String x) {
105105
String y(x);
106-
y.set_encoding("UTF-8");
106+
y.set_encoding(CE_UTF8);
107107
return y;
108108
}
109109

110110

111111
// [[Rcpp::export]]
112112
String test_String_ctor_encoding2() {
113113
String y("å");
114-
y.set_encoding("UTF-8");
114+
y.set_encoding(CE_UTF8);
115115
return y;
116116
}

inst/unitTests/runit.String.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ if (.runThisTest) {
8787
a <- b <- "å"
8888
Encoding(a) <- "unknown"
8989
Encoding(b) <- "UTF-8"
90-
checkEquals(test_String_encoding(a), "unknown")
91-
checkEquals(test_String_encoding(b), "UTF-8")
90+
checkEquals(test_String_encoding(a), 0)
91+
checkEquals(test_String_encoding(b), 1)
9292
checkEquals(Encoding(test_String_set_encoding(a)), "UTF-8")
9393
checkEquals(Encoding(test_String_ctor_encoding(a)), "UTF-8")
9494
checkEquals(Encoding(test_String_ctor_encoding2()), "UTF-8")

0 commit comments

Comments
 (0)