Skip to content

Commit e81493b

Browse files
authored
Merge pull request #680 from nathan-russell/feature/sugar-trimws
Sugar function 'trimws' with unit tests (closes #679)
2 parents 7492cff + c56e54b commit e81493b

File tree

6 files changed

+382
-0
lines changed

6 files changed

+382
-0
lines changed

ChangeLog

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
2017-04-22 Nathan Russell <[email protected]>
2+
3+
* inst/include/Rcpp/sugar/functions/strings/trimws.h: Added sugar
4+
function trimws with unit tests
5+
* inst/include/Rcpp/sugar/functions/strings/strings.h: Idem
6+
* inst/unitTests/cpp/sugar.cpp: Idem
7+
* inst/unitTests/runit.sugar.R: Idem
8+
19
2017-04-20 Dirk Eddelbuettel <[email protected]>
210

311
* DESCRIPTION (Version, Date): Roll minor version

inst/NEWS.Rd

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@
2525
(James Balamuta in \ghpr{661} addressing \ghit{628}, \ghit{563},
2626
\ghit{552}, \ghit{460}, \ghit{419}, and \ghit{251}).
2727
}
28+
\item Changes in Rcpp Sugar:
29+
\itemize{
30+
\item Added sugar function \code{trimws} (Nathan Russell in \ghpr{680}
31+
addressing \ghit{679}).
32+
}
2833
}
2934
}
3035

inst/include/Rcpp/sugar/functions/strings/strings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,6 @@
2323
#define RCPP_SUGAR_FUNCTIONS_STRINGS_H
2424

2525
#include <Rcpp/sugar/functions/strings/collapse.h>
26+
#include <Rcpp/sugar/functions/strings/trimws.h>
2627

2728
#endif
Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
// -*- mode: C++; c-indent-level: 4; c-basic-offset: 4; indent-tabs-mode: nil; -*-
2+
//
3+
// trimws.h: Rcpp R/C++ interface class library -- trimws
4+
//
5+
// Copyright (C) 2017 Nathan Russell
6+
//
7+
// This file is part of Rcpp.
8+
//
9+
// Rcpp is free software: you can redistribute it and/or modify it
10+
// under the terms of the GNU General Public License as published by
11+
// the Free Software Foundation, either version 2 of the License, or
12+
// (at your option) any later version.
13+
//
14+
// Rcpp is distributed in the hope that it will be useful, but
15+
// WITHOUT ANY WARRANTY; without even the implied warranty of
16+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17+
// GNU General Public License for more details.
18+
//
19+
// You should have received a copy of the GNU General Public License
20+
// along with Rcpp. If not, see <http://www.gnu.org/licenses/>.
21+
22+
#ifndef Rcpp__sugar__trimws_h
23+
#define Rcpp__sugar__trimws_h
24+
25+
#include <string>
26+
#include <cstring>
27+
28+
namespace Rcpp {
29+
namespace sugar {
30+
namespace detail {
31+
32+
33+
/* NB: std::isspace is not used because it also counts
34+
'\f' and '\v' as whitespace, whereas base::trimws only
35+
checks for ' ', '\t', '\r', and '\n' */
36+
inline bool isws(const char c) {
37+
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
38+
}
39+
40+
inline const char* trim_left(const char* str) {
41+
if (!str) {
42+
return "";
43+
}
44+
45+
while (isws(*str)) {
46+
++str;
47+
}
48+
49+
return str;
50+
}
51+
52+
inline const char* trim_right(const char* str, R_len_t sz, std::string* buff) {
53+
if (!str) {
54+
return "";
55+
}
56+
57+
buff->clear();
58+
const char* ptr = str + sz - 1;
59+
60+
for (; ptr > str && isws(*ptr); --sz, --ptr);
61+
62+
buff->append(str, sz - isws(*ptr));
63+
return buff->c_str();
64+
}
65+
66+
inline const char* trim_both(const char* str, R_len_t sz, std::string* buff) {
67+
if (!str) {
68+
return "";
69+
}
70+
71+
buff->clear();
72+
73+
while (isws(*str)) {
74+
++str; --sz;
75+
}
76+
77+
const char* ptr = str + sz - 1;
78+
79+
for (; ptr > str && isws(*ptr); --sz, --ptr);
80+
81+
buff->append(str, sz);
82+
return buff->c_str();
83+
}
84+
85+
86+
} // detail
87+
} // sugar
88+
89+
90+
inline Vector<STRSXP> trimws(const Vector<STRSXP>& x, const char* which = "both") {
91+
R_xlen_t i = 0, sz = x.size();
92+
Vector<STRSXP> res = no_init(sz);
93+
std::string buffer;
94+
95+
if (*which == 'b') {
96+
for (; i < sz; i++) {
97+
if (traits::is_na<STRSXP>(x[i])) {
98+
res[i] = x[i];
99+
} else {
100+
res[i] = sugar::detail::trim_both(
101+
x[i],
102+
LENGTH(x[i]),
103+
&buffer
104+
);
105+
}
106+
}
107+
} else if (*which == 'l') {
108+
for (; i < sz; i++) {
109+
if (traits::is_na<STRSXP>(x[i])) {
110+
res[i] = x[i];
111+
} else {
112+
res[i] = sugar::detail::trim_left(x[i]);
113+
}
114+
}
115+
} else if (*which == 'r') {
116+
for (; i < sz; i++) {
117+
if (traits::is_na<STRSXP>(x[i])) {
118+
res[i] = x[i];
119+
} else {
120+
res[i] = sugar::detail::trim_right(
121+
x[i],
122+
LENGTH(x[i]),
123+
&buffer
124+
);
125+
}
126+
}
127+
} else {
128+
stop("Invalid `which` argument '%s'!", which);
129+
return Vector<STRSXP>::create("Unreachable");
130+
}
131+
132+
return res;
133+
}
134+
135+
inline Matrix<STRSXP> trimws(const Matrix<STRSXP>& x, const char* which = "both") {
136+
R_xlen_t i = 0, nr = x.nrow(), nc = x.ncol(), sz = x.size();
137+
Matrix<STRSXP> res = no_init(nr, nc);
138+
std::string buffer;
139+
140+
if (*which == 'b') {
141+
for (; i < sz; i++) {
142+
if (traits::is_na<STRSXP>(x[i])) {
143+
res[i] = x[i];
144+
} else {
145+
res[i] = sugar::detail::trim_both(
146+
x[i],
147+
LENGTH(x[i]),
148+
&buffer
149+
);
150+
}
151+
}
152+
} else if (*which == 'l') {
153+
for (; i < sz; i++) {
154+
if (traits::is_na<STRSXP>(x[i])) {
155+
res[i] = x[i];
156+
} else {
157+
res[i] = sugar::detail::trim_left(x[i]);
158+
}
159+
}
160+
} else if (*which == 'r') {
161+
for (; i < sz; i++) {
162+
if (traits::is_na<STRSXP>(x[i])) {
163+
res[i] = x[i];
164+
} else {
165+
res[i] = sugar::detail::trim_right(
166+
x[i],
167+
LENGTH(x[i]),
168+
&buffer
169+
);
170+
}
171+
}
172+
} else {
173+
stop("Invalid `which` argument '%s'!", which);
174+
return Matrix<STRSXP>();
175+
}
176+
177+
return res;
178+
}
179+
180+
inline String trimws(const String& str, const char* which = "both") {
181+
std::string buffer;
182+
183+
if (*which == 'b') {
184+
if (traits::is_na<STRSXP>(str.get_sexp())) {
185+
return String(str.get_sexp());
186+
}
187+
return sugar::detail::trim_both(
188+
str.get_cstring(),
189+
LENGTH(str.get_sexp()),
190+
&buffer
191+
);
192+
}
193+
194+
if (*which == 'l') {
195+
if (traits::is_na<STRSXP>(str.get_sexp())) {
196+
return String(str.get_sexp());
197+
}
198+
return sugar::detail::trim_left(str.get_cstring());
199+
}
200+
201+
if (*which == 'r') {
202+
if (traits::is_na<STRSXP>(str.get_sexp())) {
203+
return String(str.get_sexp());
204+
}
205+
return sugar::detail::trim_right(
206+
str.get_cstring(),
207+
LENGTH(str.get_sexp()),
208+
&buffer
209+
);
210+
}
211+
212+
stop("Invalid `which` argument '%s'!", which);
213+
return String("Unreachable");
214+
}
215+
216+
217+
} // Rcpp
218+
219+
#endif // Rcpp__sugar__trimws_h

inst/unitTests/cpp/sugar.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1201,3 +1201,21 @@ LogicalMatrix UpperTri(NumericMatrix x, bool diag = false) {
12011201
LogicalMatrix LowerTri(NumericMatrix x, bool diag = false) {
12021202
return lower_tri(x, diag);
12031203
}
1204+
1205+
1206+
// 22 April 2017: trimws
1207+
1208+
// [[Rcpp::export]]
1209+
CharacterVector vtrimws(CharacterVector x, const char* which = "both") {
1210+
return trimws(x, which);
1211+
}
1212+
1213+
// [[Rcpp::export]]
1214+
CharacterMatrix mtrimws(CharacterMatrix x, const char* which = "both") {
1215+
return trimws(x, which);
1216+
}
1217+
1218+
// [[Rcpp::export]]
1219+
String strimws(String x, const char* which = "both") {
1220+
return trimws(x, which);
1221+
}

0 commit comments

Comments
 (0)